Uploaded code
This commit is contained in:
450
scripts/migrate.py
Normal file
450
scripts/migrate.py
Normal file
@@ -0,0 +1,450 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sharey Local-to-B2 Migration Script
|
||||
|
||||
This script migrates existing local files and pastes to Backblaze B2
|
||||
while preserving their original IDs and structure.
|
||||
|
||||
Sharey Naming Conventions:
|
||||
- Files: 6-char random ID + original extension (e.g., abc123.jpg)
|
||||
- Pastes: 6-char UUID prefix + .txt extension (e.g., def456.txt)
|
||||
- B2 Structure: files/{file_id} and pastes/{paste_id}.txt
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from b2sdk.v2 import InMemoryAccountInfo, B2Api
|
||||
from config import config
|
||||
except ImportError as e:
|
||||
print(f"❌ Missing dependencies: {e}")
|
||||
print("💡 Make sure you're running this script in the same environment as your Sharey app")
|
||||
print("💡 Run: pip install -r requirements.txt")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class ShareyMigrator:
|
||||
"""Handles migration of local Sharey files to B2"""
|
||||
|
||||
def __init__(self):
|
||||
self.b2_api = None
|
||||
self.bucket = None
|
||||
self.stats = {
|
||||
'files_migrated': 0,
|
||||
'pastes_migrated': 0,
|
||||
'files_skipped': 0,
|
||||
'pastes_skipped': 0,
|
||||
'errors': 0,
|
||||
'total_size': 0
|
||||
}
|
||||
self.migration_log = []
|
||||
|
||||
def initialize_b2(self) -> bool:
|
||||
"""Initialize B2 connection"""
|
||||
print("🔧 Initializing B2 connection...")
|
||||
|
||||
# Validate B2 configuration
|
||||
if not config.validate_b2_config():
|
||||
print("❌ Invalid B2 configuration. Please check your config.json")
|
||||
return False
|
||||
|
||||
try:
|
||||
b2_config = config.get_b2_config()
|
||||
print(f"📋 Target bucket: {b2_config['bucket_name']}")
|
||||
|
||||
info = InMemoryAccountInfo()
|
||||
self.b2_api = B2Api(info)
|
||||
self.b2_api.authorize_account("production", b2_config['key_id'], b2_config['key'])
|
||||
self.bucket = self.b2_api.get_bucket_by_name(b2_config['bucket_name'])
|
||||
print("✅ B2 connection established")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to connect to B2: {e}")
|
||||
return False
|
||||
|
||||
def scan_local_directories(self, base_path: str = ".") -> Tuple[List[str], List[str]]:
|
||||
"""Scan for local uploads and pastes directories"""
|
||||
print(f"🔍 Scanning for local files in: {os.path.abspath(base_path)}")
|
||||
|
||||
uploads_dir = os.path.join(base_path, "uploads")
|
||||
pastes_dir = os.path.join(base_path, "pastes")
|
||||
|
||||
file_paths = []
|
||||
paste_paths = []
|
||||
|
||||
# Scan uploads directory
|
||||
if os.path.exists(uploads_dir):
|
||||
print(f"📁 Found uploads directory: {uploads_dir}")
|
||||
for root, dirs, files in os.walk(uploads_dir):
|
||||
for file in files:
|
||||
# Skip hidden files, metadata files, and any Sharey system files
|
||||
if (not file.startswith('.') and
|
||||
not file.endswith('.sharey-meta') and
|
||||
'.sharey-meta' not in file):
|
||||
file_paths.append(os.path.join(root, file))
|
||||
print(f" Found {len(file_paths)} files (skipped .sharey-meta files)")
|
||||
else:
|
||||
print(f"⚠️ No uploads directory found at: {uploads_dir}")
|
||||
|
||||
# Scan pastes directory
|
||||
if os.path.exists(pastes_dir):
|
||||
print(f"📝 Found pastes directory: {pastes_dir}")
|
||||
for root, dirs, files in os.walk(pastes_dir):
|
||||
for file in files:
|
||||
if not file.startswith('.'): # Skip hidden files
|
||||
paste_paths.append(os.path.join(root, file))
|
||||
print(f" Found {len(paste_paths)} pastes")
|
||||
else:
|
||||
print(f"⚠️ No pastes directory found at: {pastes_dir}")
|
||||
|
||||
return file_paths, paste_paths
|
||||
|
||||
def extract_id_from_path(self, file_path: str, base_dir: str) -> str:
|
||||
"""Extract the file ID from the file path"""
|
||||
# Get relative path from base directory
|
||||
rel_path = os.path.relpath(file_path, base_dir)
|
||||
|
||||
# Extract filename without extension for ID
|
||||
filename = os.path.basename(rel_path)
|
||||
file_id = os.path.splitext(filename)[0]
|
||||
|
||||
# Validate ID format (should be 6 characters for Sharey)
|
||||
if len(file_id) != 6:
|
||||
print(f"⚠️ Warning: {filename} has non-standard ID length ({len(file_id)} chars, expected 6)")
|
||||
|
||||
return file_id
|
||||
|
||||
def file_exists_in_b2(self, b2_path: str) -> bool:
|
||||
"""Check if a file already exists in B2"""
|
||||
try:
|
||||
# Try different methods depending on B2 SDK version
|
||||
if hasattr(self.bucket, 'get_file_info_by_name'):
|
||||
file_info = self.bucket.get_file_info_by_name(b2_path)
|
||||
return True
|
||||
elif hasattr(self.bucket, 'ls'):
|
||||
for file_version, _ in self.bucket.ls(b2_path, recursive=False):
|
||||
if file_version.file_name == b2_path:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
# Fallback - assume doesn't exist to avoid skipping
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def migrate_file(self, local_path: str, uploads_dir: str, dry_run: bool = False) -> bool:
|
||||
"""Migrate a single file to B2"""
|
||||
try:
|
||||
# Extract file ID and determine B2 path
|
||||
file_id = self.extract_id_from_path(local_path, uploads_dir)
|
||||
file_extension = os.path.splitext(local_path)[1]
|
||||
b2_path = f"files/{file_id}{file_extension}"
|
||||
|
||||
# Check if file already exists in B2
|
||||
if self.file_exists_in_b2(b2_path):
|
||||
print(f"⏭️ Skipping {file_id} (already exists in B2)")
|
||||
self.stats['files_skipped'] += 1
|
||||
return True
|
||||
|
||||
# Get file info
|
||||
file_size = os.path.getsize(local_path)
|
||||
content_type = mimetypes.guess_type(local_path)[0] or 'application/octet-stream'
|
||||
|
||||
print(f"📤 Uploading file: {file_id}{file_extension} ({file_size:,} bytes)")
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would upload to: {b2_path}")
|
||||
self.stats['files_migrated'] += 1
|
||||
self.stats['total_size'] += file_size
|
||||
return True
|
||||
|
||||
# Upload to B2 - try different methods for different SDK versions
|
||||
with open(local_path, 'rb') as file_data:
|
||||
data = file_data.read()
|
||||
|
||||
# Try different upload methods
|
||||
try:
|
||||
# Method 1: upload_bytes (newer SDK)
|
||||
if hasattr(self.bucket, 'upload_bytes'):
|
||||
file_info = self.bucket.upload_bytes(
|
||||
data,
|
||||
b2_path,
|
||||
content_type=content_type
|
||||
)
|
||||
# Method 2: upload with file-like object (older SDK)
|
||||
elif hasattr(self.bucket, 'upload_file'):
|
||||
from io import BytesIO
|
||||
file_obj = BytesIO(data)
|
||||
file_info = self.bucket.upload_file(
|
||||
file_obj,
|
||||
b2_path,
|
||||
content_type=content_type
|
||||
)
|
||||
# Method 3: upload with upload source (alternative)
|
||||
elif hasattr(self.bucket, 'upload'):
|
||||
from io import BytesIO
|
||||
file_obj = BytesIO(data)
|
||||
file_info = self.bucket.upload(
|
||||
file_obj,
|
||||
b2_path,
|
||||
content_type=content_type
|
||||
)
|
||||
else:
|
||||
raise Exception("No compatible upload method found in B2 SDK")
|
||||
|
||||
except Exception as upload_error:
|
||||
raise Exception(f"Upload failed: {upload_error}")
|
||||
|
||||
self.stats['files_migrated'] += 1
|
||||
self.stats['total_size'] += file_size
|
||||
self.migration_log.append(f"FILE: {file_id}{file_extension} -> {b2_path}")
|
||||
print(f" ✅ Uploaded successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed to upload {local_path}: {e}")
|
||||
self.stats['errors'] += 1
|
||||
self.migration_log.append(f"ERROR: {local_path} -> {e}")
|
||||
return False
|
||||
|
||||
def migrate_paste(self, local_path: str, pastes_dir: str, dry_run: bool = False) -> bool:
|
||||
"""Migrate a single paste to B2"""
|
||||
try:
|
||||
# Extract paste ID and determine B2 path
|
||||
paste_id = self.extract_id_from_path(local_path, pastes_dir)
|
||||
b2_path = f"pastes/{paste_id}.txt"
|
||||
|
||||
# Check if paste already exists in B2
|
||||
if self.file_exists_in_b2(b2_path):
|
||||
print(f"⏭️ Skipping paste {paste_id} (already exists in B2)")
|
||||
self.stats['pastes_skipped'] += 1
|
||||
return True
|
||||
|
||||
# Get paste info
|
||||
file_size = os.path.getsize(local_path)
|
||||
|
||||
print(f"📝 Uploading paste: {paste_id} ({file_size:,} bytes)")
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would upload to: {b2_path}")
|
||||
self.stats['pastes_migrated'] += 1
|
||||
self.stats['total_size'] += file_size
|
||||
return True
|
||||
|
||||
# Read and upload paste content
|
||||
with open(local_path, 'r', encoding='utf-8', errors='ignore') as file:
|
||||
content = file.read()
|
||||
|
||||
# Upload to B2 as UTF-8 text - try different methods
|
||||
data = content.encode('utf-8')
|
||||
|
||||
try:
|
||||
# Method 1: upload_bytes (newer SDK)
|
||||
if hasattr(self.bucket, 'upload_bytes'):
|
||||
self.bucket.upload_bytes(
|
||||
data,
|
||||
b2_path,
|
||||
content_type='text/plain; charset=utf-8'
|
||||
)
|
||||
# Method 2: upload with file-like object (older SDK)
|
||||
elif hasattr(self.bucket, 'upload_file'):
|
||||
from io import BytesIO
|
||||
file_obj = BytesIO(data)
|
||||
self.bucket.upload_file(
|
||||
file_obj,
|
||||
b2_path,
|
||||
content_type='text/plain; charset=utf-8'
|
||||
)
|
||||
# Method 3: upload with upload source (alternative)
|
||||
elif hasattr(self.bucket, 'upload'):
|
||||
from io import BytesIO
|
||||
file_obj = BytesIO(data)
|
||||
self.bucket.upload(
|
||||
file_obj,
|
||||
b2_path,
|
||||
content_type='text/plain; charset=utf-8'
|
||||
)
|
||||
else:
|
||||
raise Exception("No compatible upload method found in B2 SDK")
|
||||
|
||||
except Exception as upload_error:
|
||||
raise Exception(f"Upload failed: {upload_error}")
|
||||
|
||||
self.stats['pastes_migrated'] += 1
|
||||
self.stats['total_size'] += file_size
|
||||
self.migration_log.append(f"PASTE: {paste_id} -> {b2_path}")
|
||||
print(f" ✅ Uploaded successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed to upload paste {local_path}: {e}")
|
||||
self.stats['errors'] += 1
|
||||
self.migration_log.append(f"ERROR: {local_path} -> {e}")
|
||||
return False
|
||||
|
||||
def migrate_all(self, base_path: str = ".", dry_run: bool = False, skip_files: bool = False, skip_pastes: bool = False):
|
||||
"""Migrate all local files and pastes to B2"""
|
||||
if dry_run:
|
||||
print("🧪 DRY RUN MODE - No files will actually be uploaded")
|
||||
|
||||
print(f"\n🚀 Starting migration from: {os.path.abspath(base_path)}")
|
||||
print("=" * 60)
|
||||
|
||||
# Scan for local files
|
||||
file_paths, paste_paths = self.scan_local_directories(base_path)
|
||||
|
||||
if not file_paths and not paste_paths:
|
||||
print("❌ No files or pastes found to migrate")
|
||||
return False
|
||||
|
||||
total_items = len(file_paths) + len(paste_paths)
|
||||
print(f"\n📊 Migration Plan:")
|
||||
print(f" Files to migrate: {len(file_paths)}")
|
||||
print(f" Pastes to migrate: {len(paste_paths)}")
|
||||
print(f" Total items: {total_items}")
|
||||
|
||||
if not dry_run:
|
||||
confirm = input(f"\n❓ Proceed with migration? (y/N): ").strip().lower()
|
||||
if confirm != 'y':
|
||||
print("Migration cancelled")
|
||||
return False
|
||||
|
||||
print(f"\n🔄 Starting migration...")
|
||||
print("-" * 40)
|
||||
|
||||
# Migrate files
|
||||
if file_paths and not skip_files:
|
||||
print(f"\n📁 Migrating {len(file_paths)} files...")
|
||||
uploads_dir = os.path.join(base_path, "uploads")
|
||||
|
||||
for i, file_path in enumerate(file_paths, 1):
|
||||
print(f"[{i}/{len(file_paths)}] ", end="")
|
||||
self.migrate_file(file_path, uploads_dir, dry_run)
|
||||
|
||||
# Migrate pastes
|
||||
if paste_paths and not skip_pastes:
|
||||
print(f"\n📝 Migrating {len(paste_paths)} pastes...")
|
||||
pastes_dir = os.path.join(base_path, "pastes")
|
||||
|
||||
for i, paste_path in enumerate(paste_paths, 1):
|
||||
print(f"[{i}/{len(paste_paths)}] ", end="")
|
||||
self.migrate_paste(paste_path, pastes_dir, dry_run)
|
||||
|
||||
self.print_summary(dry_run)
|
||||
self.save_migration_log()
|
||||
return True
|
||||
|
||||
def print_summary(self, dry_run: bool = False):
|
||||
"""Print migration summary"""
|
||||
print("\n" + "=" * 60)
|
||||
print("📊 MIGRATION SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
if dry_run:
|
||||
print("🧪 DRY RUN RESULTS:")
|
||||
|
||||
print(f"✅ Files migrated: {self.stats['files_migrated']}")
|
||||
print(f"✅ Pastes migrated: {self.stats['pastes_migrated']}")
|
||||
print(f"⏭️ Files skipped: {self.stats['files_skipped']}")
|
||||
print(f"⏭️ Pastes skipped: {self.stats['pastes_skipped']}")
|
||||
print(f"❌ Errors: {self.stats['errors']}")
|
||||
print(f"📦 Total data: {self.stats['total_size']:,} bytes ({self.stats['total_size'] / 1024 / 1024:.2f} MB)")
|
||||
|
||||
success_rate = ((self.stats['files_migrated'] + self.stats['pastes_migrated']) /
|
||||
max(1, self.stats['files_migrated'] + self.stats['pastes_migrated'] + self.stats['errors'])) * 100
|
||||
print(f"📈 Success rate: {success_rate:.1f}%")
|
||||
|
||||
if not dry_run and (self.stats['files_migrated'] > 0 or self.stats['pastes_migrated'] > 0):
|
||||
print(f"\n🎉 Migration completed successfully!")
|
||||
print(f"💡 Your files are now accessible via your Sharey B2 URLs")
|
||||
|
||||
def save_migration_log(self):
|
||||
"""Save migration log to file"""
|
||||
if not self.migration_log:
|
||||
return
|
||||
|
||||
log_filename = f"migration_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
|
||||
try:
|
||||
with open(log_filename, 'w') as f:
|
||||
f.write(f"Sharey B2 Migration Log\n")
|
||||
f.write(f"Generated: {datetime.now().isoformat()}\n")
|
||||
f.write(f"=" * 50 + "\n\n")
|
||||
|
||||
for entry in self.migration_log:
|
||||
f.write(f"{entry}\n")
|
||||
|
||||
f.write(f"\n" + "=" * 50 + "\n")
|
||||
f.write(f"SUMMARY:\n")
|
||||
f.write(f"Files migrated: {self.stats['files_migrated']}\n")
|
||||
f.write(f"Pastes migrated: {self.stats['pastes_migrated']}\n")
|
||||
f.write(f"Files skipped: {self.stats['files_skipped']}\n")
|
||||
f.write(f"Pastes skipped: {self.stats['pastes_skipped']}\n")
|
||||
f.write(f"Errors: {self.stats['errors']}\n")
|
||||
f.write(f"Total size: {self.stats['total_size']:,} bytes\n")
|
||||
|
||||
print(f"📄 Migration log saved to: {log_filename}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to save migration log: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main migration function"""
|
||||
print("🚀 Sharey Local-to-B2 Migration Tool")
|
||||
print("=" * 50)
|
||||
|
||||
# Parse command line arguments
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Migrate local Sharey files to Backblaze B2')
|
||||
parser.add_argument('--path', '-p', default='.', help='Path to Sharey directory (default: current directory)')
|
||||
parser.add_argument('--dry-run', '-d', action='store_true', help='Perform a dry run without uploading')
|
||||
parser.add_argument('--skip-files', action='store_true', help='Skip file migration')
|
||||
parser.add_argument('--skip-pastes', action='store_true', help='Skip paste migration')
|
||||
parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation prompt')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize migrator
|
||||
migrator = ShareyMigrator()
|
||||
|
||||
# Initialize B2 connection
|
||||
if not migrator.initialize_b2():
|
||||
print("❌ Failed to initialize B2 connection")
|
||||
sys.exit(1)
|
||||
|
||||
# Run migration
|
||||
try:
|
||||
success = migrator.migrate_all(
|
||||
base_path=args.path,
|
||||
dry_run=args.dry_run,
|
||||
skip_files=args.skip_files,
|
||||
skip_pastes=args.skip_pastes
|
||||
)
|
||||
|
||||
if success:
|
||||
print(f"\n💡 Next steps:")
|
||||
print(f" 1. Test your Sharey app to ensure URLs work correctly")
|
||||
print(f" 2. Consider backing up your local files before deletion")
|
||||
print(f" 3. Update any hardcoded URLs to use the new B2 structure")
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n⏹️ Migration cancelled by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Migration failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user