Files
sharey/scripts/migrate.py
2025-09-27 17:45:52 +01:00

451 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Sharey Local-to-B2 Migration Script
This script migrates existing local files and pastes to Backblaze B2
while preserving their original IDs and structure.
Sharey Naming Conventions:
- Files: 6-char random ID + original extension (e.g., abc123.jpg)
- Pastes: 6-char UUID prefix + .txt extension (e.g., def456.txt)
- B2 Structure: files/{file_id} and pastes/{paste_id}.txt
"""
import os
import sys
import mimetypes
from pathlib import Path
from typing import Dict, List, Tuple
import json
from datetime import datetime
try:
from b2sdk.v2 import InMemoryAccountInfo, B2Api
from config import config
except ImportError as e:
print(f"❌ Missing dependencies: {e}")
print("💡 Make sure you're running this script in the same environment as your Sharey app")
print("💡 Run: pip install -r requirements.txt")
sys.exit(1)
class ShareyMigrator:
"""Handles migration of local Sharey files to B2"""
def __init__(self):
self.b2_api = None
self.bucket = None
self.stats = {
'files_migrated': 0,
'pastes_migrated': 0,
'files_skipped': 0,
'pastes_skipped': 0,
'errors': 0,
'total_size': 0
}
self.migration_log = []
def initialize_b2(self) -> bool:
"""Initialize B2 connection"""
print("🔧 Initializing B2 connection...")
# Validate B2 configuration
if not config.validate_b2_config():
print("❌ Invalid B2 configuration. Please check your config.json")
return False
try:
b2_config = config.get_b2_config()
print(f"📋 Target bucket: {b2_config['bucket_name']}")
info = InMemoryAccountInfo()
self.b2_api = B2Api(info)
self.b2_api.authorize_account("production", b2_config['key_id'], b2_config['key'])
self.bucket = self.b2_api.get_bucket_by_name(b2_config['bucket_name'])
print("✅ B2 connection established")
return True
except Exception as e:
print(f"❌ Failed to connect to B2: {e}")
return False
def scan_local_directories(self, base_path: str = ".") -> Tuple[List[str], List[str]]:
"""Scan for local uploads and pastes directories"""
print(f"🔍 Scanning for local files in: {os.path.abspath(base_path)}")
uploads_dir = os.path.join(base_path, "uploads")
pastes_dir = os.path.join(base_path, "pastes")
file_paths = []
paste_paths = []
# Scan uploads directory
if os.path.exists(uploads_dir):
print(f"📁 Found uploads directory: {uploads_dir}")
for root, dirs, files in os.walk(uploads_dir):
for file in files:
# Skip hidden files, metadata files, and any Sharey system files
if (not file.startswith('.') and
not file.endswith('.sharey-meta') and
'.sharey-meta' not in file):
file_paths.append(os.path.join(root, file))
print(f" Found {len(file_paths)} files (skipped .sharey-meta files)")
else:
print(f"⚠️ No uploads directory found at: {uploads_dir}")
# Scan pastes directory
if os.path.exists(pastes_dir):
print(f"📝 Found pastes directory: {pastes_dir}")
for root, dirs, files in os.walk(pastes_dir):
for file in files:
if not file.startswith('.'): # Skip hidden files
paste_paths.append(os.path.join(root, file))
print(f" Found {len(paste_paths)} pastes")
else:
print(f"⚠️ No pastes directory found at: {pastes_dir}")
return file_paths, paste_paths
def extract_id_from_path(self, file_path: str, base_dir: str) -> str:
"""Extract the file ID from the file path"""
# Get relative path from base directory
rel_path = os.path.relpath(file_path, base_dir)
# Extract filename without extension for ID
filename = os.path.basename(rel_path)
file_id = os.path.splitext(filename)[0]
# Validate ID format (should be 6 characters for Sharey)
if len(file_id) != 6:
print(f"⚠️ Warning: {filename} has non-standard ID length ({len(file_id)} chars, expected 6)")
return file_id
def file_exists_in_b2(self, b2_path: str) -> bool:
"""Check if a file already exists in B2"""
try:
# Try different methods depending on B2 SDK version
if hasattr(self.bucket, 'get_file_info_by_name'):
file_info = self.bucket.get_file_info_by_name(b2_path)
return True
elif hasattr(self.bucket, 'ls'):
for file_version, _ in self.bucket.ls(b2_path, recursive=False):
if file_version.file_name == b2_path:
return True
return False
else:
# Fallback - assume doesn't exist to avoid skipping
return False
except:
return False
def migrate_file(self, local_path: str, uploads_dir: str, dry_run: bool = False) -> bool:
"""Migrate a single file to B2"""
try:
# Extract file ID and determine B2 path
file_id = self.extract_id_from_path(local_path, uploads_dir)
file_extension = os.path.splitext(local_path)[1]
b2_path = f"files/{file_id}{file_extension}"
# Check if file already exists in B2
if self.file_exists_in_b2(b2_path):
print(f"⏭️ Skipping {file_id} (already exists in B2)")
self.stats['files_skipped'] += 1
return True
# Get file info
file_size = os.path.getsize(local_path)
content_type = mimetypes.guess_type(local_path)[0] or 'application/octet-stream'
print(f"📤 Uploading file: {file_id}{file_extension} ({file_size:,} bytes)")
if dry_run:
print(f" [DRY RUN] Would upload to: {b2_path}")
self.stats['files_migrated'] += 1
self.stats['total_size'] += file_size
return True
# Upload to B2 - try different methods for different SDK versions
with open(local_path, 'rb') as file_data:
data = file_data.read()
# Try different upload methods
try:
# Method 1: upload_bytes (newer SDK)
if hasattr(self.bucket, 'upload_bytes'):
file_info = self.bucket.upload_bytes(
data,
b2_path,
content_type=content_type
)
# Method 2: upload with file-like object (older SDK)
elif hasattr(self.bucket, 'upload_file'):
from io import BytesIO
file_obj = BytesIO(data)
file_info = self.bucket.upload_file(
file_obj,
b2_path,
content_type=content_type
)
# Method 3: upload with upload source (alternative)
elif hasattr(self.bucket, 'upload'):
from io import BytesIO
file_obj = BytesIO(data)
file_info = self.bucket.upload(
file_obj,
b2_path,
content_type=content_type
)
else:
raise Exception("No compatible upload method found in B2 SDK")
except Exception as upload_error:
raise Exception(f"Upload failed: {upload_error}")
self.stats['files_migrated'] += 1
self.stats['total_size'] += file_size
self.migration_log.append(f"FILE: {file_id}{file_extension} -> {b2_path}")
print(f" ✅ Uploaded successfully")
return True
except Exception as e:
print(f" ❌ Failed to upload {local_path}: {e}")
self.stats['errors'] += 1
self.migration_log.append(f"ERROR: {local_path} -> {e}")
return False
def migrate_paste(self, local_path: str, pastes_dir: str, dry_run: bool = False) -> bool:
"""Migrate a single paste to B2"""
try:
# Extract paste ID and determine B2 path
paste_id = self.extract_id_from_path(local_path, pastes_dir)
b2_path = f"pastes/{paste_id}.txt"
# Check if paste already exists in B2
if self.file_exists_in_b2(b2_path):
print(f"⏭️ Skipping paste {paste_id} (already exists in B2)")
self.stats['pastes_skipped'] += 1
return True
# Get paste info
file_size = os.path.getsize(local_path)
print(f"📝 Uploading paste: {paste_id} ({file_size:,} bytes)")
if dry_run:
print(f" [DRY RUN] Would upload to: {b2_path}")
self.stats['pastes_migrated'] += 1
self.stats['total_size'] += file_size
return True
# Read and upload paste content
with open(local_path, 'r', encoding='utf-8', errors='ignore') as file:
content = file.read()
# Upload to B2 as UTF-8 text - try different methods
data = content.encode('utf-8')
try:
# Method 1: upload_bytes (newer SDK)
if hasattr(self.bucket, 'upload_bytes'):
self.bucket.upload_bytes(
data,
b2_path,
content_type='text/plain; charset=utf-8'
)
# Method 2: upload with file-like object (older SDK)
elif hasattr(self.bucket, 'upload_file'):
from io import BytesIO
file_obj = BytesIO(data)
self.bucket.upload_file(
file_obj,
b2_path,
content_type='text/plain; charset=utf-8'
)
# Method 3: upload with upload source (alternative)
elif hasattr(self.bucket, 'upload'):
from io import BytesIO
file_obj = BytesIO(data)
self.bucket.upload(
file_obj,
b2_path,
content_type='text/plain; charset=utf-8'
)
else:
raise Exception("No compatible upload method found in B2 SDK")
except Exception as upload_error:
raise Exception(f"Upload failed: {upload_error}")
self.stats['pastes_migrated'] += 1
self.stats['total_size'] += file_size
self.migration_log.append(f"PASTE: {paste_id} -> {b2_path}")
print(f" ✅ Uploaded successfully")
return True
except Exception as e:
print(f" ❌ Failed to upload paste {local_path}: {e}")
self.stats['errors'] += 1
self.migration_log.append(f"ERROR: {local_path} -> {e}")
return False
def migrate_all(self, base_path: str = ".", dry_run: bool = False, skip_files: bool = False, skip_pastes: bool = False):
"""Migrate all local files and pastes to B2"""
if dry_run:
print("🧪 DRY RUN MODE - No files will actually be uploaded")
print(f"\n🚀 Starting migration from: {os.path.abspath(base_path)}")
print("=" * 60)
# Scan for local files
file_paths, paste_paths = self.scan_local_directories(base_path)
if not file_paths and not paste_paths:
print("❌ No files or pastes found to migrate")
return False
total_items = len(file_paths) + len(paste_paths)
print(f"\n📊 Migration Plan:")
print(f" Files to migrate: {len(file_paths)}")
print(f" Pastes to migrate: {len(paste_paths)}")
print(f" Total items: {total_items}")
if not dry_run:
confirm = input(f"\n❓ Proceed with migration? (y/N): ").strip().lower()
if confirm != 'y':
print("Migration cancelled")
return False
print(f"\n🔄 Starting migration...")
print("-" * 40)
# Migrate files
if file_paths and not skip_files:
print(f"\n📁 Migrating {len(file_paths)} files...")
uploads_dir = os.path.join(base_path, "uploads")
for i, file_path in enumerate(file_paths, 1):
print(f"[{i}/{len(file_paths)}] ", end="")
self.migrate_file(file_path, uploads_dir, dry_run)
# Migrate pastes
if paste_paths and not skip_pastes:
print(f"\n📝 Migrating {len(paste_paths)} pastes...")
pastes_dir = os.path.join(base_path, "pastes")
for i, paste_path in enumerate(paste_paths, 1):
print(f"[{i}/{len(paste_paths)}] ", end="")
self.migrate_paste(paste_path, pastes_dir, dry_run)
self.print_summary(dry_run)
self.save_migration_log()
return True
def print_summary(self, dry_run: bool = False):
"""Print migration summary"""
print("\n" + "=" * 60)
print("📊 MIGRATION SUMMARY")
print("=" * 60)
if dry_run:
print("🧪 DRY RUN RESULTS:")
print(f"✅ Files migrated: {self.stats['files_migrated']}")
print(f"✅ Pastes migrated: {self.stats['pastes_migrated']}")
print(f"⏭️ Files skipped: {self.stats['files_skipped']}")
print(f"⏭️ Pastes skipped: {self.stats['pastes_skipped']}")
print(f"❌ Errors: {self.stats['errors']}")
print(f"📦 Total data: {self.stats['total_size']:,} bytes ({self.stats['total_size'] / 1024 / 1024:.2f} MB)")
success_rate = ((self.stats['files_migrated'] + self.stats['pastes_migrated']) /
max(1, self.stats['files_migrated'] + self.stats['pastes_migrated'] + self.stats['errors'])) * 100
print(f"📈 Success rate: {success_rate:.1f}%")
if not dry_run and (self.stats['files_migrated'] > 0 or self.stats['pastes_migrated'] > 0):
print(f"\n🎉 Migration completed successfully!")
print(f"💡 Your files are now accessible via your Sharey B2 URLs")
def save_migration_log(self):
"""Save migration log to file"""
if not self.migration_log:
return
log_filename = f"migration_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
try:
with open(log_filename, 'w') as f:
f.write(f"Sharey B2 Migration Log\n")
f.write(f"Generated: {datetime.now().isoformat()}\n")
f.write(f"=" * 50 + "\n\n")
for entry in self.migration_log:
f.write(f"{entry}\n")
f.write(f"\n" + "=" * 50 + "\n")
f.write(f"SUMMARY:\n")
f.write(f"Files migrated: {self.stats['files_migrated']}\n")
f.write(f"Pastes migrated: {self.stats['pastes_migrated']}\n")
f.write(f"Files skipped: {self.stats['files_skipped']}\n")
f.write(f"Pastes skipped: {self.stats['pastes_skipped']}\n")
f.write(f"Errors: {self.stats['errors']}\n")
f.write(f"Total size: {self.stats['total_size']:,} bytes\n")
print(f"📄 Migration log saved to: {log_filename}")
except Exception as e:
print(f"⚠️ Failed to save migration log: {e}")
def main():
"""Main migration function"""
print("🚀 Sharey Local-to-B2 Migration Tool")
print("=" * 50)
# Parse command line arguments
import argparse
parser = argparse.ArgumentParser(description='Migrate local Sharey files to Backblaze B2')
parser.add_argument('--path', '-p', default='.', help='Path to Sharey directory (default: current directory)')
parser.add_argument('--dry-run', '-d', action='store_true', help='Perform a dry run without uploading')
parser.add_argument('--skip-files', action='store_true', help='Skip file migration')
parser.add_argument('--skip-pastes', action='store_true', help='Skip paste migration')
parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation prompt')
args = parser.parse_args()
# Initialize migrator
migrator = ShareyMigrator()
# Initialize B2 connection
if not migrator.initialize_b2():
print("❌ Failed to initialize B2 connection")
sys.exit(1)
# Run migration
try:
success = migrator.migrate_all(
base_path=args.path,
dry_run=args.dry_run,
skip_files=args.skip_files,
skip_pastes=args.skip_pastes
)
if success:
print(f"\n💡 Next steps:")
print(f" 1. Test your Sharey app to ensure URLs work correctly")
print(f" 2. Consider backing up your local files before deletion")
print(f" 3. Update any hardcoded URLs to use the new B2 structure")
sys.exit(0)
else:
sys.exit(1)
except KeyboardInterrupt:
print(f"\n⏹️ Migration cancelled by user")
sys.exit(1)
except Exception as e:
print(f"\n❌ Migration failed: {e}")
sys.exit(1)
if __name__ == "__main__":
main()