#!/usr/bin/env python3 """ Sharey Local-to-B2 Migration Script This script migrates existing local files and pastes to Backblaze B2 while preserving their original IDs and structure. Sharey Naming Conventions: - Files: 6-char random ID + original extension (e.g., abc123.jpg) - Pastes: 6-char UUID prefix + .txt extension (e.g., def456.txt) - B2 Structure: files/{file_id} and pastes/{paste_id}.txt """ import os import sys import mimetypes from pathlib import Path from typing import Dict, List, Tuple import json from datetime import datetime try: from b2sdk.v2 import InMemoryAccountInfo, B2Api from config import config except ImportError as e: print(f"โŒ Missing dependencies: {e}") print("๐Ÿ’ก Make sure you're running this script in the same environment as your Sharey app") print("๐Ÿ’ก Run: pip install -r requirements.txt") sys.exit(1) class ShareyMigrator: """Handles migration of local Sharey files to B2""" def __init__(self): self.b2_api = None self.bucket = None self.stats = { 'files_migrated': 0, 'pastes_migrated': 0, 'files_skipped': 0, 'pastes_skipped': 0, 'errors': 0, 'total_size': 0 } self.migration_log = [] def initialize_b2(self) -> bool: """Initialize B2 connection""" print("๐Ÿ”ง Initializing B2 connection...") # Validate B2 configuration if not config.validate_b2_config(): print("โŒ Invalid B2 configuration. Please check your config.json") return False try: b2_config = config.get_b2_config() print(f"๐Ÿ“‹ Target bucket: {b2_config['bucket_name']}") info = InMemoryAccountInfo() self.b2_api = B2Api(info) self.b2_api.authorize_account("production", b2_config['key_id'], b2_config['key']) self.bucket = self.b2_api.get_bucket_by_name(b2_config['bucket_name']) print("โœ… B2 connection established") return True except Exception as e: print(f"โŒ Failed to connect to B2: {e}") return False def scan_local_directories(self, base_path: str = ".") -> Tuple[List[str], List[str]]: """Scan for local uploads and pastes directories""" print(f"๐Ÿ” Scanning for local files in: {os.path.abspath(base_path)}") uploads_dir = os.path.join(base_path, "uploads") pastes_dir = os.path.join(base_path, "pastes") file_paths = [] paste_paths = [] # Scan uploads directory if os.path.exists(uploads_dir): print(f"๐Ÿ“ Found uploads directory: {uploads_dir}") for root, dirs, files in os.walk(uploads_dir): for file in files: # Skip hidden files, metadata files, and any Sharey system files if (not file.startswith('.') and not file.endswith('.sharey-meta') and '.sharey-meta' not in file): file_paths.append(os.path.join(root, file)) print(f" Found {len(file_paths)} files (skipped .sharey-meta files)") else: print(f"โš ๏ธ No uploads directory found at: {uploads_dir}") # Scan pastes directory if os.path.exists(pastes_dir): print(f"๐Ÿ“ Found pastes directory: {pastes_dir}") for root, dirs, files in os.walk(pastes_dir): for file in files: if not file.startswith('.'): # Skip hidden files paste_paths.append(os.path.join(root, file)) print(f" Found {len(paste_paths)} pastes") else: print(f"โš ๏ธ No pastes directory found at: {pastes_dir}") return file_paths, paste_paths def extract_id_from_path(self, file_path: str, base_dir: str) -> str: """Extract the file ID from the file path""" # Get relative path from base directory rel_path = os.path.relpath(file_path, base_dir) # Extract filename without extension for ID filename = os.path.basename(rel_path) file_id = os.path.splitext(filename)[0] # Validate ID format (should be 6 characters for Sharey) if len(file_id) != 6: print(f"โš ๏ธ Warning: {filename} has non-standard ID length ({len(file_id)} chars, expected 6)") return file_id def file_exists_in_b2(self, b2_path: str) -> bool: """Check if a file already exists in B2""" try: # Try different methods depending on B2 SDK version if hasattr(self.bucket, 'get_file_info_by_name'): file_info = self.bucket.get_file_info_by_name(b2_path) return True elif hasattr(self.bucket, 'ls'): for file_version, _ in self.bucket.ls(b2_path, recursive=False): if file_version.file_name == b2_path: return True return False else: # Fallback - assume doesn't exist to avoid skipping return False except: return False def migrate_file(self, local_path: str, uploads_dir: str, dry_run: bool = False) -> bool: """Migrate a single file to B2""" try: # Extract file ID and determine B2 path file_id = self.extract_id_from_path(local_path, uploads_dir) file_extension = os.path.splitext(local_path)[1] b2_path = f"files/{file_id}{file_extension}" # Check if file already exists in B2 if self.file_exists_in_b2(b2_path): print(f"โญ๏ธ Skipping {file_id} (already exists in B2)") self.stats['files_skipped'] += 1 return True # Get file info file_size = os.path.getsize(local_path) content_type = mimetypes.guess_type(local_path)[0] or 'application/octet-stream' print(f"๐Ÿ“ค Uploading file: {file_id}{file_extension} ({file_size:,} bytes)") if dry_run: print(f" [DRY RUN] Would upload to: {b2_path}") self.stats['files_migrated'] += 1 self.stats['total_size'] += file_size return True # Upload to B2 - try different methods for different SDK versions with open(local_path, 'rb') as file_data: data = file_data.read() # Try different upload methods try: # Method 1: upload_bytes (newer SDK) if hasattr(self.bucket, 'upload_bytes'): file_info = self.bucket.upload_bytes( data, b2_path, content_type=content_type ) # Method 2: upload with file-like object (older SDK) elif hasattr(self.bucket, 'upload_file'): from io import BytesIO file_obj = BytesIO(data) file_info = self.bucket.upload_file( file_obj, b2_path, content_type=content_type ) # Method 3: upload with upload source (alternative) elif hasattr(self.bucket, 'upload'): from io import BytesIO file_obj = BytesIO(data) file_info = self.bucket.upload( file_obj, b2_path, content_type=content_type ) else: raise Exception("No compatible upload method found in B2 SDK") except Exception as upload_error: raise Exception(f"Upload failed: {upload_error}") self.stats['files_migrated'] += 1 self.stats['total_size'] += file_size self.migration_log.append(f"FILE: {file_id}{file_extension} -> {b2_path}") print(f" โœ… Uploaded successfully") return True except Exception as e: print(f" โŒ Failed to upload {local_path}: {e}") self.stats['errors'] += 1 self.migration_log.append(f"ERROR: {local_path} -> {e}") return False def migrate_paste(self, local_path: str, pastes_dir: str, dry_run: bool = False) -> bool: """Migrate a single paste to B2""" try: # Extract paste ID and determine B2 path paste_id = self.extract_id_from_path(local_path, pastes_dir) b2_path = f"pastes/{paste_id}.txt" # Check if paste already exists in B2 if self.file_exists_in_b2(b2_path): print(f"โญ๏ธ Skipping paste {paste_id} (already exists in B2)") self.stats['pastes_skipped'] += 1 return True # Get paste info file_size = os.path.getsize(local_path) print(f"๐Ÿ“ Uploading paste: {paste_id} ({file_size:,} bytes)") if dry_run: print(f" [DRY RUN] Would upload to: {b2_path}") self.stats['pastes_migrated'] += 1 self.stats['total_size'] += file_size return True # Read and upload paste content with open(local_path, 'r', encoding='utf-8', errors='ignore') as file: content = file.read() # Upload to B2 as UTF-8 text - try different methods data = content.encode('utf-8') try: # Method 1: upload_bytes (newer SDK) if hasattr(self.bucket, 'upload_bytes'): self.bucket.upload_bytes( data, b2_path, content_type='text/plain; charset=utf-8' ) # Method 2: upload with file-like object (older SDK) elif hasattr(self.bucket, 'upload_file'): from io import BytesIO file_obj = BytesIO(data) self.bucket.upload_file( file_obj, b2_path, content_type='text/plain; charset=utf-8' ) # Method 3: upload with upload source (alternative) elif hasattr(self.bucket, 'upload'): from io import BytesIO file_obj = BytesIO(data) self.bucket.upload( file_obj, b2_path, content_type='text/plain; charset=utf-8' ) else: raise Exception("No compatible upload method found in B2 SDK") except Exception as upload_error: raise Exception(f"Upload failed: {upload_error}") self.stats['pastes_migrated'] += 1 self.stats['total_size'] += file_size self.migration_log.append(f"PASTE: {paste_id} -> {b2_path}") print(f" โœ… Uploaded successfully") return True except Exception as e: print(f" โŒ Failed to upload paste {local_path}: {e}") self.stats['errors'] += 1 self.migration_log.append(f"ERROR: {local_path} -> {e}") return False def migrate_all(self, base_path: str = ".", dry_run: bool = False, skip_files: bool = False, skip_pastes: bool = False): """Migrate all local files and pastes to B2""" if dry_run: print("๐Ÿงช DRY RUN MODE - No files will actually be uploaded") print(f"\n๐Ÿš€ Starting migration from: {os.path.abspath(base_path)}") print("=" * 60) # Scan for local files file_paths, paste_paths = self.scan_local_directories(base_path) if not file_paths and not paste_paths: print("โŒ No files or pastes found to migrate") return False total_items = len(file_paths) + len(paste_paths) print(f"\n๐Ÿ“Š Migration Plan:") print(f" Files to migrate: {len(file_paths)}") print(f" Pastes to migrate: {len(paste_paths)}") print(f" Total items: {total_items}") if not dry_run: confirm = input(f"\nโ“ Proceed with migration? (y/N): ").strip().lower() if confirm != 'y': print("Migration cancelled") return False print(f"\n๐Ÿ”„ Starting migration...") print("-" * 40) # Migrate files if file_paths and not skip_files: print(f"\n๐Ÿ“ Migrating {len(file_paths)} files...") uploads_dir = os.path.join(base_path, "uploads") for i, file_path in enumerate(file_paths, 1): print(f"[{i}/{len(file_paths)}] ", end="") self.migrate_file(file_path, uploads_dir, dry_run) # Migrate pastes if paste_paths and not skip_pastes: print(f"\n๐Ÿ“ Migrating {len(paste_paths)} pastes...") pastes_dir = os.path.join(base_path, "pastes") for i, paste_path in enumerate(paste_paths, 1): print(f"[{i}/{len(paste_paths)}] ", end="") self.migrate_paste(paste_path, pastes_dir, dry_run) self.print_summary(dry_run) self.save_migration_log() return True def print_summary(self, dry_run: bool = False): """Print migration summary""" print("\n" + "=" * 60) print("๐Ÿ“Š MIGRATION SUMMARY") print("=" * 60) if dry_run: print("๐Ÿงช DRY RUN RESULTS:") print(f"โœ… Files migrated: {self.stats['files_migrated']}") print(f"โœ… Pastes migrated: {self.stats['pastes_migrated']}") print(f"โญ๏ธ Files skipped: {self.stats['files_skipped']}") print(f"โญ๏ธ Pastes skipped: {self.stats['pastes_skipped']}") print(f"โŒ Errors: {self.stats['errors']}") print(f"๐Ÿ“ฆ Total data: {self.stats['total_size']:,} bytes ({self.stats['total_size'] / 1024 / 1024:.2f} MB)") success_rate = ((self.stats['files_migrated'] + self.stats['pastes_migrated']) / max(1, self.stats['files_migrated'] + self.stats['pastes_migrated'] + self.stats['errors'])) * 100 print(f"๐Ÿ“ˆ Success rate: {success_rate:.1f}%") if not dry_run and (self.stats['files_migrated'] > 0 or self.stats['pastes_migrated'] > 0): print(f"\n๐ŸŽ‰ Migration completed successfully!") print(f"๐Ÿ’ก Your files are now accessible via your Sharey B2 URLs") def save_migration_log(self): """Save migration log to file""" if not self.migration_log: return log_filename = f"migration_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt" try: with open(log_filename, 'w') as f: f.write(f"Sharey B2 Migration Log\n") f.write(f"Generated: {datetime.now().isoformat()}\n") f.write(f"=" * 50 + "\n\n") for entry in self.migration_log: f.write(f"{entry}\n") f.write(f"\n" + "=" * 50 + "\n") f.write(f"SUMMARY:\n") f.write(f"Files migrated: {self.stats['files_migrated']}\n") f.write(f"Pastes migrated: {self.stats['pastes_migrated']}\n") f.write(f"Files skipped: {self.stats['files_skipped']}\n") f.write(f"Pastes skipped: {self.stats['pastes_skipped']}\n") f.write(f"Errors: {self.stats['errors']}\n") f.write(f"Total size: {self.stats['total_size']:,} bytes\n") print(f"๐Ÿ“„ Migration log saved to: {log_filename}") except Exception as e: print(f"โš ๏ธ Failed to save migration log: {e}") def main(): """Main migration function""" print("๐Ÿš€ Sharey Local-to-B2 Migration Tool") print("=" * 50) # Parse command line arguments import argparse parser = argparse.ArgumentParser(description='Migrate local Sharey files to Backblaze B2') parser.add_argument('--path', '-p', default='.', help='Path to Sharey directory (default: current directory)') parser.add_argument('--dry-run', '-d', action='store_true', help='Perform a dry run without uploading') parser.add_argument('--skip-files', action='store_true', help='Skip file migration') parser.add_argument('--skip-pastes', action='store_true', help='Skip paste migration') parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation prompt') args = parser.parse_args() # Initialize migrator migrator = ShareyMigrator() # Initialize B2 connection if not migrator.initialize_b2(): print("โŒ Failed to initialize B2 connection") sys.exit(1) # Run migration try: success = migrator.migrate_all( base_path=args.path, dry_run=args.dry_run, skip_files=args.skip_files, skip_pastes=args.skip_pastes ) if success: print(f"\n๐Ÿ’ก Next steps:") print(f" 1. Test your Sharey app to ensure URLs work correctly") print(f" 2. Consider backing up your local files before deletion") print(f" 3. Update any hardcoded URLs to use the new B2 structure") sys.exit(0) else: sys.exit(1) except KeyboardInterrupt: print(f"\nโน๏ธ Migration cancelled by user") sys.exit(1) except Exception as e: print(f"\nโŒ Migration failed: {e}") sys.exit(1) if __name__ == "__main__": main()