sharey/scripts/migrate.py

#!/usr/bin/env python3
"""
Sharey Local-to-B2 Migration Script

This script migrates existing local files and pastes to Backblaze B2
while preserving their original IDs and structure.

Sharey Naming Conventions:
- Files: 6-char random ID + original extension (e.g., abc123.jpg)
- Pastes: 6-char UUID prefix + .txt extension (e.g., def456.txt)
- B2 Structure: files/{file_id} and pastes/{paste_id}.txt
"""

import os
import sys
import mimetypes
from pathlib import Path
from typing import Dict, List, Tuple
import json
from datetime import datetime

try:
    from b2sdk.v2 import InMemoryAccountInfo, B2Api
    from config import config
except ImportError as e:
    print(f"❌ Missing dependencies: {e}")
    print("💡 Make sure you're running this script in the same environment as your Sharey app")
    print("💡 Run: pip install -r requirements.txt")
    sys.exit(1)


class ShareyMigrator:
    """Handles migration of local Sharey files to B2"""

    def __init__(self):
        self.b2_api = None
        self.bucket = None
        self.stats = {
            'files_migrated': 0,
            'pastes_migrated': 0,
            'files_skipped': 0,
            'pastes_skipped': 0,
            'errors': 0,
            'total_size': 0
        }
        self.migration_log = []

    def initialize_b2(self) -> bool:
        """Initialize B2 connection"""
        print("🔧 Initializing B2 connection...")

        # Validate B2 configuration
        if not config.validate_b2_config():
            print("❌ Invalid B2 configuration. Please check your config.json")
            return False

        try:
            b2_config = config.get_b2_config()
            print(f"📋 Target bucket: {b2_config['bucket_name']}")

            info = InMemoryAccountInfo()
            self.b2_api = B2Api(info)
            self.b2_api.authorize_account("production", b2_config['key_id'], b2_config['key'])
            self.bucket = self.b2_api.get_bucket_by_name(b2_config['bucket_name'])
            print("✅ B2 connection established")
            return True

        except Exception as e:
            print(f"❌ Failed to connect to B2: {e}")
            return False

    def scan_local_directories(self, base_path: str = ".") -> Tuple[List[str], List[str]]:
        """Scan for local uploads and pastes directories"""
        print(f"🔍 Scanning for local files in: {os.path.abspath(base_path)}")

        uploads_dir = os.path.join(base_path, "uploads")
        pastes_dir = os.path.join(base_path, "pastes")

        file_paths = []
        paste_paths = []

        # Scan uploads directory
        if os.path.exists(uploads_dir):
            print(f"📁 Found uploads directory: {uploads_dir}")
            for root, dirs, files in os.walk(uploads_dir):
                for file in files:
                    # Skip hidden files, metadata files, and any Sharey system files
                    if (not file.startswith('.') and
                        not file.endswith('.sharey-meta') and
                        '.sharey-meta' not in file):
                        file_paths.append(os.path.join(root, file))
            print(f"   Found {len(file_paths)} files (skipped .sharey-meta files)")
        else:
            print(f"⚠️  No uploads directory found at: {uploads_dir}")

        # Scan pastes directory
        if os.path.exists(pastes_dir):
            print(f"📝 Found pastes directory: {pastes_dir}")
            for root, dirs, files in os.walk(pastes_dir):
                for file in files:
                    if not file.startswith('.'):  # Skip hidden files
                        paste_paths.append(os.path.join(root, file))
            print(f"   Found {len(paste_paths)} pastes")
        else:
            print(f"⚠️  No pastes directory found at: {pastes_dir}")

        return file_paths, paste_paths

    def extract_id_from_path(self, file_path: str, base_dir: str) -> str:
        """Extract the file ID from the file path"""
        # Get relative path from base directory
        rel_path = os.path.relpath(file_path, base_dir)

        # Extract filename without extension for ID
        filename = os.path.basename(rel_path)
        file_id = os.path.splitext(filename)[0]

        # Validate ID format (should be 6 characters for Sharey)
        if len(file_id) != 6:
            print(f"⚠️  Warning: {filename} has non-standard ID length ({len(file_id)} chars, expected 6)")

        return file_id

    def file_exists_in_b2(self, b2_path: str) -> bool:
        """Check if a file already exists in B2"""
        try:
            # Try different methods depending on B2 SDK version
            if hasattr(self.bucket, 'get_file_info_by_name'):
                file_info = self.bucket.get_file_info_by_name(b2_path)
                return True
            elif hasattr(self.bucket, 'ls'):
                for file_version, _ in self.bucket.ls(b2_path, recursive=False):
                    if file_version.file_name == b2_path:
                        return True
                return False
            else:
                # Fallback - assume doesn't exist to avoid skipping
                return False
        except:
            return False

    def migrate_file(self, local_path: str, uploads_dir: str, dry_run: bool = False) -> bool:
        """Migrate a single file to B2"""
        try:
            # Extract file ID and determine B2 path
            file_id = self.extract_id_from_path(local_path, uploads_dir)
            file_extension = os.path.splitext(local_path)[1]
            b2_path = f"files/{file_id}{file_extension}"

            # Check if file already exists in B2
            if self.file_exists_in_b2(b2_path):
                print(f"⏭️  Skipping {file_id} (already exists in B2)")
                self.stats['files_skipped'] += 1
                return True

            # Get file info
            file_size = os.path.getsize(local_path)
            content_type = mimetypes.guess_type(local_path)[0] or 'application/octet-stream'

            print(f"📤 Uploading file: {file_id}{file_extension} ({file_size:,} bytes)")

            if dry_run:
                print(f"   [DRY RUN] Would upload to: {b2_path}")
                self.stats['files_migrated'] += 1
                self.stats['total_size'] += file_size
                return True

            # Upload to B2 - try different methods for different SDK versions
            with open(local_path, 'rb') as file_data:
                data = file_data.read()

                # Try different upload methods
                try:
                    # Method 1: upload_bytes (newer SDK)
                    if hasattr(self.bucket, 'upload_bytes'):
                        file_info = self.bucket.upload_bytes(
                            data,
                            b2_path,
                            content_type=content_type
                        )
                    # Method 2: upload with file-like object (older SDK)
                    elif hasattr(self.bucket, 'upload_file'):
                        from io import BytesIO
                        file_obj = BytesIO(data)
                        file_info = self.bucket.upload_file(
                            file_obj,
                            b2_path,
                            content_type=content_type
                        )
                    # Method 3: upload with upload source (alternative)
                    elif hasattr(self.bucket, 'upload'):
                        from io import BytesIO
                        file_obj = BytesIO(data)
                        file_info = self.bucket.upload(
                            file_obj,
                            b2_path,
                            content_type=content_type
                        )
                    else:
                        raise Exception("No compatible upload method found in B2 SDK")

                except Exception as upload_error:
                    raise Exception(f"Upload failed: {upload_error}")

            self.stats['files_migrated'] += 1
            self.stats['total_size'] += file_size
            self.migration_log.append(f"FILE: {file_id}{file_extension} -> {b2_path}")
            print(f"   ✅ Uploaded successfully")
            return True

        except Exception as e:
            print(f"   ❌ Failed to upload {local_path}: {e}")
            self.stats['errors'] += 1
            self.migration_log.append(f"ERROR: {local_path} -> {e}")
            return False

    def migrate_paste(self, local_path: str, pastes_dir: str, dry_run: bool = False) -> bool:
        """Migrate a single paste to B2"""
        try:
            # Extract paste ID and determine B2 path
            paste_id = self.extract_id_from_path(local_path, pastes_dir)
            b2_path = f"pastes/{paste_id}.txt"

            # Check if paste already exists in B2
            if self.file_exists_in_b2(b2_path):
                print(f"⏭️  Skipping paste {paste_id} (already exists in B2)")
                self.stats['pastes_skipped'] += 1
                return True

            # Get paste info
            file_size = os.path.getsize(local_path)

            print(f"📝 Uploading paste: {paste_id} ({file_size:,} bytes)")

            if dry_run:
                print(f"   [DRY RUN] Would upload to: {b2_path}")
                self.stats['pastes_migrated'] += 1
                self.stats['total_size'] += file_size
                return True

            # Read and upload paste content
            with open(local_path, 'r', encoding='utf-8', errors='ignore') as file:
                content = file.read()

            # Upload to B2 as UTF-8 text - try different methods
            data = content.encode('utf-8')

            try:
                # Method 1: upload_bytes (newer SDK)
                if hasattr(self.bucket, 'upload_bytes'):
                    self.bucket.upload_bytes(
                        data,
                        b2_path,
                        content_type='text/plain; charset=utf-8'
                    )
                # Method 2: upload with file-like object (older SDK)
                elif hasattr(self.bucket, 'upload_file'):
                    from io import BytesIO
                    file_obj = BytesIO(data)
                    self.bucket.upload_file(
                        file_obj,
                        b2_path,
                        content_type='text/plain; charset=utf-8'
                    )
                # Method 3: upload with upload source (alternative)
                elif hasattr(self.bucket, 'upload'):
                    from io import BytesIO
                    file_obj = BytesIO(data)
                    self.bucket.upload(
                        file_obj,
                        b2_path,
                        content_type='text/plain; charset=utf-8'
                    )
                else:
                    raise Exception("No compatible upload method found in B2 SDK")

            except Exception as upload_error:
                raise Exception(f"Upload failed: {upload_error}")

            self.stats['pastes_migrated'] += 1
            self.stats['total_size'] += file_size
            self.migration_log.append(f"PASTE: {paste_id} -> {b2_path}")
            print(f"   ✅ Uploaded successfully")
            return True

        except Exception as e:
            print(f"   ❌ Failed to upload paste {local_path}: {e}")
            self.stats['errors'] += 1
            self.migration_log.append(f"ERROR: {local_path} -> {e}")
            return False

    def migrate_all(self, base_path: str = ".", dry_run: bool = False, skip_files: bool = False, skip_pastes: bool = False):
        """Migrate all local files and pastes to B2"""
        if dry_run:
            print("🧪 DRY RUN MODE - No files will actually be uploaded")

        print(f"\n🚀 Starting migration from: {os.path.abspath(base_path)}")
        print("=" * 60)

        # Scan for local files
        file_paths, paste_paths = self.scan_local_directories(base_path)

        if not file_paths and not paste_paths:
            print("❌ No files or pastes found to migrate")
            return False

        total_items = len(file_paths) + len(paste_paths)
        print(f"\n📊 Migration Plan:")
        print(f"   Files to migrate: {len(file_paths)}")
        print(f"   Pastes to migrate: {len(paste_paths)}")
        print(f"   Total items: {total_items}")

        if not dry_run:
            confirm = input(f"\n❓ Proceed with migration? (y/N): ").strip().lower()
            if confirm != 'y':
                print("Migration cancelled")
                return False

        print(f"\n🔄 Starting migration...")
        print("-" * 40)

        # Migrate files
        if file_paths and not skip_files:
            print(f"\n📁 Migrating {len(file_paths)} files...")
            uploads_dir = os.path.join(base_path, "uploads")

            for i, file_path in enumerate(file_paths, 1):
                print(f"[{i}/{len(file_paths)}] ", end="")
                self.migrate_file(file_path, uploads_dir, dry_run)

        # Migrate pastes
        if paste_paths and not skip_pastes:
            print(f"\n📝 Migrating {len(paste_paths)} pastes...")
            pastes_dir = os.path.join(base_path, "pastes")

            for i, paste_path in enumerate(paste_paths, 1):
                print(f"[{i}/{len(paste_paths)}] ", end="")
                self.migrate_paste(paste_path, pastes_dir, dry_run)

        self.print_summary(dry_run)
        self.save_migration_log()
        return True

    def print_summary(self, dry_run: bool = False):
        """Print migration summary"""
        print("\n" + "=" * 60)
        print("📊 MIGRATION SUMMARY")
        print("=" * 60)

        if dry_run:
            print("🧪 DRY RUN RESULTS:")

        print(f"✅ Files migrated: {self.stats['files_migrated']}")
        print(f"✅ Pastes migrated: {self.stats['pastes_migrated']}")
        print(f"⏭️  Files skipped: {self.stats['files_skipped']}")
        print(f"⏭️  Pastes skipped: {self.stats['pastes_skipped']}")
        print(f"❌ Errors: {self.stats['errors']}")
        print(f"📦 Total data: {self.stats['total_size']:,} bytes ({self.stats['total_size'] / 1024 / 1024:.2f} MB)")

        success_rate = ((self.stats['files_migrated'] + self.stats['pastes_migrated']) /
                       max(1, self.stats['files_migrated'] + self.stats['pastes_migrated'] + self.stats['errors'])) * 100
        print(f"📈 Success rate: {success_rate:.1f}%")

        if not dry_run and (self.stats['files_migrated'] > 0 or self.stats['pastes_migrated'] > 0):
            print(f"\n🎉 Migration completed successfully!")
            print(f"💡 Your files are now accessible via your Sharey B2 URLs")

    def save_migration_log(self):
        """Save migration log to file"""
        if not self.migration_log:
            return

        log_filename = f"migration_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

        try:
            with open(log_filename, 'w') as f:
                f.write(f"Sharey B2 Migration Log\n")
                f.write(f"Generated: {datetime.now().isoformat()}\n")
                f.write(f"=" * 50 + "\n\n")

                for entry in self.migration_log:
                    f.write(f"{entry}\n")

                f.write(f"\n" + "=" * 50 + "\n")
                f.write(f"SUMMARY:\n")
                f.write(f"Files migrated: {self.stats['files_migrated']}\n")
                f.write(f"Pastes migrated: {self.stats['pastes_migrated']}\n")
                f.write(f"Files skipped: {self.stats['files_skipped']}\n")
                f.write(f"Pastes skipped: {self.stats['pastes_skipped']}\n")
                f.write(f"Errors: {self.stats['errors']}\n")
                f.write(f"Total size: {self.stats['total_size']:,} bytes\n")

            print(f"📄 Migration log saved to: {log_filename}")

        except Exception as e:
            print(f"⚠️  Failed to save migration log: {e}")


def main():
    """Main migration function"""
    print("🚀 Sharey Local-to-B2 Migration Tool")
    print("=" * 50)

    # Parse command line arguments
    import argparse
    parser = argparse.ArgumentParser(description='Migrate local Sharey files to Backblaze B2')
    parser.add_argument('--path', '-p', default='.', help='Path to Sharey directory (default: current directory)')
    parser.add_argument('--dry-run', '-d', action='store_true', help='Perform a dry run without uploading')
    parser.add_argument('--skip-files', action='store_true', help='Skip file migration')
    parser.add_argument('--skip-pastes', action='store_true', help='Skip paste migration')
    parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation prompt')

    args = parser.parse_args()

    # Initialize migrator
    migrator = ShareyMigrator()

    # Initialize B2 connection
    if not migrator.initialize_b2():
        print("❌ Failed to initialize B2 connection")
        sys.exit(1)

    # Run migration
    try:
        success = migrator.migrate_all(
            base_path=args.path,
            dry_run=args.dry_run,
            skip_files=args.skip_files,
            skip_pastes=args.skip_pastes
        )

        if success:
            print(f"\n💡 Next steps:")
            print(f"   1. Test your Sharey app to ensure URLs work correctly")
            print(f"   2. Consider backing up your local files before deletion")
            print(f"   3. Update any hardcoded URLs to use the new B2 structure")
            sys.exit(0)
        else:
            sys.exit(1)

    except KeyboardInterrupt:
        print(f"\n⏹️  Migration cancelled by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n❌ Migration failed: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()