Initial commit: SheepOp LLM - Transformer-based language model implementation
- Complete transformer implementation from scratch - Training pipeline with gradient accumulation and mixed precision - Optimized inference with KV caching - Multi-format data processing (PDFs, images, code, text) - Comprehensive documentation - Apache 2.0 license - Example training plots included in docs/images/
This commit is contained in:
291
setup_storage.py
Executable file
291
setup_storage.py
Executable file
@@ -0,0 +1,291 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Move large files to external storage and create symbolic links.
|
||||
Helps manage large datasets and checkpoints on systems with limited space.
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
|
||||
def create_storage_structure(storage_root: str):
|
||||
"""Create directory structure in storage location."""
|
||||
storage_path = Path(storage_root)
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create subdirectories
|
||||
(storage_path / "data").mkdir(exist_ok=True)
|
||||
(storage_path / "checkpoints").mkdir(exist_ok=True)
|
||||
(storage_path / "checkpoints_test").mkdir(exist_ok=True)
|
||||
|
||||
print(f"✅ Created storage structure at: {storage_path}")
|
||||
return storage_path
|
||||
|
||||
|
||||
def move_and_link(source_dir: Path, target_dir: Path, link_name: str, dry_run: bool = False):
|
||||
"""
|
||||
Move directory contents to storage and create symbolic link.
|
||||
|
||||
Args:
|
||||
source_dir: Source directory in project
|
||||
target_dir: Target directory in storage
|
||||
link_name: Name for the symbolic link (same as source_dir name)
|
||||
dry_run: If True, only show what would be done
|
||||
"""
|
||||
source_dir = Path(source_dir)
|
||||
target_dir = Path(target_dir)
|
||||
|
||||
if not source_dir.exists():
|
||||
print(f"⚠️ Source directory doesn't exist: {source_dir}")
|
||||
return False
|
||||
|
||||
if dry_run:
|
||||
print(f"\n[DRY RUN] Would move contents from {source_dir} to {target_dir}")
|
||||
print(f" Would replace {source_dir} with symlink -> {target_dir}")
|
||||
return True
|
||||
|
||||
# Move files (skip Python module files)
|
||||
moved_count = 0
|
||||
temp_backup = source_dir.parent / f".{source_dir.name}_backup"
|
||||
|
||||
# First, backup Python files
|
||||
python_files = []
|
||||
for item in source_dir.iterdir():
|
||||
if item.suffix == '.py' or item.name.startswith('__'):
|
||||
python_files.append(item)
|
||||
|
||||
# Move non-Python files to storage
|
||||
for item in source_dir.iterdir():
|
||||
# Skip Python files and hidden files
|
||||
if item.name.startswith('__') or item.suffix == '.py' or item.name.startswith('.'):
|
||||
continue
|
||||
|
||||
target_item = target_dir / item.name
|
||||
if target_item.exists():
|
||||
print(f"⚠️ Skipping {item.name} (already exists in storage)")
|
||||
continue
|
||||
|
||||
print(f"📦 Moving {item.name}...")
|
||||
try:
|
||||
if item.is_dir():
|
||||
shutil.copytree(item, target_item)
|
||||
shutil.rmtree(item)
|
||||
else:
|
||||
shutil.copy2(item, target_item)
|
||||
item.unlink()
|
||||
moved_count += 1
|
||||
except Exception as e:
|
||||
print(f"❌ Error moving {item.name}: {e}")
|
||||
return False
|
||||
|
||||
# Copy Python files to storage (keep structure)
|
||||
for item in python_files:
|
||||
target_item = target_dir / item.name
|
||||
if not target_item.exists():
|
||||
shutil.copy2(item, target_item)
|
||||
|
||||
# Replace source directory with symlink
|
||||
# Step 1: Remove original directory
|
||||
try:
|
||||
shutil.rmtree(source_dir)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not remove {source_dir}: {e}")
|
||||
return False
|
||||
|
||||
# Step 2: Create symlink
|
||||
try:
|
||||
source_dir.symlink_to(target_dir)
|
||||
print(f"✅ Created symbolic link: {source_dir} -> {target_dir}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Error creating link: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Move large files to external storage and create symbolic links',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Dry run (show what would be done)
|
||||
python3 setup_storage.py --storage /mnt/storage/sheepOp --dry-run
|
||||
|
||||
# Move data and checkpoints to storage
|
||||
python3 setup_storage.py --storage /mnt/storage/sheepOp
|
||||
|
||||
# Only move data, not checkpoints
|
||||
python3 setup_storage.py --storage /mnt/storage/sheepOp --skip-checkpoints
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--storage',
|
||||
type=str,
|
||||
default='/mnt/storage/sheepOp',
|
||||
help='Storage root directory (default: /mnt/storage/sheepOp)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--project-root',
|
||||
type=str,
|
||||
default='.',
|
||||
help='Project root directory (default: current directory)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without actually doing it'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-checkpoints',
|
||||
action='store_true',
|
||||
help='Skip moving checkpoints (only move data)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-data',
|
||||
action='store_true',
|
||||
help='Skip moving data (only move checkpoints)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
project_root = Path(args.project_root).resolve()
|
||||
storage_root = Path(args.storage).resolve()
|
||||
|
||||
print(f"🚀 Setting up storage links")
|
||||
print(f" Project root: {project_root}")
|
||||
print(f" Storage root: {storage_root}")
|
||||
print(f" Dry run: {args.dry_run}\n")
|
||||
|
||||
# Create storage structure (always create, even in dry-run, to check permissions)
|
||||
try:
|
||||
create_storage_structure(args.storage)
|
||||
except Exception as e:
|
||||
if args.dry_run:
|
||||
print(f"⚠️ Could not create storage structure (will be created during actual run): {e}")
|
||||
else:
|
||||
print(f"❌ Error creating storage structure: {e}")
|
||||
return 1
|
||||
|
||||
storage_data = storage_root / "data"
|
||||
storage_checkpoints = storage_root / "checkpoints"
|
||||
storage_checkpoints_test = storage_root / "checkpoints_test"
|
||||
|
||||
project_data = project_root / "data"
|
||||
project_checkpoints = project_root / "checkpoints"
|
||||
project_checkpoints_test = project_root / "checkpoints_test"
|
||||
|
||||
success = True
|
||||
|
||||
# Move data
|
||||
if not args.skip_data:
|
||||
print(f"\n📁 Processing data directory...")
|
||||
if project_data.exists():
|
||||
if project_data.is_symlink():
|
||||
print(f" ℹ️ data/ is already a symlink: {project_data.readlink()}")
|
||||
else:
|
||||
print(" Moving data files to storage (keeping __init__.py)...")
|
||||
# Copy __init__.py to storage first
|
||||
init_file = project_data / "__init__.py"
|
||||
if init_file.exists():
|
||||
storage_init = storage_data / "__init__.py"
|
||||
if not storage_init.exists():
|
||||
if args.dry_run:
|
||||
print(f" [DRY RUN] Would copy: __init__.py -> {storage_init}")
|
||||
else:
|
||||
# Ensure storage directory exists
|
||||
storage_data.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(init_file, storage_init)
|
||||
print(" ✅ Copied __init__.py to storage")
|
||||
else:
|
||||
print(" ℹ️ __init__.py already exists in storage")
|
||||
|
||||
# Move all other files
|
||||
moved_files = []
|
||||
for item in project_data.iterdir():
|
||||
if item.name == '__init__.py' or item.name.startswith('__'):
|
||||
continue
|
||||
|
||||
target_item = storage_data / item.name
|
||||
if args.dry_run:
|
||||
print(f" [DRY RUN] Would move: {item.name} -> {target_item}")
|
||||
moved_files.append(item.name)
|
||||
else:
|
||||
if not target_item.exists():
|
||||
if item.is_dir():
|
||||
shutil.copytree(item, target_item)
|
||||
shutil.rmtree(item)
|
||||
else:
|
||||
shutil.copy2(item, target_item)
|
||||
item.unlink()
|
||||
moved_files.append(item.name)
|
||||
print(f" ✅ Moved: {item.name}")
|
||||
else:
|
||||
print(f" ⚠️ Already exists: {item.name}")
|
||||
|
||||
# Replace data/ with symlink
|
||||
if not args.dry_run and moved_files:
|
||||
# Remove original directory
|
||||
init_backup = project_data / "__init__.py"
|
||||
if init_backup.exists():
|
||||
# Keep a reference
|
||||
pass
|
||||
shutil.rmtree(project_data)
|
||||
|
||||
# Create symlink
|
||||
project_data.symlink_to(storage_data)
|
||||
print(f" ✅ Replaced data/ with symlink -> {storage_data}")
|
||||
else:
|
||||
print(" ℹ️ data/ directory doesn't exist, creating symlink...")
|
||||
if not args.dry_run:
|
||||
project_data.symlink_to(storage_data)
|
||||
print(f" ✅ Created data/ symlink -> {storage_data}")
|
||||
|
||||
# Move checkpoints
|
||||
if not args.skip_checkpoints:
|
||||
print(f"\n💾 Processing checkpoints...")
|
||||
|
||||
if project_checkpoints.exists():
|
||||
print(" Moving checkpoints to storage...")
|
||||
success = move_and_link(
|
||||
project_checkpoints,
|
||||
storage_checkpoints,
|
||||
"checkpoints",
|
||||
args.dry_run
|
||||
) and success
|
||||
|
||||
if project_checkpoints_test.exists():
|
||||
print(" Moving checkpoints_test to storage...")
|
||||
success = move_and_link(
|
||||
project_checkpoints_test,
|
||||
storage_checkpoints_test,
|
||||
"checkpoints_test",
|
||||
args.dry_run
|
||||
) and success
|
||||
|
||||
if args.dry_run:
|
||||
print(f"\n✅ Dry run complete. Use without --dry-run to execute.")
|
||||
else:
|
||||
if success:
|
||||
print(f"\n✅ Storage setup complete!")
|
||||
print(f"\n📋 Next steps:")
|
||||
print(f" 1. Your data files are now in: {storage_root}/data/")
|
||||
print(f" 2. Your checkpoints will be saved to: {storage_root}/checkpoints/")
|
||||
print(f" 3. Links are created in your project directory")
|
||||
print(f" 4. Training will automatically use the storage location")
|
||||
else:
|
||||
print(f"\n❌ Some operations failed. Please check the errors above.")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
|
||||
Reference in New Issue
Block a user