From 8fc6aa5a1e8d2db4f6a8aa7c84677ef0e2828336 Mon Sep 17 00:00:00 2001 From: Carlos Gutierrez Date: Sun, 16 Nov 2025 16:55:58 -0500 Subject: [PATCH] fixing memory --- training/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/training/__init__.py b/training/__init__.py index 1b761e7..5cf1d9c 100644 --- a/training/__init__.py +++ b/training/__init__.py @@ -303,9 +303,12 @@ class Trainer: except KeyboardInterrupt: print("\n\nāš ļø Training interrupted by user!") print(f"šŸ’¾ Saving checkpoint at epoch {self.current_epoch + 1}...") - self.save_checkpoint() - print(f"āœ… Checkpoint saved! You can resume with:") - print(f" python3 train.py --data --resume {self.save_dir}/checkpoint_epoch_{self.current_epoch}.pt") + try: + self.save_checkpoint() + print(f"āœ… Checkpoint saved! You can resume with:") + print(f" python3 train.py --data --resume {self.save_dir}/checkpoint_epoch_{self.current_epoch}.pt") + except Exception as e: + print(f"āš ļø Warning: Could not save checkpoint: {e}") # Generate plots before exiting print("\nšŸ“Š Generating training plots...") @@ -316,7 +319,9 @@ class Trainer: except Exception as e: print(f"Warning: Could not generate plots: {e}") - raise + # Exit cleanly instead of re-raising + print("\nāœ… Training interrupted successfully. Exiting...") + return def save_checkpoint(self, is_best: bool = False, model_config: dict = None): """Save model checkpoint."""