lof310
diff --git a/‎README.md‎
Lines changed: 7 additions & 12 deletions b/‎README.md‎
Lines changed: 7 additions & 12 deletions
diff --git a/‎arch_eval/__init__.py‎
Lines changed: 0 additions & 4 deletions b/‎arch_eval/__init__.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎arch_eval/__pycache__/__init__.cpython-313.pyc‎
1004 Bytes b/‎arch_eval/__pycache__/__init__.cpython-313.pyc‎
1004 Bytes
diff --git a/‎arch_eval/__pycache__/profiler.cpython-313.pyc‎
2.53 KB b/‎arch_eval/__pycache__/profiler.cpython-313.pyc‎
2.53 KB
diff --git a/‎arch_eval/core/benchmark.py‎
Lines changed: 0 additions & 2 deletions b/‎arch_eval/core/benchmark.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎arch_eval/core/config.py‎
Lines changed: 20 additions & 2 deletions b/‎arch_eval/core/config.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎arch_eval/core/trainer.py‎
Lines changed: 38 additions & 29 deletions b/‎arch_eval/core/trainer.py‎
Lines changed: 38 additions & 29 deletions
@@ -1,11 +1,13 @@
-# arch_eval library
+# arch_eval
 
 [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
 [![PyTorch](https://img.shields.io/badge/PyTorch-1.9+-ee4c2c.svg)](https://pytorch.org/)
 [![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)](https://opensource.org/licenses/Apache-2.0)
 [![GitHub Repo](https://img.shields.io/badge/GitHub-lof310%2Farch__eval-blue)](https://github.com/lof310/arch_eval)
+[![Stars](https://img.shields.io/github/stars/lof310/transformer)](#)
+[![Downloads](https://img.shields.io/github/downloads/lof310/transformer/total)](https://github.com/lof310/transformer/releases)
 
-**arch_eval** is a High-Level library for Efficient and Fast Architecture Evaluation and Comparison of Machine Learning models. It provides a unified interface for training, benchmarking, and hyperparameter optimization with features like distributed training, mixed precision, and real-time visualization.
+High-Level library for Efficient and Fast Architecture Evaluation and Comparison of Machine Learning models. It provides a unified interface for training, benchmarking, and hyperparameter optimization with features like distributed training, mixed precision, and real-time visualization.
 
 ## Features
 
@@ -15,11 +17,10 @@
 - **Advanced Mixed Precision**: AMP with float16, bfloat16, and experimental FP8 support.
 - **Gradient Checkpointing**: Reduce memory footprint for large models.
 - **Rich Visualization**: Real-time training windows, video recording of metrics, and publication‑ready plots.
-- **Logging**: DirectIntegration with Weights & Biases.
+- **Logging**: Integration with Weights & Biases.
 - **Hyperparameter Optimization**: Grid search and random search out of the box.
 - **Extensible Plugin System**: Custom hooks and callbacks for maximum flexibility.
-- **Robust Data Handling**: Supports PyTorch Datasets, synthetic data, torchvision datasets, Hugging Face datasets, and streaming.
-- **Production-Ready**: Configurable timeouts, retry logic and deterministic execution.
+- **Data Handling**: Supports PyTorch Datasets, synthetic data, torchvision datasets, Hugging Face datasets, and streaming.
 
 ## Installation
 
@@ -37,11 +38,6 @@ pip install -e .
 pip install .
 ```
 
-Or Install directly with pip
-```bash
-pip install arch_eval
-```
-
 ## Quick Start
 
 ### 1. Train a Single Model
@@ -68,8 +64,7 @@ class MLP(nn.Module):
         self.net = nn.Sequential(
             nn.Linear(input_size, hidden),
             nn.GELU(),
-            nn.Linear(hidden, num_classes),
-            nn.Softmax(dim=-1)
+            nn.Linear(hidden, num_classes)
         )
 
     def forward(self, x):
 
@@ -11,8 +11,6 @@
 from arch_eval.logging.logger_config import setup_logging
 from arch_eval.plugins.manager import PluginManager
 
-# from arch_eval.interpret import permutation_importance, attention_weights
-
 _plugin_manager = PluginManager()
 _plugin_manager.discover_plugins()
 
@@ -27,6 +25,4 @@
     "init_distributed",
     "cleanup_distributed",
     "HyperparameterOptimizer",
-    "permutation_importance",
-    "attention_weights",
 ]
@@ -19,7 +19,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 def _train_single_process(args):
     """Helper for process-based parallelism with memory cleanup."""
     model_info, config = args
@@ -61,7 +60,6 @@ def _train_single_process(args):
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 
-
 class Benchmark:
     """Benchmark multiple models for comparison."""
 
 
@@ -1,28 +1,33 @@
 """Configuration dataclasses for Trainer and Benchmark."""
 
 import os
+import warnings
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import torch
 
+
 class TaskType(str, Enum):
     REGRESSION = "regression"
     CLASSIFICATION = "classification"
     NEXT_TOKEN_PREDICTION = "next-token-prediction"
 
+
 class DistributedBackend(str, Enum):
     NONE = "none"
     DATAPARALLEL = "dp"
     DISTRIBUTED = "ddp"
     FSDP = "fsdp"
 
+
 class MixedPrecisionDtype(str, Enum):
     FLOAT16 = "float16"
     BFLOAT16 = "bfloat16"
     FP8 = "fp8"  # experimental
 
+
 def _serialize_callable(obj: Any) -> Any:
     """Convert a callable to a serializable representation."""
     if obj is None:
@@ -34,6 +39,7 @@ def _serialize_callable(obj: Any) -> Any:
     warnings.warn(f"Callable {obj} may not be picklable.")
     return str(obj)
 
+
 def _deserialize_callable(rep: Any) -> Any:
     """Restore a callable from its serialized representation."""
     if rep is None or not isinstance(rep, tuple):
@@ -47,6 +53,7 @@ def _deserialize_callable(rep: Any) -> Any:
             raise ValueError(f"Could not restore function {module_name}.{func_name}: {e}")
     return rep
 
+
 def _serialize_dtype(dtype: torch.dtype) -> str:
     """Convert torch.dtype to string."""
     return str(dtype).split('.')[-1]
@@ -56,6 +63,7 @@ def _deserialize_dtype(dtype_str: str) -> torch.dtype:
     """Convert string back to torch.dtype."""
     return getattr(torch, dtype_str)
 
+
 @dataclass
 class BaseConfig:
     """Base configuration with common fields."""
@@ -112,7 +120,8 @@ def __post_init__(self):
         if self.seed is not None:
             torch.manual_seed(self.seed)
             torch.cuda.manual_seed_all(self.seed)
-            import numpy as np, random
+            import numpy as np
+            import random
             np.random.seed(self.seed)
             random.seed(self.seed)
         if self.deterministic:
@@ -138,6 +147,7 @@ def __setstate__(self, state):
             state['dtype'] = _deserialize_dtype(state['dtype'])
         self.__dict__.update(state)
 
+
 @dataclass
 class TrainingConfig(BaseConfig):
     """Configuration for Trainer."""
@@ -217,6 +227,13 @@ class TrainingConfig(BaseConfig):
     # Profiling
     profiler: Optional[Dict[str, Any]] = None  # ej. {"enabled": True, "activities": ["cpu", "cuda"], "schedule": {...}}
 
+    # Memory
+    gc_collect_interval: int = 50
+
+    # Confusion matrix
+    log_confusion_matrix: bool = False
+    confusion_matrix_labels: Optional[List[str]] = None
+
     def __post_init__(self):
         super().__post_init__()
         if self.log_to_wandb and self.wandb_project is None:
@@ -250,7 +267,7 @@ def __post_init__(self):
                 raise ConfigurationError("distributed_world_size must be >= 1")
         if self.mixed_precision_dtype == MixedPrecisionDtype.FP8:
             try:
-                import transformer_engine.pytorch as te
+                import transformer_engine.pytorch as te  # noqa
             except ImportError:
                 raise ConfigurationError("FP8 requires NVIDIA Transformer Engine installed.")
 
@@ -286,6 +303,7 @@ def __setstate__(self, state):
             state['callbacks'] = restored_callbacks
         super().__setstate__(state)
 
+
 @dataclass
 class BenchmarkConfig(BaseConfig):
     """Configuration for Benchmark."""
 
@@ -2,7 +2,6 @@
 
 import logging
 import os
-import time
 from collections import defaultdict
 from contextlib import AbstractContextManager
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -19,9 +18,9 @@
 from arch_eval.distributed import cleanup_distributed, get_wrapped_model, init_distributed
 from arch_eval.logging.logger_config import LoggerAdapter
 from arch_eval.metrics.calculator import MetricCalculator
-from arch_eval.plugins.manager import PluginManager, hook
+from arch_eval.plugins.manager import PluginManager
 from arch_eval.profiler import profiler_context
-from arch_eval.utils.device import memory_summary
+from arch_eval.utils.device import memory_summary, auto_device
 from arch_eval.viz.viz import PlotSaver, RealtimeWindow, VideoRecorder
 
 logger = logging.getLogger(__name__)
@@ -61,13 +60,15 @@ def __init__(self, model: nn.Module, config: TrainingConfig):
         else:
             self.model = model
 
-        self._validate_model()
         self.device = torch.device(config.device)
         self.model = self.model.to(self.device).to(config.dtype)
 
         self.dataset_handler = DatasetHandler(config)
         self.train_loader, self.val_loader, self.test_loader = self.dataset_handler.prepare_loaders()
 
+        # Validate model with a real batch (if train_loader exists)
+        self._validate_model_with_data()
+
         self.metric_calculator = MetricCalculator(
             config.task, config.device, output_transform=config.model_output_transform
         )
@@ -81,15 +82,15 @@ def __init__(self, model: nn.Module, config: TrainingConfig):
         self.amp_dtype = self._get_amp_dtype()
         self.scaler = torch.cuda.amp.GradScaler() if self.use_amp and config.grad_scaler else None
 
-        # Gradient checkpointing (experimental) TODO
+        # Gradient checkpointing (experimental)
         if config.gradient_checkpointing:
             self._apply_gradient_checkpointing()
 
         # Visualization
         self.window = None
         if config.realtime:
             try:
-                self.window = RealtimeWindow(config)
+                self.window = RealtimeWindow(config, metric_names=config.viz_metrics)
                 if getattr(self.window, "disabled", False):
                     self.window = None
             except Exception as e:
@@ -120,6 +121,9 @@ def __init__(self, model: nn.Module, config: TrainingConfig):
         self.accumulation_steps = config.gradient_accumulation_steps
         self.current_accum_step = 0
 
+        # Initialize checkpoint best metric
+        self.checkpoint_best_metric = None
+
         self.logger.info(f"Trainer initialized on {self.device}\n{memory_summary()}")
 
     def _get_amp_dtype(self):
@@ -135,10 +139,7 @@ def _get_amp_dtype(self):
             return torch.float16
 
     def _apply_gradient_checkpointing(self):
-        """Experimental: attempts to enable gradient checkpointing on specified modules.
-        This is not a standard PyTorch feature; models must implement it internally.
-        The current implementation sets a '_gradient_checkpointing' attribute on modules,
-        which may be used by custom layers. For most models, this will have no effect."""
+        """Experimental: attempts to enable gradient checkpointing on specified modules."""
         if self.config.gradient_checkpointing_modules:
             for name in self.config.gradient_checkpointing_modules:
                 module = dict(self.model.named_modules()).get(name)
@@ -150,14 +151,24 @@ def _apply_gradient_checkpointing(self):
                     module._gradient_checkpointing = True
         self.logger.warning("Gradient checkpointing is experimental and may not work as expected.")
 
-    def _validate_model(self):
-        shape = self.config.input_shape or (1, 10)
-        dummy = torch.randn(1, *shape).to(torch.device(self.config.device))
+    def _validate_model_with_data(self):
+        """Run a forward pass on a single batch to ensure the model accepts the data."""
+        if self.train_loader is None:
+            self.logger.warning("No training loader – skipping model validation.")
+            return
         try:
+            # Get one batch
+            data, targets = next(iter(self.train_loader))
+            data = data.to(self.device)
+            targets = targets.to(self.device)
+            self.model.eval()
             with torch.no_grad():
-                self.model(dummy)
+                _ = self.model(data)
+            self.model.train()
+            self.logger.info("Model validation passed.")
         except Exception as e:
-            raise ModelError(f"Model validation failed: {e}")
+            raise ModelError(f"Model validation failed on a real batch: {e}. "
+                             "Check that your model's input size matches the dataset features.")
 
     def _setup_optimizers(self):
         self.optimizers = []
@@ -231,7 +242,7 @@ def _setup_loss_function(self):
 
     def _compute_loss(self, output, targets):
         if isinstance(output, tuple) and len(output) == 2:
-            return output[1] # Assume second element is loss
+            return output[1]  # Assume second element is loss
         else:
             return self.criterion(output, targets)
 
@@ -298,8 +309,6 @@ def train(self) -> Dict[str, List[float]]:
                 self.window.close()
             if self.config.log_to_wandb:
                 wandb.finish()
-            if hasattr(self, "tb_writer") and self.tb_writer:
-                self.tb_writer.close()
             if self.config.distributed_backend != DistributedBackend.NONE:
                 cleanup_distributed()
 
@@ -334,7 +343,6 @@ def _train_epoch(self) -> Dict[str, float]:
             else:
                 loss.backward()
 
-#            self.plugin_manager.execute_hook("on_backward", self, loss.item() * self.accumulation_steps) # FIXME: Possible Overhead
             self.current_accum_step += 1
 
             if self.current_accum_step % self.accumulation_steps == 0:
@@ -397,7 +405,6 @@ def _evaluate(self, loader: DataLoader, split: str) -> Dict[str, float]:
         total_loss = 0.0
         metric_accum = defaultdict(float)
         count = 0
-        autocast = torch.cuda.amp.autocast if self.use_amp else NullContext
 
         # Reset confusion matrix accumulator if needed
         if self.config.log_confusion_matrix and split == "val":
@@ -437,8 +444,8 @@ def _evaluate(self, loader: DataLoader, split: str) -> Dict[str, float]:
                 wandb.log({
                     f"confusion_matrix/{split}": wandb.plot.confusion_matrix(
                         probs=None,
-                        y_true=self.metric_calculator._all_targets,
-                        preds=self.metric_calculator._all_preds,
+                        y_true=np.array(self.metric_calculator._all_targets),
+                        preds=np.array(self.metric_calculator._all_preds),
                         class_names=class_names
                     )
                 }, step=self.current_epoch)
@@ -475,9 +482,6 @@ def _log_metrics(self, metrics: Dict[str, float], step: int):
 
         if self.config.log_to_wandb:
             wandb.log(metrics, step=step)
-        if hasattr(self, "tb_writer") and self.tb_writer:
-            for k, v in metrics.items():
-                self.tb_writer.add_scalar(k, v, step)
 
         self.plugin_manager.execute_hook("on_log", self, metrics, step)
 
@@ -502,13 +506,18 @@ def _save_checkpoint(self, epoch: int, metrics: Dict[str, float]):
             current = metrics.get(self.config.checkpoint_metric)
             if current is not None:
                 mode = "min" if "loss" in self.config.checkpoint_metric else "max"
-                improved = (mode == "min" and current < self.checkpoint_best_metric) or (
-                    mode == "max" and current > self.checkpoint_best_metric
-                )
-                if improved:
+                if self.checkpoint_best_metric is None:
                     self.checkpoint_best_metric = current
                     is_best = True
                     save_this = True
+                else:
+                    improved = (mode == "min" and current < self.checkpoint_best_metric) or (
+                        mode == "max" and current > self.checkpoint_best_metric
+                    )
+                    if improved:
+                        self.checkpoint_best_metric = current
+                        is_best = True
+                        save_this = True
         else:
             if epoch % self.config.save_frequency == 0:
                 save_this = True