Merge pull request #433 from Modalities/uv_support_for_different_cuda_versions

le1nux · web-flow · commit 4190fef53fb0 · 2026-03-06T10:29:42.000+01:00
feat: Added cuda version selection to uv build.
diff --git a/README.md b/README.md
@@ -44,11 +44,11 @@ It is recommended to install Modalities via uv or install PyTorch, psutil and Ni
 # Get uv (tested with uv version 0.9.13)
 curl -LsSf https://astral.sh/uv/install.sh | sh
 
-uv sync
+uv sync --extra [cpu|cu126|cu128|cu130]  # Get CUDA version via nvidia-smi
 source .venv/bin/activate
 
 # For developers: use [tests,linting] and install pre-commit hooks
-uv sync --extra tests --extra linting
+uv sync --extra [cpu|cu126|cu128|cu130] --extra tests --extra linting
 pre-commit install --install-hooks
 ```
 
@@ -60,7 +60,8 @@ conda create -n modalities python=3.13
 conda activate modalities
 
 # Install PyTorch, psutil, Ninja and Flash Attention
-pip install "torch<2.11.0"
+# For PyTorch, select the correct index URL for your CUDA/CPU setup from https://pytorch.org/get-started/locally/ e.g.:
+pip install "torch>=2.10,<2.11.0" torchvision --index-url https://download.pytorch.org/whl/cu130
 pip install psutil ninja  # Ninja lowers compilation time of flash attention significantly 
 pip install flash-attn==2.8.3 --no-build-isolation
 ```
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,6 @@ description = "Modalities, a PyTorch-native framework for distributed and reprod
 readme = "README.md"
 dependencies = [
     "numpy",
-    "torch<2.11.0",
     "ninja",
     "packaging",
     "tqdm",
@@ -25,25 +24,86 @@ dependencies = [
     "matplotlib",
     "wandb",
     "einops>=0.7.0",
-    "flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'",
     "debugpy",  # For VSCode debugging support
 ]
 
 [project.urls]
 Homepage = "https://github.com/Modalities/modalities"
 Issues = "https://github.com/Modalities/modalities/issues"
 
-[project.optional-dependencies]
-linting = ["pre-commit"]
-tests = ["pytest", "pytest-cov", "debugpy"]
-
 [project.scripts]
 modalities = "modalities.__main__:main"
 
 [build-system]
 requires = ["setuptools >= 61.0.0"]
 build-backend = "setuptools.build_meta"
 
+[project.optional-dependencies]
+linting = ["pre-commit"]
+tests = ["pytest", "pytest-cov", "debugpy"]
+
+cpu = ["torch>=2.10,<2.11.0", "torchvision"]
+cu126 = [
+  "torch>=2.10,<2.11.0",
+  "torchvision",
+  "flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
+]
+cu128 = [
+  "torch>=2.10,<2.11.0",
+  "torchvision",
+  "flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
+]
+cu130 = [
+  "torch>=2.10,<2.11.0",
+  "torchvision",
+  "flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
+]
+
+[tool.uv]
+conflicts = [
+  [
+    { extra = "cpu" },
+    { extra = "cu126" },
+    { extra = "cu128" },
+    { extra = "cu130" },
+  ],
+]
+
+[tool.uv.sources]
+torch = [
+  { index = "pytorch-cpu", extra = "cpu" },
+  { index = "pytorch-cu126", extra = "cu126" },
+  { index = "pytorch-cu128", extra = "cu128" },
+  { index = "pytorch-cu130", extra = "cu130" },
+]
+torchvision = [
+  { index = "pytorch-cpu", extra = "cpu" },
+  { index = "pytorch-cu126", extra = "cu126" },
+  { index = "pytorch-cu128", extra = "cu128" },
+  { index = "pytorch-cu130", extra = "cu130" },
+]
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu126"
+url = "https://download.pytorch.org/whl/cu126"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu130"
+url = "https://download.pytorch.org/whl/cu130"
+explicit = true
+
+
 [tool.uv.extra-build-dependencies]
 flash-attn = [
     { requirement = "torch", match-runtime = true },
diff --git a/src/modalities/utils/mfu.py b/src/modalities/utils/mfu.py
@@ -14,7 +14,7 @@
 #       https://www.nvidia.com/en-us/data-center/h100/
 #
 # NOTE: These values are valid for fp16 and bf16 only
-PEAK_PERFORMANCE = {"A100": 312e12, "H100": 989e12, "GH200": 989e12}
+PEAK_PERFORMANCE = {"A100": 312e12, "H100": 989e12, "GH200": 989e12, "B200": 2.25e15}
 
 
 class MFUCalculatorABC:
@@ -130,6 +130,10 @@ def _get_theoretical_gpu_peak_performance(model_parts: FSDPX | list[FSDP2], worl
             single_gpu_peak_performance = MFUCalculatorABC._get_theoretical_gpu_peak_performance_single(
                 precision, "GH200"
             )
+        elif device_name.startswith("NVIDIA B200"):
+            single_gpu_peak_performance = MFUCalculatorABC._get_theoretical_gpu_peak_performance_single(
+                precision, "B200"
+            )
         else:
             warnings.warn(f"Could not get theoretical GPU peak performance for unknown device = {device_name}.")
             return None