ElementAI · RaymondLi0 · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/.agents/skills b/.agents/skills
@@ -0,0 +1 @@
+../skills
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -0,0 +1,14 @@
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "printf '{\"hookSpecificOutput\":{\"hookEventName\":\"UserPromptSubmit\",\"additionalContext\":\"MANDATORY WORKFLOW — never skip or reorder: (1) Read the artifact first (commit, file, error, PR). (2) Identify and invoke the relevant skill via the Skill tool BEFORE forming any answer or plan — even when the answer seems obvious. (3) Only then answer using the skill context. Skipping step 2 is not allowed.\"}}'"
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/.claude/skills b/.claude/skills
@@ -0,0 +1 @@
+../skills
diff --git a/.cursorrules b/.cursorrules
@@ -0,0 +1 @@
+See CLAUDE.md for all repository guidelines.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -0,0 +1,80 @@
+megatron/core/ @NVIDIA/core-adlr @NVIDIA/core-nemo
+
+megatron/core/models/common/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/gpt
+
+megatron/core/models/gpt/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/gpt
+
+megatron/core/models/multimodal/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/multi-modal
+
+megatron/core/models/mamba/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-model
+megatron/core/ssm/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-model
+
+megatron/core/models/hybrid/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-model
+
+megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets
+
+megatron/core/tokenizers/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/tokenizers
+
+megatron/core/distributed/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/data-parallelism
+megatron/core/distributed/fsdp/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
+
+megatron/core/transformer/fsdp_dtensor_checkpoint.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
+
+megatron/core/dist_checkpointing/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-checkpointing
+
+megatron/core/optimizer/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mcore-optimizer
+
+megatron/core/optimizer/distrib_optimizer.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer
+megatron/core/optimizer/layer_wise_optimizer.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer
+megatron/core/optimizer/param_layout.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer
+
+megatron/core/optimizer/emerging_optimizers.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mcore-emerging-optimizers
+megatron/core/optimizer/muon.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mcore-emerging-optimizers
+megatron/core/optimizer/qk_clip.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mcore-emerging-optimizers @NVIDIA/transformer
+
+megatron/core/inference/modelopt_support @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/post-training
+
+megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets
+
+megatron/core/pipeline_parallel/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/pipeline-parallelism
+
+megatron/core/transformer/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/transformer
+
+megatron/core/transformer/moe/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mixture-of-experts-adlr @NVIDIA/mixture-of-experts-devtech
+
+megatron/core/inference/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/inference
+
+megatron/inference/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/inference-interface
+
+megatron/core/parallel_state.py @NVIDIA/core-adlr @NVIDIA/core-nemo
+
+megatron/core/post_training/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/post-training
+
+megatron/post_training/ @NVIDIA/post-training
+
+megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs
+
+megatron/training/ @NVIDIA/training-adlr @NVIDIA/training-nemo
+megatron/training/arguments.py
+
+.gitlab/ @NVIDIA/ci
+.github/ @NVIDIA/ci
+.github/oncall_schedule.json @NVIDIA/mcore-oncall-rotation
+.gitlab-ci.yml @NVIDIA/ci
+docker/  @NVIDIA/ci
+tests/functional_tests/python_test_utils/ @NVIDIA/ci
+tests/functional_tests/shell_test_utils/ @NVIDIA/ci
+tests/test_utils/recipes/ @NVIDIA/ci
+tests/unit_tests/run_ci_test.sh @NVIDIA/ci
+
+# API Backwards Compatibility Check
+scripts/check_api_backwards_compatibility.py @NVIDIA/ci
+scripts/README_API_COMPAT.md @NVIDIA/ci
+.github/workflows/check_api_backwards_compatibility_workflow.yml @NVIDIA/ci
+docs/api-backwards-compatibility-check.md @NVIDIA/ci
+tests/unit_tests/test_api_backwards_compat_setup.py @NVIDIA/ci
+
+megatron/rl/ @NVIDIA/reinforcement-learning
+examples/rl/ @NVIDIA/reinforcement-learning
+test/unit_tests/test_rl_utils.py @NVIDIA/reinforcement-learning
+train_rl.py @NVIDIA/reinforcement-learning
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,29 @@
+---
+name: Bug report
+about: Create a report to help us improve the repository or project
+title: ""
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+
+A clear and concise description of what the bug is. Tag @NVIDIA/mcore-oncall
+to get oncall's attention to this issue.
+
+**Steps/Code to reproduce bug**
+
+Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
+
+A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports.
+
+
+**Expected behavior**
+
+A clear and concise description of what you expected to happen.
+
+
+**Additional context**
+
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,2 @@
+blank_issues_enabled: false
+
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,23 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ""
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+Tag @NVIDIA/mcore-oncall
+to get oncall's attention to this issue.
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,13 @@
+---
+name: QUESTION
+about: Ask a question about Megatron-LM that is not a bug, regression or enhancement
+  request
+title: "[QUESTION]"
+labels: ''
+assignees: ''
+
+---
+
+**Your question**
+Ask a clear and concise question about Megatron-LM. Tag @NVIDIA/mcore-oncall
+to get oncall's attention to this issue.
diff --git a/.github/ISSUE_TEMPLATE/regression.md b/.github/ISSUE_TEMPLATE/regression.md
@@ -0,0 +1,40 @@
+---
+name: REGRESSION
+about: Report a regression in speed or accuracy due to a Megatron-LM update
+title: "[REGRESSION]"
+labels: ''
+assignees: ''
+
+---
+
+**Describe the regression**
+A clear and concise description of what the regression is. Tag @NVIDIA/mcore-oncall
+to get oncall's attention to this issue.
+
+**To Reproduce**
+Steps to reproduce the behavior. The easier it is to reproduce the faster it will get maintainer attention.
+
+**Previous performance**
+What speed or accuracy did you previously see.
+
+**New performance**
+What speed or accuracy do you see after the update.
+
+**Stack trace/logs**
+If applicable, add the stack trace or logs related to the regression.
+
+**Environment (please complete the following information):**
+ - Previous Megatron-LM commit ID
+ - New Megatron-LM commit ID
+ - Previous PyTorch version
+ - New PyTorch version
+ - Previous CUDA version
+ - New CUDA version
+ - Previous NCCL version
+ - New NCCL version
+
+**Proposed fix**
+If you have a proposal for how to fix the issue state it here or link to a PR.
+
+**Additional context**
+Add any other context about the problem here.