SemiAnalysisAI · Oseltamivir · Jun 4, 2026 · Jun 4, 2026 · Jun 5, 2026 · Jun 5, 2026
@@ -1801,7 +1801,7 @@ dsv4-fp4-b200-vllm-agentic:
       - { tp: 8, ep: 8, dp-attn: true, offloading: cpu,  conc-list: [64, 128, 256] }
 
 dsv4-fp4-b200-trt:
-  image: ghcr.io#semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-9aa3715
+  image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc15.post1
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: b200-dsv4
@@ -1814,15 +1814,15 @@ dsv4-fp4-b200-trt:
       osl: 1024
       search-space:
       - { tp: 8, conc-start: 1, conc-end: 32 }
-      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 2048 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 128 }
     - isl: 8192
       osl: 1024
       search-space:
       - { tp: 8, conc-start: 1, conc-end: 32 }
-      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 1024 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 256 }
 
 dsv4-fp4-b200-trt-mtp:
-  image: ghcr.io#semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-9aa3715
+  image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc15.post1
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: b200-dsv4
@@ -1835,7 +1835,7 @@ dsv4-fp4-b200-trt-mtp:
       osl: 1024
       search-space:
       - { tp: 8, conc-start: 1, conc-end: 32, spec-decoding: mtp }
-      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 512, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 128, spec-decoding: mtp }
     - isl: 8192
       osl: 1024
       search-space:

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -3459,6 +3459,13 @@
     - "Add 1k1k/8k1k FP8 recipe set under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1648
 
+- config-keys:
+    - dsv4-fp4-b200-trt
+    - dsv4-fp4-b200-trt-mtp
+  description:
+    - "Update B200 DeepSeek-V4-Pro TRT image to the official nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc15.post1 (non-MTP and MTP), replacing the older ghcr.io semianalysis 9aa3715 build. The official release uses the V1 KV-cache manager (use_kv_cache_manager_v2=False), avoiding the custom feat/deepseek_v4 build's V2 max_num_requests=2x doubling that OOM'd conc-256 dpa=true on B200, and runs the overlap scheduler natively (mirrors the B300 setup in PR #1636 / run 26999118817)."
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1664
+
 - config-keys:
     - minimaxm2.5-fp8-b300-dynamo-vllm
   description: