justbin-coder
diff --git a/‎.github/workflows/math_e2e.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/math_e2e.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/math_e2e_rollout_logprobs.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/math_e2e_rollout_logprobs.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 33 additions & 12 deletions b/‎README.md‎
Lines changed: 33 additions & 12 deletions
@@ -67,6 +67,11 @@ jobs:
                 export REPO_PATH=$(pwd)
                 bash tests/e2e_tests/math/sglang/run_pipeline.sh
 
+            - name: vLLM Pipeline mode
+              run: |
+                export REPO_PATH=$(pwd)
+                bash tests/e2e_tests/math/vllm/run_pipeline.sh
+
     qwen-grpo-test-sglang044:
         runs-on: rlinf
         container:
 
@@ -66,3 +66,9 @@ jobs:
               run: |
                 export REPO_PATH=$(pwd)
                 bash tests/e2e_tests/math/sglang/run_pipeline.sh qwen2.5-1.5b-grpo-pipeline-rollout-logprobs.yaml
+
+            - name: vLLM Pipeline mode
+              run: |
+                export REPO_PATH=$(pwd)
+                bash tests/e2e_tests/math/vllm/run_pipeline.sh qwen2.5-1.5b-grpo-pipeline-rollout-logprobs.yaml
+
@@ -3,9 +3,8 @@ repos:
     rev: "v0.12.9"
     hooks:
       - id: ruff
-        args: ["--preview"]
+        args: ["--preview", "--fix"]
       - id: ruff-format
-        args: ["--check"]
 
   - repo: https://github.com/commit-check/commit-check
     rev: "v0.10.2"
 
@@ -11,6 +11,13 @@
 <a href="https://github.com/RLinf/misc/blob/main/pic/wechat.jpg?raw=true"><img src="https://img.shields.io/badge/微信-green?logo=wechat&amp"></a>
 </div>
 
+<div align="center">
+
+[![English](https://img.shields.io/badge/lang-English-blue.svg)](README.md)
+[![简体中文](https://img.shields.io/badge/语言-简体中文-red.svg)](README.zh-CN.md)
+
+</div>
+
 <h1 align="center">
   <sub>RLinf: Reinforcement Learning Infrastructure for Agentic AI</sub>
 </h1>
@@ -25,6 +32,7 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
 ## What's NEW!
 - [2025/09] <img src="https://github.githubassets.com/images/icons/emoji/unicode/1f525.png" width="18" /> [Example Gallery](https://rlinf.readthedocs.io/en/latest/rst_source/examples/index.html) is updated, users can find various off-the-shelf examples!
 - [2025/09] The paper [RLinf: Flexible and Efficient Large-scale Reinforcement Learning via Macro-to-Micro Flow Transformation](https://arxiv.org/abs/2509.15965) is released.
+- [2025/09] The [report on RLinf by Machine Heart](https://mp.weixin.qq.com/s/Xtv4gDu3lhDDGadLrzt6Aw)  is released. 
 - [2025/08] RLinf is open-sourced. The formal v0.1 will be released soon.
 
 ## Key Features
@@ -68,7 +76,7 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
 <div align="center">
 <table>
   <tr>
-    <th colspan="5" style="text-align:center;"><strong>OpenVLA-OFT model results on ManiSkill3</strong></th>
+    <th colspan="5" style="text-align:center;"><strong>OpenVLA and OpenVLA-OFT model results on ManiSkill3</strong></th>
   </tr>
   <tr>
     <th>Model</th>
@@ -120,10 +128,10 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
   </tr>
   <tr>
     <th>Model</th>
-    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-spatial">Spatial</a></th>
-    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-goal">Goal</a></th>
-    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-object">Object</a></th>
-    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-long">Long</a></th>
+    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-spatial"><img src="docs/source-en/_static/svg/hf-logo.svg" alt="HF" width="16" height="16" style="vertical-align: middle;">Spatial</a></th>
+    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-goal"><img src="docs/source-en/_static/svg/hf-logo.svg" alt="HF" width="16" height="16" style="vertical-align: middle;">Goal</a></th>
+    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-object"><img src="docs/source-en/_static/svg/hf-logo.svg" alt="HF" width="16" height="16" style="vertical-align: middle;">Object</a></th>
+    <th><a href="https://huggingface.co/RLinf/RLinf-OpenVLAOFT-GRPO-LIBERO-long"><img src="docs/source-en/_static/svg/hf-logo.svg" alt="HF" width="16" height="16" style="vertical-align: middle;">Long</a></th>
     <th>Average</th>
   </tr>
   <tr>
@@ -166,9 +174,9 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
   </tr>
   <tr>
     <th>Model</th>
-    <th><a href="https://huggingface.co/datasets/RLinf/AIME24">AIME 24</a></th>
-    <th><a href="https://huggingface.co/datasets/RLinf/AIME25">AIME 25</a></th>
-    <th><a href="https://huggingface.co/datasets/RLinf/GPQA-diamond">GPQA-diamond</a></th>
+    <th>AIME 24</a></th>
+    <th>AIME 25</a></th>
+    <th>GPQA-diamond</a></th>
     <th>Average</th>
   </tr>
   <tr>
@@ -211,9 +219,9 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
   </tr>
   <tr>
     <th>Model</th>
-    <th><a href="https://huggingface.co/datasets/RLinf/AIME24">AIME 24</a></th>
-    <th><a href="https://huggingface.co/datasets/RLinf/AIME25">AIME 25</a></th>
-    <th><a href="https://huggingface.co/datasets/RLinf/GPQA-diamond">GPQA-diamond</a></th>
+    <th>AIME 24</a></th>
+    <th>AIME 25</a></th>
+    <th>GPQA-diamond</a></th>
     <th>Average</th>
   </tr>
   <tr>
@@ -330,7 +338,20 @@ If you find **RLinf** helpful, please cite the paper:
 }
 ```
 
-If you use RL+VLA in RLinf, you can also cite our empirical study paper:
+If you use RL+VLA in RLinf, you can also cite our technical report and empirical study paper:
+
+```bibtex
+@misc{zang2025rlinfvlaunifiedefficientframework,
+      title={RLinf-VLA: A Unified and Efficient Framework for VLA+RL Training}, 
+      author={Hongzhi Zang and Mingjie Wei and Si Xu and Yongji Wu and Zhen Guo and Yuanqing Wang and Hao Lin and Liangzhi Shi and Yuqing Xie and Zhexuan Xu and Zhihao Liu and Kang Chen and Wenhao Tang and Quanlu Zhang and Weinan Zhang and Chao Yu and Yu Wang},
+      year={2025},
+      eprint={2510.06710},
+      archivePrefix={arXiv},
+      primaryClass={cs.RO},
+      url={https://arxiv.org/abs/2510.06710}, 
+}
+```
+
 ```bibtex
 @misc{liu2025rlbringvlageneralization,
   title={What Can RL Bring to VLA Generalization? An Empirical Study},