anytask/index.html at gh-pages · rai-opensource/anytask · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>AnyTask: an Automated Task and Data Generation Framework for Advancing Sim-to-Real Policy Learning</title>
    <!-- <link rel="icon" href="https://rai-inst.com/wp-content/uploads/2024/12/cropped-RAI-FavIcon-32x32.png" sizes="32x32"> -->
    <link rel="icon" href="https://rai-inst.com/wp-content/uploads/2024/12/cropped-RAI-FavIcon-32x32.png" sizes="32x32">
    <link rel="icon" href="https://rai-inst.com/wp-content/uploads/2024/12/cropped-RAI-FavIcon-192x192.png"
        sizes="192x192">
    <link rel="apple-touch-icon" href="https://rai-inst.com/wp-content/uploads/2024/12/cropped-RAI-FavIcon-180x180.png">
    <link rel="stylesheet" href="style.css" />
</head>

<body>
    <div id="content" style="display:block;">
        <!-- HERO -->

        <!--<header class="full-page-image" id="hero">

            <video id="bg-video" preload="auto" autoplay muted playsinline loop
                poster="assets/images/teaser_poster.jpg">
                <source src="assets/videos/teaser_ball_3x_compressed.mp4" type="video/mp4" />
	    </video>

            <div class="overlay"></div>
            <div class="bottom-overlay" style="padding:0 24px;">
                <h1>AnyTask:<br />an Automated Task and Data Generation Framework for Advancing Sim-to-Real Policy
                    Learning</h1>
            </div>
            <div class="scroll-indicator" onclick="scrollToContent()">
                ↓
	    </div>
	</header>-->

        <!-- MAIN CONTENT BODY -->
        <main class="main-content">
		<!--<div class="sub-hero-text">Scaling Robot Learning with Automated Simulation and Data Synthesis</div> -->
	    <div class="hero-text">AnyTask</div>
            <div class="sub-hero-text">An Automated Task and Data Generation Framework for Advancing Sim-to-Real Policy
                    Learning</div>

            <!-- Authors -->
            <div class="authors">
                Ran Gong<sup>1*</sup>,
                Xiaohan Zhang<sup>1*</sup>,
                Jinghuan Shang<sup>1*</sup>,
                Maria Vittoria Minniti<sup>1*</sup>,
                Jigarkumar Patel<sup>1</sup>,
                Valerio Pepe<sup>1</sup>,
                Riedana Yan<sup>1</sup>,
                Ahmet Gundogdu<sup>1</sup>,
                Ivan Kapelyukh<sup>1</sup>,
                Ali Abbas<sup>1</sup>,
                Xiaoqiang Yan<sup>1</sup>,
                Harsh Patel<sup>1</sup>,
                Laura Herlant<sup>1</sup>,
                Karl Schmeckpeper<sup>1</sup>

                <div class="affiliation">
                    <sup>1</sup>Robotics and AI Institute, Boston, MA, USA
                </div>
                <div class="equal-contribution">
                    <i>* Equal Contribution</i>
                </div>
            </div>

            <!-- <div class="main-video-container" style="margin: 20px 0; text-align: center;">
        <video style="width: 100%; max-width: 800px;" controls muted playsinline loop>
          <source src="assets/videos/ReLIC.mp4" type="video/mp4" />
        </video>
      </div> -->

            <div class="quick-links">
                <!-- Links Placeholder -->
            </div>

            <div class="tagline" id="abstract">Abstract</div>
            <div class="section">
                <p>
                    Generalist robot learning remains constrained by data: large-scale, diverse, and high‐quality
                    interaction data are expensive to collect in the real world.
                    While simulation has become a promising way for scaling up data collection, the related tasks,
                    including simulation task design, task-aware scene generation,
                    expert demonstration synthesis, and sim-to-real transfer, still demand substantial human effort.
                </p>
                <p>
                    We present <strong>AnyTask</strong>, an automated framework that pairs massively parallel GPU
                    simulation with foundation models to design diverse manipulation tasks
                    and synthesize robot data. We introduce three AnyTask agents for generating expert demonstrations
                    aiming to solve as many tasks as possible:
                </p>
                <ul>
                    <li><strong>ViPR</strong>: A novel task and motion planning agent with VLM-in-the-loop Parallel
                        Refinement.</li>
                    <li><strong>ViPR-Eureka</strong>: A reinforcement learning agent with generated dense rewards and
                        LLM-guided contact sampling.</li>
                    <li><strong>ViPR-RL</strong>: A hybrid planning and learning approach that jointly produces
                        high-quality demonstrations with only sparse rewards.</li>
                </ul>
                <p>
                    We train behavior cloning policies on generated data, validate them in simulation, and deploy them
                    directly on real robot hardware.
                    The policies generalize to novel object poses, achieving <strong>44% average success</strong> across
                    a suite of real-world pick-and-place,
                    drawer opening, contact-rich pushing, and long-horizon manipulation tasks.
                </p>
            </div>

            <div class="tagline" id="overview">System Overview</div>
            <div class="section">
                <img src="assets/images/system_overview.jpg" alt="System Overview"
                    style="width: 100%; margin: 20px 0 10px 0;">
                <p class="figure-caption">
                    <strong>Figure 1: AnyTask System Overview.</strong> The pipeline first produces simulated
                    manipulation tasks using an object database and high-level task types. It automatically generates
                    task descriptions and simulation code, then efficiently collects data via ViPR, ViPR-RL, and
                    ViPR-Eureka
                    agents within massively parallel environments. Online domain randomization ensures diverse scenes
                    and visual observations, allowing policies trained on this simulated data to transfer zero-shot to
                    the real world.
                </p>
            </div>

            <div class="tagline" id="databases">Object Database</div>
            <div class="section">
                <div class="main-video-container" style="margin: 20px 0; text-align: center;">
                    <video style="width: 100%; max-width: 800px;" controls muted playsinline loop>
                        <source src="assets/videos/object_database.mp4" type="video/mp4" />
                    </video>
                </div>
                <p class="figure-caption">
                    <strong>Figure 2: Object Database.</strong> We generate diverse manipulation tasks using an
                    object database.
                </p>
            </div>

            <div class="tagline" id="agents">AnyTask Agents</div>

            <!-- TAMP -->
            <div class="video-gallery-section" id="gallery-section-tamp">
                <div class="gallery-caption-container">
                    <p class="figure-caption gallery-caption">
                        <b>1. ViPR Agent:</b> A novel task and motion planning agent with VLM-in-the-loop Parallel
                        Refinement.
                    </p>
                </div>
                <div class="video-gallery-container single-video">
                    <div class="video-gallery" style="justify-content: center;">
                        <video style="width: 100%; max-width: 800px;" controls muted playsinline loop>
                            <source src="assets/videos/agents/tamp.mp4" type="video/mp4" />
                        </video>
                    </div>
                </div>
            </div>

            <!-- Eureka -->
            <div class="video-gallery-section" id="gallery-section-eureka">
                <div class="gallery-caption-container">
                    <p class="figure-caption gallery-caption">
                        <b>2. ViPR-Eureka Agent:</b> A reinforcement learning agent with generated dense rewards and
                        LLM-guided contact sampling.
                    </p>
                </div>
                <div class="video-gallery-container single-video">
                    <div class="video-gallery" style="justify-content: center;">
                        <video style="width: 100%; max-width: 800px;" controls muted playsinline loop>
                            <source src="assets/videos/agents/eureka.mp4" type="video/mp4" />
                        </video>
                    </div>
                </div>
            </div>

            <!-- RL -->
            <div class="video-gallery-section" id="gallery-section-rl">
                <div class="gallery-caption-container">
                    <p class="figure-caption gallery-caption">
                        <b>3. ViPR-RL Agent:</b> A hybrid planning and learning approach that jointly produces
                        high-quality
                        demonstrations with only sparse rewards.
                    </p>
                </div>
                <div class="video-gallery-container single-video">
                    <div class="video-gallery" style="justify-content: center;">
                        <video style="width: 100%; max-width: 800px;" controls muted playsinline loop>
                            <source src="assets/videos/agents/tamprl.mp4" type="video/mp4" />
                        </video>
                    </div>
                </div>
            </div>

            <div class="section">
                <!-- Side-by-Side Comparison Slider -->
                <div class="section-subtitle">Sim Real Comparison</div>
                <div class="video-compare-container">
                    <!-- Background Video (Real World) -->
                    <video src="assets/videos/side_by_side/real_put_strawberry_into_closed_drawer_small.mp4" muted
                        playsinline poster="assets/images/real_robot_sr.png"></video>

                    <!-- Foreground Video (Simulation) - Clipped -->
                    <video class="video-clipped"
                        src="assets/videos/side_by_side/sim_put_strawberry_into_closed_drawer_padded.mp4" muted
                        playsinline></video>

                    <!-- Labels -->
                    <div class="compare-label label-left">Simulation</div>
                    <div class="compare-label label-right">Real World</div>

                    <!-- Slider Control -->
                    <div class="slider-control">
                        <div class="slider-handle">
                            <svg viewBox="0 0 24 24" width="16" height="16" fill="#333">
                                <path d="M8 5v14l11-7z" />
                            </svg> <!-- Placeholder icon, simple arrows better -->
                            <span style="font-size: 12px; color: #333;">&harr;</span>
                        </div>
                    </div>
                    <input type="range" min="0" max="100" value="50" class="slider-input">

                    <!-- Replay Button -->
                    <button id="replayBtn" class="replay-button"
                        style="position: absolute; top: 10px; right: 10px; z-index: 10; padding: 5px 10px; background: rgba(0,0,0,0.5); color: white; border: none; border-radius: 5px; cursor: pointer; font-size: 12px;">
                        Replay
                    </button>
                </div>
                <div style="text-align: center; font-size: 14px; color: #666; font-style: italic; margin-top: 10px;">
                    Drag the slider to compare Simulation (Left) vs Real World (Right)
                </div>

                <script src="assets/js/video_comparison.js"></script>

                <div class="tagline" id="results">Results & Sim-to-Real</div>
                <div class="video-gallery-section" id="gallery-section-sim2real">
                    <div class="video-gallery-container">
                        <div class="video-gallery" id="sim2real-gallery">
                            <div class="video-wrapper">
                                <div class="video-title">Lift Peach</div>
                                <video class="gallery-video" src="assets/videos/sim2real/real_lift_peach_small.mp4"
                                    autoplay muted playsinline loop controls></video>
                            </div>
                            <div class="video-wrapper">
                                <div class="video-title">Lift Banana</div>
                                <video class="gallery-video" src="assets/videos/sim2real/real_pick_banana_small.mp4"
                                    autoplay muted playsinline loop controls></video>
                            </div>
                            <div class="video-wrapper">
                                <div class="video-title">Push Pear to Center</div>
                                <video class="gallery-video"
                                    src="assets/videos/sim2real/real_push_pear_to_center_small.mp4" autoplay muted
                                    playsinline loop controls></video>
                            </div>
                            <div class="video-wrapper">
                                <div class="video-title">Put Strawberry Into Bowl</div>
                                <video class="gallery-video"
                                    src="assets/videos/sim2real/real_put_strawberry_into_bowl_small.mp4" autoplay muted
                                    playsinline loop controls></video>
                            </div>
                            <div class="video-wrapper">
                                <div class="video-title">Stack Banana on Can</div>
                                <video class="gallery-video"
                                    src="assets/videos/sim2real/real_stack_banana_on_can_small.mp4" autoplay muted
                                    playsinline loop controls></video>
                            </div>
                        </div>
                    </div>
                    <div class="gallery-nav-controls" style="text-align: center; margin-top: 10px;">
                        <button class="gallery-nav left" id="scrollLeftBtnSim2Real">&lt;</button>
                        <button class="gallery-nav right" id="scrollRightBtnSim2Real">&gt;</button>
                    </div>
                    <div style="text-align: center; margin: 50px 0 20px 0;">
                        <img src="assets/images/real_robot_sr.png" alt="Real Robot Success Rate"
                            style="max-width: 40%; border-radius: 15px; box-shadow: 0 4px 12px rgba(0,0,0,.15);">
                    </div>
                    <div class="gallery-caption-container">

                        <p class="figure-caption gallery-caption">
                            <b>Sim-to-Real Transfer:</b> We directly deploy the policies trained in simulation to the
                            real
                            robot. All videos are shown at <b>original speed (1x)</b>.
                        </p>
                    </div>
                </div>


                <p>
                    The policies generalize to novel object poses, achieving <strong>44% average success</strong> across
                    a suite of real-world pick-and-place,
                    drawer opening, contact-rich pushing, and long-horizon manipulation tasks.

                </p>
            </div>

            <!-- End of main content -->
        </main>
        <footer class="footer">Some website materials are adapted from <a href="https://www.videomimic.net/"
                target="_blank">VideoMimic</a> and <a href="https://transic-robot.github.io/"
                target="_blank">TRANSIC</a>
        </footer>
    </div>

    <!-- ============================================================ -->
    <!--  JS                                                          -->
    <!-- ============================================================ -->
    <script src="assets/js/scroll_button.js"></script>
    <script>
        function scrollToContent() {
            document.querySelector('.main-content').scrollIntoView({ behavior: 'smooth' });
        }

</body >

    </script>
</body>