From aef16c38525a6ac69a2490324a7a073e71bf5e80 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Wed, 10 Jun 2026 15:33:41 -0500 Subject: [PATCH 1/2] update cuopt_on_nemoclaw for nemoclaw 0.0.55 --- cuopt_on_nemoclaw/SETUP.md | 20 +- cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh | 247 ++++++--- .../always-tool-discovery/SKILL.md | 98 ++++ .../openclaw-skills/cuopt-first/SKILL.md | 117 ++++ .../cuopt-model-mapper/SKILL.md | 14 +- .../openclaw-skills/cuopt-python-api/SKILL.md | 107 ++++ .../openclaw-skills/cuopt-remote-env/SKILL.md | 161 ++++++ .../openclaw-skills/cuopt-sandbox/SKILL.md | 503 +++++++++++++++--- .../SKILL.md | 16 +- .../optimization-intent-router/SKILL.md | 2 +- .../optimization-mode-router/SKILL.md | 9 +- .../tabular-optimization-ingestion/SKILL.md | 7 +- cuopt_on_nemoclaw/smoke_lp.py | 73 +++ cuopt_on_nemoclaw/smoke_milp.py | 63 +++ cuopt_on_nemoclaw/smoke_vrp.py | 95 ++++ 15 files changed, 1372 insertions(+), 160 deletions(-) create mode 100644 cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md create mode 100644 cuopt_on_nemoclaw/smoke_lp.py create mode 100644 cuopt_on_nemoclaw/smoke_milp.py create mode 100644 cuopt_on_nemoclaw/smoke_vrp.py diff --git a/cuopt_on_nemoclaw/SETUP.md b/cuopt_on_nemoclaw/SETUP.md index 4a004d9..f0ff795 100644 --- a/cuopt_on_nemoclaw/SETUP.md +++ b/cuopt_on_nemoclaw/SETUP.md @@ -44,12 +44,25 @@ it can be any existing sandbox. ## What the setup script does -- **add** — Add cuOpt to an existing sandbox: apply-policy → install → install-skill → test +- **add** — Add cuOpt to an existing sandbox: apply-policy → install → install-skill → `test --smoke` - **apply-policy** — Merges cuOpt network rules into a running sandbox's policy - **install** — Creates a Python venv (`/sandbox/.openclaw-data/cuopt`), installs `cuopt_sh_client`, `cuopt-cu13`, and `grpcio`, and stamps the cuOpt venv activation file (`/sandbox/.bash_profile`) - **install-activation** — Re-stamps `/sandbox/.bash_profile` without reinstalling the venv (use after changing `CUOPT_HOST`, `CUOPT_PORT`, or `CUOPT_VENV`) - **install-skill** — Uploads skill files from `openclaw-skills/` into the sandbox, then vendors the upstream cuOpt skills (numerical optimization for LP/MILP/QP, routing, server, formulation, user-rules, skill-evolution) from `github.com/NVIDIA/cuopt/tree/release/26.06/skills` so the agent can read them without outbound HTTPS. Override the upstream ref via `CUOPT_SKILLS_REF` (default `release/26.06`); narrow what gets installed via `CUOPT_SKILLS_SKIP` (comma-separated globs, default `cuopt-install,*developer*,*-api-c`). Finally, the step writes a fresh `skills.entries.cuopt-sandbox.config.lastInstallAt` timestamp into `~/.openclaw/openclaw.json` so the gateway's config-reload watcher invalidates the cached `` snapshot — without this, skills uploaded after the agent's first run never appear in the prompt (see [How `` is cached](#how-available_skills-is-cached) below). -- **test** — Smoke tests PyPI access and cuOpt server connectivity from inside the sandbox +- **test** — Connectivity probe from inside the sandbox (`probe_cuopt.py` + pip check). Does **not** run solve smokes. +- **test --smoke** — Probe plus end-to-end LP/MILP/VRP solves via `/sandbox/smoke_*.py` when `install-skill` has uploaded them. LP/MILP run only if gRPC is reachable; VRP only if REST is reachable (per the probe's `available:` line). + +### Version compatibility + +`nemoclaw_cuopt_setup.sh` was last verified against **nemoclaw v0.0.55** and **openshell v0.0.44**. If your installed versions differ, the script prints a non-fatal banner at startup. Silence it with `NEMOCLAW_VERSION_CHECK=0`. + +The public NemoClaw installer defaults to the `lkg` ref, which currently points at the same commit as **v0.0.55**. To pin explicitly: + +```bash +NEMOCLAW_INSTALL_TAG=v0.0.55 \ + curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash -s -- \ + --non-interactive --yes-i-accept-third-party-software +``` ## Getting cuOpt data into the sandbox @@ -116,7 +129,8 @@ Each subdirectory containing a `SKILL.md` will be uploaded. Then re-run: | What | Path | |------|------| | Setup script | `cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh` | -| Endpoint probe | `cuopt_on_nemoclaw/probe_cuopt.py` (uploaded to `/sandbox/probe_cuopt.py`; reports both REST and gRPC reachability in one call) | +| Endpoint probe | `cuopt_on_nemoclaw/probe_cuopt.py` → `/sandbox/probe_cuopt.py` (REST + gRPC reachability) | +| Smoke tests | `smoke_lp.py`, `smoke_milp.py`, `smoke_vrp.py` → `/sandbox/` (pre-built; agent runs as-is — see skills) | | Skill source files | `cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md` | | cuOpt venv in sandbox | `/sandbox/.openclaw-data/cuopt/` | diff --git a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh index 851624a..bdc57a2 100755 --- a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh +++ b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh @@ -17,7 +17,7 @@ # NemoClaw cuOpt sandbox setup # # Subcommands: -# add [NAME] Add cuOpt to a sandbox: policy + install + skill + test. +# add [NAME] Add cuOpt to a sandbox: policy + install + skill + test --smoke. # apply-policy [NAME] Add cuOpt network policy to a running sandbox. # install [NAME] Install cuOpt packages in the sandbox venv and # stamp the activation file (see install-activation). @@ -35,7 +35,8 @@ # `add` runs against any sandbox reuse the cache # and install offline. # clear-wheel-cache Remove $CUOPT_WHEEL_CACHE. -# test [NAME] Smoke-test PyPI + cuOpt server reachability. +# test [NAME] Probe REST/gRPC reachability from the sandbox (default). +# test --smoke [NAME] Probe + LP/MILP/VRP solve smokes when installed and reachable. # # Flags: # -y, --yes Skip confirmation prompts (for CI/CD). @@ -89,7 +90,8 @@ # nemoclaw delete cuopt && nemoclaw create cuopt # Recreate sandbox # ./nemoclaw_cuopt_setup.sh add cuopt # Now installs offline (fast) # ./nemoclaw_cuopt_setup.sh apply-policy bob # Just fix network policy -# ./nemoclaw_cuopt_setup.sh test cuopt # Re-run smoke test +# ./nemoclaw_cuopt_setup.sh test cuopt # Connectivity probe only +# ./nemoclaw_cuopt_setup.sh test --smoke cuopt # Probe + solve smokes # # Version compatibility: # The TESTED_NEMOCLAW_VERSION / TESTED_OPENSHELL_VERSION constants below @@ -98,9 +100,12 @@ # the installed tools differ (non-fatal). To install the exact tested # NemoClaw build: # -# NEMOCLAW_INSTALL_TAG=v \ +# NEMOCLAW_INSTALL_TAG=v0.0.55 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # +# The public installer defaults to the `lkg` ref, which currently matches +# v0.0.55; use the tag above to pin even after `lkg` moves forward. +# # Silence the banner with NEMOCLAW_VERSION_CHECK=0. # ============================================================================= set -euo pipefail @@ -172,13 +177,17 @@ CUOPT_TEST_SANDBOX_GRPC="" # a newer release end-to-end. Used by check_versions() to surface a # non-fatal warning banner if the installed tools drift ahead. # -# To install the exact tested NemoClaw build: -# NEMOCLAW_INSTALL_TAG=v${TESTED_NEMOCLAW_VERSION} \ +# To install the exact tested NemoClaw build (openshell is bundled with the +# NemoClaw release this script was verified against): +# NEMOCLAW_INSTALL_TAG=v0.0.55 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # +# The public installer defaults to `lkg`, which currently resolves to the same +# commit as v0.0.55. Pin the tag if you need reproducibility after lkg moves. +# # Silence the banner with NEMOCLAW_VERSION_CHECK=0. -TESTED_NEMOCLAW_VERSION="0.0.48" -TESTED_OPENSHELL_VERSION="0.0.39" +TESTED_NEMOCLAW_VERSION="0.0.55" +TESTED_OPENSHELL_VERSION="0.0.44" # ── NemoClaw / OpenShell version compatibility check ───────────── # Non-fatal. Prints a warning banner when the installed tool version @@ -337,6 +346,18 @@ sandbox_exec_root() { docker exec -u root "$container" "$@" } +# sandbox_run_script +# Read a bash script from stdin and run it in the sandbox container. +# Prefer this over piping to `openshell sandbox connect` for batch +# commands — connect echoes the script to the terminal (OpenShell +# 0.0.44+ bracketed-paste / line-echo behavior). +sandbox_run_script() { + local sandbox="$1" + local container + container=$(find_sandbox_container "$sandbox") || return $? + docker exec -i -u sandbox -e HOME=/sandbox "$container" bash +} + # upload_wheel_cache # Copy the CONTENTS of into , flat # (no wrapping directory). Replaces `openshell sandbox upload`, which @@ -1087,7 +1108,7 @@ cmd_install() { "exit" ) - printf '%s\n' "${commands[@]}" | openshell sandbox connect "$sandbox" + printf '%s\n' "${commands[@]}" | sandbox_run_script "$sandbox" local cuopt_ip="host.openshell.internal" [[ -n "$CUOPT_HOST" ]] && cuopt_ip="$CUOPT_HOST" @@ -1244,8 +1265,15 @@ INNER_EOF } # ── test ────────────────────────────────────────────────────────── +# Modes: +# probe (default) — pip check + probe_cuopt.py only +# smoke — probe + LP/MILP/VRP solve scripts when installed and reachable cmd_test() { local sandbox="${1:-$CUOPT_SANDBOX}" + local mode="${2:-probe}" + local run_solves=false + [[ "$mode" == smoke || "$mode" == --smoke ]] && run_solves=true + local venv="/sandbox/${CUOPT_VENV}" local grpc_host="host.openshell.internal" local cuopt_url="http://host.openshell.internal:${CUOPT_PORT}" @@ -1281,14 +1309,18 @@ cmd_test() { fi echo "Host services: REST=$(if $has_rest; then echo UP; else echo DOWN; fi) gRPC=$(if $has_grpc; then echo UP; else echo DOWN; fi)" - echo "Smoke-testing sandbox: $sandbox (venv: $venv) ..." - - # probe_cuopt.py reports REST and gRPC reachability in one call. We pass - # CUOPT_SERVER_HOST/PORT (REST) and CUOPT_REMOTE_HOST/PORT (gRPC) so the - # probe checks the same endpoints we just verified are listening on the - # host. The probe's exit code is non-zero only when *both* are unreachable - # from inside the sandbox — `|| true` prevents that from breaking the - # heredoc's overall exit status. + if $run_solves; then + echo "Testing sandbox: $sandbox (venv: $venv) — probe + solve smokes ..." + else + echo "Testing sandbox: $sandbox (venv: $venv) — connectivity probe only ..." + fi + + local solves_flag=false + $run_solves && solves_flag=true + + # probe_cuopt.py reports REST and gRPC reachability. Solve smokes run only + # In test --smoke mode, only when scripts exist, and only when the probe's + # `available:` line shows the matching service (REST for VRP, gRPC for LP/MILP). local sandbox_cmds=" source ${venv}/bin/activate echo '--- pip check ---' @@ -1296,20 +1328,68 @@ python3 -c \"import cuopt_sh_client; print('cuopt_sh_client', cuopt_sh_client.__ echo '' echo '--- cuOpt endpoint probe (REST=${cuopt_url}, gRPC=${grpc_host}:${CUOPT_GRPC_PORT}) ---' -CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ -CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ -python3 /sandbox/probe_cuopt.py || true +PROBE_OUT=\$(CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/probe_cuopt.py 2>&1) || true +echo \"\$PROBE_OUT\" + +if [[ ${solves_flag} == true ]]; then + echo '' + echo '--- cuOpt solve smokes (test --smoke) ---' + GRPC_OK=false + REST_OK=false + echo \"\$PROBE_OUT\" | grep -qE '^available:.*grpc' && GRPC_OK=true + echo \"\$PROBE_OUT\" | grep -qE '^available:.*rest' && REST_OK=true + + if [[ \$GRPC_OK == true ]]; then + if [[ -f /sandbox/smoke_lp.py ]]; then + echo '--- remote LP smoke (smoke_lp.py) ---' + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/smoke_lp.py || true + echo '' + else + echo 'LP smoke skipped (/sandbox/smoke_lp.py missing; run install-skill)' + echo '' + fi + if [[ -f /sandbox/smoke_milp.py ]]; then + echo '--- remote MILP smoke (smoke_milp.py) ---' + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/smoke_milp.py || true + echo '' + fi + else + echo 'LP/MILP smokes skipped (gRPC not reachable from sandbox — see probe above)' + echo '' + fi + + if [[ \$REST_OK == true ]]; then + if [[ -f /sandbox/smoke_vrp.py ]]; then + echo '--- REST VRP smoke (smoke_vrp.py) ---' + CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ + python3 /sandbox/smoke_vrp.py || true + echo '' + else + echo 'VRP smoke skipped (/sandbox/smoke_vrp.py missing; run install-skill)' + echo '' + fi + else + echo 'VRP smoke skipped (REST not reachable from sandbox — see probe above)' + echo '' + fi +fi -echo '' -exit " # Capture the sandbox output so we can both display it AND parse it for # reachability ('unreachable' literal from probe_cuopt.py). `tee` keeps # the live UX intact; mktemp avoids clobbering anything else in /tmp. local probe_log probe_log="$(mktemp /tmp/cuopt-probe-XXXXXX.log)" - echo "$sandbox_cmds" | openshell sandbox connect "$sandbox" 2>&1 \ - | tee "$probe_log" + if ! printf '%s' "$sandbox_cmds" | sandbox_run_script "$sandbox" 2>&1 \ + | tee "$probe_log"; then + rm -f "$probe_log" + echo "error: sandbox test script failed (is sandbox '${sandbox}' running?)" >&2 + return 1 + fi echo "Test complete." # Detect probe failures per service. Only treat as a failure if the @@ -1423,6 +1503,38 @@ skill_is_skipped() { return 1 } +# Upload a single file to /sandbox/. openshell upload treats DEST +# as a directory; passing a file path creates a wrongly named directory. +upload_sandbox_file() { + local sandbox="$1" + local src="$2" + local base + base="$(basename "$src")" + local dest="/sandbox/${base}" + + if [[ ! -f "$src" ]]; then + echo " warning: ${base} not found at $src — skipping" >&2 + return 1 + fi + + sandbox_exec "$sandbox" rm -rf "$dest" 2>/dev/null || true + + echo " Uploading ${base} -> ${dest}" + if ! openshell sandbox upload "$sandbox" "$src" "/sandbox/" 2>&1; then + echo " Upload failed — falling back to inline base64 copy via sandbox_exec" + local file_b64 + file_b64="$(base64 -w 0 < "$src")" + if sandbox_exec "$sandbox" \ + bash -c "echo '${file_b64}' | base64 -d > '${dest}'" 2>/dev/null; then + echo " ${base} written via fallback" + else + echo " warning: failed to write ${base} into sandbox" >&2 + return 1 + fi + fi + return 0 +} + # ── install-skill ───────────────────────────────────────────────── cmd_install_skill() { local sandbox="${1:-$CUOPT_SANDBOX}" @@ -1565,21 +1677,29 @@ cmd_install_skill() { guardrail_content="$(cat <<'GUARDRAIL' --- name: cuopt-setup -description: "Read this FIRST when working with cuOpt. If you cannot read the cuopt-sandbox skill at ~/.openclaw/skills/cuopt-sandbox/SKILL.md, this tells you where the real file lives." +description: "Read this FIRST in every sandbox session. Tool catalog: if you only see tool_search/tool_describe/tool_call, run tool_search (query read or shell) — read/write/exec are NOT missing. cuOpt path: read /sandbox/.openclaw/skills/cuopt-first/SKILL.md then cuopt-sandbox. MILP imports: cuopt.linear_programming.problem (NOT from cuopt import milp) — see cuopt-python-api. If ~ paths fail, use absolute paths below." --- -# cuOpt sandbox skill — path resolution +# cuOpt sandbox — tools + skill paths + +## Tools (compact catalog) + +NemoClaw hides `read`, `write`, `exec` behind `tool_search` / +`tool_describe` / `tool_call`. **Do not claim files are unreadable.** +Run `tool_call` → `tool_search` with `query: "read"` or `"shell"`, +then `tool_call` the real tool. Full guide: +`/sandbox/.openclaw/skills/always-tool-discovery/SKILL.md` -The cuopt-sandbox skill (sandbox-specific setup, networking, venv) is -installed at an absolute path that always works: +## cuOpt skills + +The cuopt-sandbox skill (sandbox setup, networking, venv) is installed at: /sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md -Read that file FIRST for sandbox-specific cuOpt setup, then consult the -per-task sibling skills it points at (cuopt-user-rules, -cuopt-numerical-optimization-api-python, cuopt-routing-api-python, -numerical-optimization-formulation, etc.) which live in the same -`/sandbox/.openclaw/skills/` directory. +Start with `/sandbox/.openclaw/skills/cuopt-first/SKILL.md` for optimization +tasks. **Python MILP/LP imports:** `/sandbox/.openclaw/skills/cuopt-python-api/SKILL.md` +(never `from cuopt import milp`). Read cuopt-sandbox for full wiring, then sibling skills in the same +directory (cuopt-user-rules, cuopt-numerical-optimization-api-python, etc.). ## Why this guardrail exists @@ -1645,6 +1765,9 @@ if extra_dir not in existing: load["extraDirs"] = existing load.setdefault("watch", True) load.setdefault("watchDebounceMs", 250) +# skills.priority is NOT valid on OpenClaw 2026.5.x (added in a later PR). +# Remove it if a prior install-skill run wrote one — it breaks config validate. +skills.pop("priority", None) # Drop the obsolete sentinel from the prior mechanism if present so the # config stays clean. The new loader ignores skills.entries.X.config # for discovery purposes. @@ -1680,40 +1803,12 @@ print(" skills.load.extraDirs=" + json.dumps(existing)) echo "Skills installed." - # Upload the combined REST/gRPC probe directly to /sandbox/. The probe is - # not a skill (it's run by `cmd_test`), so it doesn't need to live under - # the skills tree. Direct upload is preferred when policy allows it. - # - # IMPORTANT: `openshell sandbox upload` treats DEST as a *directory* and - # lands the file at DEST/. Passing a file path (e.g. - # `/sandbox/probe_cuopt.py`) creates a directory with that name containing - # the real file inside — Python then errors with "can't find '__main__' - # module" when invoked against the directory. So we pass `/sandbox/` and - # let the basename come from SRC. - # - # We also defensively `rm -rf` any prior file or directory at the - # destination before uploading, and fall back to an inline base64 copy - # via sandbox_exec if the upload fails outright. - local probe="$SCRIPT_DIR/probe_cuopt.py" - if [[ -f "$probe" ]]; then - sandbox_exec "$sandbox" \ - rm -rf /sandbox/probe_cuopt.py 2>/dev/null || true - - echo " Uploading probe_cuopt.py -> /sandbox/probe_cuopt.py" - if ! openshell sandbox upload "$sandbox" "$probe" "/sandbox/" 2>&1; then - echo " Upload failed — falling back to inline base64 copy via sandbox_exec" - local probe_b64 - probe_b64="$(base64 -w 0 < "$probe")" - if sandbox_exec "$sandbox" \ - bash -c "echo '${probe_b64}' | base64 -d > /sandbox/probe_cuopt.py" 2>/dev/null; then - echo " probe_cuopt.py written via fallback" - else - echo " warning: failed to write probe_cuopt.py into sandbox" >&2 - fi - fi - else - echo " warning: probe_cuopt.py not found at $probe — skipping" >&2 - fi + # Sandbox helper scripts (not skills): probe + smoke tests for agents and + # cmd_test. Uploaded to /sandbox/ directly when policy allows. + local helper + for helper in probe_cuopt.py smoke_lp.py smoke_milp.py smoke_vrp.py; do + upload_sandbox_file "$sandbox" "$SCRIPT_DIR/$helper" + done } @@ -1730,7 +1825,7 @@ cmd_add() { # something needs attention, a compact post-mortem of what cmd_test # actually saw. local test_rc=0 - cmd_test "$sandbox" || test_rc=$? + cmd_test "$sandbox" smoke || test_rc=$? print_activation_banner "$sandbox" if [[ $test_rc -ne 0 ]]; then print_service_status_summary "$sandbox" "$test_rc" @@ -1771,7 +1866,21 @@ main() { install-skill) cmd_install_skill "${1:-}" ;; cache-wheels) cmd_cache_wheels "${1:-}" ;; clear-wheel-cache) cmd_clear_wheel_cache ;; - test) cmd_test "${1:-}" ;; + test) + local t_sandbox="" t_smoke=false + while [[ $# -gt 0 ]]; do + case "$1" in + --smoke) t_smoke=true; shift ;; + -*) echo "unknown test flag: $1" >&2; exit 1 ;; + *) t_sandbox="$1"; shift ;; + esac + done + if $t_smoke; then + cmd_test "${t_sandbox:-$CUOPT_SANDBOX}" smoke + else + cmd_test "${t_sandbox:-$CUOPT_SANDBOX}" probe + fi + ;; add) cmd_add "${1:-}" ;; help|-h|--help) usage ;; *) diff --git a/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md new file mode 100644 index 0000000..9c52909 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md @@ -0,0 +1,98 @@ +--- +name: always-tool-discovery +description: READ FIRST in every NemoClaw sandbox session. STOP before "I can't read files", "no read tool", "no exec", "can't list directory", "please paste ls/cat output", or "I only have tool_search/tool_describe/tool_call". Real tools (read, write, exec, edit, process) are NOT missing — tools.toolSearch hides them. Use tool_call tool_search query shell or read, then tool_describe, then tool_call the real tool. Never ask the user to paste filesystem output. Applies to ALL tasks including cuOpt, CSV uploads, and scheduling — run tool_search before claiming any file or shell capability is absent. +--- + +# Sandbox Tool Discovery (NemoClaw / OpenClaw catalog) + +**If you find yourself thinking "I don't have an exec / read / python / +shell / file / process tool exposed in this session", you are wrong and +must stop.** In a NemoClaw sandbox, OpenClaw is configured with +`tools.toolSearch: true`, which hides `exec`, `read`, `write`, `edit`, +`process`, and every other real tool behind the three meta-tools +(`tool_search`, `tool_describe`, `tool_call`). They are not missing — +they are one `tool_search` call away. + +The correct response to "my tool list looks too short" is to run +`tool_search`. It is **not** to: + +- Apologize to the user and ask them to paste shell output. +- Fall back to hand calculation, hand-rolled algorithms, or "I'll just + reason about it". +- Search the web for an answer the local filesystem already has. +- Give up and report "this sandbox seems to have no command execution". +- Claim a file is unreadable, missing, or that you can't see what's in + a directory. + +If your tool list shows only `tool_search`, `tool_describe`, and +`tool_call`, that is **the expected state** — the real tools are +reachable via the catalog, not absent. + +## How the catalog works + +OpenClaw's compact tool catalog (enabled by `tools.toolSearch: true`) +keeps the model's tool-context small by replacing the full per-tool +schema with three meta-tools: + +| Meta-tool | Purpose | +|-----------------|------------------------------------------------------------------------| +| `tool_search` | Find tools by free-text query (e.g. `"shell"`, `"file"`, `"process"`). | +| `tool_describe` | Return the parameter schema for a named tool. Call once per new tool. | +| `tool_call` | Actually invoke a tool by name with `{name, arguments}`. | + +NemoClaw configures OpenClaw this way by default starting at v0.0.55, so +in any current sandbox the compact catalog is on. If your tool list +already includes `exec` / `read` / `write` directly, the compact +catalog is disabled for this session; call those tools directly and +ignore the rest of this skill. + +## Use them in this order + +1. **`tool_search`** with `{"query": ""}` and `{"limit": 20}` lists the + full catalog. `{"query": "shell"}`, `{"query": "file"}`, + `{"query": "read"}`, `{"query": "process"}` narrows by topic. +2. **`tool_describe`** with `{"name": ""}` returns the parameter + schema. Call this once per new tool. +3. **`tool_call`** with `{"name": "", "arguments": {…}}` runs it. + +## Capabilities you'll typically need + +| Task | Search term | Tool name | Typical args | +|---|---|---|---| +| Shell (`ls`, `python3`, …) | `shell` / `exec` | `exec` | `{"command": "ls -1 /sandbox/"}` | +| Read a file | `read` / `file` | `read` | `{"path": "/sandbox/foo.csv"}` | +| Write / edit | `write` / `edit` | `write` / `edit` | see `tool_describe` | +| Poll background job | `process` | `process` | see `tool_describe` | + +## Worked example — read a CSV the user uploaded + +Tool list shows only `tool_search`, `tool_describe`, `tool_call`: + +```json +{"name": "tool_search", "arguments": {"query": "read"}} +{"name": "tool_describe", "arguments": {"name": "read"}} +{"name": "tool_call", "arguments": {"name": "read", "arguments": {"path": "/sandbox/teams.csv"}}} +``` + +## Worked example — run probe_cuopt.py + +```json +{"name": "tool_search", "arguments": {"query": "shell"}} +{"name": "tool_describe", "arguments": {"name": "exec"}} +{"name": "tool_call", "arguments": { + "name": "exec", + "arguments": {"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"} +}} +``` + +## Anti-pattern (from real sessions) + +> I can't read files in this sandbox — I only see tool_search, +> tool_describe, and tool_call. Can you paste the CSV contents? + +Wrong. Run `tool_search` → `read` → read the file yourself. + +## Related skills + +- `cuopt-first` / `cuopt-sandbox` — after you have `read`/`exec`, use + these for cuOpt optimization tasks. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md new file mode 100644 index 0000000..a9d99a1 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md @@ -0,0 +1,117 @@ +--- +name: cuopt-first +description: MANDATORY in NemoClaw sandbox. If you only see tool_search/tool_describe/tool_call, run tool_search (query read or shell) BEFORE claiming you cannot read files or run commands — see always-tool-discovery. STOP before any schedule/heuristic/draft-plan output. FIRST actions: (1) tool_search→exec if needed, (2) probe_cuopt.py, (3) cuopt-remote-env + smoke with CUOPT_REMOTE_HOST/PORT. Do NOT use from cuopt import milp — MILP is cuopt.linear_programming.problem.Problem with INTEGER vars; see cuopt-python-api. Do NOT wait for user to say "use cuOpt". Triggers: schedule, league, CSV upload, minimize, assign, MILP, doubleheader, ImportError, ModuleNotFoundError, wrong import path. +--- + +# cuOpt First (NemoClaw Sandbox) + +**This sandbox exists to solve with cuOpt.** If the user's request is an +optimization problem — schedule, assign, route, minimize, maximize, +allocate under constraints — **cuOpt is turn one, not turn two after the +user corrects you.** + +## The rule + +| Allowed before cuOpt smoke passes | **Not allowed** before cuOpt smoke passes | +|---|---| +| `probe_cuopt.py` | A schedule, roster, or assignment output | +| Read `cuopt-remote-env`, set `CUOPT_REMOTE_*` | Greedy / heuristic / backtracking solver code | +| LP smoke test (`Using remote GPU backend` + `Optimal`) | "Here's a draft plan while we set up cuOpt" | +| Read CSV/Excel to infer **columns and constraints** | Capacity-only "probably feasible/infeasible" verdict | +| Ask **one** blocking clarification question | `ortools`, `pulp`, hand-rolled MIP search | +| Write `/sandbox/solve.py` **for cuOpt** | Apologizing for using a heuristic — you should not have used one | + +**Interpretation ≠ answer.** You may inspect `teams.csv` to learn that +rows are teams and `games.csv` lists matchups. You may **not** emit a +completed season schedule, slot assignments, or "I built a heuristic +schedule with doubleheaders minimized" until cuOpt returns a solver +status on the real model. + +## Mandatory first actions (in order) + +Do these **before** any optimization answer, **even if** the user +uploaded data and asked a natural-language planning question: + +0. **Tools (every session).** If your tool list is only + `tool_search` / `tool_describe` / `tool_call`, run `tool_search` + (`query: "read"` to read CSVs, `query: "shell"` for probe/exec). + **Never ask the user to paste file contents.** See + `always-tool-discovery`. +1. **`tool_call exec`** → `bash -lc 'python3 /sandbox/probe_cuopt.py'` +2. Read **`cuopt-remote-env`** → export `CUOPT_REMOTE_HOST` / + `CUOPT_REMOTE_PORT` in the same shell as Python +3. Run **LP smoke test** → `python3 /sandbox/smoke_lp.py` with + `CUOPT_REMOTE_*` → confirm `Using remote GPU backend` + `status=Optimal` +4. For MILP scheduling: also run `/sandbox/smoke_milp.py` (same env vars) +5. Read **`cuopt-python-api`** — copy import lines for `/sandbox/solve.py`; + **never** `from cuopt import milp` +6. Read **`cuopt-sandbox`** Four Gates + problem-family routing +7. **Then** formulate and submit the real problem to cuOpt + +Steps 1–3 are infrastructure, not "skipping ahead of data inspection." +They take seconds. Run them in parallel with skimming file headers if +needed — but **never** skip them. + +## Scheduling / league / CSV tasks + +Language like "build the season schedule", "assign games to slots", +"minimize doubleheaders", "balance home/away", uploaded league tables +→ **MILP via cuOpt gRPC**, not a custom Python scheduler. + +Default flow: + +1. Gates above (probe → env → smoke) +2. **`cuopt-python-api`** — mandatory imports (do not guess `cuopt.milp`) +3. `numerical-optimization-formulation` + `cuopt-numerical-optimization-api-python` +4. cuOpt `Problem` with binary/integer assignment vars + cuOpt solve +5. Report `Problem.Status.name`, objective, assignments + +Do **not** write a nested-loop or greedy assigner "to test feasibility" +or "to give the user something quickly." + +**One solve at a time:** after you submit to cuOpt, poll until +`time_limit` returns a terminal status (`Optimal`, `FeasibleFound`, +time-limit stop, etc.). Do not start a second job because the first +"is taking too long" — see `cuopt-sandbox` "Long-running solves". + +## Anti-pattern — wrong import → heuristic fallback + +> `from cuopt import milp` failed. I'll use a greedy scheduler instead. + +**Wrong.** MILP uses `from cuopt.linear_programming.problem import Problem, +INTEGER`. Run the verify one-liner in `cuopt-python-api`, then build the +real model. Import errors are not cuOpt outages. + +## Anti-pattern — heuristic first, cuOpt after user correction + +> You're right — I should have started with cuOpt. That was my miss. I +> used a heuristic first when this should be treated as a MILP scheduling +> problem. I'll model this with cuOpt now. + +**This message means you already failed.** The user should never have +had to send the correction. The heuristic output should never have +been produced. Do not pattern-match on apologizing — **prevent** the +heuristic pass entirely by running steps 1–3 before any assigner code. + +## Anti-pattern — "explore structure" before cuOpt + +> Let me first run a quick feasibility check / greedy pass to understand +> the constraint structure, then I'll move to cuOpt. + +Wrong. cuOpt **is** the feasibility and optimization engine here. +Structure exploration belongs in formulation (variables, constraints), +not in a parallel non-cuOpt solver. + +## When orchestration skills apply + +If `optimization-from-data-orchestrator` or `tabular-optimization-ingestion` +is in play: they govern **interpretation**, not permission to skip cuOpt. +In this sandbox, add **step 0: cuopt-first gates** before any step that +could produce user-visible optimization output. + +## Related skills + +- `cuopt-python-api` — copy-paste imports; no `cuopt.milp` +- `cuopt-sandbox` — full sandbox wiring (gates, routing, long-running solves) +- `cuopt-remote-env` — env vars and cudaError diagnostics +- `always-tool-discovery` — reaching `read`/`exec` when tools are compact diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md index 888689e..89dfadd 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md @@ -41,6 +41,11 @@ If those conditions are not met, first use: For the fast path, map directly from the interpreted data into cuOpt structures. Do not introduce a replayable intermediate artifact unless the user asks for replayability, auditability, export, or reuse. +**In NemoClaw sandbox:** before building the cuOpt model, confirm +`cuopt-first` gates completed (probe → `CUOPT_REMOTE_*` → smoke test). +Do not build a parallel heuristic assigner "first" — cuOpt is the first +and only solver for assignments/schedules. + ## Workflow ### 1. Confirm the minimum viable formulation @@ -190,19 +195,22 @@ If relevant, also mention which constraint or resource appears to be most limiti - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve - For QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve - For routing: - use `routing-formulation` - then use `cuopt-routing-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve ## Success criterion diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md new file mode 100644 index 0000000..5ebe79e --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md @@ -0,0 +1,107 @@ +--- +name: cuopt-python-api +description: STOP on ImportError, ModuleNotFoundError, or guessed imports (from cuopt import milp, import cuopt.milp, cuopt.solve). LP/MILP/QP share ONE Python entrypoint cuopt.linear_programming.problem.Problem — MILP uses INTEGER variables, not a separate milp module. With CUOPT_REMOTE_HOST/PORT set before Python starts, remote gRPC MILP works in this sandbox. One failed import is NOT proof cuOpt is unavailable — run the verify command below, then read cuopt-numerical-optimization-api-python. Triggers: scheduling MILP, assignment, import failed, wrong API path. +--- + +# cuOpt Python API — sandbox copy-paste (LP / MILP / QP) + +**Do not invent import paths.** There is no `cuopt.milp`, no +`from cuopt import milp`, and no top-level `cuopt.Problem`. + +LP, MILP, and QP all use the same class: + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings +``` + +| Task | How | +|---|---| +| LP | `vtype=CONTINUOUS` | +| MILP (schedule, assign, roster) | `vtype=INTEGER` (binary = `lb=0, ub=1`) | +| QP | same `Problem` + quadratic objective (see API skill) | + +Remote solve: export `CUOPT_REMOTE_*` in the **same** shell as Python +(see `cuopt-remote-env`). Success log includes `Using remote GPU backend`. + +## Verify API (run before declaring import broken) + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' +``` + +If this prints `api_ok`, the SDK is installed — your earlier import path +was wrong, not cuOpt. + +## Pre-installed smoke scripts (do not rewrite — run as-is) + +| Script | Path | When | +|---|---|---| +| LP + remote gRPC | `/sandbox/smoke_lp.py` | Gate 3 (always) | +| MILP + remote gRPC | `/sandbox/smoke_milp.py` | Scheduling / assignment tasks | +| VRP REST | `/sandbox/smoke_vrp.py` | Routing tasks only | + +**LP smoke:** + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' +``` + +**MILP smoke** (same env vars): + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_milp.py' +``` + +**VRP smoke** (REST — no `CUOPT_REMOTE_*`): + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 /sandbox/smoke_vrp.py' +``` + +## Scheduling / assignment skeleton + +```python +from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +p = Problem("assign") +assign = {} # (entity, slot) -> Var +for e in entities: + for s in slots: + assign[e, s] = p.addVariable( + vtype=INTEGER, lb=0, ub=1, name=f"x_{e}_{s}" + ) +# ... one-hot constraints, objective ... +settings = SolverSettings() +settings.set_parameter("time_limit", 600) # solver ALWAYS returns within this window +p.solve(settings) +print(p.Status.name, p.ObjValue) # Optimal, FeasibleFound, TimeLimit, etc. — all terminal +``` + +Full patterns: `cuopt-numerical-optimization-api-python`. **One job at a +time:** poll the same solve until `time_limit` returns — see +`cuopt-sandbox` "Long-running solves". + +## Anti-pattern — wrong import → give up + +> `from cuopt import milp` failed. I see `linear_programming` in the +> package but I'll pivot to a heuristic scheduler instead. + +**Wrong.** MILP **is** `linear_programming.Problem` + `INTEGER`. Run the +verify command, copy the imports above, set `CUOPT_REMOTE_*`, solve with +cuOpt. A import mistake is not a solver outage. + +## Related + +- `cuopt-remote-env` — env vars + LP smoke test +- `cuopt-first` — gates before any schedule output +- `cuopt-numerical-optimization-api-python` — full LP/MILP/QP examples diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md new file mode 100644 index 0000000..dfd00fd --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md @@ -0,0 +1,161 @@ +--- +name: cuopt-remote-env +description: STOP if you see cudaErrorInsufficientDriver, "CUDA driver version is insufficient", RMM/CUDA init errors, ImportError/from cuopt import milp/wrong Python API path, or a failed cuOpt smoke test — BEFORE declaring cuOpt blocked, offering "prep work while cuOpt gets fixed", or pivoting to heuristics/backtracking. In the NemoClaw sandbox there is NO local GPU; every Python LP/MILP/QP solve defaults to LOCAL CUDA unless you export CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT before the Python process starts. LP/MILP/QP use cuopt.linear_programming.problem.Problem (MILP = INTEGER vars, not cuopt.milp). The probe (probe_cuopt.py) does NOT set these vars for you. Passing Gate 1 (grpc reachable) is NOT proof remote Python is configured. You MUST export CUOPT_REMOTE_HOST=host.openshell.internal and CUOPT_REMOTE_PORT=5001 inside bash -lc, run the LP smoke test, and confirm "Using remote GPU backend" in output before any real model or any "cuOpt unavailable" claim. Also triggers on remote execution env vars, gRPC Python API, local solve mistaken for server failure. +--- + +# cuOpt Remote Execution Environment (NemoClaw Sandbox) + +**There is no GPU in this sandbox.** The Python SDK's default for +`Problem.solve()` is a **local CUDA solve**. Local CUDA **always fails +here** — usually as `cudaErrorInsufficientDriver` or "CUDA driver +version is insufficient for CUDA runtime version". + +That error almost always means **you forgot the remote env vars**, not +that cuOpt or OpenShell is broken. + +## The rule in one sentence + +**Before any `p.solve()` / LP / MILP / QP Python call: export +`CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` in the same shell that +starts Python, then confirm `Using remote GPU backend` in the log.** + +The connectivity probe (`probe_cuopt.py`) **does not** set these for +you. It only checks that the gRPC port answers. + +## Python imports — copy exactly (LP / MILP / QP) + +**There is no `from cuopt import milp`.** MILP scheduling uses the same +module as LP: + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings +``` + +If an import fails, run this **before** declaring the API broken: + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' +``` + +Full fragments + MILP smoke: **`cuopt-python-api`**. + +## Mandatory exports (LP / MILP / QP via Python SDK or `cuopt_cli`) + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +``` + +| Variable | Value | Never use | +|---|---|---| +| `CUOPT_REMOTE_HOST` | `host.openshell.internal` | `localhost`, `127.0.0.1`, `0.0.0.0` | +| `CUOPT_REMOTE_PORT` | `5001` | `5000` (that's REST) | + +Env vars must be set **in the same process tree** as the Python +interpreter. Exporting them in a prior `tool_call exec` does not carry +over to the next one. Inline them in every solve command: + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' +``` + +Use `bash -lc` (login shell) so the cuOpt venv and paths are active. +Bare `bash -c`, `sh -c`, or non-login `tool_call exec` shells often +skip `/sandbox/.bash_profile` and leave the venv inactive. + +## Gate checklist — complete before ANY real solve + +Copy this checklist into your reasoning and fill it in: + +| Step | Done? | Evidence | +|---|---|---| +| Probe returned `grpc` or `rest grpc` | ☐ | `available:` line from `probe_cuopt.py` | +| Wrote solver script to a **file** (not inline heredoc) | ☐ | `/sandbox/solve.py` for real models; use pre-installed `/sandbox/smoke_lp.py` for Gate 3 | +| Ran with `bash -lc` + venv activate + **`export CUOPT_REMOTE_*`** | ☐ | command includes both exports | +| Log contains **`Using remote GPU backend`** | ☐ | paste the line | +| Smoke LP returned **`Optimal`** | ☐ | status + objective | + +**Only after all five rows are checked** may you build the user's real +model or tell the user cuOpt is server-side broken. + +If row 4 is missing and you see `cudaErrorInsufficientDriver` → go +back to row 3 (env vars), **not** to prep work or heuristics. + +## Error → meaning → action + +| What you see | What it means | What to do | +|---|---|---| +| `cudaErrorInsufficientDriver` / "driver version is insufficient" **without** `Using remote GPU backend` | **Local solve** — env vars missing or wrong shell | Set `CUOPT_REMOTE_*`, use `bash -lc`, rerun smoke test | +| No `Using remote GPU backend`, no CUDA error yet | Env vars not picked up | Same fix — exports must be in the same `bash -lc` line as `python3` | +| `Using remote GPU backend` + `Optimal` | Remote path works | Proceed to real model (`cuopt-sandbox` Gate 4) | +| `Using remote GPU backend` + `cudaErrorNoDevice` / `Remote … failed` | Client OK; **host** GPU broken | Operator fixes host service — still not a local-env problem | +| Probe `available: grpc` only | Port reachable | **Not sufficient** — still run smoke test **with env vars** | + +## REST path (VRP / routing) — different vars + +Routing uses REST, not `CUOPT_REMOTE_*`. Pass host/port explicitly: + +```python +CuOptServiceSelfHostClient(ip="host.openshell.internal", port="5000") +``` + +See `cuopt-sandbox` for routing. This skill applies to **gRPC Python +LP/MILP/QP** solves. + +## Anti-pattern — "blocked, let me do prep work" (from real sessions) + +> The probe reached gRPC. The smoke test failed with +> `cudaErrorInsufficientDriver`. So I should not claim a valid optimized +> schedule from cuOpt. Best next step: fix/enable the cuOpt runtime, or +> I can do prep work — validate data, summarize rules, draft the model. + +**Wrong.** You have not tested remote execution yet. The smoke test +failed because it was a **local** solve. Fix: + +1. Export `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`. +2. Rerun smoke test. +3. Only if smoke fails **with** `Using remote GPU backend` in the log + is there a real server-side blocker. + +Offering prep work / capacity analysis / model drafting as a substitute +for step 1–2 is **not allowed** when the probe shows gRPC available. +Prep work is fine **in parallel after** remote smoke passes, or when +the user explicitly asks for it — not as a bypass for missing env vars. + +## Anti-pattern — "I'll set env vars later" + +> The probe confirms gRPC. I'll draft the MILP first and set remote env +> vars when I'm ready to solve. + +Wrong. Set env vars **before the first smoke test**, not after the full +model is written. The smoke test exists precisely to catch missing env +vars before you invest in formulation. + +## Pre-installed smoke test (Gate 3) + +Run `/sandbox/smoke_lp.py` — **do not rewrite it**. It is uploaded by +`nemoclaw_cuopt_setup.sh` with the correct imports and a tiny LP model. + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' +``` + +Expected: log line `Using remote GPU backend`, then `status=Optimal objective=10.0 …`. + +For MILP scheduling tasks, also run `/sandbox/smoke_milp.py` with the +same env vars. For routing, run `/sandbox/smoke_vrp.py` (REST, no +`CUOPT_REMOTE_*`). See `cuopt-python-api`. + +## Related skills + +- `cuopt-python-api` — import lines; anti-pattern for `cuopt.milp` +- `cuopt-sandbox` — full gate sequence (probe → env → smoke → model) +- `always-tool-discovery` — how to reach `exec`/`read` when tools are hidden diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md index 41a3a96..cba3a8c 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-sandbox -description: Read FIRST whenever a request involves linear programming (LP), mixed-integer linear programming (MILP), quadratic programming (QP), or vehicle routing (VRP, TSP, PDP) — including small / pedagogical / made-up problems ("invent an LP example", "show me a small MILP", "find optimal X under constraints", "minimize cost", "plan deliveries", "schedule with constraints"). In this sandbox, cuOpt is the default solver and must be used whenever it is available; this skill points at the per-task skills (cuopt-user-rules, cuopt-numerical-optimization-api-python, cuopt-routing-api-python, numerical-optimization-formulation, etc.) and covers sandbox-specific setup (venv at /sandbox/.openclaw-data/cuopt, host.openshell.internal networking, REST 5000 / gRPC 5001). +description: STOP before schedule/heuristic output OR claiming files/shell are unavailable. If only tool_search/tool_describe/tool_call are visible, run tool_search (read/shell) first — see always-tool-discovery. Then read cuopt-first; probe_cuopt.py; cuopt-remote-env + smoke with CUOPT_REMOTE_* before any model. One cuOpt job at a time — poll until time_limit returns a status; never submit a second solve while the first is in flight. Triggers: schedule, league, CSV, minimize, assign, MILP, routing, optimal, feasible, time_limit, job still running. No ortools/heuristics. cudaErrorInsufficientDriver without Using remote GPU backend = missing env vars. --- # NVIDIA cuOpt (Sandbox) @@ -8,47 +8,76 @@ description: Read FIRST whenever a request involves linear programming (LP), mix Solve optimization problems using NVIDIA cuOpt from inside the sandbox. This skill covers **sandbox-specific** setup — networking, venv, and connectivity. +> **Read `cuopt-first` before this section if you have not already.** It +> defines what you must **not** output (heuristic schedules, greedy +> assigners, feasibility verdicts) before the probe → env → smoke gates +> below complete. + +## Zero optimization output before Gate 3 + +Do not send the user any of the following until the LP smoke test +passes with `Using remote GPU backend` (see Four gates below): + +- A season schedule, slot assignment, or roster +- "Here's a heuristic / greedy / draft plan" +- "Feasible" / "infeasible" / "capacity is sufficient" as the answer +- Python that assigns games, routes, or resources **outside** cuOpt's API +- An apology for not using cuOpt — prevent the miss instead + +Reading uploaded files to identify columns and constraints is fine. +**Emitting an optimization result is not**, until cuOpt solves the model. + > **In this sandbox, LP/MILP must be solved through the remote cuOpt service > on the host, not the local CUDA runtime.** There is no GPU inside the -> sandbox. If you see `cudaErrorInsufficientDriver` or similar CUDA errors, -> you accidentally invoked a local solve — set `CUOPT_REMOTE_HOST` and -> `CUOPT_REMOTE_PORT` to use the remote service instead. - -## Finding the shell, file, and editing tools (NemoClaw catalog) +> sandbox. If you see `cudaErrorInsufficientDriver` **without** +> `Using remote GPU backend` in the same run, you accidentally invoked a +> local solve — set `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` to use the +> remote service instead. If you **do** see `Using remote GPU backend` +> followed by `cudaErrorNoDevice` or `Remote LP solve failed`, the client +> path is correct and the **host cuOpt service** has no usable GPU — report +> that to the operator; do not fall back to heuristics or hand search. +> +> **Full env-var checklist and error table:** read the `cuopt-remote-env` +> skill — it is mandatory for every gRPC Python LP/MILP/QP solve in this +> sandbox. + +## Finding the shell, file, and editing tools + +**If your tool list shows only `tool_search`, `tool_describe`, and +`tool_call`, you still have `read`, `write`, and `exec` — run +`tool_search` first.** Full walkthrough: `always-tool-discovery` skill. +Do **not** tell the user you cannot read files or ask them to paste +`ls`/`cat` output. + +Minimal pattern (read a CSV or run probe): + +```json +{"name": "tool_search", "arguments": {"query": "read"}} +{"name": "tool_describe", "arguments": {"name": "read"}} +{"name": "tool_call", "arguments": {"name": "read", "arguments": {"path": "/sandbox/teams.csv"}}} +``` -This sandbox runs under NemoClaw, which by default exposes only three -meta-tools to the model — `tool_search`, `tool_describe`, `tool_call` — -and hides every real tool (`exec`, `read`, `write`, `edit`, `process`, -…) behind that catalog. If your tool list shows only those three, the -real tools are not missing; they are reachable via the catalog. +For shell/probe, use `"query": "shell"` → `exec` → +`{"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"}`. -Use them in this order: +Every command below assumes this catalog path when real tools are not +directly listed. Report a setup problem only if `tool_search` with +`{"query": ""}` returns nothing beyond the three meta-tools. -1. `tool_search` with `{query: ""}` and `{limit: 20}` lists the catalog; - `{query: "shell"}` or `{query: "file"}` narrows by topic. -2. `tool_describe` with `{name: ""}` returns the parameter schema. - Call this once before the first invocation of any new tool. -3. `tool_call` with `{name: "", arguments: {…}}` runs it. +Concrete one-shot for the cuOpt capability probe (the very first +thing this skill expects you to run): -The capabilities the rest of this skill assumes: +```json +{"name": "tool_search", "arguments": {"query": "shell"}} +{"name": "tool_describe", "arguments": {"name": "exec"}} +{"name": "tool_call", "arguments": { + "name": "exec", + "arguments": {"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"} +}} +``` -| Capability | Search term | Tool name | Typical args | -|---|---|---|---| -| Run any shell command (ls, source, python3, …) | `shell` / `exec` | `exec` | `{command: "ls -1 …"}` | -| Read a known file path | `read` | `read` | `{path: "/sandbox/…"}` | -| Write / edit a file | `write` / `edit` | `write` / `edit` | see `tool_describe` | - -Every shell example below — `ls -1 /sandbox/.openclaw/skills/`, sourcing -the venv, `python3 /sandbox/probe_cuopt.py`, the LP smoke test — is -dispatched as `tool_call` with `name: "exec"` and -`arguments: {command: ""}`. - -If `tool_search` with an empty query returns only the three catalog -tools themselves, no real tools were registered for this session — that -is a gateway setup problem, not a discovery problem; report it to the -operator. If your tool list already includes `exec` / `read` / etc. -directly, the compact catalog is disabled; call those tools directly and -ignore this section. +If you finished reading this section without running that probe once, +you have not yet done the work this skill exists for. Run it. ## Always use cuOpt when it is available @@ -83,6 +112,26 @@ pedagogy defeats the purpose. Always: When in doubt, run cuOpt. +## Evidence standard — no cuOpt, no verdict + +Do not tell the user a schedule/plan is **infeasible**, **impossible**, +or **cannot be satisfied** unless cuOpt returned an explicit solver +status (`Infeasible`, `InfeasibleOrUnbounded`, etc.) for a model you +actually submitted. + +The following are **not** acceptable substitutes for a cuOpt infeasibility +proof: + +- Backtracking, branch-and-bound, or exhaustive search you wrote yourself. +- `ortools`, `pulp`, or any other non-cuOpt solver. +- Hand reasoning ("the constraints clearly conflict"). +- A heuristic that failed to find a feasible assignment. + +If cuOpt has not yet run successfully on the real model, say **"I have +not yet solved this with cuOpt"** — not "it's infeasible". If you ran a +non-cuOpt exploratory search, label it explicitly as a **non-authoritative +heuristic** with caveats and still pursue the cuOpt path. + ## Remote-first — never try a local solve before the remote service There is no GPU in this sandbox. Every cuOpt Python entry point that @@ -102,24 +151,39 @@ solves are routed to a host-side cuOpt service over the wire. check, and a stale "REST only" or "gRPC only" reading will pin you to a suboptimal path. Only the full `rest grpc` result is durable enough to reuse for the rest of the session.** -2. Pick the remote interface from the table (rest, gRPC, or both) that - matches your problem class. **Use it first.** - - Routing (VRP, TSP, PDP) → REST. Open `cuopt-server-api-python` and - reuse a starter from its `assets/vrp_*/` cookbook. - - LP / MILP / QP → **prefer gRPC** via Python SDK or `cuopt_cli` - whenever the probe shows gRPC available. Fall back to REST via - `cuopt_sh` / `cuopt_sh_client` only when gRPC is not. Both route to - the same host service, but gRPC is the native path for these - problem classes (binary protocol, lower per-call overhead, better - streaming behavior). A previous session decision to use REST does - not justify reusing it after a re-probe reveals gRPC. -3. The **only** legitimate evidence that cuOpt is unavailable for your + **Gate 1 only proves the endpoint is reachable — not that solves work.** +2. **Stop and answer the post-probe checklist** (see "Four gates before + modeling" below). Pick the interface, name the sibling skill you will + read next, and confirm the problem family (MILP vs routing vs LP). + **Do not write model code until you have written down those three + answers.** +3. **Set remote env vars and run the smoke test** (Gates 2–3). For LP / + MILP / QP: read `cuopt-remote-env`, export `CUOPT_REMOTE_HOST` / + `CUOPT_REMOTE_PORT` in the same `bash -lc` line as Python, run the + minimal LP smoke test, confirm `Using remote GPU backend` + `Optimal`. + For routing when REST is available: minimal health/submit from the VRP + cookbook with explicit `host.openshell.internal:5000`. + **`cudaErrorInsufficientDriver` without `Using remote GPU backend` + means env vars were not set — retry Gate 2; do NOT declare cuOpt + blocked or offer prep work as a substitute.** +4. Read the sibling skills named in step 2 (`numerical-optimization-formulation` + + `cuopt-numerical-optimization-api-python` for MILP/scheduling, etc.). + **Only now** formulate and submit the real problem. +5. The **only** legitimate evidence that cuOpt is unavailable for your task is a fresh `probe_cuopt.py` result whose `available:` line is `none`, *or* the matching column in the capability table marks the - required interface as "Decline". The following do **not** count and - never permit skipping cuOpt: + required interface as "Decline", *or* the smoke test fails with a + server-side error after env/venv are confirmed correct. The following + do **not** count and never permit skipping cuOpt: - a failed `import cuopt` / `from cuopt import routing` / any `ModuleNotFoundError` in the current interpreter + - any **other** Python solver library being missing + (`ortools`, `pulp`, `scipy.optimize`, `cvxpy`, `pyomo`, `mip`, …). + Those are **not cuOpt**, and their absence has nothing to do with + whether the cuOpt service is reachable. If you typed + `from ortools…` (or any of the above) and got `ModuleNotFoundError`, + do not propose installing them, do not write your own backtracking / + branch-and-bound / simplex search — run the cuOpt probe. - the problem being small, toy-sized, pedagogical, or "obvious" - a probe result from earlier in the session that wasn't `rest grpc` (re-probe — the operator may have started a service since) @@ -128,8 +192,8 @@ solves are routed to a host-side cuOpt service over the wire. If you have any of these and no fresh `none` probe, you are still required to use cuOpt. The sandbox has no GPU, so once you do reach the "local cuOpt is the only candidate" branch (a real `none` - probe), it will almost certainly fail anyway — proceed to step 4. -4. **If every cuOpt path fails**, stop. Explain to the user exactly + probe), it will almost certainly fail anyway — proceed to step 6. +6. **If every cuOpt path fails**, stop. Explain to the user exactly which probe / interface / payload failed and what's needed (operator action, network policy, etc.). **Do not** silently fall back to brute force, hand calculation, exhaustive search, a non-cuOpt @@ -148,7 +212,12 @@ read the sibling skills installed alongside this one in `/sandbox/.openclaw/skills/`. Names follow stable suffix patterns upstream, so prefer pattern-based discovery over memorizing exact names: +- `cuopt-first` — **Read before anything else for optimization tasks:** + no heuristic/schedule output before probe → env → smoke - `cuopt-user-rules` — Read FIRST: behavior rules, clarify before coding, verify results +- `cuopt-remote-env` — **Mandatory before any gRPC Python LP/MILP/QP solve:** + `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`, smoke test, cudaError diagnostics +- `always-tool-discovery` — **Every session:** `tool_search` → `read`/`exec` when catalog is compact - Any `*-formulation` skill — How to go from problem text to formulation (LP / MILP / QP, vehicle routing, etc.) - Any `cuopt-*-api-python` skill — Solve through the Python SDK @@ -287,6 +356,108 @@ source /sandbox/.openclaw-data/cuopt/bin/activate && \ python3 /sandbox/probe_cuopt.py --json ``` +## Four gates before modeling + +The probe, **remote env vars**, the smoke test, and the sibling-skill +read are **four separate gates**. Passing one does not skip the others. +A common failure mode is probing successfully (`available: grpc`), running +a smoke test **without** `CUOPT_REMOTE_*` exports, getting +`cudaErrorInsufficientDriver`, and incorrectly declaring cuOpt blocked +or offering "prep work" — see `cuopt-remote-env` for the full error table. + +**Gate 1 — Endpoint reachable (probe).** Run `probe_cuopt.py`. Record: + +| Question | Your answer (write it out before proceeding) | +|---|---| +| `available:` line | `rest` / `grpc` / `rest grpc` / `none` | +| Problem class for this task | LP / MILP / QP / routing | +| Interface you will use | gRPC Python SDK / REST / `cuopt_cli` | +| Sibling skill to read next | e.g. `cuopt-numerical-optimization-api-python` | + +**`available: grpc` means the TCP port answered — not that env vars are +set, not that remote solves succeed, and not that you may skip the +smoke test or read `cuopt-remote-env`.** The probe does not export +`CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` for you. + +**Gate 2 — Remote env vars set (mandatory for gRPC Python LP/MILP/QP).** +Read `cuopt-remote-env` and complete its checklist before any +`p.solve()`. Minimum: + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +``` + +These must appear in the **same** `bash -lc '…'` command as `python3`, +not in a prior shell invocation. Skip this gate → local CUDA → +`cudaErrorInsufficientDriver` → **not** a server failure. + +**Gate 3 — Remote solve works (smoke test with Gate 2 env vars).** +Run the minimal LP in "Quick connectivity smoke test". Expected: +`Using remote GPU backend` + `Optimal`. + +| Smoke outcome | Meaning | Next action | +|---|---|---| +| `Optimal` + `Using remote GPU backend` | Path works | Proceed to Gate 4 | +| `cudaErrorInsufficientDriver` **without** remote backend log | Gate 2 skipped | Read `cuopt-remote-env`; set exports; retry | +| Shell/heredoc/`File name too long`/`SyntaxError` | Script packaging bug | Write script to file; retry with env vars | +| No `Using remote GPU backend`, no CUDA error | Env vars not in same shell | Inline exports in `bash -lc`; retry | +| `Using remote GPU backend` then `cudaErrorNoDevice` | Host GPU broken | Operator action; **not** missing env vars | +| Connection refused on probe | Service down | Operator starts service | + +**Do not tell the user cuOpt is unavailable, and do not offer prep-work +substitutes (data validation, capacity checks, model drafting), until +Gate 3 passes OR smoke fails with `Using remote GPU backend` already +in the log** (proving the client path is correct and the fault is +server-side). + +**Gate 4 — Read the right skills.** Open the formulation + API skills +from the table before writing solver code. **Read `cuopt-python-api` +first** and copy its import lines — do not guess `from cuopt import milp`. +For scheduling / assignment / league timetable problems, that is almost always MILP via +`numerical-optimization-formulation` + +`cuopt-numerical-optimization-api-python` — **not** vehicle routing +unless the user explicitly gave locations, vehicles, and a travel matrix. + +Only after Gates 1–4 pass may you build the real model. + +### Problem family quick routing + +| User language | Problem class | Skills to read | Interface (typical) | +|---|---|---|---| +| Schedule, timetable, league, roster, assign slots/shifts/games | MILP (assignment/scheduling) | `numerical-optimization-formulation`, `cuopt-numerical-optimization-api-python` | gRPC Python SDK | +| Product mix, blend, allocate budget | LP or MILP | same | gRPC | +| Deliveries, routes, trucks, TSP, VRP, PDP | Routing | `routing-formulation`, `cuopt-server-api-python` | REST | +| Minimize cost / maximize profit with linear constraints | LP / MILP / QP per formulation skill | formulation + `cuopt-numerical-optimization-api-python` | gRPC | + +When unsure between MILP scheduling and VRP: if the decisions are +*who plays whom when* or *which resource gets which task*, it's MILP. +If the decisions are *which stops each vehicle visits in what order*, +it's routing. + +**Anti-pattern — probe then heuristic (from real sessions):** + +> I probed cuOpt first and found gRPC available. My first Python script +> failed with a shell error, so I tried backtracking to test feasibility +> structure and concluded the schedule is likely infeasible. + +Wrong on four counts: (1) probe passing is Gate 1 only — smoke test +(Gate 2) was skipped; (2) a shell/heredoc failure is not a solver +failure — retry with a file-based script; (3) backtracking is not an +acceptable substitute for cuOpt when the service is reachable; (4) +infeasibility requires a cuOpt solver status, not a heuristic search. + +**Anti-pattern — cudaErrorInsufficientDriver → "cuOpt blocked" → prep work:** + +> Smoke test failed with `cudaErrorInsufficientDriver`. I should not +> claim a valid schedule from cuOpt. Let me do prep work — validate +> data, summarize rules, draft the model — while the runtime gets fixed. + +Wrong: Gate 2 (`CUOPT_REMOTE_*`) was skipped, so the smoke test hit +**local CUDA**, not the gRPC server. Read `cuopt-remote-env`, set env +vars, rerun smoke. Do not offer prep work as a bypass for missing env +vars when `available: grpc`. + ## How to invoke each interface — sandbox-specific delta For complete API docs, modeling patterns, and examples, read the upstream @@ -295,17 +466,17 @@ sibling skills listed at the top of this file. Below is only what's ### gRPC path (Python SDK and `cuopt_cli`) -The Python SDK and `cuopt_cli` solve through the gRPC server. Set: +**Read `cuopt-remote-env` first** — it is the canonical checklist for +`CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`, the smoke test command, and +the error→action table. Summary: ```bash export CUOPT_REMOTE_HOST=host.openshell.internal export CUOPT_REMOTE_PORT=5001 ``` -before the Python or CLI process starts. If you see `Using remote GPU -backend` in the solver output, the remote path engaged. If you see -`cudaErrorInsufficientDriver` instead, the env vars didn't take effect and -the client tried to solve locally — there is no GPU here, so it fails. +before the Python or CLI process starts, in the same `bash -lc` line as +`python3`. Success marker: `Using remote GPU backend` in the log. For modeling, status checking, and examples → the matching upstream skill in `/sandbox/.openclaw/skills/` — typically a `cuopt-*-api-python` @@ -395,6 +566,157 @@ The `cuopt-routing-api-python` skill describes the GPU-backed Python API and is **not** the right reference inside this sandbox — use the REST path instead. +## Long-running solves — one job, poll to completion + +cuOpt MILP / VRP solves can take tens of seconds to several minutes. +Under NemoClaw's `exec` tool, any command that exceeds `yieldMs` is +moved to a background process; the agent then has to poll it via the +`process` tool to retrieve the final result. **That polling is your job +to do silently — it is not a checkpoint that requires user input.** + +### One job at a time — never submit while one is in flight + +When you submit a solve (gRPC `Problem.solve()`, REST +`get_optimized_routes()`, `cuopt_sh`, etc.), **do not start another +solve until the current one returns a terminal response.** + +| In flight | Allowed | **Not allowed** | +|---|---|---| +| Python process still running / `reqId` not finished | Poll same process or repoll same `reqId` | New `python3 /sandbox/solve.py`, new REST POST, new gRPC solve | +| Exec backgrounded, no exit yet | `tool_call process` on **that** handle | Kill + resubmit, "try simpler model" as a second job | +| Waiting on REST `reqId` | `client.repoll(reqId)` | Submit a fresh payload while the first job runs | + +**Why:** cancelling or abandoning job A does not free the GPU — the +server keeps solving until **A's** `time_limit` expires. Job B then +runs concurrently, wastes GPU, and you lose A's final status/incumbent. + +### `time_limit` means you always get a response + +If you set `time_limit` (Python: +`settings.set_parameter("time_limit", N)`; REST: +`solver_config.time_limit`), cuOpt **will stop and return within that +window** — even when the problem does not converge to optimality. + +You are waiting for a **terminal solver status**, not necessarily +`Optimal`: + +| Status (examples) | Meaning | +|---|---| +| `Optimal` | Proven optimal (within tolerances) | +| `FeasibleFound` / `PrimalFeasible` | Feasible solution, may not be optimal | +| `TimeLimit` / time-limit reached | Best effort within budget — **still a valid response** | +| `Infeasible` | No feasible solution | + +Silence or a hung client past ~2 × `time_limit` is a **bug or poll +failure**, not "MILP might run forever". Keep polling the **same** +submission; do not open a second one because the first "seems slow". + +Three failure modes to avoid — all surface as "it's taking a while, I'll +do something else": + +1. **Interrupting the user** — pausing to ask "should I keep + waiting?" / "should I take the current incumbent?". Wastes the + user's turn; addressed by the rules below. +2. **Cancelling the solve** — killing the Python process, terminating + the `tool_call process` handle, or calling `CancelJob` on the gRPC + server. **This is worse**, because it does not actually stop the + work — the server-side solve keeps consuming GPU until its own + `time_limit` fires, and there is no recovery path back to that + `job_id` from a new client (see + `cpp/docs/grpc-job-management-proposal.md` in nvidia-cuopt for the + in-flight design that would fix this; today no `ListJobs` RPC + exists). A cancel-and-retry loop just queues a *second* concurrent + solve on the same GPU while the first one runs to completion + unobserved. +3. **Submitting a second job** — starting a new solve because the first + "hasn't returned yet". The first job is still running server-side; + you now have two GPU jobs and no clean result from either. + +Concrete rules: + +- If you started a cuOpt solve and it is still running, your only valid + next actions are: (a) `tool_call process` to poll, or (b) wait and + poll again — **on that same job**. **Never** submit a second solve + in parallel. **Do not** return to the user with "should I keep + waiting?", "should I take the current incumbent?", or "let me know if + you want me to continue". The user already asked for the solution; + pausing to re-confirm wastes their time and frequently means the + solver finishes in the gap and the user has to type "yes finish" to + unblock work that already completed. +- cuOpt's MILP solver respects `SolverSettings.time_limit` (default in + this sandbox: 120s unless you override). The solver will stop + itself and return a status within that budget — convergence to + `Optimal` is not required. You do not need to "decide when to stop" + — `time_limit` decided that already. Poll until the process exits or + you hit a generous wall clock (e.g. 2 × the configured solver time + limit), then report the **terminal status** (including + `FeasibleFound` / time-limit stops). +- If a feasible incumbent is visible in partial output but the solver + has not exited, that is **not** a finished solve. Keep polling. Only + report `Optimal` / `FeasibleFound` / `Infeasible` etc. once the + Python process actually exits and you can read `Problem.Status.name` + from the final output (or from a file the script wrote on exit). +- **"Report early" and "cancel" are different actions.** Reporting + early means surfacing the current incumbent in chat *while the + solver keeps running*; cancelling means killing the Python process, + ending the `tool_call process` handle, sending SIGTERM, or calling + `CancelJob` on the gRPC service. The rules below permit the former + in narrow cases; they **do not permit cancelling a running solve + just because it feels slow**. +- The only legitimate reasons to **report early** are: (1) the process + is genuinely hung (no output movement for > 2 × `time_limit`, no + completion); (2) the user explicitly asked you to stop or take what + you have now; (3) the wall-clock budget for the *whole task* (not + the solver) is about to expire. If you do report early, **keep the + solver running** unless one of the cancellation reasons below also + holds — the incumbent may improve while you write the message. +- The only legitimate reasons to **cancel** a running solve are: (1) + the user explicitly said "cancel" / "kill it" / "stop"; (2) the + process is genuinely hung *and* you have already tried to read its + output once without success. "It's taking longer than I expected", + "I want to try a smaller time limit", and "the user might want a + different formulation" are **not** cancel reasons — let the current + `time_limit` fire, then iterate on the result. Cancelling and + resubmitting "with a smaller time limit" is almost always wrong: + the original solve does not actually stop, so you are now running + two solves on one GPU. +- If you must report a partial result, label it explicitly: + "intermediate incumbent, solver still running" — and keep polling + while you compose the message; do not pause for confirmation, and do + not cancel after sending. + +Antipatterns to avoid (both from real sessions): + +**Forced-choice pause** — + +> Still running, but it's on track. +> If you want the fastest path, I can stop at the current feasible +> solution and give you that immediately. If you want the +> better-quality schedule, I should let cuOpt finish. +> Recommendation: let it finish. + +This is a forced-choice pause. The agent already knows the right +answer ("let it finish"); it should just keep polling and return when +the solve actually completes. The user does not need to confirm a +recommendation the agent itself already made. + +**Cancel-and-retry** — + +> The MILP has been running for about 3 minutes and the gRPC call +> hasn't returned. I'll cancel this attempt and re-submit with a +> shorter `time_limit` and a simpler formulation so we get *something* +> back quickly. + +This is wrong on three counts: (1) the original solve does not +actually stop when the local Python process is killed — the +server-side GPU keeps working until *its* `time_limit` fires; +(2) the original `job_id` is now unrecoverable, so any improving +incumbent the server eventually produces is lost; (3) the "retry" +queues a second concurrent solve on the same GPU, racing the +unobserved first one. The correct action is to keep polling, let the +configured `time_limit` decide when to stop, and report whatever +status the solver returns. + ## Script execution hygiene For any solver script longer than a one-liner, write it to a file first @@ -403,13 +725,22 @@ badly with the `tool_call → exec → shell → Python` quoting chain — quote collapse across layer boundaries, and each broken inline script costs a full sandbox round-trip before the failure is even visible. +**A shell/script packaging failure is never evidence that cuOpt failed +and never a reason to pivot to backtracking, `ortools`, or hand search.** +If your first cuOpt attempt dies with `File name too long`, `SyntaxError`, +`source: not found`, or a mangled heredoc, fix the execution path and +retry — starting with the smoke test if you haven't passed Gate 3 yet. + Recommended pattern: ```bash cat > /sandbox/solve.py <<'PY' # … solver code … PY -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && python3 /sandbox/solve.py' +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/solve.py' ``` Use `bash -lc` (not bare `sh`) for any command that calls `source`; the @@ -419,9 +750,12 @@ default shell behind `tool_call exec` can be `dash`, which doesn't have Failure symptoms that mean script construction is broken — **not** cuOpt. If you see any of these, stop debugging the solver and switch to the -file pattern above: +file pattern above. **Do not abandon the cuOpt path.** - `source: not found` → wrap with `bash -lc '...'`. +- `File name too long` → heredoc/command string blew past shell limits; + write the script to `/sandbox/solve.py` with `write`/`edit` and run + that file instead. - `SyntaxError` on a Python line containing an unquoted URL, path, or shell metacharacter → quoting collapsed somewhere across the layers. - `NameError` on a token that should obviously be a string literal @@ -443,35 +777,47 @@ extraction code rather than extrapolating from a different problem class: The LP/MILP and routing shapes are different. Do not assume one based on having read the other. -## Quick connectivity smoke test (LP) +## Quick connectivity smoke tests -After the connectivity probes pass, run this minimal LP to verify the full -remote-solve path works end to end. Expected: `Optimal`, objective `10`, -`x = 2`, `y = 2`, with `Using remote GPU backend` in the solver log. +**Gate 3 — mandatory before any real LP/MILP/QP model.** Requires Gate 2 +env vars (`cuopt-remote-env`). Run the **pre-installed** scripts at +`/sandbox/` — do not rewrite them (correct imports are already inside): -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -p = Problem("smoke") -x = p.addVariable(lb=0, vtype=CONTINUOUS, name="x") -y = p.addVariable(lb=0, vtype=CONTINUOUS, name="y") -p.addConstraint(x + y <= 4) -p.addConstraint(x <= 2) -p.addConstraint(y <= 3) -p.setObjective(3*x + 2*y, sense=MAXIMIZE) -p.solve(SolverSettings()) -print(p.Status.name, p.ObjValue, x.getValue(), y.getValue()) +| Script | Use | +|---|---| +| `smoke_lp.py` | Gate 3 for all gRPC LP/MILP/QP work | +| `smoke_milp.py` | Extra check for scheduling / assignment (INTEGER path) | +| `smoke_vrp.py` | Routing only — REST, no `CUOPT_REMOTE_*` | + +**LP (Gate 3):** + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' ``` -If this fails, do not move on to a real problem — fix connectivity first -(see Troubleshooting below). +Expected: `Using remote GPU backend`, then `status=Optimal objective=10.0 …`. + +**MILP (scheduling tasks):** same env vars, `python3 /sandbox/smoke_milp.py`. + +**VRP:** `python3 /sandbox/smoke_vrp.py` — expects `status=0 solution_cost=…`. + +Write **real models** to `/sandbox/solve.py`; use the smoke scripts only +for connectivity checks. + +If this fails, do not move on to a real problem — diagnose using the +smoke-outcome table in "Four gates before modeling" and `cuopt-remote-env`. +Do **not** pivot to heuristic search or declare cuOpt blocked unless +`Using remote GPU backend` was already present in the failing run. ## Troubleshooting | Symptom | Cause | Fix | |---------|-------|-----| -| `cudaErrorInsufficientDriver` or CUDA errors | Accidentally invoked local solve instead of remote service | Set `CUOPT_REMOTE_HOST=host.openshell.internal` and `CUOPT_REMOTE_PORT=5001` before solving | +| `cudaErrorInsufficientDriver` without `Using remote GPU backend` | Accidentally invoked local solve instead of remote service | Set `CUOPT_REMOTE_HOST=host.openshell.internal` and `CUOPT_REMOTE_PORT=5001` before solving; use `bash -lc` | +| `Using remote GPU backend` then `cudaErrorNoDevice` / `Remote LP solve failed` | Client path OK; host cuOpt gRPC service has no visible GPU | Operator fixes host GPU / container runtime. Do **not** fall back to heuristics — report blocker and stop | | `from cuopt import routing` fails with CUDA / RMM init error | There is no GPU in this sandbox; routing has no remote-aware Python wrapper | Use REST instead: see "Vehicle routing (VRP, TSP, PDP) — REST only in this sandbox" above and `cuopt-server-api-python`'s `assets/vrp_*/` cookbook. Do **not** fall back to brute force or non-cuOpt methods | | `403 Forbidden` | Wrong address or sandbox policy missing port | Use `host.openshell.internal`, not `localhost`. If address is correct, ask operator to run `nemoclaw_cuopt_setup.sh apply-policy` | | `Connection refused` on `:5000` | REST service not running or host firewall blocking the port | Check if REST is needed; gRPC alone (5001) is sufficient for LP/MILP. If REST is needed, ask operator to start it | @@ -479,4 +825,5 @@ If this fails, do not move on to a real problem — fix connectivity first | Connection timeout / hang | Server not running or host firewall blocking Docker | Ask operator to verify from host: `ss -tlnp \| grep 500` | | Timeout through `10.200.0.1:3128` | Sandbox proxy cannot reach the destination | Ask operator to verify sandbox network policy includes the cuOpt ports | | `ModuleNotFoundError` | Venv not activated — common in non-login shells (`bash -c '…'`) because `.bash_profile` only fires for login shells | Wrap the call in `bash -lc '…'` (preferred) or `source /sandbox/.openclaw-data/cuopt/bin/activate` before the python invocation | +| `ModuleNotFoundError: No module named 'cuopt.milp'` or `from cuopt import milp` fails | **Wrong import path** — MILP is not a separate package | Use `from cuopt.linear_programming.problem import Problem, INTEGER` — see `cuopt-python-api`; run its verify one-liner before pivoting | | No `Using remote GPU backend` in output | Remote env vars not set or not picked up | Ensure `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` are exported before the Python process starts | diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md index e57e45b..4bba337 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md @@ -20,11 +20,16 @@ Skip this skill when the user is clearly asking for non-optimization analytics, ## Sequence +**Step 0 (NemoClaw sandbox only — do not skip):** `cuopt-first` → +probe → `cuopt-remote-env` → smoke test. **No user-visible schedule, +assignment, heuristic plan, or feasibility verdict before step 0 +completes.** Data files may be read for column/constraint discovery only. + Run these in order, but skip any step already settled from context. Default to fast mode; surface replayable/auditable mode only on a real signal (reruns, audit, export, recurring planning). 1. **`optimization-intent-router`** — decide whether this is optimization at all and which family (LP / MILP / QP / routing). If non-optimization, stop the optimization flow. 2. **`optimization-mode-router`** — *only if* there is a signal that replayability, audit, export, or recurring runs may matter. Otherwise stay in fast mode silently. -3. **`tabular-optimization-ingestion`** — identify row grain and table roles, infer likely objective and constraint fields, refine the family classification if the data clearly supports a different one, and surface any blockers. +3. **`tabular-optimization-ingestion`** — identify row grain and table roles, infer likely objective and constraint fields, refine the family classification if the data clearly supports a different one, and surface any blockers. **Output interpretation only — not a schedule or heuristic solve.** 4. **`cuopt-model-mapper`** — ask at most the final blocking clarification, then map directly into cuOpt and solve. Family-specific handoffs after step 4: @@ -33,8 +38,15 @@ Family-specific handoffs after step 4: ## Guardrails -- Do not skip intent classification and jump directly to cuOpt from raw data. +- **In NemoClaw sandbox:** run `cuopt-first` step 0 (probe → env → smoke) + before any optimization **answer** — ingestion steps do not authorize + heuristic schedules or feasibility substitutes. +- Do not skip intent classification and jump directly to cuOpt from raw data + **without** step 0 infrastructure gates — but step 0 is fast and mandatory. - Do not ask a long questionnaire before inspecting the uploaded data. - Do not trigger replayable/auditable mode by default — only when the user signals reuse, audit, export, or recurring runs. - Do not let ingestion become solver construction; the steps stay distinct. - Do not use cuOpt for descriptive analytics tasks. +- **Do not produce a heuristic/greedy/backtracking schedule during steps + 1–3** as a stand-in for cuOpt; the first solver that emits assignments + must be cuOpt after step 0 passes. diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md index 101564a..bebd430 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md @@ -63,7 +63,7 @@ Weaker but meaningful signals: - "How should we allocate this?" - "How can we reduce cost while meeting demand?" -When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization. +When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization — and in the NemoClaw sandbox, **`cuopt-first` applies immediately** (probe before any schedule/heuristic output). ## Route classification diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md index 648457e..9264f27 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md @@ -120,17 +120,20 @@ After selecting a mode, hand off based on problem type: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve - If the request is QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve - If the request is routing (VRP / TSP / PDP): - use `routing-formulation` - then use `cuopt-routing-api-python` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + before any gRPC Python solve - If the user is asking about server usage or deployment rather than solving a model directly: - use `cuopt-server-common` or `cuopt-server-api-python` as appropriate diff --git a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md index c1e14c3..82d62cc 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md @@ -13,6 +13,12 @@ The purpose of this skill is to bridge the gap between messy uploaded data and s This skill does **not** solve the optimization problem itself. It inspects the data, infers likely modeling roles, and identifies what still needs clarification. +**It does not authorize heuristic, greedy, or backtracking schedules as +answers.** In the NemoClaw sandbox, read `cuopt-first`: the first solver +that produces assignments or a schedule must be cuOpt after probe → env → +smoke gates pass. Ingestion output is a modeling interpretation (entities, +objective fields, constraints) — never a completed plan. + This skill refines the optimization interpretation using the uploaded data; it does not replace the earlier intent decision unless the data clearly contradicts it. ## Purpose @@ -144,7 +150,6 @@ Examples: - Are demands mandatory or forecast-only? - Must decisions be integers? - Are these time windows hard constraints? -- Is this travel matrix symmetric? - Can unmet demand be allowed with penalty? - Is profit net profit or revenue only? diff --git a/cuopt_on_nemoclaw/smoke_lp.py b/cuopt_on_nemoclaw/smoke_lp.py new file mode 100644 index 0000000..504ee47 --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_lp.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Remote LP smoke test for the NemoClaw cuOpt sandbox. + +Verifies gRPC remote execution for LP (not MILP routing, not local CUDA). + +Requires CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT in the environment when +Python starts (set them in the same ``bash -lc`` line as this script). + +Success markers in combined stdout/stderr: + Using remote GPU backend + status=Optimal objective=10.0 + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import sys +from os import environ + +DEFAULT_HOST = "host.openshell.internal" +DEFAULT_PORT = "5001" + +OK_STATUSES = frozenset({"Optimal", "PrimalFeasible"}) + + +def _require_remote_env() -> None: + host = environ.get("CUOPT_REMOTE_HOST") + port = environ.get("CUOPT_REMOTE_PORT") + if not host or not port: + print( + "error: CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT must be set " + "before Python starts.\n" + "example:\n" + " bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && " + f"export CUOPT_REMOTE_HOST={DEFAULT_HOST} && " + f"export CUOPT_REMOTE_PORT={DEFAULT_PORT} && " + "python3 /sandbox/smoke_lp.py'", + file=sys.stderr, + ) + sys.exit(1) + + +def main() -> int: + _require_remote_env() + + from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE + from cuopt.linear_programming.solver_settings import SolverSettings + + p = Problem("smoke_lp") + x = p.addVariable(lb=0, vtype=CONTINUOUS, name="x") + y = p.addVariable(lb=0, vtype=CONTINUOUS, name="y") + p.addConstraint(x + y <= 4) + p.addConstraint(x <= 2) + p.addConstraint(y <= 3) + p.setObjective(3 * x + 2 * y, sense=MAXIMIZE) + p.solve(SolverSettings()) + + status = p.Status.name + if status not in OK_STATUSES: + print(f"status={status} FAIL", file=sys.stderr) + return 1 + + print( + f"status={status} objective={p.ObjValue} " + f"x={x.getValue()} y={y.getValue()}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cuopt_on_nemoclaw/smoke_milp.py b/cuopt_on_nemoclaw/smoke_milp.py new file mode 100644 index 0000000..859ad6c --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_milp.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Remote MILP smoke test for the NemoClaw cuOpt sandbox. + +MILP uses the same ``cuopt.linear_programming.problem.Problem`` class as LP +with ``vtype=INTEGER`` — there is no ``from cuopt import milp``. + +Requires CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT when Python starts. + +Success markers: + Using remote GPU backend + status=Optimal (or FeasibleFound) + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import sys +from os import environ + +OK_STATUSES = frozenset({"Optimal", "FeasibleFound", "PrimalFeasible"}) + + +def _require_remote_env() -> None: + if not environ.get("CUOPT_REMOTE_HOST") or not environ.get("CUOPT_REMOTE_PORT"): + print( + "error: export CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT before " + "running (see /sandbox/.openclaw/skills/cuopt-remote-env/SKILL.md)", + file=sys.stderr, + ) + sys.exit(1) + + +def main() -> int: + _require_remote_env() + + from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE + from cuopt.linear_programming.solver_settings import SolverSettings + + # Need at least one constraint so the CSR matrix (A_offsets) is built. + # Tiny 2-variable integer problem (same shape as milp_basic, smaller nums). + p = Problem("smoke_milp") + x = p.addVariable(vtype=INTEGER, lb=0, ub=10, name="x") + y = p.addVariable(vtype=INTEGER, lb=0, ub=10, name="y") + p.addConstraint(x + y <= 4) + p.addConstraint(x <= 2) + p.setObjective(x + 2 * y, sense=MAXIMIZE) + settings = SolverSettings() + settings.set_parameter("time_limit", 60) + p.solve(settings) + + status = p.Status.name + if status not in OK_STATUSES: + print(f"status={status} FAIL", file=sys.stderr) + return 1 + + print(f"status={status} objective={p.ObjValue} x={x.getValue()} y={y.getValue()}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cuopt_on_nemoclaw/smoke_vrp.py b/cuopt_on_nemoclaw/smoke_vrp.py new file mode 100644 index 0000000..7d7fdb2 --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_vrp.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""REST VRP smoke test for the NemoClaw cuOpt sandbox. + +Routing uses REST on port 5000 (not CUOPT_REMOTE_* gRPC vars). +Do not use ``from cuopt import routing`` — there is no GPU in the sandbox. + +Env (defaults shown): + CUOPT_SERVER_HOST=host.openshell.internal + CUOPT_SERVER_PORT=5000 + +Success markers: + status=0 + solution_cost present in solver_response + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import json +import sys +import time +from os import environ +from typing import Any + +DEFAULT_HOST = "host.openshell.internal" +DEFAULT_PORT = "5000" + +# Minimal valid payload (same shape as vrp_minimal cookbook). +PAYLOAD: dict[str, Any] = { + "cost_matrix_data": { + "data": { + "0": [ + [0, 1, 1], + [1, 0, 1], + [1, 1, 0], + ] + } + }, + "task_data": {"task_locations": [1, 2]}, + "fleet_data": {"vehicle_locations": [[0, 0]]}, + "solver_config": {"time_limit": 30}, +} + + +def _repoll(client: Any, solution: dict[str, Any], tries: int = 120) -> dict[str, Any]: + if "reqId" not in solution or "response" in solution: + return solution + req_id = solution["reqId"] + for _ in range(tries): + solution = client.repoll(req_id, response_type="dict") + if "response" in solution: + return solution + time.sleep(1) + return solution + + +def main() -> int: + host = environ.get("CUOPT_SERVER_HOST", DEFAULT_HOST) + port = environ.get("CUOPT_SERVER_PORT", DEFAULT_PORT) + + from cuopt_sh_client import CuOptServiceSelfHostClient + + client = CuOptServiceSelfHostClient( + ip=host, + port=str(port), + polling_timeout=60, + timeout_exception=False, + ) + solution = client.get_optimized_routes(PAYLOAD) + solution = _repoll(client, solution) + + if "response" not in solution: + print( + "error: no response from REST VRP (still polling or server error)", + file=sys.stderr, + ) + print(json.dumps(solution, indent=2), file=sys.stderr) + return 1 + + sr = solution["response"].get("solver_response", {}) + status = sr.get("status") + cost = sr.get("solution_cost") + if status != 0: + print(f"status={status} FAIL", file=sys.stderr) + print(json.dumps(solution, indent=2), file=sys.stderr) + return 1 + + print(f"status={status} solution_cost={cost} host={host} port={port}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 7e2b588cacd7f5707201adedfed0c5dd258acd54 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 15 Jun 2026 13:34:51 -0400 Subject: [PATCH 2/2] align cuopt_on_nemoclaw skills with guidance and update to v0.0.64 --- cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh | 194 +++- .../always-tool-discovery/SKILL.md | 98 -- .../openclaw-skills/cuopt-first/SKILL.md | 117 --- .../cuopt-model-mapper/SKILL.md | 25 +- .../openclaw-skills/cuopt-python-api/SKILL.md | 107 --- .../openclaw-skills/cuopt-remote-env/SKILL.md | 161 ---- .../openclaw-skills/cuopt-sandbox/SKILL.md | 865 ++---------------- .../cuopt-sandbox/references/activation.md | 33 + .../references/environment-and-networking.md | 54 ++ .../references/gates-and-first-actions.md | 77 ++ .../references/intent-and-triggers.md | 71 ++ .../cuopt-sandbox/references/interfaces.md | 38 + .../references/long-running-jobs.md | 47 + .../references/python-imports.md | 48 + .../references/remote-env-and-smoke.md | 71 ++ .../references/routing-rest-only.md | 37 + .../references/troubleshooting.md | 37 + .../SKILL.md | 65 +- .../optimization-intent-router/SKILL.md | 41 +- .../optimization-mode-router/SKILL.md | 24 +- .../tabular-optimization-ingestion/SKILL.md | 30 +- cuopt_on_nemoclaw/smoke_milp.py | 2 +- 22 files changed, 869 insertions(+), 1373 deletions(-) delete mode 100644 cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md delete mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md delete mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md delete mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md diff --git a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh index bdc57a2..1e0e07d 100755 --- a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh +++ b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh @@ -27,7 +27,9 @@ # install-activation [NAME] Re-stamp the cuOpt venv activation file # (/sandbox/.bash_profile). Use after changing # CUOPT_HOST, CUOPT_PORT, or CUOPT_VENV. -# install-skill [NAME] Upload the cuOpt skill into the sandbox. +# install-skill [NAME] Upload the cuOpt skill into the sandbox and append +# tool-search file-access notes to workspace TOOLS.md +# when not already present. # cache-wheels [NAME] Snapshot a sandbox's already-installed wheels # into $CUOPT_WHEEL_CACHE. NAME must already have # cuOpt installed (run `add` or `install` against @@ -100,11 +102,10 @@ # the installed tools differ (non-fatal). To install the exact tested # NemoClaw build: # -# NEMOCLAW_INSTALL_TAG=v0.0.55 \ +# NEMOCLAW_INSTALL_TAG=v0.0.64 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # -# The public installer defaults to the `lkg` ref, which currently matches -# v0.0.55; use the tag above to pin even after `lkg` moves forward. +# The public installer defaults to the `lkg` ref, which moves. # # Silence the banner with NEMOCLAW_VERSION_CHECK=0. # ============================================================================= @@ -179,14 +180,13 @@ CUOPT_TEST_SANDBOX_GRPC="" # # To install the exact tested NemoClaw build (openshell is bundled with the # NemoClaw release this script was verified against): -# NEMOCLAW_INSTALL_TAG=v0.0.55 \ +# NEMOCLAW_INSTALL_TAG=v0.0.64 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # -# The public installer defaults to `lkg`, which currently resolves to the same -# commit as v0.0.55. Pin the tag if you need reproducibility after lkg moves. +# The public installer defaults to the `lkg` ref, which moves. # # Silence the banner with NEMOCLAW_VERSION_CHECK=0. -TESTED_NEMOCLAW_VERSION="0.0.55" +TESTED_NEMOCLAW_VERSION="0.0.64" TESTED_OPENSHELL_VERSION="0.0.44" # ── NemoClaw / OpenShell version compatibility check ───────────── @@ -1535,6 +1535,144 @@ upload_sandbox_file() { return 0 } +# ── install_workspace_tools_md (helper) ─────────────────────────── +# Append a managed block to /sandbox/.openclaw/workspace/TOOLS.md when OpenClaw +# compact tool-search mode is active (tools.toolSearch not false). Skips — and +# strips any existing block — when direct tools are enabled. OpenClaw injects +# TOOLS.md on every turn via Project Context. +install_workspace_tools_md() { + local sandbox="$1" + local container + if ! container=$(find_sandbox_container "$sandbox"); then + echo " warning: cannot update TOOLS.md (sandbox container not running)" >&2 + return 1 + fi + + local inner_script + inner_script=$(cat <<'INNER_EOF' +set -eu +tools_md=/sandbox/.openclaw/workspace/TOOLS.md +begin='# >> cuopt tool help' +end='# << cuopt tool help' + +tool_mode=$(python3 - <<'PY' +import json + +cfg_path = "/sandbox/.openclaw/openclaw.json" +try: + with open(cfg_path) as f: + cfg = json.load(f) +except (FileNotFoundError, json.JSONDecodeError, OSError): + print("compact") + raise SystemExit(0) + +ts = (cfg.get("tools") or {}).get("toolSearch") +if ts is False: + print("direct") +elif isinstance(ts, dict) and ts.get("enabled") is False: + print("direct") +else: + print("compact") +PY +) + +write_managed_block() { + cat <<'BLOCK_EOF' +# >> cuopt tool help +With only `tool_search_code` exposed, `read` and `exec` still exist - reach them via `openclaw.tools.search`, then `describe`, then `call` inside a `tool_search_code` run. Try that before asking the user to paste file contents. +`read` requires an exact path (no globs). Use the path the user gave, or run `find` through `exec` if you need to discover files under `/sandbox`. +# << cuopt tool help +BLOCK_EOF +} + +# Drop trailing blank lines so strip+re-append does not accumulate spacing. +trim_trailing_blank_lines() { + local file="$1" + [ -f "$file" ] || return 0 + tmp="${file}.trim.$$" + awk ' + { lines[NR] = $0 } + END { + n = NR + while (n > 0 && lines[n] == "") { + n-- + } + for (i = 1; i <= n; i++) { + print lines[i] + } + } + ' "$file" > "$tmp" + mv "$tmp" "$file" +} + +append_managed_block() { + trim_trailing_blank_lines "$tools_md" + if [ -s "$tools_md" ]; then + printf '\n' >> "$tools_md" + fi + write_managed_block >> "$tools_md" +} + +mkdir -p /sandbox/.openclaw/workspace +if [ -f "$tools_md" ]; then + tmp="${tools_md}.tmp.$$" + awk -v begin="$begin" -v end="$end" ' + $0 == begin { skip=1; next } + skip && $0 == end { skip=0; next } + !skip { print } + ' "$tools_md" > "$tmp" + mv "$tmp" "$tools_md" + trim_trailing_blank_lines "$tools_md" +fi + +if [ "$tool_mode" = "direct" ]; then + echo skipped-direct + exit 0 +fi + +if [ ! -f "$tools_md" ]; then + cat > "$tools_md" <<'HEADER_EOF' +# TOOLS.md - Local Notes + +HEADER_EOF +fi + +append_managed_block +echo updated +INNER_EOF +) + + local inner_b64 + inner_b64=$(printf '%s' "$inner_script" | base64 -w 0) + + local err_log result + err_log=$(mktemp) + result=$(sandbox_exec "$sandbox" \ + sh -c "echo '$inner_b64' | base64 -d | sh" 2>"$err_log") || { + echo " warning: could not update TOOLS.md in container '$container'" >&2 + if [[ -s "$err_log" ]]; then + sed 's/^/ /' "$err_log" >&2 + fi + rm -f "$err_log" + return 1 + } + rm -f "$err_log" + + case "$result" in + updated) + echo " TOOLS.md cuOpt tool help block updated" + ;; + skipped-direct) + echo " TOOLS.md cuOpt tool help skipped (tools.toolSearch is false)" + ;; + *) + echo " warning: unexpected TOOLS.md update result: $result" >&2 + return 1 + ;; + esac + return 0 +} + # ── install-skill ───────────────────────────────────────────────── cmd_install_skill() { local sandbox="${1:-$CUOPT_SANDBOX}" @@ -1677,40 +1815,33 @@ cmd_install_skill() { guardrail_content="$(cat <<'GUARDRAIL' --- name: cuopt-setup -description: "Read this FIRST in every sandbox session. Tool catalog: if you only see tool_search/tool_describe/tool_call, run tool_search (query read or shell) — read/write/exec are NOT missing. cuOpt path: read /sandbox/.openclaw/skills/cuopt-first/SKILL.md then cuopt-sandbox. MILP imports: cuopt.linear_programming.problem (NOT from cuopt import milp) — see cuopt-python-api. If ~ paths fail, use absolute paths below." +description: "NemoClaw cuOpt sandbox entry — probe/smoke before schedule output; absolute skill paths under /sandbox/.openclaw/skills/." --- -# cuOpt sandbox — tools + skill paths +# cuOpt sandbox — skill paths -## Tools (compact catalog) +## Schedule / assignment workflow -NemoClaw hides `read`, `write`, `exec` behind `tool_search` / -`tool_describe` / `tool_call`. **Do not claim files are unreadable.** -Run `tool_call` → `tool_search` with `query: "read"` or `"shell"`, -then `tool_call` the real tool. Full guide: -`/sandbox/.openclaw/skills/always-tool-discovery/SKILL.md` +Read (in order): -## cuOpt skills + /sandbox/.openclaw/skills/optimization-from-data-orchestrator/SKILL.md + /sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md -The cuopt-sandbox skill (sandbox setup, networking, venv) is installed at: +Routing + cuOpt-first rules: +`/sandbox/.openclaw/skills/cuopt-sandbox/references/activation.md` + +## cuOpt skills /sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md -Start with `/sandbox/.openclaw/skills/cuopt-first/SKILL.md` for optimization -tasks. **Python MILP/LP imports:** `/sandbox/.openclaw/skills/cuopt-python-api/SKILL.md` -(never `from cuopt import milp`). Read cuopt-sandbox for full wiring, then sibling skills in the same -directory (cuopt-user-rules, cuopt-numerical-optimization-api-python, etc.). +**Python MILP/LP imports:** +`/sandbox/.openclaw/skills/cuopt-sandbox/references/python-imports.md` +(use `cuopt.linear_programming.problem`, not `from cuopt import milp`). ## Why this guardrail exists -OpenClaw compacts skill paths to `~/…` in the system prompt. When you try -to read `~/.openclaw/skills/cuopt-sandbox/SKILL.md`, the `~` may expand to -`/root/` or another directory that is not readable. If that happens: - -1. Do NOT give up on using cuOpt. -2. Read the skill from the absolute path above. -3. Also check `/sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md` (symlink - to the same file). +OpenClaw compacts skill paths to `~/…` in the system prompt. Use absolute +paths under `/sandbox/.openclaw/skills/` when `~` paths fail. GUARDRAIL )" @@ -1801,6 +1932,9 @@ print(" skills.load.extraDirs=" + json.dumps(existing)) echo " until skills.load.extraDirs includes /sandbox/.openclaw/skills" >&2 fi + install_workspace_tools_md "$sandbox" \ + || echo " warning: could not update workspace TOOLS.md (non-fatal)" >&2 + echo "Skills installed." # Sandbox helper scripts (not skills): probe + smoke tests for agents and diff --git a/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md deleted file mode 100644 index 9c52909..0000000 --- a/cuopt_on_nemoclaw/openclaw-skills/always-tool-discovery/SKILL.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: always-tool-discovery -description: READ FIRST in every NemoClaw sandbox session. STOP before "I can't read files", "no read tool", "no exec", "can't list directory", "please paste ls/cat output", or "I only have tool_search/tool_describe/tool_call". Real tools (read, write, exec, edit, process) are NOT missing — tools.toolSearch hides them. Use tool_call tool_search query shell or read, then tool_describe, then tool_call the real tool. Never ask the user to paste filesystem output. Applies to ALL tasks including cuOpt, CSV uploads, and scheduling — run tool_search before claiming any file or shell capability is absent. ---- - -# Sandbox Tool Discovery (NemoClaw / OpenClaw catalog) - -**If you find yourself thinking "I don't have an exec / read / python / -shell / file / process tool exposed in this session", you are wrong and -must stop.** In a NemoClaw sandbox, OpenClaw is configured with -`tools.toolSearch: true`, which hides `exec`, `read`, `write`, `edit`, -`process`, and every other real tool behind the three meta-tools -(`tool_search`, `tool_describe`, `tool_call`). They are not missing — -they are one `tool_search` call away. - -The correct response to "my tool list looks too short" is to run -`tool_search`. It is **not** to: - -- Apologize to the user and ask them to paste shell output. -- Fall back to hand calculation, hand-rolled algorithms, or "I'll just - reason about it". -- Search the web for an answer the local filesystem already has. -- Give up and report "this sandbox seems to have no command execution". -- Claim a file is unreadable, missing, or that you can't see what's in - a directory. - -If your tool list shows only `tool_search`, `tool_describe`, and -`tool_call`, that is **the expected state** — the real tools are -reachable via the catalog, not absent. - -## How the catalog works - -OpenClaw's compact tool catalog (enabled by `tools.toolSearch: true`) -keeps the model's tool-context small by replacing the full per-tool -schema with three meta-tools: - -| Meta-tool | Purpose | -|-----------------|------------------------------------------------------------------------| -| `tool_search` | Find tools by free-text query (e.g. `"shell"`, `"file"`, `"process"`). | -| `tool_describe` | Return the parameter schema for a named tool. Call once per new tool. | -| `tool_call` | Actually invoke a tool by name with `{name, arguments}`. | - -NemoClaw configures OpenClaw this way by default starting at v0.0.55, so -in any current sandbox the compact catalog is on. If your tool list -already includes `exec` / `read` / `write` directly, the compact -catalog is disabled for this session; call those tools directly and -ignore the rest of this skill. - -## Use them in this order - -1. **`tool_search`** with `{"query": ""}` and `{"limit": 20}` lists the - full catalog. `{"query": "shell"}`, `{"query": "file"}`, - `{"query": "read"}`, `{"query": "process"}` narrows by topic. -2. **`tool_describe`** with `{"name": ""}` returns the parameter - schema. Call this once per new tool. -3. **`tool_call`** with `{"name": "", "arguments": {…}}` runs it. - -## Capabilities you'll typically need - -| Task | Search term | Tool name | Typical args | -|---|---|---|---| -| Shell (`ls`, `python3`, …) | `shell` / `exec` | `exec` | `{"command": "ls -1 /sandbox/"}` | -| Read a file | `read` / `file` | `read` | `{"path": "/sandbox/foo.csv"}` | -| Write / edit | `write` / `edit` | `write` / `edit` | see `tool_describe` | -| Poll background job | `process` | `process` | see `tool_describe` | - -## Worked example — read a CSV the user uploaded - -Tool list shows only `tool_search`, `tool_describe`, `tool_call`: - -```json -{"name": "tool_search", "arguments": {"query": "read"}} -{"name": "tool_describe", "arguments": {"name": "read"}} -{"name": "tool_call", "arguments": {"name": "read", "arguments": {"path": "/sandbox/teams.csv"}}} -``` - -## Worked example — run probe_cuopt.py - -```json -{"name": "tool_search", "arguments": {"query": "shell"}} -{"name": "tool_describe", "arguments": {"name": "exec"}} -{"name": "tool_call", "arguments": { - "name": "exec", - "arguments": {"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"} -}} -``` - -## Anti-pattern (from real sessions) - -> I can't read files in this sandbox — I only see tool_search, -> tool_describe, and tool_call. Can you paste the CSV contents? - -Wrong. Run `tool_search` → `read` → read the file yourself. - -## Related skills - -- `cuopt-first` / `cuopt-sandbox` — after you have `read`/`exec`, use - these for cuOpt optimization tasks. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md deleted file mode 100644 index a9d99a1..0000000 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-first/SKILL.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -name: cuopt-first -description: MANDATORY in NemoClaw sandbox. If you only see tool_search/tool_describe/tool_call, run tool_search (query read or shell) BEFORE claiming you cannot read files or run commands — see always-tool-discovery. STOP before any schedule/heuristic/draft-plan output. FIRST actions: (1) tool_search→exec if needed, (2) probe_cuopt.py, (3) cuopt-remote-env + smoke with CUOPT_REMOTE_HOST/PORT. Do NOT use from cuopt import milp — MILP is cuopt.linear_programming.problem.Problem with INTEGER vars; see cuopt-python-api. Do NOT wait for user to say "use cuOpt". Triggers: schedule, league, CSV upload, minimize, assign, MILP, doubleheader, ImportError, ModuleNotFoundError, wrong import path. ---- - -# cuOpt First (NemoClaw Sandbox) - -**This sandbox exists to solve with cuOpt.** If the user's request is an -optimization problem — schedule, assign, route, minimize, maximize, -allocate under constraints — **cuOpt is turn one, not turn two after the -user corrects you.** - -## The rule - -| Allowed before cuOpt smoke passes | **Not allowed** before cuOpt smoke passes | -|---|---| -| `probe_cuopt.py` | A schedule, roster, or assignment output | -| Read `cuopt-remote-env`, set `CUOPT_REMOTE_*` | Greedy / heuristic / backtracking solver code | -| LP smoke test (`Using remote GPU backend` + `Optimal`) | "Here's a draft plan while we set up cuOpt" | -| Read CSV/Excel to infer **columns and constraints** | Capacity-only "probably feasible/infeasible" verdict | -| Ask **one** blocking clarification question | `ortools`, `pulp`, hand-rolled MIP search | -| Write `/sandbox/solve.py` **for cuOpt** | Apologizing for using a heuristic — you should not have used one | - -**Interpretation ≠ answer.** You may inspect `teams.csv` to learn that -rows are teams and `games.csv` lists matchups. You may **not** emit a -completed season schedule, slot assignments, or "I built a heuristic -schedule with doubleheaders minimized" until cuOpt returns a solver -status on the real model. - -## Mandatory first actions (in order) - -Do these **before** any optimization answer, **even if** the user -uploaded data and asked a natural-language planning question: - -0. **Tools (every session).** If your tool list is only - `tool_search` / `tool_describe` / `tool_call`, run `tool_search` - (`query: "read"` to read CSVs, `query: "shell"` for probe/exec). - **Never ask the user to paste file contents.** See - `always-tool-discovery`. -1. **`tool_call exec`** → `bash -lc 'python3 /sandbox/probe_cuopt.py'` -2. Read **`cuopt-remote-env`** → export `CUOPT_REMOTE_HOST` / - `CUOPT_REMOTE_PORT` in the same shell as Python -3. Run **LP smoke test** → `python3 /sandbox/smoke_lp.py` with - `CUOPT_REMOTE_*` → confirm `Using remote GPU backend` + `status=Optimal` -4. For MILP scheduling: also run `/sandbox/smoke_milp.py` (same env vars) -5. Read **`cuopt-python-api`** — copy import lines for `/sandbox/solve.py`; - **never** `from cuopt import milp` -6. Read **`cuopt-sandbox`** Four Gates + problem-family routing -7. **Then** formulate and submit the real problem to cuOpt - -Steps 1–3 are infrastructure, not "skipping ahead of data inspection." -They take seconds. Run them in parallel with skimming file headers if -needed — but **never** skip them. - -## Scheduling / league / CSV tasks - -Language like "build the season schedule", "assign games to slots", -"minimize doubleheaders", "balance home/away", uploaded league tables -→ **MILP via cuOpt gRPC**, not a custom Python scheduler. - -Default flow: - -1. Gates above (probe → env → smoke) -2. **`cuopt-python-api`** — mandatory imports (do not guess `cuopt.milp`) -3. `numerical-optimization-formulation` + `cuopt-numerical-optimization-api-python` -4. cuOpt `Problem` with binary/integer assignment vars + cuOpt solve -5. Report `Problem.Status.name`, objective, assignments - -Do **not** write a nested-loop or greedy assigner "to test feasibility" -or "to give the user something quickly." - -**One solve at a time:** after you submit to cuOpt, poll until -`time_limit` returns a terminal status (`Optimal`, `FeasibleFound`, -time-limit stop, etc.). Do not start a second job because the first -"is taking too long" — see `cuopt-sandbox` "Long-running solves". - -## Anti-pattern — wrong import → heuristic fallback - -> `from cuopt import milp` failed. I'll use a greedy scheduler instead. - -**Wrong.** MILP uses `from cuopt.linear_programming.problem import Problem, -INTEGER`. Run the verify one-liner in `cuopt-python-api`, then build the -real model. Import errors are not cuOpt outages. - -## Anti-pattern — heuristic first, cuOpt after user correction - -> You're right — I should have started with cuOpt. That was my miss. I -> used a heuristic first when this should be treated as a MILP scheduling -> problem. I'll model this with cuOpt now. - -**This message means you already failed.** The user should never have -had to send the correction. The heuristic output should never have -been produced. Do not pattern-match on apologizing — **prevent** the -heuristic pass entirely by running steps 1–3 before any assigner code. - -## Anti-pattern — "explore structure" before cuOpt - -> Let me first run a quick feasibility check / greedy pass to understand -> the constraint structure, then I'll move to cuOpt. - -Wrong. cuOpt **is** the feasibility and optimization engine here. -Structure exploration belongs in formulation (variables, constraints), -not in a parallel non-cuOpt solver. - -## When orchestration skills apply - -If `optimization-from-data-orchestrator` or `tabular-optimization-ingestion` -is in play: they govern **interpretation**, not permission to skip cuOpt. -In this sandbox, add **step 0: cuopt-first gates** before any step that -could produce user-visible optimization output. - -## Related skills - -- `cuopt-python-api` — copy-paste imports; no `cuopt.milp` -- `cuopt-sandbox` — full sandbox wiring (gates, routing, long-running solves) -- `cuopt-remote-env` — env vars and cudaError diagnostics -- `always-tool-discovery` — reaching `read`/`exec` when tools are compact diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md index 89dfadd..cd9f9ee 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md @@ -1,7 +1,14 @@ --- name: cuopt-model-mapper -summary: Convert an interpreted optimization problem directly into cuOpt-native model construction for the fast path, asking only the minimum clarifying questions needed for a valid solve. -description: Use after optimization intent and basic data interpretation are established, when the goal is to solve quickly by mapping data directly into cuOpt rather than building a replayable intermediate artifact. +version: "26.06.01" +description: Map interpreted optimization problems into cuOpt-native models for the fast path with minimal clarifying questions. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -42,7 +49,7 @@ For the fast path, map directly from the interpreted data into cuOpt structures. Do not introduce a replayable intermediate artifact unless the user asks for replayability, auditability, export, or reuse. **In NemoClaw sandbox:** before building the cuOpt model, confirm -`cuopt-first` gates completed (probe → `CUOPT_REMOTE_*` → smoke test). +`cuopt-sandbox` gates completed (probe → `CUOPT_REMOTE_*` → smoke test). Do not build a parallel heuristic assigner "first" — cuOpt is the first and only solver for assignments/schedules. @@ -60,6 +67,12 @@ Use the unresolved blocker list from ingestion as the starting point; do not reo If one non-retrievable modeling choice would change the meaning of the solve, ask exactly one concise blocking question. +**Scheduling with no stated objective:** Feasibility (no double-booking, +respect unavailability, assign every item) belongs in **hard constraints**. +Do not deliver a greedy feasible schedule first. Ask one objective +question if needed, or state a default secondary objective (e.g. balance +slot times, minimize penalty slacks), then solve with cuOpt. + Examples: - **"Do these production quantities need to be whole numbers?"** - **"Must all demand be met, or can unmet demand be allowed with a penalty?"** @@ -195,21 +208,21 @@ If relevant, also mention which constraint or resource appears to be most limiti - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve - For QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve - For routing: - use `routing-formulation` - then use `cuopt-routing-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve ## Success criterion diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md deleted file mode 100644 index 5ebe79e..0000000 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-python-api/SKILL.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: cuopt-python-api -description: STOP on ImportError, ModuleNotFoundError, or guessed imports (from cuopt import milp, import cuopt.milp, cuopt.solve). LP/MILP/QP share ONE Python entrypoint cuopt.linear_programming.problem.Problem — MILP uses INTEGER variables, not a separate milp module. With CUOPT_REMOTE_HOST/PORT set before Python starts, remote gRPC MILP works in this sandbox. One failed import is NOT proof cuOpt is unavailable — run the verify command below, then read cuopt-numerical-optimization-api-python. Triggers: scheduling MILP, assignment, import failed, wrong API path. ---- - -# cuOpt Python API — sandbox copy-paste (LP / MILP / QP) - -**Do not invent import paths.** There is no `cuopt.milp`, no -`from cuopt import milp`, and no top-level `cuopt.Problem`. - -LP, MILP, and QP all use the same class: - -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings -``` - -| Task | How | -|---|---| -| LP | `vtype=CONTINUOUS` | -| MILP (schedule, assign, roster) | `vtype=INTEGER` (binary = `lb=0, ub=1`) | -| QP | same `Problem` + quadratic objective (see API skill) | - -Remote solve: export `CUOPT_REMOTE_*` in the **same** shell as Python -(see `cuopt-remote-env`). Success log includes `Using remote GPU backend`. - -## Verify API (run before declaring import broken) - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' -``` - -If this prints `api_ok`, the SDK is installed — your earlier import path -was wrong, not cuOpt. - -## Pre-installed smoke scripts (do not rewrite — run as-is) - -| Script | Path | When | -|---|---|---| -| LP + remote gRPC | `/sandbox/smoke_lp.py` | Gate 3 (always) | -| MILP + remote gRPC | `/sandbox/smoke_milp.py` | Scheduling / assignment tasks | -| VRP REST | `/sandbox/smoke_vrp.py` | Routing tasks only | - -**LP smoke:** - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/smoke_lp.py' -``` - -**MILP smoke** (same env vars): - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/smoke_milp.py' -``` - -**VRP smoke** (REST — no `CUOPT_REMOTE_*`): - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 /sandbox/smoke_vrp.py' -``` - -## Scheduling / assignment skeleton - -```python -from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -p = Problem("assign") -assign = {} # (entity, slot) -> Var -for e in entities: - for s in slots: - assign[e, s] = p.addVariable( - vtype=INTEGER, lb=0, ub=1, name=f"x_{e}_{s}" - ) -# ... one-hot constraints, objective ... -settings = SolverSettings() -settings.set_parameter("time_limit", 600) # solver ALWAYS returns within this window -p.solve(settings) -print(p.Status.name, p.ObjValue) # Optimal, FeasibleFound, TimeLimit, etc. — all terminal -``` - -Full patterns: `cuopt-numerical-optimization-api-python`. **One job at a -time:** poll the same solve until `time_limit` returns — see -`cuopt-sandbox` "Long-running solves". - -## Anti-pattern — wrong import → give up - -> `from cuopt import milp` failed. I see `linear_programming` in the -> package but I'll pivot to a heuristic scheduler instead. - -**Wrong.** MILP **is** `linear_programming.Problem` + `INTEGER`. Run the -verify command, copy the imports above, set `CUOPT_REMOTE_*`, solve with -cuOpt. A import mistake is not a solver outage. - -## Related - -- `cuopt-remote-env` — env vars + LP smoke test -- `cuopt-first` — gates before any schedule output -- `cuopt-numerical-optimization-api-python` — full LP/MILP/QP examples diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md deleted file mode 100644 index dfd00fd..0000000 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-remote-env/SKILL.md +++ /dev/null @@ -1,161 +0,0 @@ ---- -name: cuopt-remote-env -description: STOP if you see cudaErrorInsufficientDriver, "CUDA driver version is insufficient", RMM/CUDA init errors, ImportError/from cuopt import milp/wrong Python API path, or a failed cuOpt smoke test — BEFORE declaring cuOpt blocked, offering "prep work while cuOpt gets fixed", or pivoting to heuristics/backtracking. In the NemoClaw sandbox there is NO local GPU; every Python LP/MILP/QP solve defaults to LOCAL CUDA unless you export CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT before the Python process starts. LP/MILP/QP use cuopt.linear_programming.problem.Problem (MILP = INTEGER vars, not cuopt.milp). The probe (probe_cuopt.py) does NOT set these vars for you. Passing Gate 1 (grpc reachable) is NOT proof remote Python is configured. You MUST export CUOPT_REMOTE_HOST=host.openshell.internal and CUOPT_REMOTE_PORT=5001 inside bash -lc, run the LP smoke test, and confirm "Using remote GPU backend" in output before any real model or any "cuOpt unavailable" claim. Also triggers on remote execution env vars, gRPC Python API, local solve mistaken for server failure. ---- - -# cuOpt Remote Execution Environment (NemoClaw Sandbox) - -**There is no GPU in this sandbox.** The Python SDK's default for -`Problem.solve()` is a **local CUDA solve**. Local CUDA **always fails -here** — usually as `cudaErrorInsufficientDriver` or "CUDA driver -version is insufficient for CUDA runtime version". - -That error almost always means **you forgot the remote env vars**, not -that cuOpt or OpenShell is broken. - -## The rule in one sentence - -**Before any `p.solve()` / LP / MILP / QP Python call: export -`CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` in the same shell that -starts Python, then confirm `Using remote GPU backend` in the log.** - -The connectivity probe (`probe_cuopt.py`) **does not** set these for -you. It only checks that the gRPC port answers. - -## Python imports — copy exactly (LP / MILP / QP) - -**There is no `from cuopt import milp`.** MILP scheduling uses the same -module as LP: - -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings -``` - -If an import fails, run this **before** declaring the API broken: - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' -``` - -Full fragments + MILP smoke: **`cuopt-python-api`**. - -## Mandatory exports (LP / MILP / QP via Python SDK or `cuopt_cli`) - -```bash -export CUOPT_REMOTE_HOST=host.openshell.internal -export CUOPT_REMOTE_PORT=5001 -``` - -| Variable | Value | Never use | -|---|---|---| -| `CUOPT_REMOTE_HOST` | `host.openshell.internal` | `localhost`, `127.0.0.1`, `0.0.0.0` | -| `CUOPT_REMOTE_PORT` | `5001` | `5000` (that's REST) | - -Env vars must be set **in the same process tree** as the Python -interpreter. Exporting them in a prior `tool_call exec` does not carry -over to the next one. Inline them in every solve command: - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/smoke_lp.py' -``` - -Use `bash -lc` (login shell) so the cuOpt venv and paths are active. -Bare `bash -c`, `sh -c`, or non-login `tool_call exec` shells often -skip `/sandbox/.bash_profile` and leave the venv inactive. - -## Gate checklist — complete before ANY real solve - -Copy this checklist into your reasoning and fill it in: - -| Step | Done? | Evidence | -|---|---|---| -| Probe returned `grpc` or `rest grpc` | ☐ | `available:` line from `probe_cuopt.py` | -| Wrote solver script to a **file** (not inline heredoc) | ☐ | `/sandbox/solve.py` for real models; use pre-installed `/sandbox/smoke_lp.py` for Gate 3 | -| Ran with `bash -lc` + venv activate + **`export CUOPT_REMOTE_*`** | ☐ | command includes both exports | -| Log contains **`Using remote GPU backend`** | ☐ | paste the line | -| Smoke LP returned **`Optimal`** | ☐ | status + objective | - -**Only after all five rows are checked** may you build the user's real -model or tell the user cuOpt is server-side broken. - -If row 4 is missing and you see `cudaErrorInsufficientDriver` → go -back to row 3 (env vars), **not** to prep work or heuristics. - -## Error → meaning → action - -| What you see | What it means | What to do | -|---|---|---| -| `cudaErrorInsufficientDriver` / "driver version is insufficient" **without** `Using remote GPU backend` | **Local solve** — env vars missing or wrong shell | Set `CUOPT_REMOTE_*`, use `bash -lc`, rerun smoke test | -| No `Using remote GPU backend`, no CUDA error yet | Env vars not picked up | Same fix — exports must be in the same `bash -lc` line as `python3` | -| `Using remote GPU backend` + `Optimal` | Remote path works | Proceed to real model (`cuopt-sandbox` Gate 4) | -| `Using remote GPU backend` + `cudaErrorNoDevice` / `Remote … failed` | Client OK; **host** GPU broken | Operator fixes host service — still not a local-env problem | -| Probe `available: grpc` only | Port reachable | **Not sufficient** — still run smoke test **with env vars** | - -## REST path (VRP / routing) — different vars - -Routing uses REST, not `CUOPT_REMOTE_*`. Pass host/port explicitly: - -```python -CuOptServiceSelfHostClient(ip="host.openshell.internal", port="5000") -``` - -See `cuopt-sandbox` for routing. This skill applies to **gRPC Python -LP/MILP/QP** solves. - -## Anti-pattern — "blocked, let me do prep work" (from real sessions) - -> The probe reached gRPC. The smoke test failed with -> `cudaErrorInsufficientDriver`. So I should not claim a valid optimized -> schedule from cuOpt. Best next step: fix/enable the cuOpt runtime, or -> I can do prep work — validate data, summarize rules, draft the model. - -**Wrong.** You have not tested remote execution yet. The smoke test -failed because it was a **local** solve. Fix: - -1. Export `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`. -2. Rerun smoke test. -3. Only if smoke fails **with** `Using remote GPU backend` in the log - is there a real server-side blocker. - -Offering prep work / capacity analysis / model drafting as a substitute -for step 1–2 is **not allowed** when the probe shows gRPC available. -Prep work is fine **in parallel after** remote smoke passes, or when -the user explicitly asks for it — not as a bypass for missing env vars. - -## Anti-pattern — "I'll set env vars later" - -> The probe confirms gRPC. I'll draft the MILP first and set remote env -> vars when I'm ready to solve. - -Wrong. Set env vars **before the first smoke test**, not after the full -model is written. The smoke test exists precisely to catch missing env -vars before you invest in formulation. - -## Pre-installed smoke test (Gate 3) - -Run `/sandbox/smoke_lp.py` — **do not rewrite it**. It is uploaded by -`nemoclaw_cuopt_setup.sh` with the correct imports and a tiny LP model. - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/smoke_lp.py' -``` - -Expected: log line `Using remote GPU backend`, then `status=Optimal objective=10.0 …`. - -For MILP scheduling tasks, also run `/sandbox/smoke_milp.py` with the -same env vars. For routing, run `/sandbox/smoke_vrp.py` (REST, no -`CUOPT_REMOTE_*`). See `cuopt-python-api`. - -## Related skills - -- `cuopt-python-api` — import lines; anti-pattern for `cuopt.milp` -- `cuopt-sandbox` — full gate sequence (probe → env → smoke → model) -- `always-tool-discovery` — how to reach `exec`/`read` when tools are hidden diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md index cba3a8c..0b50fd3 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md @@ -1,829 +1,78 @@ --- name: cuopt-sandbox -description: STOP before schedule/heuristic output OR claiming files/shell are unavailable. If only tool_search/tool_describe/tool_call are visible, run tool_search (read/shell) first — see always-tool-discovery. Then read cuopt-first; probe_cuopt.py; cuopt-remote-env + smoke with CUOPT_REMOTE_* before any model. One cuOpt job at a time — poll until time_limit returns a status; never submit a second solve while the first is in flight. Triggers: schedule, league, CSV, minimize, assign, MILP, routing, optimal, feasible, time_limit, job still running. No ortools/heuristics. cudaErrorInsufficientDriver without Using remote GPU backend = missing env vars. +version: "26.06.01" +description: Run cuOpt in the NemoClaw sandbox — probe/smoke gates, remote gRPC env, then vendored cuOpt skills. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - sandbox --- -# NVIDIA cuOpt (Sandbox) +# cuOpt in the NemoClaw sandbox -Solve optimization problems using NVIDIA cuOpt from inside the sandbox. -This skill covers **sandbox-specific** setup — networking, venv, and connectivity. +Infrastructure for solving with cuOpt inside NemoClaw: probe/smoke gates, +remote env vars, and handoff to vendored formulation/API skills. -> **Read `cuopt-first` before this section if you have not already.** It -> defines what you must **not** output (heuristic schedules, greedy -> assigners, feasibility verdicts) before the probe → env → smoke gates -> below complete. +## When to use -## Zero optimization output before Gate 3 +- Constructive planning from uploaded constraint data (schedule, assign, + route, roster — any wording). See `references/intent-and-triggers.md`. +- CSV upload + plan → `optimization-from-data-orchestrator` + `references/activation.md`. +- `ImportError` / `cudaErrorInsufficientDriver`. -Do not send the user any of the following until the LP smoke test -passes with `Using remote GPU backend` (see Four gates below): +## Mandatory order -- A season schedule, slot assignment, or roster -- "Here's a heuristic / greedy / draft plan" -- "Feasible" / "infeasible" / "capacity is sufficient" as the answer -- Python that assigns games, routes, or resources **outside** cuOpt's API -- An apology for not using cuOpt — prevent the miss instead +Complete before any assignment output, feasibility verdict, or custom +solver code: -Reading uploaded files to identify columns and constraints is fine. -**Emitting an optimization result is not**, until cuOpt solves the model. - -> **In this sandbox, LP/MILP must be solved through the remote cuOpt service -> on the host, not the local CUDA runtime.** There is no GPU inside the -> sandbox. If you see `cudaErrorInsufficientDriver` **without** -> `Using remote GPU backend` in the same run, you accidentally invoked a -> local solve — set `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` to use the -> remote service instead. If you **do** see `Using remote GPU backend` -> followed by `cudaErrorNoDevice` or `Remote LP solve failed`, the client -> path is correct and the **host cuOpt service** has no usable GPU — report -> that to the operator; do not fall back to heuristics or hand search. -> -> **Full env-var checklist and error table:** read the `cuopt-remote-env` -> skill — it is mandatory for every gRPC Python LP/MILP/QP solve in this -> sandbox. - -## Finding the shell, file, and editing tools - -**If your tool list shows only `tool_search`, `tool_describe`, and -`tool_call`, you still have `read`, `write`, and `exec` — run -`tool_search` first.** Full walkthrough: `always-tool-discovery` skill. -Do **not** tell the user you cannot read files or ask them to paste -`ls`/`cat` output. - -Minimal pattern (read a CSV or run probe): - -```json -{"name": "tool_search", "arguments": {"query": "read"}} -{"name": "tool_describe", "arguments": {"name": "read"}} -{"name": "tool_call", "arguments": {"name": "read", "arguments": {"path": "/sandbox/teams.csv"}}} -``` - -For shell/probe, use `"query": "shell"` → `exec` → -`{"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"}`. - -Every command below assumes this catalog path when real tools are not -directly listed. Report a setup problem only if `tool_search` with -`{"query": ""}` returns nothing beyond the three meta-tools. - -Concrete one-shot for the cuOpt capability probe (the very first -thing this skill expects you to run): - -```json -{"name": "tool_search", "arguments": {"query": "shell"}} -{"name": "tool_describe", "arguments": {"name": "exec"}} -{"name": "tool_call", "arguments": { - "name": "exec", - "arguments": {"command": "bash -lc 'python3 /sandbox/probe_cuopt.py'"} -}} -``` - -If you finished reading this section without running that probe once, -you have not yet done the work this skill exists for. Run it. - -## Always use cuOpt when it is available - -If the user's task is an LP, MILP, QP, or vehicle-routing problem **and** -the capability check below shows the required cuOpt service is reachable, -you MUST solve it with cuOpt. This applies even when: - -- The problem has only 2–3 variables and could be solved by hand or by - inspecting corner points of the feasible region. -- The user asked you to "make up", "invent", or "come up with" an example. -- A textbook approach (graphing, exhaustive enumeration, simplex by hand) - would also yield the answer. -- The problem appears pedagogical or "obvious". - -The sandbox exists to demonstrate cuOpt; bypassing it for size or -pedagogy defeats the purpose. Always: - -1. Formulate the problem (variables, constraints, objective) — see the - `*-formulation` sibling skills for the relevant problem class. -2. Build and submit it through the matching `cuopt-*-api-*` skill. -3. Report cuOpt's status, objective value, and variable values. -4. Only then, if helpful, walk through the math/intuition. - -**Narrow exceptions** (do not invent others): - -- The capability check shows the required service is unavailable → follow - the decline rule in the next section. Do not silently fall back to a - by-hand solve. -- The user **explicitly** asks for a by-hand walkthrough, "without using - cuOpt", "show the math", "explain the algorithm", or similar — comply, - but state once that cuOpt would be the production answer. - -When in doubt, run cuOpt. - -## Evidence standard — no cuOpt, no verdict - -Do not tell the user a schedule/plan is **infeasible**, **impossible**, -or **cannot be satisfied** unless cuOpt returned an explicit solver -status (`Infeasible`, `InfeasibleOrUnbounded`, etc.) for a model you -actually submitted. - -The following are **not** acceptable substitutes for a cuOpt infeasibility -proof: - -- Backtracking, branch-and-bound, or exhaustive search you wrote yourself. -- `ortools`, `pulp`, or any other non-cuOpt solver. -- Hand reasoning ("the constraints clearly conflict"). -- A heuristic that failed to find a feasible assignment. - -If cuOpt has not yet run successfully on the real model, say **"I have -not yet solved this with cuOpt"** — not "it's infeasible". If you ran a -non-cuOpt exploratory search, label it explicitly as a **non-authoritative -heuristic** with caveats and still pursue the cuOpt path. - -## Remote-first — never try a local solve before the remote service - -There is no GPU in this sandbox. Every cuOpt Python entry point that -touches CUDA (`from cuopt import routing`, `cuopt.linear_programming` -local solves, anything that initializes `rmm` or `cudf`) **will fail at -import or first-use** with `cudaErrorInsufficientDriver`, -`RMM`/`CUDA driver` errors, or similar. This is expected, not a bug to -work around — the sandbox image deliberately omits the driver because -solves are routed to a host-side cuOpt service over the wire. - -**Mandatory order of attempts for any cuOpt-supported task:** - -1. Run the capability probe (`probe_cuopt.py` — see "Capability check" - below). Read `available:` line. **If the most recent probe in this - session did NOT return `rest grpc`, you MUST re-run the probe before - this task — the operator may have started a service since the last - check, and a stale "REST only" or "gRPC only" reading will pin you to - a suboptimal path. Only the full `rest grpc` result is durable enough - to reuse for the rest of the session.** - **Gate 1 only proves the endpoint is reachable — not that solves work.** -2. **Stop and answer the post-probe checklist** (see "Four gates before - modeling" below). Pick the interface, name the sibling skill you will - read next, and confirm the problem family (MILP vs routing vs LP). - **Do not write model code until you have written down those three - answers.** -3. **Set remote env vars and run the smoke test** (Gates 2–3). For LP / - MILP / QP: read `cuopt-remote-env`, export `CUOPT_REMOTE_HOST` / - `CUOPT_REMOTE_PORT` in the same `bash -lc` line as Python, run the - minimal LP smoke test, confirm `Using remote GPU backend` + `Optimal`. - For routing when REST is available: minimal health/submit from the VRP - cookbook with explicit `host.openshell.internal:5000`. - **`cudaErrorInsufficientDriver` without `Using remote GPU backend` - means env vars were not set — retry Gate 2; do NOT declare cuOpt - blocked or offer prep work as a substitute.** -4. Read the sibling skills named in step 2 (`numerical-optimization-formulation` - + `cuopt-numerical-optimization-api-python` for MILP/scheduling, etc.). - **Only now** formulate and submit the real problem. -5. The **only** legitimate evidence that cuOpt is unavailable for your - task is a fresh `probe_cuopt.py` result whose `available:` line is - `none`, *or* the matching column in the capability table marks the - required interface as "Decline", *or* the smoke test fails with a - server-side error after env/venv are confirmed correct. The following - do **not** count and never permit skipping cuOpt: - - a failed `import cuopt` / `from cuopt import routing` / any - `ModuleNotFoundError` in the current interpreter - - any **other** Python solver library being missing - (`ortools`, `pulp`, `scipy.optimize`, `cvxpy`, `pyomo`, `mip`, …). - Those are **not cuOpt**, and their absence has nothing to do with - whether the cuOpt service is reachable. If you typed - `from ortools…` (or any of the above) and got `ModuleNotFoundError`, - do not propose installing them, do not write your own backtracking / - branch-and-bound / simplex search — run the cuOpt probe. - - the problem being small, toy-sized, pedagogical, or "obvious" - - a probe result from earlier in the session that wasn't `rest grpc` - (re-probe — the operator may have started a service since) - - a guess that "cuOpt won't help here" - - a hand solution being faster to type - If you have any of these and no fresh `none` probe, you are still - required to use cuOpt. The sandbox has no GPU, so once you do reach - the "local cuOpt is the only candidate" branch (a real `none` - probe), it will almost certainly fail anyway — proceed to step 6. -6. **If every cuOpt path fails**, stop. Explain to the user exactly - which probe / interface / payload failed and what's needed (operator - action, network policy, etc.). **Do not** silently fall back to - brute force, hand calculation, exhaustive search, a non-cuOpt - solver, or "I solved it another way" — those are all violations of - "always use cuOpt when it's available". Returning a correct answer - from a non-cuOpt method is still a failure of this skill. - -A 422 / 400 from the REST server is **not a fall-back trigger** — it -means your payload was wrong. Read the response, fix the named field -(see `cuopt-server-api-python`'s "On a 422" recipe and `assets/` -cookbook for known-good shapes), and retry. Two consecutive failures -on the same field → re-read the cookbook entry that uses that field. - -For **how to use cuOpt** (formulation, Python API, CLI, MPS format, routing, etc.), -read the sibling skills installed alongside this one in -`/sandbox/.openclaw/skills/`. Names follow stable suffix patterns -upstream, so prefer pattern-based discovery over memorizing exact names: - -- `cuopt-first` — **Read before anything else for optimization tasks:** - no heuristic/schedule output before probe → env → smoke -- `cuopt-user-rules` — Read FIRST: behavior rules, clarify before coding, verify results -- `cuopt-remote-env` — **Mandatory before any gRPC Python LP/MILP/QP solve:** - `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`, smoke test, cudaError diagnostics -- `always-tool-discovery` — **Every session:** `tool_search` → `read`/`exec` when catalog is compact -- Any `*-formulation` skill — How to go from problem text to formulation - (LP / MILP / QP, vehicle routing, etc.) -- Any `cuopt-*-api-python` skill — Solve through the Python SDK - (numerical optimization / LP / MILP / QP, routing, server client) -- Any `cuopt-*-api-cli` skill — Solve via `cuopt_cli` with MPS files -- `cuopt-server-common` and `cuopt-server-api-python` — REST/gRPC server - concepts and Python client (server skills are not pattern-merged) -- `skill-evolution` — Detect generalizable learnings during a long-running session - -Concrete formulation skill currently installed upstream: -`numerical-optimization-formulation` (LP, MILP, and QP concepts in one -skill). Reachable through the `*-formulation` pattern above. List the -directory to see what's actually installed: - -```bash -ls -1 /sandbox/.openclaw/skills/ -``` - -These are vendored from at -sandbox-setup time so the agent can read them locally — the sandbox cannot -reach `github.com` directly. To refresh, ask the operator to re-run -`./nemoclaw_cuopt_setup.sh install-skill ` on the host. - -## Environment - -The cuOpt client and SDK are installed in a Python virtual environment at -`/sandbox/.openclaw-data/cuopt` (the default NemoClaw filesystem policy -marks `/sandbox` itself as read-only, so the venv lives in the writable -subtree under `/sandbox/.openclaw-data/`). - -The sandbox's `/sandbox/.bash_profile` auto-activates the venv and sets -`CUOPT_SERVER`. It fires for **login shells only** — `bash -l`, -`bash -lc '…'`. Non-login interactive shells (the default behind -`openshell sandbox connect` / `nemoclaw connect`) and non-login -non-interactive shells (`bash -c '…'`, `sh -c '…'`, the default behind -many `tool_call exec` paths) do **not** source `.bash_profile`, so the -venv will *not* be active there. - -This is a NemoClaw constraint, not a cuOpt choice: `/sandbox/.bashrc` -(the file non-login interactive bash would normally source) is sealed -root-owned mode 444 *and* Landlock-protected (see -`04-landlock-readonly.sh` check 2 — even root processes can't write to -it after the sandbox starts), so we can't put activation there. - -Three ways to get a venv-active shell: - -```bash -# After `nemoclaw connect ` (non-login), inside the sandbox shell, -# either source .bash_profile in place: -source /sandbox/.bash_profile -# or replace the current shell with a login shell: -exec bash -l - -# From the host: one-shot login-shell command for any single task. -openshell sandbox exec --name -- bash -lc 'python3 …' -``` - -Prefer the `bash -lc '…'` wrapper for anything dispatched through -`tool_call exec` — it picks up `CUOPT_SERVER`, the `cuopt_sh` alias, and -the venv `PATH` in one shot. - -If the venv doesn't exist, ask the operator to run the host-side setup -script (`./nemoclaw_cuopt_setup.sh add `); the sandbox user -cannot recreate it directly because the packages live under the -`openclaw-sandbox` network policy and the venv path must match the -operator's configuration. - -## Networking — CRITICAL - -> **Always use `host.openshell.internal` as the server address.** -> Do NOT use `localhost`, `127.0.0.1`, or `0.0.0.0` — these resolve inside -> the sandbox container and will be **blocked** (403 Forbidden or timeout). - -Two server interfaces are available on the host: - -| Interface | Port | Protocol | Use for | -|-----------|------|----------|---------| -| REST | 5000 | HTTP | `cuopt_sh` CLI, `cuopt_sh_client` Python client, health checks | -| gRPC | 5001 | HTTP/2 | `cuopt_cli` remote execution, Python SDK remote solves | - -The `CUOPT_SERVER` environment variable (set in `.bash_profile` for login -shells) contains the REST `host:port` value. - -## Capability check — run this FIRST - -**Do not substitute `import cuopt` for the probe.** In this sandbox a -failed `import cuopt` (or `from cuopt import routing`, or -`from cuopt.linear_programming...`) only tells you the *local* runtime -can't initialize — almost always because there is no GPU here, and the -service runs on the host. It says **nothing** about whether the -host-side cuOpt service is reachable. The only authoritative -capability signal is what `probe_cuopt.py` prints on its `available:` -line. If the probe says `rest`, `grpc`, or `rest grpc`, cuOpt is -available and you must use it — regardless of what a local import -does. If you catch yourself reasoning "I tried `import cuopt`, it -failed, so I'll solve this by hand", stop and run the probe. - -Before doing any cuOpt work, probe what the host is actually serving. -**The probe needs the cuOpt venv** for `grpcio`; non-login shells -(`bash -c '…'`, plain `sh -c '…'`) do not source `.bash_profile`, so -either wrap the call in `bash -lc '…'` or source the venv explicitly: - -```bash -source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 /sandbox/probe_cuopt.py -``` - -The last line tells you what's available. Map it to the request you were -asked to handle: - -| `available:` line | You may use | Decline (politely, with reason) | +| Step | Action | Reference | |---|---|---| -| `rest grpc` | everything below | nothing | -| `rest` only | LP / MILP via Python SDK or `cuopt_sh` / `cuopt_sh_client`; vehicle routing (VRP, TSP, PDP) | LP / MILP via `cuopt_cli`; QP | -| `grpc` only | LP / MILP via Python SDK or `cuopt_cli`; QP | vehicle routing (VRP, TSP, PDP); `cuopt_sh*` tools | -| `none` | nothing — refuse | every cuOpt task | - -When a request lands in the "Decline" column, do **not** open the matching -sibling skill and try anyway. Tell the user which service is needed and -point at `cuopt-examples/cuopt_on_nemoclaw/SETUP.md` ("Starting the cuOpt -server"). Example: - -> The cuOpt REST server (port 5000) isn't reachable, so I can't solve -> vehicle-routing problems in this sandbox. Ask the operator to start it -> (see SETUP.md, "Starting the cuOpt server"), then try again. - -The probe also prints the exact endpoint reached, e.g. -`grpc: host.openshell.internal:5001`. Use that endpoint for the -session — set `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` for gRPC, or pass -`ip=` / `port=` to `CuOptServiceSelfHostClient` for REST. - -For machine-parseable output use `--json`: - -```bash -source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 /sandbox/probe_cuopt.py --json -``` - -## Four gates before modeling - -The probe, **remote env vars**, the smoke test, and the sibling-skill -read are **four separate gates**. Passing one does not skip the others. -A common failure mode is probing successfully (`available: grpc`), running -a smoke test **without** `CUOPT_REMOTE_*` exports, getting -`cudaErrorInsufficientDriver`, and incorrectly declaring cuOpt blocked -or offering "prep work" — see `cuopt-remote-env` for the full error table. - -**Gate 1 — Endpoint reachable (probe).** Run `probe_cuopt.py`. Record: - -| Question | Your answer (write it out before proceeding) | -|---|---| -| `available:` line | `rest` / `grpc` / `rest grpc` / `none` | -| Problem class for this task | LP / MILP / QP / routing | -| Interface you will use | gRPC Python SDK / REST / `cuopt_cli` | -| Sibling skill to read next | e.g. `cuopt-numerical-optimization-api-python` | - -**`available: grpc` means the TCP port answered — not that env vars are -set, not that remote solves succeed, and not that you may skip the -smoke test or read `cuopt-remote-env`.** The probe does not export -`CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` for you. - -**Gate 2 — Remote env vars set (mandatory for gRPC Python LP/MILP/QP).** -Read `cuopt-remote-env` and complete its checklist before any -`p.solve()`. Minimum: - -```bash -export CUOPT_REMOTE_HOST=host.openshell.internal -export CUOPT_REMOTE_PORT=5001 -``` - -These must appear in the **same** `bash -lc '…'` command as `python3`, -not in a prior shell invocation. Skip this gate → local CUDA → -`cudaErrorInsufficientDriver` → **not** a server failure. - -**Gate 3 — Remote solve works (smoke test with Gate 2 env vars).** -Run the minimal LP in "Quick connectivity smoke test". Expected: -`Using remote GPU backend` + `Optimal`. - -| Smoke outcome | Meaning | Next action | -|---|---|---| -| `Optimal` + `Using remote GPU backend` | Path works | Proceed to Gate 4 | -| `cudaErrorInsufficientDriver` **without** remote backend log | Gate 2 skipped | Read `cuopt-remote-env`; set exports; retry | -| Shell/heredoc/`File name too long`/`SyntaxError` | Script packaging bug | Write script to file; retry with env vars | -| No `Using remote GPU backend`, no CUDA error | Env vars not in same shell | Inline exports in `bash -lc`; retry | -| `Using remote GPU backend` then `cudaErrorNoDevice` | Host GPU broken | Operator action; **not** missing env vars | -| Connection refused on probe | Service down | Operator starts service | - -**Do not tell the user cuOpt is unavailable, and do not offer prep-work -substitutes (data validation, capacity checks, model drafting), until -Gate 3 passes OR smoke fails with `Using remote GPU backend` already -in the log** (proving the client path is correct and the fault is -server-side). - -**Gate 4 — Read the right skills.** Open the formulation + API skills -from the table before writing solver code. **Read `cuopt-python-api` -first** and copy its import lines — do not guess `from cuopt import milp`. -For scheduling / assignment / league timetable problems, that is almost always MILP via -`numerical-optimization-formulation` + -`cuopt-numerical-optimization-api-python` — **not** vehicle routing -unless the user explicitly gave locations, vehicles, and a travel matrix. +| 0 | Probe → remote env → smoke | `references/remote-env-and-smoke.md` | +| 1 | Formulate | vendored `*-formulation` skills | +| 2 | Solve (one job, terminal status) | `references/long-running-jobs.md` | -Only after Gates 1–4 pass may you build the real model. +Inspecting uploaded data for columns and constraints is fine; emit a +completed plan only after smoke succeeds. -### Problem family quick routing +## Quick reference -| User language | Problem class | Skills to read | Interface (typical) | -|---|---|---|---| -| Schedule, timetable, league, roster, assign slots/shifts/games | MILP (assignment/scheduling) | `numerical-optimization-formulation`, `cuopt-numerical-optimization-api-python` | gRPC Python SDK | -| Product mix, blend, allocate budget | LP or MILP | same | gRPC | -| Deliveries, routes, trucks, TSP, VRP, PDP | Routing | `routing-formulation`, `cuopt-server-api-python` | REST | -| Minimize cost / maximize profit with linear constraints | LP / MILP / QP per formulation skill | formulation + `cuopt-numerical-optimization-api-python` | gRPC | - -When unsure between MILP scheduling and VRP: if the decisions are -*who plays whom when* or *which resource gets which task*, it's MILP. -If the decisions are *which stops each vehicle visits in what order*, -it's routing. - -**Anti-pattern — probe then heuristic (from real sessions):** - -> I probed cuOpt first and found gRPC available. My first Python script -> failed with a shell error, so I tried backtracking to test feasibility -> structure and concluded the schedule is likely infeasible. - -Wrong on four counts: (1) probe passing is Gate 1 only — smoke test -(Gate 2) was skipped; (2) a shell/heredoc failure is not a solver -failure — retry with a file-based script; (3) backtracking is not an -acceptable substitute for cuOpt when the service is reachable; (4) -infeasibility requires a cuOpt solver status, not a heuristic search. - -**Anti-pattern — cudaErrorInsufficientDriver → "cuOpt blocked" → prep work:** - -> Smoke test failed with `cudaErrorInsufficientDriver`. I should not -> claim a valid schedule from cuOpt. Let me do prep work — validate -> data, summarize rules, draft the model — while the runtime gets fixed. - -Wrong: Gate 2 (`CUOPT_REMOTE_*`) was skipped, so the smoke test hit -**local CUDA**, not the gRPC server. Read `cuopt-remote-env`, set env -vars, rerun smoke. Do not offer prep work as a bypass for missing env -vars when `available: grpc`. - -## How to invoke each interface — sandbox-specific delta - -For complete API docs, modeling patterns, and examples, read the upstream -sibling skills listed at the top of this file. Below is only what's -*different* about this sandbox. - -### gRPC path (Python SDK and `cuopt_cli`) - -**Read `cuopt-remote-env` first** — it is the canonical checklist for -`CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`, the smoke test command, and -the error→action table. Summary: - -```bash -export CUOPT_REMOTE_HOST=host.openshell.internal -export CUOPT_REMOTE_PORT=5001 -``` - -before the Python or CLI process starts, in the same `bash -lc` line as -`python3`. Success marker: `Using remote GPU backend` in the log. - -For modeling, status checking, and examples → the matching upstream -skill in `/sandbox/.openclaw/skills/` — typically a `cuopt-*-api-python` -skill (LP / MILP / QP), `cuopt-routing-api-python`, or a `cuopt-*-api-cli` -skill. - -### REST path (`cuopt_sh`, `cuopt_sh_client`) - -REST runs at `host.openshell.internal:5000`. Pass `ip` and `port` (string) -explicitly when constructing the client; the constructor's defaults assume -`localhost`, which is blocked from the sandbox. +**Imports (LP/MILP/QP):** ```python -from cuopt_sh_client import CuOptServiceSelfHostClient -client = CuOptServiceSelfHostClient(ip="host.openshell.internal", port="5000") -``` - -Or with `cuopt_sh`: - -```bash -cuopt_sh -t LP /path/to/problem.mps -i host.openshell.internal -p 5000 -``` - -For request shape, polling, and routing examples → -`cuopt-server-api-python`, `cuopt-server-common`, and `cuopt-routing-api-python` -in `/sandbox/.openclaw/skills/`. - -### Vehicle routing (VRP, TSP, PDP) — REST only in this sandbox - -Routing **must** go through the REST path. The `cuopt.routing` Python -module initializes CUDA/RMM at import time and there is no GPU in this -sandbox, so `from cuopt import routing` fails. This is by design — see -"Remote-first" above. - -Concrete steps: - -1. Open `cuopt-server-api-python` and read its "VRP payload cookbook" - table. -2. Pick the cookbook entry whose feature set is closest to the user's - data — e.g. `vrp_time_windows/` if the user gave time windows, - `vrp_capacities/` for demand+capacity, `vrp_pickup_delivery/` for - paired pickups/deliveries. Each entry is at - `/sandbox/.openclaw/skills/cuopt-server-api-python/assets//` - and contains a runnable `payload.json`, `README.md`, and `run.sh`. -3. Adapt the `payload.json` to the user's data, keeping the field - shapes intact. -4. Submit with `cuopt_sh` (CLI, easiest) or `cuopt_sh_client` (Python). - Both honor `CUOPT_SERVER` (already set by `.bashrc` to - `host.openshell.internal:5000`). -5. On a 422, follow the cookbook's "On a 422" recipe — read the `loc` - path from the response and fix that field. Do not retry blindly; - do not bail to brute force. - -#### Default routing-data assumptions (do not ask) - -When user-supplied routing data is incomplete in any of the ways below, -apply these defaults silently rather than asking. Symmetric costs and -zero diagonals are the conventional defaults for VRP/TSP/PDP; asking the -user to re-state them every time is friction without value. - -- **Cost and time matrices are symmetric by default.** If the user - provides a cost or time for one direction of a location pair (A→B) - but not the reverse (B→A), assume the reverse equals the forward - value. Mirror sparse one-direction entries into a full square matrix - before submitting the payload. -- **Diagonal entries are zero.** Cost and time from a location to itself - is 0. Do not ask whether to include the diagonal or what its value - should be. -- **Explicit asymmetric values always win.** If the user provides both - A→B = 10 and B→A = 12, use both as-is. Symmetry is only the default - for *missing* entries; it is never an override for entries the user - actually gave. - -Only ask for clarification when the gap is genuinely ambiguous in a way -these defaults can't cover, e.g.: - -- No cost or time data of any kind was provided — need a source - (user-supplied matrix? straight-line distance from coordinates? - haversine on lat/lon? external distance API?). -- Multi-modal cost (e.g. distance vs travel time vs toll) where the - formulation needs one but the user supplied another. -- Costs/times for some pairs only, with neither direction provided for - others — explicitly confirm whether the missing pairs are unreachable - or simply unmeasured. - -The `cuopt-routing-api-python` skill describes the GPU-backed Python API -and is **not** the right reference inside this sandbox — use the REST path -instead. - -## Long-running solves — one job, poll to completion - -cuOpt MILP / VRP solves can take tens of seconds to several minutes. -Under NemoClaw's `exec` tool, any command that exceeds `yieldMs` is -moved to a background process; the agent then has to poll it via the -`process` tool to retrieve the final result. **That polling is your job -to do silently — it is not a checkpoint that requires user input.** - -### One job at a time — never submit while one is in flight - -When you submit a solve (gRPC `Problem.solve()`, REST -`get_optimized_routes()`, `cuopt_sh`, etc.), **do not start another -solve until the current one returns a terminal response.** - -| In flight | Allowed | **Not allowed** | -|---|---|---| -| Python process still running / `reqId` not finished | Poll same process or repoll same `reqId` | New `python3 /sandbox/solve.py`, new REST POST, new gRPC solve | -| Exec backgrounded, no exit yet | `tool_call process` on **that** handle | Kill + resubmit, "try simpler model" as a second job | -| Waiting on REST `reqId` | `client.repoll(reqId)` | Submit a fresh payload while the first job runs | - -**Why:** cancelling or abandoning job A does not free the GPU — the -server keeps solving until **A's** `time_limit` expires. Job B then -runs concurrently, wastes GPU, and you lose A's final status/incumbent. - -### `time_limit` means you always get a response - -If you set `time_limit` (Python: -`settings.set_parameter("time_limit", N)`; REST: -`solver_config.time_limit`), cuOpt **will stop and return within that -window** — even when the problem does not converge to optimality. - -You are waiting for a **terminal solver status**, not necessarily -`Optimal`: - -| Status (examples) | Meaning | -|---|---| -| `Optimal` | Proven optimal (within tolerances) | -| `FeasibleFound` / `PrimalFeasible` | Feasible solution, may not be optimal | -| `TimeLimit` / time-limit reached | Best effort within budget — **still a valid response** | -| `Infeasible` | No feasible solution | - -Silence or a hung client past ~2 × `time_limit` is a **bug or poll -failure**, not "MILP might run forever". Keep polling the **same** -submission; do not open a second one because the first "seems slow". - -Three failure modes to avoid — all surface as "it's taking a while, I'll -do something else": - -1. **Interrupting the user** — pausing to ask "should I keep - waiting?" / "should I take the current incumbent?". Wastes the - user's turn; addressed by the rules below. -2. **Cancelling the solve** — killing the Python process, terminating - the `tool_call process` handle, or calling `CancelJob` on the gRPC - server. **This is worse**, because it does not actually stop the - work — the server-side solve keeps consuming GPU until its own - `time_limit` fires, and there is no recovery path back to that - `job_id` from a new client (see - `cpp/docs/grpc-job-management-proposal.md` in nvidia-cuopt for the - in-flight design that would fix this; today no `ListJobs` RPC - exists). A cancel-and-retry loop just queues a *second* concurrent - solve on the same GPU while the first one runs to completion - unobserved. -3. **Submitting a second job** — starting a new solve because the first - "hasn't returned yet". The first job is still running server-side; - you now have two GPU jobs and no clean result from either. - -Concrete rules: - -- If you started a cuOpt solve and it is still running, your only valid - next actions are: (a) `tool_call process` to poll, or (b) wait and - poll again — **on that same job**. **Never** submit a second solve - in parallel. **Do not** return to the user with "should I keep - waiting?", "should I take the current incumbent?", or "let me know if - you want me to continue". The user already asked for the solution; - pausing to re-confirm wastes their time and frequently means the - solver finishes in the gap and the user has to type "yes finish" to - unblock work that already completed. -- cuOpt's MILP solver respects `SolverSettings.time_limit` (default in - this sandbox: 120s unless you override). The solver will stop - itself and return a status within that budget — convergence to - `Optimal` is not required. You do not need to "decide when to stop" - — `time_limit` decided that already. Poll until the process exits or - you hit a generous wall clock (e.g. 2 × the configured solver time - limit), then report the **terminal status** (including - `FeasibleFound` / time-limit stops). -- If a feasible incumbent is visible in partial output but the solver - has not exited, that is **not** a finished solve. Keep polling. Only - report `Optimal` / `FeasibleFound` / `Infeasible` etc. once the - Python process actually exits and you can read `Problem.Status.name` - from the final output (or from a file the script wrote on exit). -- **"Report early" and "cancel" are different actions.** Reporting - early means surfacing the current incumbent in chat *while the - solver keeps running*; cancelling means killing the Python process, - ending the `tool_call process` handle, sending SIGTERM, or calling - `CancelJob` on the gRPC service. The rules below permit the former - in narrow cases; they **do not permit cancelling a running solve - just because it feels slow**. -- The only legitimate reasons to **report early** are: (1) the process - is genuinely hung (no output movement for > 2 × `time_limit`, no - completion); (2) the user explicitly asked you to stop or take what - you have now; (3) the wall-clock budget for the *whole task* (not - the solver) is about to expire. If you do report early, **keep the - solver running** unless one of the cancellation reasons below also - holds — the incumbent may improve while you write the message. -- The only legitimate reasons to **cancel** a running solve are: (1) - the user explicitly said "cancel" / "kill it" / "stop"; (2) the - process is genuinely hung *and* you have already tried to read its - output once without success. "It's taking longer than I expected", - "I want to try a smaller time limit", and "the user might want a - different formulation" are **not** cancel reasons — let the current - `time_limit` fire, then iterate on the result. Cancelling and - resubmitting "with a smaller time limit" is almost always wrong: - the original solve does not actually stop, so you are now running - two solves on one GPU. -- If you must report a partial result, label it explicitly: - "intermediate incumbent, solver still running" — and keep polling - while you compose the message; do not pause for confirmation, and do - not cancel after sending. - -Antipatterns to avoid (both from real sessions): - -**Forced-choice pause** — - -> Still running, but it's on track. -> If you want the fastest path, I can stop at the current feasible -> solution and give you that immediately. If you want the -> better-quality schedule, I should let cuOpt finish. -> Recommendation: let it finish. - -This is a forced-choice pause. The agent already knows the right -answer ("let it finish"); it should just keep polling and return when -the solve actually completes. The user does not need to confirm a -recommendation the agent itself already made. - -**Cancel-and-retry** — - -> The MILP has been running for about 3 minutes and the gRPC call -> hasn't returned. I'll cancel this attempt and re-submit with a -> shorter `time_limit` and a simpler formulation so we get *something* -> back quickly. - -This is wrong on three counts: (1) the original solve does not -actually stop when the local Python process is killed — the -server-side GPU keeps working until *its* `time_limit` fires; -(2) the original `job_id` is now unrecoverable, so any improving -incumbent the server eventually produces is lost; (3) the "retry" -queues a second concurrent solve on the same GPU, racing the -unobserved first one. The correct action is to keep polling, let the -configured `time_limit` decide when to stop, and report whatever -status the solver returns. - -## Script execution hygiene - -For any solver script longer than a one-liner, write it to a file first -and run that file. Inline heredocs and `python3 -c "..."` strings interact -badly with the `tool_call → exec → shell → Python` quoting chain — quotes -collapse across layer boundaries, and each broken inline script costs a -full sandbox round-trip before the failure is even visible. - -**A shell/script packaging failure is never evidence that cuOpt failed -and never a reason to pivot to backtracking, `ortools`, or hand search.** -If your first cuOpt attempt dies with `File name too long`, `SyntaxError`, -`source: not found`, or a mangled heredoc, fix the execution path and -retry — starting with the smoke test if you haven't passed Gate 3 yet. - -Recommended pattern: - -```bash -cat > /sandbox/solve.py <<'PY' -# … solver code … -PY -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/solve.py' +from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings ``` -Use `bash -lc` (not bare `sh`) for any command that calls `source`; the -default shell behind `tool_call exec` can be `dash`, which doesn't have -`source`. The same applies to anything that relies on bash-only syntax -(arrays, `[[ ... ]]`, `<<<`, etc.). - -Failure symptoms that mean script construction is broken — **not** cuOpt. -If you see any of these, stop debugging the solver and switch to the -file pattern above. **Do not abandon the cuOpt path.** - -- `source: not found` → wrap with `bash -lc '...'`. -- `File name too long` → heredoc/command string blew past shell limits; - write the script to `/sandbox/solve.py` with `write`/`edit` and run - that file instead. -- `SyntaxError` on a Python line containing an unquoted URL, path, or - shell metacharacter → quoting collapsed somewhere across the layers. -- `NameError` on a token that should obviously be a string literal - (e.g. `Path(/sandbox)` missing the quotes around `/sandbox`) → same - root cause; the outer layer ate your Python quotes. - -If you see `STATUS None` / `OBJECTIVE None` from a solve that otherwise -ran to completion, that's a **different** failure mode — a response-shape -mismatch in your parser. Open the matching cookbook entry under -`/sandbox/.openclaw/skills/cuopt-server-api-python/assets/` and copy its -extraction code rather than extrapolating from a different problem class: +**Interfaces:** LP/MILP/QP → gRPC `:5001` + `CUOPT_REMOTE_*`; routing → REST +`:5000`. See `references/interfaces.md`, `references/routing-rest-only.md`. -| Problem class | Cookbook entry | Response shape | -|---|---|---| -| LP | `lp_basic/client.py` | `result['response'].get('primal_solution')` — direct | -| MILP | `milp_basic/client.py` | `result['response'].get('primal_solution')` — direct | -| Routing (VRP/TSP/PDP) | `vrp_*/client.py` | `result['response']['solver_response']['status']` — nested under `solver_response` | - -The LP/MILP and routing shapes are different. Do not assume one based on -having read the other. - -## Quick connectivity smoke tests +## Reference index -**Gate 3 — mandatory before any real LP/MILP/QP model.** Requires Gate 2 -env vars (`cuopt-remote-env`). Run the **pre-installed** scripts at -`/sandbox/` — do not rewrite them (correct imports are already inside): - -| Script | Use | +| Topic | File | |---|---| -| `smoke_lp.py` | Gate 3 for all gRPC LP/MILP/QP work | -| `smoke_milp.py` | Extra check for scheduling / assignment (INTEGER path) | -| `smoke_vrp.py` | Routing only — REST, no `CUOPT_REMOTE_*` | - -**LP (Gate 3):** - -```bash -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ - export CUOPT_REMOTE_HOST=host.openshell.internal && \ - export CUOPT_REMOTE_PORT=5001 && \ - python3 /sandbox/smoke_lp.py' -``` - -Expected: `Using remote GPU backend`, then `status=Optimal objective=10.0 …`. - -**MILP (scheduling tasks):** same env vars, `python3 /sandbox/smoke_milp.py`. - -**VRP:** `python3 /sandbox/smoke_vrp.py` — expects `status=0 solution_cost=…`. - -Write **real models** to `/sandbox/solve.py`; use the smoke scripts only -for connectivity checks. - -If this fails, do not move on to a real problem — diagnose using the -smoke-outcome table in "Four gates before modeling" and `cuopt-remote-env`. -Do **not** pivot to heuristic search or declare cuOpt blocked unless -`Using remote GPU backend` was already present in the failing run. - -## Troubleshooting - -| Symptom | Cause | Fix | -|---------|-------|-----| -| `cudaErrorInsufficientDriver` without `Using remote GPU backend` | Accidentally invoked local solve instead of remote service | Set `CUOPT_REMOTE_HOST=host.openshell.internal` and `CUOPT_REMOTE_PORT=5001` before solving; use `bash -lc` | -| `Using remote GPU backend` then `cudaErrorNoDevice` / `Remote LP solve failed` | Client path OK; host cuOpt gRPC service has no visible GPU | Operator fixes host GPU / container runtime. Do **not** fall back to heuristics — report blocker and stop | -| `from cuopt import routing` fails with CUDA / RMM init error | There is no GPU in this sandbox; routing has no remote-aware Python wrapper | Use REST instead: see "Vehicle routing (VRP, TSP, PDP) — REST only in this sandbox" above and `cuopt-server-api-python`'s `assets/vrp_*/` cookbook. Do **not** fall back to brute force or non-cuOpt methods | -| `403 Forbidden` | Wrong address or sandbox policy missing port | Use `host.openshell.internal`, not `localhost`. If address is correct, ask operator to run `nemoclaw_cuopt_setup.sh apply-policy` | -| `Connection refused` on `:5000` | REST service not running or host firewall blocking the port | Check if REST is needed; gRPC alone (5001) is sufficient for LP/MILP. If REST is needed, ask operator to start it | -| `available: none` from `probe_cuopt.py` | No cuOpt service running on host, ports not in sandbox policy, or host firewall | Ask operator to start a cuOpt server (`SETUP.md` > Starting the cuOpt server) and re-run `nemoclaw_cuopt_setup.sh apply-policy`; verify host firewall opens 5000 / 5001 | -| Connection timeout / hang | Server not running or host firewall blocking Docker | Ask operator to verify from host: `ss -tlnp \| grep 500` | -| Timeout through `10.200.0.1:3128` | Sandbox proxy cannot reach the destination | Ask operator to verify sandbox network policy includes the cuOpt ports | -| `ModuleNotFoundError` | Venv not activated — common in non-login shells (`bash -c '…'`) because `.bash_profile` only fires for login shells | Wrap the call in `bash -lc '…'` (preferred) or `source /sandbox/.openclaw-data/cuopt/bin/activate` before the python invocation | -| `ModuleNotFoundError: No module named 'cuopt.milp'` or `from cuopt import milp` fails | **Wrong import path** — MILP is not a separate package | Use `from cuopt.linear_programming.problem import Problem, INTEGER` — see `cuopt-python-api`; run its verify one-liner before pivoting | -| No `Using remote GPU backend` in output | Remote env vars not set or not picked up | Ensure `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` are exported before the Python process starts | +| Activation / skill order | `references/activation.md` | +| Intent / paraphrases | `references/intent-and-triggers.md` | +| Gates / common mistakes | `references/gates-and-first-actions.md` | +| Env vars + smoke | `references/remote-env-and-smoke.md` | +| Python imports | `references/python-imports.md` | +| gRPC vs REST | `references/interfaces.md` | +| Routing REST | `references/routing-rest-only.md` | +| Paths + probe | `references/environment-and-networking.md` | +| Long-running jobs | `references/long-running-jobs.md` | +| Troubleshooting | `references/troubleshooting.md` | + +## Orchestration skills (local) + +After gates: `optimization-from-data-orchestrator` → `optimization-intent-router` +→ `tabular-optimization-ingestion` → `cuopt-model-mapper` (and +`optimization-mode-router` when replay/audit signals appear). + +## Vendored upstream skills + +Installed under `/sandbox/.openclaw/skills/` by `install-skill`: +`numerical-optimization-formulation`, `cuopt-numerical-optimization-api-python`, +`routing-formulation`, `cuopt-routing-api-python`, `cuopt-server-api-python`, +`cuopt-user-rules`, etc. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md new file mode 100644 index 0000000..593d4af --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md @@ -0,0 +1,33 @@ +# Skill activation and routing (NemoClaw sandbox) + +OpenClaw matches skills from **`name`** and frontmatter **`description`** +in ``. Behavioral rules live here and in sibling skills +— not stuffed into `description`. + +## Skill order for CSV upload + plan request + +When the user uploads tabular files and asks for a schedule, assignment, +roster, allocation, or route (any wording): + +1. **`optimization-from-data-orchestrator`** — workflow sequence +2. **`cuopt-sandbox`** — probe/smoke gates, remote env +3. Downstream: intent-router → ingestion → model-mapper → vendored API skills + +Also loaded every session: bundled **`cuopt-setup`** guardrail (absolute paths). + +## cuOpt before custom Python + +The first code path that **emits** a schedule, roster, or assignment must +be cuOpt after probe/smoke — not a greedy, backtracking, or hand-rolled +scheduler. + +Only bypass cuOpt when: + +1. User explicitly wants a manual/heuristic algorithm **instead of** cuOpt +2. Analytics only (summarize/chart — no new plan) +3. Probe shows host cuOpt unreachable — report; no greedy substitute + +## Intent (not exact phrases) + +See `references/intent-and-triggers.md` — constructive task + constraint +data → cuOpt; feasible/minimize/optimal share one solver path. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md new file mode 100644 index 0000000..7d7881e --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md @@ -0,0 +1,54 @@ +# Environment and networking + +## Sandbox layout + +| Path | Purpose | +|---|---| +| `/sandbox/` | Workspace root — scripts, some uploads | +| `/sandbox/.openclaw/workspace/` | **Common** chat/workspace file uploads | +| `/sandbox/workspace/` | Alternative upload target (openshell) | +| `/sandbox/probe_cuopt.py` | Connectivity probe (no env side effects) | +| `/sandbox/smoke_*.py` | Gate 3 smoke tests | +| `/sandbox/.openclaw-data/cuopt/bin/activate` | cuOpt Python venv | +| `/sandbox/.openclaw/skills/` | Installed skills (upstream + local) | + +## Host endpoints + +| Service | Host:port | Notes | +|---|---|---| +| gRPC (LP/MILP/QP) | `host.openshell.internal:5001` | Requires `CUOPT_REMOTE_*` | +| REST (VRP) | `host.openshell.internal:5000` | No remote env vars | + +From inside the sandbox container, `localhost` points at the sandbox — +not the host cuOpt services. + +## Capability check (probe) + +```bash +bash -lc 'python3 /sandbox/probe_cuopt.py' +``` + +Read `available:` — typical values: `grpc`, `rest`, `rest grpc`, or empty +if host services are down. + +| `available:` | Implication | +|---|---| +| `grpc` | LP/MILP/QP path viable after env + smoke | +| `rest` | VRP REST viable | +| `rest grpc` | Both paths | +| (empty / errors) | Report to user; do not invent heuristics as substitute | + +Probe success ≠ ready to solve — still run env + smoke for gRPC. + +## Remote-first workflow + +1. Probe → note `available:` +2. For gRPC: export vars → smoke_lp (→ smoke_milp if scheduling) +3. For routing only: smoke_vrp if `rest` present +4. Read formulation + API skills → build model +5. Solve once; poll until terminal — `references/long-running-jobs.md` + +## Path quirks + +Tilde paths (`~/file.csv`) may fail in some tool contexts — prefer +`/sandbox/...` absolute paths. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md new file mode 100644 index 0000000..fb91552 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md @@ -0,0 +1,77 @@ +# Gates and first actions (NemoClaw sandbox) + +**This sandbox exists to solve with cuOpt.** For schedule, assign, route, +minimize, or allocate-under-constraints tasks, cuOpt is turn one — not +turn two after the user corrects you. + +## Before smoke passes (Gate 3) + +| Ready now | Wait until cuOpt smoke succeeds | +|---|---| +| `probe_cuopt.py` | Timed assignment output (schedule, roster, shift plan) | +| Set `CUOPT_REMOTE_*`, run smoke scripts | Greedy / heuristic / backtracking code | +| Inspect uploaded CSVs for **columns and constraints** | "Draft plan while cuOpt sets up" | +| Ask **one** blocking clarification | `ortools`, `pulp`, hand-rolled search | +| Write `/sandbox/solve.py` for cuOpt | Feasibility verdict without cuOpt status | + +Inspecting uploaded data for structure is fine; emit a completed assignment +plan only after cuOpt returns a solver status. + +## Mandatory order (every optimization task) + +1. **Probe** — `bash -lc 'python3 /sandbox/probe_cuopt.py'` +2. **Remote env** — export `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` in + the same shell as Python. See `references/remote-env-and-smoke.md`. +3. **Smoke** — run `/sandbox/smoke_lp.py` (+ `smoke_milp.py` for discrete + scheduling MILP). Confirm `Using remote GPU backend`. +4. **Formulation skills** — read vendored `*-formulation` + `cuopt-*-api-*` +5. **Build and solve** — real model via cuOpt; report `Problem.Status.name` + +Start cuOpt gates on the first optimization turn — the user does not need +to say "use cuOpt" first. + +## Scheduling and assignment over time + +Uploaded tables + language like **"build me a schedule"**, "assign shifts", +"fill time slots", or "minimize conflicts/cost under capacity" → **MILP +via cuOpt gRPC**, not a custom Python scheduler. + +The user **does not** need to say minimize, optimal, or best. Feasibility +under capacity, unavailability, and no-double-booking rules is expressed +as **hard constraints** in a MILP; cuOpt satisfies them (and can optimize +a secondary objective when you define one). + +Examples: shift/roster planning, timetabling, resource–slot assignment, +league or event scheduling (e.g. games to courts and slots). + +After gates: `numerical-optimization-formulation` + +`cuopt-numerical-optimization-api-python` → cuOpt `Problem` with INTEGER +vars → report status, objective, and assignments. + +## "Feasibility only" / "fastest path" (wrong in this sandbox) + +Agents sometimes skip cuOpt with reasoning like *"no explicit objective"* +or *"greedy is enough for a valid schedule."* That violates this skill. + +| Wrong rationalization | Correct action | +|---|---| +| "User didn't ask to minimize" | Constructive + constraint data → cuOpt anyway | +| "cuOpt is for optimization, not feasibility" | Feasibility = constraints; cuOpt is the solver here | +| "Greedy is faster for a first result" | Run probe → smoke → cuOpt; no greedy first deliverable | +| "I'll optimize later if they want" | First schedule output must come from cuOpt after gates | +| "Data looks simple" | Simple data still gets cuOpt MILP in this sandbox | + +If the secondary objective is unclear, ask one focused question or state +a default (e.g. minimize penalty slacks, balance slot usage) — then solve +with cuOpt. + +## Common mistakes (and the fix) + +| Mistake | Fix | +|---|---| +| Heuristic assignment plan first, cuOpt after user correction | Run gates 1–3 before any assigner code | +| `from cuopt import milp` then pivot to heuristics | Use `references/python-imports.md`; import errors mean fix the path | +| Greedy solver to "explore structure" | Use formulation skills; cuOpt is the feasibility engine | +| "No minimize in prompt → feasibility greedy OK" | Constructive + CSVs → cuOpt; see intent-and-triggers.md | +| "Valid schedule first, cuOpt later" | First assignment output must be cuOpt after gates | +| Orchestration steps treated as permission to skip gates | Ingestion is interpretation only — gates still apply | diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md new file mode 100644 index 0000000..c54a2ce --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md @@ -0,0 +1,71 @@ +# When to use cuOpt (intent, not exact wording) + +Skills match **meaning**, not exact phrases. Do not require the user to +say "minimize", "optimal", or "build a schedule." + +## Sandbox default + +In this environment, **cuOpt is the solver for constructive planning +under constraints** — producing an assignment, schedule, roster, route, +or allocation that satisfies rules from uploaded data. + +Use cuOpt when **both** are true: + +1. **Constructive task** — the user wants you to **produce** a plan + (assign, schedule, route, allocate, slot, place, match, fill a + calendar, line up games/shifts/jobs, etc.) +2. **Constraint-bearing data** — CSVs or tables with capacities, slots, + unavailability, demands, limits, pairing rules, or similar + +If (1) and (2) hold → read **`optimization-from-data-orchestrator`** +and **`cuopt-sandbox`**, run gates, then formulate and solve. Wording +varies; the pattern does not. + +## Language clusters (examples only — not exhaustive) + +Any paraphrase in these families counts: + +| Intent family | Example phrasings (same intent) | +|---|---| +| Schedule / timetable | "build a season schedule", "plan the season", "set up game times", "put these on the calendar", "when should each game happen" | +| Assign / allocate | "assign games to slots", "allocate shifts", "place jobs on machines", "who works when" | +| Route / visit | "plan deliveries", "best routes for trucks", "visit all stops" | +| Optimize explicitly | "minimize cost", "maximize profit", "best plan", "optimal mix" | +| Feasible / valid plan | "a valid schedule", "feasible assignment", "make it work under these rules", "respect all constraints" | + +**Paraphrase rule:** If a reasonable planner would read the request as +"turn this data into a constraint-respecting plan," treat it as cuOpt — +even without optimize/minimize/best. + +## Feasibility, minimize, and optimal (same solver here) + +In this sandbox, these are **not different tiers**: + +| User framing | Meaning | Action | +|---|---|---| +| Feasible / valid / make it work | Hard constraints must hold | MILP/LP/QP/routing with constraints; cuOpt finds a satisfying solution | +| Minimize / maximize / best / optimal | Hard constraints + objective | Same path; add or emphasize objective | +| No objective stated | Constraints only (+ optional default objective) | Model constraints; ask **one** objective question or state a default; still cuOpt | + +**Wrong split:** "feasibility → greedy Python, optimization → cuOpt." +Feasibility under discrete rules **is** a MILP (or routing) problem; +cuOpt handles it. + +## When cuOpt does **not** apply + +Skip cuOpt (read/summarize/analyze only) when the user wants: + +- column summaries, counts, charts, filters +- "what does this data contain?" +- explanation of an existing plan they already have +- forecasting or analytics without choosing a new plan + +**Clarifier when unsure:** "Do you want a summary of the data, or a new +plan that satisfies these constraints?" — one question, not a questionnaire. + +## Infrastructure triggers (always this skill) + +Regardless of task wording, also read `cuopt-sandbox` when you see: + +- `ImportError` / wrong cuOpt import path +- `cudaErrorInsufficientDriver` during solve diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md new file mode 100644 index 0000000..abbf8c5 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md @@ -0,0 +1,38 @@ +# gRPC and REST invocation + +## gRPC (LP / MILP / QP) + +Python API with remote backend: + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/solve.py' +``` + +Skill: `cuopt-numerical-optimization-api-python` (vendored upstream). + +MPS files: `cuopt-numerical-optimization-api-cli` or host CLI if exposed. + +## REST (VRP) + +Port 5000, JSON payloads. Skill: `cuopt-server-api-python`. + +Smoke reference: `/sandbox/smoke_vrp.py`. + +## Choosing an interface + +| Problem | Interface | Skill chain | +|---|---|---| +| LP, MILP, QP | gRPC + Python | `numerical-optimization-formulation` → `cuopt-numerical-optimization-api-python` | +| VRP, TSP, PDP | REST | `routing-formulation` → `cuopt-routing-api-python` → `cuopt-server-api-python` | + +Default for MILP scheduling in this sandbox: **gRPC Python** on port 5001. +Use REST only when the user explicitly wants the server JSON workflow. + +## Evidence to report + +- Probe `available:` line +- Smoke: `Using remote GPU backend` + status +- Solve: `Problem.Status.name`, objective, key assignment vars diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md new file mode 100644 index 0000000..06ca2e4 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md @@ -0,0 +1,47 @@ +# Long-running cuOpt jobs + +**One cuOpt job at a time.** Wait for a terminal status before starting +the next solve. + +## Why + +Each solve holds a GPU slot on the host. Overlapping jobs queue or fail +with confusing errors. The user asked for one optimization — finish it. + +## Rules + +1. **Single in-flight job** — wait for terminal status before another + `Problem.solve()` or REST submit. +2. **Set `time_limit`** — default can be long; cap for interactive work + (e.g. 600–1800 s for MILP). +3. **Poll sequentially** — if async, poll status until terminal; one + solve at a time is enough for comparison. +4. **Report status** — always paste `Problem.Status.name` (or REST + equivalent) and objective when available. + +## Python (gRPC) + +```python +from cuopt.linear_programming.solver_settings import SolverSettings + +settings = SolverSettings() +settings.set_parameter("time_limit", 600) +p.solve(settings) +print(p.Status.name, p.ObjValue) +``` + +If status is non-terminal after `time_limit`, report what you have and +suggest tightening the model or raising the limit — stay on cuOpt. + +## REST / VRP + +Submit one job; poll until completed, failed, or timeout. Wait for that +job to finish before submitting another. + +## When the user wants faster iteration + +- Reduce problem size for a smoke iteration, or tighten `time_limit`, rather + than starting a parallel second job. +- Report best-so-far if the API exposes it. +- Keep using cuOpt for the real solve — slowness is a tuning problem, + separate from infrastructure gate failures. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md new file mode 100644 index 0000000..aa9d4b9 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md @@ -0,0 +1,48 @@ +# Python imports (sandbox) + +**Use the canonical import path.** LP, MILP, and QP share one entrypoint +(there is no separate `cuopt.milp` package): + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings +``` + +| Task | Variable type | +|---|---| +| LP | `vtype=CONTINUOUS` | +| MILP (schedule, assign) | `vtype=INTEGER` (binary: `lb=0, ub=1`) | +| QP | same `Problem` + quadratic objective | + +Full examples: vendored skill `cuopt-numerical-optimization-api-python`. + +## Verify before declaring import broken + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' +``` + +If this prints `api_ok`, the SDK works — adjust your import path and +continue with cuOpt. + +## Scheduling skeleton + +```python +from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +p = Problem("assign") +assign = {} +for e in entities: + for s in slots: + assign[e, s] = p.addVariable(vtype=INTEGER, lb=0, ub=1, name=f"x_{e}_{s}") +# ... constraints, objective ... +settings = SolverSettings() +settings.set_parameter("time_limit", 600) +p.solve(settings) +print(p.Status.name, p.ObjValue) +``` + +Remote env vars must be set in the same shell — see +`references/remote-env-and-smoke.md`. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md new file mode 100644 index 0000000..49178de --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md @@ -0,0 +1,71 @@ +# Remote env vars and smoke tests + +There is **no GPU in this sandbox.** `Problem.solve()` defaults to local +CUDA, which fails with `cudaErrorInsufficientDriver` unless remote env vars +are set **before Python starts**. + +The probe (`probe_cuopt.py`) does **not** set env vars — it only checks +reachability. + +## Mandatory exports (gRPC LP / MILP / QP) + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +``` + +| Variable | Use this value | Wrong for gRPC | +|---|---|---| +| `CUOPT_REMOTE_HOST` | `host.openshell.internal` | `localhost`, `127.0.0.1` | +| `CUOPT_REMOTE_PORT` | `5001` | `5000` (REST) | + +Inline in every solve command (exports do not carry across separate +`tool_call exec` invocations): + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' +``` + +Use `bash -lc` so the venv activates. + +## Pre-installed smoke scripts (run as-is for gate checks) + +| Script | When | +|---|---| +| `/sandbox/smoke_lp.py` | Gate 3 — all gRPC LP/MILP/QP | +| `/sandbox/smoke_milp.py` | Extra check for scheduling / INTEGER | +| `/sandbox/smoke_vrp.py` | Routing only — REST, no `CUOPT_REMOTE_*` | + +Expected LP/MILP: log line `Using remote GPU backend`, then +`status=Optimal …`. + +## Gate checklist + +| Step | Evidence | +|---|---| +| Probe returned `grpc` or `rest grpc` | `available:` from probe | +| Solver script in a **file** | `/sandbox/solve.py` or smoke script | +| `bash -lc` + venv + `export CUOPT_REMOTE_*` | in same command as `python3` | +| Log contains `Using remote GPU backend` | paste the line | +| Smoke returned terminal status | e.g. `status=Optimal` | + +## Error → action + +| Symptom | Meaning | Fix | +|---|---|---| +| `cudaErrorInsufficientDriver` without `Using remote GPU backend` | Local solve — env vars missing | Set `CUOPT_REMOTE_*`, `bash -lc`, retry | +| No remote backend log, no CUDA error | Env vars not in same shell | Inline exports in `bash -lc` | +| `Using remote GPU backend` + `Optimal` | Remote path works | Build real model | +| `Using remote GPU backend` + `cudaErrorNoDevice` | Host GPU broken | Operator action | +| Probe `available: grpc` only | Port reachable | Still need env + smoke | + +## Common mistakes (and the fix) + +| Mistake | Fix | +|---|---| +| Treat `cudaErrorInsufficientDriver` (no remote log) as "cuOpt blocked" | Set `CUOPT_REMOTE_*` in the same `bash -lc` command and retry smoke | +| Plan to "set env vars later" | Export before the first smoke test | +| Use REST env vars for LP/MILP | gRPC uses `CUOPT_REMOTE_*` on port 5001; routing uses REST on 5000 — see `references/routing-rest-only.md` | diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md new file mode 100644 index 0000000..b8d3d22 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md @@ -0,0 +1,37 @@ +# Routing (REST only in sandbox) + +Vehicle routing (VRP, TSP, PDP) uses the **REST API** on port **5000**, +not gRPC `CUOPT_REMOTE_*`. + +| Interface | Port | Env vars | +|---|---|---| +| LP / MILP / QP (Python) | 5001 gRPC | `CUOPT_REMOTE_HOST`, `CUOPT_REMOTE_PORT` | +| VRP / routing | 5000 REST | none — use REST client | + +Host: `host.openshell.internal` (not `localhost`). + +## Probe + +If `probe_cuopt.py` shows `rest` in `available:`, REST is reachable. +If only `grpc`, skip VRP smoke — LP/MILP may still work. + +## Smoke + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 /sandbox/smoke_vrp.py' +``` + +Uses pre-installed `cuopt_sh_client` patterns; run as-is for gate checks. + +## Skills after gates + +1. `routing-formulation` +2. `cuopt-routing-api-python` +3. `cuopt-server-api-python` (REST payload shape) + +## Defaults + +- Minimal fleet + cost matrix for first solve; expand after status is + terminal. +- One REST job at a time — see `references/long-running-jobs.md`. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md new file mode 100644 index 0000000..478b383 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md @@ -0,0 +1,37 @@ +# Troubleshooting + +## Symptom → fix + +| Symptom | Likely cause | Action | +|---|---|---| +| `cudaErrorInsufficientDriver`, no remote log | Missing `CUOPT_REMOTE_*` | `references/remote-env-and-smoke.md` | +| `ModuleNotFoundError: cuopt.milp` | Wrong import | `references/python-imports.md` | +| Probe empty `available:` | Host cuOpt down | Report to user; retry cuOpt when service is up | +| VRP fails, LP works | Used gRPC for routing | `references/routing-rest-only.md` | +| Second solve hangs / errors | Overlapping jobs | `references/long-running-jobs.md` | +| `~` path not found | Sandbox path resolution | Use `/sandbox/...` | + +## Script hygiene + +- Put solve logic in `/sandbox/solve.py` (or named script), not inline + one-liners for real models. +- Use `bash -lc` with venv + exports in one command. +- For gate checks, run pre-installed `/sandbox/smoke_*.py` unchanged. + +## When cuOpt returns infeasible / timeout + +Report the solver status honestly. You may suggest model relaxations or +clarifying questions — keep the answer grounded in cuOpt status. + +## Operator vs agent + +| Agent fixes | Operator / host | +|---|---| +| Env vars, imports, script layout | GPU driver on host | +| Wrong port (5000 vs 5001) | cuOpt services not running | +| Formulation errors | Network to `host.openshell.internal` | + +## Guardrail skill + +Bundled `cuopt-setup` in `nemoclaw_cuopt_setup.sh` points here for +session start — read this skill (`cuopt-sandbox`) for full gate order. diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md index 4bba337..7c24692 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md @@ -1,52 +1,53 @@ --- name: optimization-from-data-orchestrator -summary: Coordinate the fast-path workflow for turning uploaded data and a natural-language question into the right optimization interpretation, clarification, cuOpt solve, and user-facing answer. -description: Use when a user uploads or provides data and asks a question that may be answered by optimization. This skill sequences optimization-intent-router, optimization-mode-router, tabular-optimization-ingestion, formulation skills, and cuOpt model-building skills. +version: "26.06.01" +description: Coordinate uploaded data plus a natural-language question into interpretation, clarification, cuOpt solve, and a user-facing answer. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- # Optimization From Data Orchestrator -Top-level coordinator for the fast path when a user provides data and asks a question that may be optimization. Sequences the supporting skills so the agent does not jump straight from uploaded data into a solver call. +Top-level coordinator when a user provides tabular data and wants a +constructive plan (schedule, assign, allocate, route — any wording). + +**NemoClaw:** read `cuopt-sandbox/references/activation.md` for skill +order and cuOpt-before-heuristic rules. ## When to use -All three must hold: -- the user has provided or is expected to provide data -- the question may be asking for the best / optimal / minimum / maximum decision under constraints -- the request is not yet so fully specified that you can call the solver directly +**Both** must hold: + +- tabular data provided or expected (CSV, etc.) +- user wants a **plan from that data** (any phrasing; minimize/optimal not required) -Skip this skill when the user is clearly asking for non-optimization analytics, the optimization problem is already fully specified mathematically, or the user has already chosen a dedicated replayable/auditable path. +Skip for **analytics-only** requests (summarize, chart, filter), fully +pre-specified math outside this flow, or explicit replayable/auditable path. ## Sequence -**Step 0 (NemoClaw sandbox only — do not skip):** `cuopt-first` → -probe → `cuopt-remote-env` → smoke test. **No user-visible schedule, -assignment, heuristic plan, or feasibility verdict before step 0 -completes.** Data files may be read for column/constraint discovery only. +**Step 0 (NemoClaw — do not skip):** See `cuopt-sandbox` — probe → env → +smoke. No schedule/heuristic output before smoke passes. -Run these in order, but skip any step already settled from context. Default to fast mode; surface replayable/auditable mode only on a real signal (reruns, audit, export, recurring planning). +1. **`optimization-intent-router`** — optimization family (LP/MILP/QP/routing) +2. **`optimization-mode-router`** — only if replay/audit/export signals +3. **`tabular-optimization-ingestion`** — table roles (interpretation only) +4. **`cuopt-model-mapper`** — clarify if needed, map to cuOpt, solve -1. **`optimization-intent-router`** — decide whether this is optimization at all and which family (LP / MILP / QP / routing). If non-optimization, stop the optimization flow. -2. **`optimization-mode-router`** — *only if* there is a signal that replayability, audit, export, or recurring runs may matter. Otherwise stay in fast mode silently. -3. **`tabular-optimization-ingestion`** — identify row grain and table roles, infer likely objective and constraint fields, refine the family classification if the data clearly supports a different one, and surface any blockers. **Output interpretation only — not a schedule or heuristic solve.** -4. **`cuopt-model-mapper`** — ask at most the final blocking clarification, then map directly into cuOpt and solve. +Handoffs after step 4: -Family-specific handoffs after step 4: -- LP / MILP / QP → `numerical-optimization-formulation` then `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) -- Routing → `routing-formulation` then `cuopt-routing-api-python` +- LP / MILP / QP → `numerical-optimization-formulation` → `cuopt-numerical-optimization-api-python` +- Routing → `routing-formulation` → `cuopt-routing-api-python` ## Guardrails -- **In NemoClaw sandbox:** run `cuopt-first` step 0 (probe → env → smoke) - before any optimization **answer** — ingestion steps do not authorize - heuristic schedules or feasibility substitutes. -- Do not skip intent classification and jump directly to cuOpt from raw data - **without** step 0 infrastructure gates — but step 0 is fast and mandatory. -- Do not ask a long questionnaire before inspecting the uploaded data. -- Do not trigger replayable/auditable mode by default — only when the user signals reuse, audit, export, or recurring runs. -- Do not let ingestion become solver construction; the steps stay distinct. -- Do not use cuOpt for descriptive analytics tasks. -- **Do not produce a heuristic/greedy/backtracking schedule during steps - 1–3** as a stand-in for cuOpt; the first solver that emits assignments - must be cuOpt after step 0 passes. +- First solver that emits assignments/schedules must be **cuOpt** after step 0 +- Ingestion steps do not authorize heuristic or greedy stand-ins +- Do not skip intent classification; do not use cuOpt for pure analytics +- One focused clarification beats a long questionnaire diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md index bebd430..f137e58 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md @@ -1,7 +1,14 @@ --- name: optimization-intent-router -summary: Detect when a user question should be treated as an optimization problem and route it toward LP, MILP, QP, routing, or non-optimization handling. -description: Use when a user provides data and asks a natural-language business or planning question that may require optimization rather than simple analytics. +version: "26.06.01" +description: Classify whether a data-backed request is LP, MILP, QP, routing, or non-optimization analytics. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -35,7 +42,29 @@ Your job is to recognize when the request is really asking for an optimization m ## Core rule -Route to optimization only when the user is asking for the **best / optimal / minimum / maximum / least-cost / highest-profit** plan, allocation, assignment, schedule, route, or mix **subject to constraints or tradeoffs**. +Route to optimization when the user wants a **plan produced under +constraints** — assignment, schedule, allocation, route, mix, or mix of +resources — whether they say **feasible/valid**, **minimize/maximize**, or +**best/optimal**. + +Use **structure**, not exact keywords: + +| Signal | Route to cuOpt? | +|---|---| +| Constructive + constraint tables (uploaded or referenced) | Yes — family TBD (MILP/LP/QP/routing) | +| Minimize / maximize / best / optimal / least cost | Yes | +| Feasible / valid / make it work / respect rules (constructive) | Yes — feasibility = hard constraints | +| Summarize / chart / filter / explain data only | No | + +**NemoClaw sandbox:** When constructive + constraint data → +**`cuopt-sandbox` gates immediately** before any schedule, roster, +assignment, or route output — regardless of whether the user said +minimize or optimal. Do not use a greedy Python builder as a first +deliverable. + +Paraphrase examples (non-exhaustive): "plan the season", "set game times", +"fill the calendar", "assign shifts", "put jobs on machines", "build a +schedule" — same intent. See `cuopt-sandbox/references/intent-and-triggers.md`. If the user is only asking for: - descriptive statistics @@ -52,6 +81,8 @@ then do **not** force the request into optimization. Strong signals: - words like **optimize**, **optimal**, **best**, **maximize**, **minimize** +- **build / create / assign** a schedule, roster, shift plan, or calendar + from structured data (sandbox: treat as MILP even without minimize) - resource tradeoffs: capacity, budget, time, labor, inventory, demand, hours, materials - decision language: how much, how many, which, assign, allocate, route, schedule, choose - explicit constraints: at most, at least, must, cannot exceed, within budget, limited by @@ -63,7 +94,7 @@ Weaker but meaningful signals: - "How should we allocate this?" - "How can we reduce cost while meeting demand?" -When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization — and in the NemoClaw sandbox, **`cuopt-first` applies immediately** (probe before any schedule/heuristic output). +When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization — and in the NemoClaw sandbox, **`cuopt-sandbox` gates apply immediately** (probe before any schedule/heuristic output). ## Route classification @@ -98,6 +129,8 @@ Common examples: - workforce scheduling with headcounts - assignment with binary decisions - product counts that must be whole +- **slot/resource scheduling** (games, shifts, appointments → time slots + and resources) — including when the user only says "build a schedule" ### Route to QP Use QP when: diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md index 9264f27..df33c89 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md @@ -1,7 +1,15 @@ --- name: optimization-mode-router -summary: Decide whether to default to fast direct-to-cuOpt mode or ask whether the user wants replayable/auditable mode for reruns, review, export, or audit. -description: Use when a user asks a question that may be answered by solving an optimization problem from uploaded or provided data, and you need to decide whether to proceed directly to cuOpt or preserve a structured reusable model artifact. +version: "26.06.01" +description: Choose fast direct-to-cuOpt solve versus replayable or auditable model artifact mode. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration +origin: skill-evolution --- # Optimization Mode Router @@ -32,8 +40,14 @@ Read this skill when all of the following are true: ## Default behavior - Default to **Fast mode**. +- Default to **direct cuOpt solve** for one-off requests from uploaded CSVs + (schedule, assignment, allocation, routing) — proceed to + `cuopt-model-mapper` without asking fast vs replayable unless the user + signals audit/export/rerun. - Do **not** ask about replayability/auditability unless there is a real signal that it matters. - Avoid turning a straightforward optimization request into a heavy upfront questionnaire. +- **NemoClaw sandbox:** Fast mode means cuOpt after `cuopt-sandbox` gates — + never a custom greedy/heuristic builder as the solve path. ## Two modes @@ -120,19 +134,19 @@ After selecting a mode, hand off based on problem type: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve - If the request is QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve - If the request is routing (VRP / TSP / PDP): - use `routing-formulation` - then use `cuopt-routing-api-python` - - in sandbox contexts, follow `cuopt-sandbox` then `cuopt-remote-env` + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) before any gRPC Python solve - If the user is asking about server usage or deployment rather than solving a model directly: diff --git a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md index 82d62cc..b285bec 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md @@ -1,7 +1,14 @@ --- name: tabular-optimization-ingestion -summary: Inspect uploaded or provided tabular data, infer likely optimization structure, and identify the smallest set of clarifications needed before building a cuOpt model. -description: Use when a user provides CSV, Excel, JSON-like tables, or similar structured data and asks a question that may become an LP, MILP, QP, or routing problem. +version: "26.06.01" +description: Infer optimization structure from uploaded tables and identify minimal clarifications before cuOpt modeling. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -14,7 +21,7 @@ The purpose of this skill is to bridge the gap between messy uploaded data and s This skill does **not** solve the optimization problem itself. It inspects the data, infers likely modeling roles, and identifies what still needs clarification. **It does not authorize heuristic, greedy, or backtracking schedules as -answers.** In the NemoClaw sandbox, read `cuopt-first`: the first solver +answers.** In the NemoClaw sandbox, read `cuopt-sandbox`: the first solver that produces assignments or a schedule must be cuOpt after probe → env → smoke gates pass. Ingestion output is a modeling interpretation (entities, objective fields, constraints) — never a completed plan. @@ -237,7 +244,22 @@ Likely interpretation: - travel table defines movement cost/time - likely problem family = routing -### Example 3: historical transaction table +### Example 3: time-slot / resource assignment (scheduling MILP) + +Files include patterns such as: +- `games.csv` or `jobs.csv` — items to place (events, tasks, orders) +- `time_slots.csv` or `shifts.csv` — when placement can occur +- `courts.csv`, `machines.csv`, or `rooms.csv` — resources +- `teams.csv` or `workers.csv` — entities tied to shared agents (coaches, operators) +- `*_unavailability.csv` — blocked (resource, slot) or (agent, slot) pairs + +Likely interpretation: +- decision = assign each item to a (slot, resource) or similar binary/integer placement +- hard constraints = no double-booking, unavailability, capacity, one game per team per slot +- likely problem family = **MILP** (even if user only says "build a schedule" or "valid plan") +- **NemoClaw:** read `optimization-from-data-orchestrator` + `cuopt-sandbox` before any custom scheduler code + +### Example 4: historical transaction table File includes: - `sales_history.csv` with `order_id`, `date`, `region`, `revenue`, `units_sold` diff --git a/cuopt_on_nemoclaw/smoke_milp.py b/cuopt_on_nemoclaw/smoke_milp.py index 859ad6c..1c85b68 100644 --- a/cuopt_on_nemoclaw/smoke_milp.py +++ b/cuopt_on_nemoclaw/smoke_milp.py @@ -26,7 +26,7 @@ def _require_remote_env() -> None: if not environ.get("CUOPT_REMOTE_HOST") or not environ.get("CUOPT_REMOTE_PORT"): print( "error: export CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT before " - "running (see /sandbox/.openclaw/skills/cuopt-remote-env/SKILL.md)", + "running (see /sandbox/.openclaw/skills/cuopt-sandbox/references/remote-env-and-smoke.md)", file=sys.stderr, ) sys.exit(1)