diff --git a/cuopt_on_nemoclaw/SETUP.md b/cuopt_on_nemoclaw/SETUP.md index 4a004d9..f0ff795 100644 --- a/cuopt_on_nemoclaw/SETUP.md +++ b/cuopt_on_nemoclaw/SETUP.md @@ -44,12 +44,25 @@ it can be any existing sandbox. ## What the setup script does -- **add** — Add cuOpt to an existing sandbox: apply-policy → install → install-skill → test +- **add** — Add cuOpt to an existing sandbox: apply-policy → install → install-skill → `test --smoke` - **apply-policy** — Merges cuOpt network rules into a running sandbox's policy - **install** — Creates a Python venv (`/sandbox/.openclaw-data/cuopt`), installs `cuopt_sh_client`, `cuopt-cu13`, and `grpcio`, and stamps the cuOpt venv activation file (`/sandbox/.bash_profile`) - **install-activation** — Re-stamps `/sandbox/.bash_profile` without reinstalling the venv (use after changing `CUOPT_HOST`, `CUOPT_PORT`, or `CUOPT_VENV`) - **install-skill** — Uploads skill files from `openclaw-skills/` into the sandbox, then vendors the upstream cuOpt skills (numerical optimization for LP/MILP/QP, routing, server, formulation, user-rules, skill-evolution) from `github.com/NVIDIA/cuopt/tree/release/26.06/skills` so the agent can read them without outbound HTTPS. Override the upstream ref via `CUOPT_SKILLS_REF` (default `release/26.06`); narrow what gets installed via `CUOPT_SKILLS_SKIP` (comma-separated globs, default `cuopt-install,*developer*,*-api-c`). Finally, the step writes a fresh `skills.entries.cuopt-sandbox.config.lastInstallAt` timestamp into `~/.openclaw/openclaw.json` so the gateway's config-reload watcher invalidates the cached `` snapshot — without this, skills uploaded after the agent's first run never appear in the prompt (see [How `` is cached](#how-available_skills-is-cached) below). -- **test** — Smoke tests PyPI access and cuOpt server connectivity from inside the sandbox +- **test** — Connectivity probe from inside the sandbox (`probe_cuopt.py` + pip check). Does **not** run solve smokes. +- **test --smoke** — Probe plus end-to-end LP/MILP/VRP solves via `/sandbox/smoke_*.py` when `install-skill` has uploaded them. LP/MILP run only if gRPC is reachable; VRP only if REST is reachable (per the probe's `available:` line). + +### Version compatibility + +`nemoclaw_cuopt_setup.sh` was last verified against **nemoclaw v0.0.55** and **openshell v0.0.44**. If your installed versions differ, the script prints a non-fatal banner at startup. Silence it with `NEMOCLAW_VERSION_CHECK=0`. + +The public NemoClaw installer defaults to the `lkg` ref, which currently points at the same commit as **v0.0.55**. To pin explicitly: + +```bash +NEMOCLAW_INSTALL_TAG=v0.0.55 \ + curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash -s -- \ + --non-interactive --yes-i-accept-third-party-software +``` ## Getting cuOpt data into the sandbox @@ -116,7 +129,8 @@ Each subdirectory containing a `SKILL.md` will be uploaded. Then re-run: | What | Path | |------|------| | Setup script | `cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh` | -| Endpoint probe | `cuopt_on_nemoclaw/probe_cuopt.py` (uploaded to `/sandbox/probe_cuopt.py`; reports both REST and gRPC reachability in one call) | +| Endpoint probe | `cuopt_on_nemoclaw/probe_cuopt.py` → `/sandbox/probe_cuopt.py` (REST + gRPC reachability) | +| Smoke tests | `smoke_lp.py`, `smoke_milp.py`, `smoke_vrp.py` → `/sandbox/` (pre-built; agent runs as-is — see skills) | | Skill source files | `cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md` | | cuOpt venv in sandbox | `/sandbox/.openclaw-data/cuopt/` | diff --git a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh index 851624a..1e0e07d 100755 --- a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh +++ b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh @@ -17,7 +17,7 @@ # NemoClaw cuOpt sandbox setup # # Subcommands: -# add [NAME] Add cuOpt to a sandbox: policy + install + skill + test. +# add [NAME] Add cuOpt to a sandbox: policy + install + skill + test --smoke. # apply-policy [NAME] Add cuOpt network policy to a running sandbox. # install [NAME] Install cuOpt packages in the sandbox venv and # stamp the activation file (see install-activation). @@ -27,7 +27,9 @@ # install-activation [NAME] Re-stamp the cuOpt venv activation file # (/sandbox/.bash_profile). Use after changing # CUOPT_HOST, CUOPT_PORT, or CUOPT_VENV. -# install-skill [NAME] Upload the cuOpt skill into the sandbox. +# install-skill [NAME] Upload the cuOpt skill into the sandbox and append +# tool-search file-access notes to workspace TOOLS.md +# when not already present. # cache-wheels [NAME] Snapshot a sandbox's already-installed wheels # into $CUOPT_WHEEL_CACHE. NAME must already have # cuOpt installed (run `add` or `install` against @@ -35,7 +37,8 @@ # `add` runs against any sandbox reuse the cache # and install offline. # clear-wheel-cache Remove $CUOPT_WHEEL_CACHE. -# test [NAME] Smoke-test PyPI + cuOpt server reachability. +# test [NAME] Probe REST/gRPC reachability from the sandbox (default). +# test --smoke [NAME] Probe + LP/MILP/VRP solve smokes when installed and reachable. # # Flags: # -y, --yes Skip confirmation prompts (for CI/CD). @@ -89,7 +92,8 @@ # nemoclaw delete cuopt && nemoclaw create cuopt # Recreate sandbox # ./nemoclaw_cuopt_setup.sh add cuopt # Now installs offline (fast) # ./nemoclaw_cuopt_setup.sh apply-policy bob # Just fix network policy -# ./nemoclaw_cuopt_setup.sh test cuopt # Re-run smoke test +# ./nemoclaw_cuopt_setup.sh test cuopt # Connectivity probe only +# ./nemoclaw_cuopt_setup.sh test --smoke cuopt # Probe + solve smokes # # Version compatibility: # The TESTED_NEMOCLAW_VERSION / TESTED_OPENSHELL_VERSION constants below @@ -98,9 +102,11 @@ # the installed tools differ (non-fatal). To install the exact tested # NemoClaw build: # -# NEMOCLAW_INSTALL_TAG=v \ +# NEMOCLAW_INSTALL_TAG=v0.0.64 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # +# The public installer defaults to the `lkg` ref, which moves. +# # Silence the banner with NEMOCLAW_VERSION_CHECK=0. # ============================================================================= set -euo pipefail @@ -172,13 +178,16 @@ CUOPT_TEST_SANDBOX_GRPC="" # a newer release end-to-end. Used by check_versions() to surface a # non-fatal warning banner if the installed tools drift ahead. # -# To install the exact tested NemoClaw build: -# NEMOCLAW_INSTALL_TAG=v${TESTED_NEMOCLAW_VERSION} \ +# To install the exact tested NemoClaw build (openshell is bundled with the +# NemoClaw release this script was verified against): +# NEMOCLAW_INSTALL_TAG=v0.0.64 \ # curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash # +# The public installer defaults to the `lkg` ref, which moves. +# # Silence the banner with NEMOCLAW_VERSION_CHECK=0. -TESTED_NEMOCLAW_VERSION="0.0.48" -TESTED_OPENSHELL_VERSION="0.0.39" +TESTED_NEMOCLAW_VERSION="0.0.64" +TESTED_OPENSHELL_VERSION="0.0.44" # ── NemoClaw / OpenShell version compatibility check ───────────── # Non-fatal. Prints a warning banner when the installed tool version @@ -337,6 +346,18 @@ sandbox_exec_root() { docker exec -u root "$container" "$@" } +# sandbox_run_script +# Read a bash script from stdin and run it in the sandbox container. +# Prefer this over piping to `openshell sandbox connect` for batch +# commands — connect echoes the script to the terminal (OpenShell +# 0.0.44+ bracketed-paste / line-echo behavior). +sandbox_run_script() { + local sandbox="$1" + local container + container=$(find_sandbox_container "$sandbox") || return $? + docker exec -i -u sandbox -e HOME=/sandbox "$container" bash +} + # upload_wheel_cache # Copy the CONTENTS of into , flat # (no wrapping directory). Replaces `openshell sandbox upload`, which @@ -1087,7 +1108,7 @@ cmd_install() { "exit" ) - printf '%s\n' "${commands[@]}" | openshell sandbox connect "$sandbox" + printf '%s\n' "${commands[@]}" | sandbox_run_script "$sandbox" local cuopt_ip="host.openshell.internal" [[ -n "$CUOPT_HOST" ]] && cuopt_ip="$CUOPT_HOST" @@ -1244,8 +1265,15 @@ INNER_EOF } # ── test ────────────────────────────────────────────────────────── +# Modes: +# probe (default) — pip check + probe_cuopt.py only +# smoke — probe + LP/MILP/VRP solve scripts when installed and reachable cmd_test() { local sandbox="${1:-$CUOPT_SANDBOX}" + local mode="${2:-probe}" + local run_solves=false + [[ "$mode" == smoke || "$mode" == --smoke ]] && run_solves=true + local venv="/sandbox/${CUOPT_VENV}" local grpc_host="host.openshell.internal" local cuopt_url="http://host.openshell.internal:${CUOPT_PORT}" @@ -1281,14 +1309,18 @@ cmd_test() { fi echo "Host services: REST=$(if $has_rest; then echo UP; else echo DOWN; fi) gRPC=$(if $has_grpc; then echo UP; else echo DOWN; fi)" - echo "Smoke-testing sandbox: $sandbox (venv: $venv) ..." - - # probe_cuopt.py reports REST and gRPC reachability in one call. We pass - # CUOPT_SERVER_HOST/PORT (REST) and CUOPT_REMOTE_HOST/PORT (gRPC) so the - # probe checks the same endpoints we just verified are listening on the - # host. The probe's exit code is non-zero only when *both* are unreachable - # from inside the sandbox — `|| true` prevents that from breaking the - # heredoc's overall exit status. + if $run_solves; then + echo "Testing sandbox: $sandbox (venv: $venv) — probe + solve smokes ..." + else + echo "Testing sandbox: $sandbox (venv: $venv) — connectivity probe only ..." + fi + + local solves_flag=false + $run_solves && solves_flag=true + + # probe_cuopt.py reports REST and gRPC reachability. Solve smokes run only + # In test --smoke mode, only when scripts exist, and only when the probe's + # `available:` line shows the matching service (REST for VRP, gRPC for LP/MILP). local sandbox_cmds=" source ${venv}/bin/activate echo '--- pip check ---' @@ -1296,20 +1328,68 @@ python3 -c \"import cuopt_sh_client; print('cuopt_sh_client', cuopt_sh_client.__ echo '' echo '--- cuOpt endpoint probe (REST=${cuopt_url}, gRPC=${grpc_host}:${CUOPT_GRPC_PORT}) ---' -CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ -CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ -python3 /sandbox/probe_cuopt.py || true +PROBE_OUT=\$(CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/probe_cuopt.py 2>&1) || true +echo \"\$PROBE_OUT\" + +if [[ ${solves_flag} == true ]]; then + echo '' + echo '--- cuOpt solve smokes (test --smoke) ---' + GRPC_OK=false + REST_OK=false + echo \"\$PROBE_OUT\" | grep -qE '^available:.*grpc' && GRPC_OK=true + echo \"\$PROBE_OUT\" | grep -qE '^available:.*rest' && REST_OK=true + + if [[ \$GRPC_OK == true ]]; then + if [[ -f /sandbox/smoke_lp.py ]]; then + echo '--- remote LP smoke (smoke_lp.py) ---' + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/smoke_lp.py || true + echo '' + else + echo 'LP smoke skipped (/sandbox/smoke_lp.py missing; run install-skill)' + echo '' + fi + if [[ -f /sandbox/smoke_milp.py ]]; then + echo '--- remote MILP smoke (smoke_milp.py) ---' + CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} \\ + python3 /sandbox/smoke_milp.py || true + echo '' + fi + else + echo 'LP/MILP smokes skipped (gRPC not reachable from sandbox — see probe above)' + echo '' + fi + + if [[ \$REST_OK == true ]]; then + if [[ -f /sandbox/smoke_vrp.py ]]; then + echo '--- REST VRP smoke (smoke_vrp.py) ---' + CUOPT_SERVER_HOST=${grpc_host} CUOPT_SERVER_PORT=${CUOPT_PORT} \\ + python3 /sandbox/smoke_vrp.py || true + echo '' + else + echo 'VRP smoke skipped (/sandbox/smoke_vrp.py missing; run install-skill)' + echo '' + fi + else + echo 'VRP smoke skipped (REST not reachable from sandbox — see probe above)' + echo '' + fi +fi -echo '' -exit " # Capture the sandbox output so we can both display it AND parse it for # reachability ('unreachable' literal from probe_cuopt.py). `tee` keeps # the live UX intact; mktemp avoids clobbering anything else in /tmp. local probe_log probe_log="$(mktemp /tmp/cuopt-probe-XXXXXX.log)" - echo "$sandbox_cmds" | openshell sandbox connect "$sandbox" 2>&1 \ - | tee "$probe_log" + if ! printf '%s' "$sandbox_cmds" | sandbox_run_script "$sandbox" 2>&1 \ + | tee "$probe_log"; then + rm -f "$probe_log" + echo "error: sandbox test script failed (is sandbox '${sandbox}' running?)" >&2 + return 1 + fi echo "Test complete." # Detect probe failures per service. Only treat as a failure if the @@ -1423,6 +1503,176 @@ skill_is_skipped() { return 1 } +# Upload a single file to /sandbox/. openshell upload treats DEST +# as a directory; passing a file path creates a wrongly named directory. +upload_sandbox_file() { + local sandbox="$1" + local src="$2" + local base + base="$(basename "$src")" + local dest="/sandbox/${base}" + + if [[ ! -f "$src" ]]; then + echo " warning: ${base} not found at $src — skipping" >&2 + return 1 + fi + + sandbox_exec "$sandbox" rm -rf "$dest" 2>/dev/null || true + + echo " Uploading ${base} -> ${dest}" + if ! openshell sandbox upload "$sandbox" "$src" "/sandbox/" 2>&1; then + echo " Upload failed — falling back to inline base64 copy via sandbox_exec" + local file_b64 + file_b64="$(base64 -w 0 < "$src")" + if sandbox_exec "$sandbox" \ + bash -c "echo '${file_b64}' | base64 -d > '${dest}'" 2>/dev/null; then + echo " ${base} written via fallback" + else + echo " warning: failed to write ${base} into sandbox" >&2 + return 1 + fi + fi + return 0 +} + +# ── install_workspace_tools_md (helper) ─────────────────────────── +# Append a managed block to /sandbox/.openclaw/workspace/TOOLS.md when OpenClaw +# compact tool-search mode is active (tools.toolSearch not false). Skips — and +# strips any existing block — when direct tools are enabled. OpenClaw injects +# TOOLS.md on every turn via Project Context. +install_workspace_tools_md() { + local sandbox="$1" + local container + if ! container=$(find_sandbox_container "$sandbox"); then + echo " warning: cannot update TOOLS.md (sandbox container not running)" >&2 + return 1 + fi + + local inner_script + inner_script=$(cat <<'INNER_EOF' +set -eu +tools_md=/sandbox/.openclaw/workspace/TOOLS.md +begin='# >> cuopt tool help' +end='# << cuopt tool help' + +tool_mode=$(python3 - <<'PY' +import json + +cfg_path = "/sandbox/.openclaw/openclaw.json" +try: + with open(cfg_path) as f: + cfg = json.load(f) +except (FileNotFoundError, json.JSONDecodeError, OSError): + print("compact") + raise SystemExit(0) + +ts = (cfg.get("tools") or {}).get("toolSearch") +if ts is False: + print("direct") +elif isinstance(ts, dict) and ts.get("enabled") is False: + print("direct") +else: + print("compact") +PY +) + +write_managed_block() { + cat <<'BLOCK_EOF' +# >> cuopt tool help +With only `tool_search_code` exposed, `read` and `exec` still exist - reach them via `openclaw.tools.search`, then `describe`, then `call` inside a `tool_search_code` run. Try that before asking the user to paste file contents. +`read` requires an exact path (no globs). Use the path the user gave, or run `find` through `exec` if you need to discover files under `/sandbox`. +# << cuopt tool help +BLOCK_EOF +} + +# Drop trailing blank lines so strip+re-append does not accumulate spacing. +trim_trailing_blank_lines() { + local file="$1" + [ -f "$file" ] || return 0 + tmp="${file}.trim.$$" + awk ' + { lines[NR] = $0 } + END { + n = NR + while (n > 0 && lines[n] == "") { + n-- + } + for (i = 1; i <= n; i++) { + print lines[i] + } + } + ' "$file" > "$tmp" + mv "$tmp" "$file" +} + +append_managed_block() { + trim_trailing_blank_lines "$tools_md" + if [ -s "$tools_md" ]; then + printf '\n' >> "$tools_md" + fi + write_managed_block >> "$tools_md" +} + +mkdir -p /sandbox/.openclaw/workspace +if [ -f "$tools_md" ]; then + tmp="${tools_md}.tmp.$$" + awk -v begin="$begin" -v end="$end" ' + $0 == begin { skip=1; next } + skip && $0 == end { skip=0; next } + !skip { print } + ' "$tools_md" > "$tmp" + mv "$tmp" "$tools_md" + trim_trailing_blank_lines "$tools_md" +fi + +if [ "$tool_mode" = "direct" ]; then + echo skipped-direct + exit 0 +fi + +if [ ! -f "$tools_md" ]; then + cat > "$tools_md" <<'HEADER_EOF' +# TOOLS.md - Local Notes + +HEADER_EOF +fi + +append_managed_block +echo updated +INNER_EOF +) + + local inner_b64 + inner_b64=$(printf '%s' "$inner_script" | base64 -w 0) + + local err_log result + err_log=$(mktemp) + result=$(sandbox_exec "$sandbox" \ + sh -c "echo '$inner_b64' | base64 -d | sh" 2>"$err_log") || { + echo " warning: could not update TOOLS.md in container '$container'" >&2 + if [[ -s "$err_log" ]]; then + sed 's/^/ /' "$err_log" >&2 + fi + rm -f "$err_log" + return 1 + } + rm -f "$err_log" + + case "$result" in + updated) + echo " TOOLS.md cuOpt tool help block updated" + ;; + skipped-direct) + echo " TOOLS.md cuOpt tool help skipped (tools.toolSearch is false)" + ;; + *) + echo " warning: unexpected TOOLS.md update result: $result" >&2 + return 1 + ;; + esac + return 0 +} + # ── install-skill ───────────────────────────────────────────────── cmd_install_skill() { local sandbox="${1:-$CUOPT_SANDBOX}" @@ -1565,32 +1815,33 @@ cmd_install_skill() { guardrail_content="$(cat <<'GUARDRAIL' --- name: cuopt-setup -description: "Read this FIRST when working with cuOpt. If you cannot read the cuopt-sandbox skill at ~/.openclaw/skills/cuopt-sandbox/SKILL.md, this tells you where the real file lives." +description: "NemoClaw cuOpt sandbox entry — probe/smoke before schedule output; absolute skill paths under /sandbox/.openclaw/skills/." --- -# cuOpt sandbox skill — path resolution +# cuOpt sandbox — skill paths + +## Schedule / assignment workflow -The cuopt-sandbox skill (sandbox-specific setup, networking, venv) is -installed at an absolute path that always works: +Read (in order): + /sandbox/.openclaw/skills/optimization-from-data-orchestrator/SKILL.md /sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md -Read that file FIRST for sandbox-specific cuOpt setup, then consult the -per-task sibling skills it points at (cuopt-user-rules, -cuopt-numerical-optimization-api-python, cuopt-routing-api-python, -numerical-optimization-formulation, etc.) which live in the same -`/sandbox/.openclaw/skills/` directory. +Routing + cuOpt-first rules: +`/sandbox/.openclaw/skills/cuopt-sandbox/references/activation.md` -## Why this guardrail exists +## cuOpt skills -OpenClaw compacts skill paths to `~/…` in the system prompt. When you try -to read `~/.openclaw/skills/cuopt-sandbox/SKILL.md`, the `~` may expand to -`/root/` or another directory that is not readable. If that happens: + /sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md + +**Python MILP/LP imports:** +`/sandbox/.openclaw/skills/cuopt-sandbox/references/python-imports.md` +(use `cuopt.linear_programming.problem`, not `from cuopt import milp`). + +## Why this guardrail exists -1. Do NOT give up on using cuOpt. -2. Read the skill from the absolute path above. -3. Also check `/sandbox/.openclaw/skills/cuopt-sandbox/SKILL.md` (symlink - to the same file). +OpenClaw compacts skill paths to `~/…` in the system prompt. Use absolute +paths under `/sandbox/.openclaw/skills/` when `~` paths fail. GUARDRAIL )" @@ -1645,6 +1896,9 @@ if extra_dir not in existing: load["extraDirs"] = existing load.setdefault("watch", True) load.setdefault("watchDebounceMs", 250) +# skills.priority is NOT valid on OpenClaw 2026.5.x (added in a later PR). +# Remove it if a prior install-skill run wrote one — it breaks config validate. +skills.pop("priority", None) # Drop the obsolete sentinel from the prior mechanism if present so the # config stays clean. The new loader ignores skills.entries.X.config # for discovery purposes. @@ -1678,42 +1932,17 @@ print(" skills.load.extraDirs=" + json.dumps(existing)) echo " until skills.load.extraDirs includes /sandbox/.openclaw/skills" >&2 fi + install_workspace_tools_md "$sandbox" \ + || echo " warning: could not update workspace TOOLS.md (non-fatal)" >&2 + echo "Skills installed." - # Upload the combined REST/gRPC probe directly to /sandbox/. The probe is - # not a skill (it's run by `cmd_test`), so it doesn't need to live under - # the skills tree. Direct upload is preferred when policy allows it. - # - # IMPORTANT: `openshell sandbox upload` treats DEST as a *directory* and - # lands the file at DEST/. Passing a file path (e.g. - # `/sandbox/probe_cuopt.py`) creates a directory with that name containing - # the real file inside — Python then errors with "can't find '__main__' - # module" when invoked against the directory. So we pass `/sandbox/` and - # let the basename come from SRC. - # - # We also defensively `rm -rf` any prior file or directory at the - # destination before uploading, and fall back to an inline base64 copy - # via sandbox_exec if the upload fails outright. - local probe="$SCRIPT_DIR/probe_cuopt.py" - if [[ -f "$probe" ]]; then - sandbox_exec "$sandbox" \ - rm -rf /sandbox/probe_cuopt.py 2>/dev/null || true - - echo " Uploading probe_cuopt.py -> /sandbox/probe_cuopt.py" - if ! openshell sandbox upload "$sandbox" "$probe" "/sandbox/" 2>&1; then - echo " Upload failed — falling back to inline base64 copy via sandbox_exec" - local probe_b64 - probe_b64="$(base64 -w 0 < "$probe")" - if sandbox_exec "$sandbox" \ - bash -c "echo '${probe_b64}' | base64 -d > /sandbox/probe_cuopt.py" 2>/dev/null; then - echo " probe_cuopt.py written via fallback" - else - echo " warning: failed to write probe_cuopt.py into sandbox" >&2 - fi - fi - else - echo " warning: probe_cuopt.py not found at $probe — skipping" >&2 - fi + # Sandbox helper scripts (not skills): probe + smoke tests for agents and + # cmd_test. Uploaded to /sandbox/ directly when policy allows. + local helper + for helper in probe_cuopt.py smoke_lp.py smoke_milp.py smoke_vrp.py; do + upload_sandbox_file "$sandbox" "$SCRIPT_DIR/$helper" + done } @@ -1730,7 +1959,7 @@ cmd_add() { # something needs attention, a compact post-mortem of what cmd_test # actually saw. local test_rc=0 - cmd_test "$sandbox" || test_rc=$? + cmd_test "$sandbox" smoke || test_rc=$? print_activation_banner "$sandbox" if [[ $test_rc -ne 0 ]]; then print_service_status_summary "$sandbox" "$test_rc" @@ -1771,7 +2000,21 @@ main() { install-skill) cmd_install_skill "${1:-}" ;; cache-wheels) cmd_cache_wheels "${1:-}" ;; clear-wheel-cache) cmd_clear_wheel_cache ;; - test) cmd_test "${1:-}" ;; + test) + local t_sandbox="" t_smoke=false + while [[ $# -gt 0 ]]; do + case "$1" in + --smoke) t_smoke=true; shift ;; + -*) echo "unknown test flag: $1" >&2; exit 1 ;; + *) t_sandbox="$1"; shift ;; + esac + done + if $t_smoke; then + cmd_test "${t_sandbox:-$CUOPT_SANDBOX}" smoke + else + cmd_test "${t_sandbox:-$CUOPT_SANDBOX}" probe + fi + ;; add) cmd_add "${1:-}" ;; help|-h|--help) usage ;; *) diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md index 888689e..cd9f9ee 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-model-mapper/SKILL.md @@ -1,7 +1,14 @@ --- name: cuopt-model-mapper -summary: Convert an interpreted optimization problem directly into cuOpt-native model construction for the fast path, asking only the minimum clarifying questions needed for a valid solve. -description: Use after optimization intent and basic data interpretation are established, when the goal is to solve quickly by mapping data directly into cuOpt rather than building a replayable intermediate artifact. +version: "26.06.01" +description: Map interpreted optimization problems into cuOpt-native models for the fast path with minimal clarifying questions. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -41,6 +48,11 @@ If those conditions are not met, first use: For the fast path, map directly from the interpreted data into cuOpt structures. Do not introduce a replayable intermediate artifact unless the user asks for replayability, auditability, export, or reuse. +**In NemoClaw sandbox:** before building the cuOpt model, confirm +`cuopt-sandbox` gates completed (probe → `CUOPT_REMOTE_*` → smoke test). +Do not build a parallel heuristic assigner "first" — cuOpt is the first +and only solver for assignments/schedules. + ## Workflow ### 1. Confirm the minimum viable formulation @@ -55,6 +67,12 @@ Use the unresolved blocker list from ingestion as the starting point; do not reo If one non-retrievable modeling choice would change the meaning of the solve, ask exactly one concise blocking question. +**Scheduling with no stated objective:** Feasibility (no double-booking, +respect unavailability, assign every item) belongs in **hard constraints**. +Do not deliver a greedy feasible schedule first. Ask one objective +question if needed, or state a default secondary objective (e.g. balance +slot times, minimize penalty slacks), then solve with cuOpt. + Examples: - **"Do these production quantities need to be whole numbers?"** - **"Must all demand be met, or can unmet demand be allowed with a penalty?"** @@ -190,19 +208,22 @@ If relevant, also mention which constraint or resource appears to be most limiti - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve - For QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve - For routing: - use `routing-formulation` - then use `cuopt-routing-api-python` - follow `cuopt-user-rules` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve ## Success criterion diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md index 41a3a96..0b50fd3 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/SKILL.md @@ -1,482 +1,78 @@ --- name: cuopt-sandbox -description: Read FIRST whenever a request involves linear programming (LP), mixed-integer linear programming (MILP), quadratic programming (QP), or vehicle routing (VRP, TSP, PDP) — including small / pedagogical / made-up problems ("invent an LP example", "show me a small MILP", "find optimal X under constraints", "minimize cost", "plan deliveries", "schedule with constraints"). In this sandbox, cuOpt is the default solver and must be used whenever it is available; this skill points at the per-task skills (cuopt-user-rules, cuopt-numerical-optimization-api-python, cuopt-routing-api-python, numerical-optimization-formulation, etc.) and covers sandbox-specific setup (venv at /sandbox/.openclaw-data/cuopt, host.openshell.internal networking, REST 5000 / gRPC 5001). +version: "26.06.01" +description: Run cuOpt in the NemoClaw sandbox — probe/smoke gates, remote gRPC env, then vendored cuOpt skills. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - sandbox --- -# NVIDIA cuOpt (Sandbox) +# cuOpt in the NemoClaw sandbox -Solve optimization problems using NVIDIA cuOpt from inside the sandbox. -This skill covers **sandbox-specific** setup — networking, venv, and connectivity. +Infrastructure for solving with cuOpt inside NemoClaw: probe/smoke gates, +remote env vars, and handoff to vendored formulation/API skills. -> **In this sandbox, LP/MILP must be solved through the remote cuOpt service -> on the host, not the local CUDA runtime.** There is no GPU inside the -> sandbox. If you see `cudaErrorInsufficientDriver` or similar CUDA errors, -> you accidentally invoked a local solve — set `CUOPT_REMOTE_HOST` and -> `CUOPT_REMOTE_PORT` to use the remote service instead. +## When to use -## Finding the shell, file, and editing tools (NemoClaw catalog) +- Constructive planning from uploaded constraint data (schedule, assign, + route, roster — any wording). See `references/intent-and-triggers.md`. +- CSV upload + plan → `optimization-from-data-orchestrator` + `references/activation.md`. +- `ImportError` / `cudaErrorInsufficientDriver`. -This sandbox runs under NemoClaw, which by default exposes only three -meta-tools to the model — `tool_search`, `tool_describe`, `tool_call` — -and hides every real tool (`exec`, `read`, `write`, `edit`, `process`, -…) behind that catalog. If your tool list shows only those three, the -real tools are not missing; they are reachable via the catalog. +## Mandatory order -Use them in this order: +Complete before any assignment output, feasibility verdict, or custom +solver code: -1. `tool_search` with `{query: ""}` and `{limit: 20}` lists the catalog; - `{query: "shell"}` or `{query: "file"}` narrows by topic. -2. `tool_describe` with `{name: ""}` returns the parameter schema. - Call this once before the first invocation of any new tool. -3. `tool_call` with `{name: "", arguments: {…}}` runs it. - -The capabilities the rest of this skill assumes: - -| Capability | Search term | Tool name | Typical args | -|---|---|---|---| -| Run any shell command (ls, source, python3, …) | `shell` / `exec` | `exec` | `{command: "ls -1 …"}` | -| Read a known file path | `read` | `read` | `{path: "/sandbox/…"}` | -| Write / edit a file | `write` / `edit` | `write` / `edit` | see `tool_describe` | - -Every shell example below — `ls -1 /sandbox/.openclaw/skills/`, sourcing -the venv, `python3 /sandbox/probe_cuopt.py`, the LP smoke test — is -dispatched as `tool_call` with `name: "exec"` and -`arguments: {command: ""}`. - -If `tool_search` with an empty query returns only the three catalog -tools themselves, no real tools were registered for this session — that -is a gateway setup problem, not a discovery problem; report it to the -operator. If your tool list already includes `exec` / `read` / etc. -directly, the compact catalog is disabled; call those tools directly and -ignore this section. - -## Always use cuOpt when it is available - -If the user's task is an LP, MILP, QP, or vehicle-routing problem **and** -the capability check below shows the required cuOpt service is reachable, -you MUST solve it with cuOpt. This applies even when: - -- The problem has only 2–3 variables and could be solved by hand or by - inspecting corner points of the feasible region. -- The user asked you to "make up", "invent", or "come up with" an example. -- A textbook approach (graphing, exhaustive enumeration, simplex by hand) - would also yield the answer. -- The problem appears pedagogical or "obvious". - -The sandbox exists to demonstrate cuOpt; bypassing it for size or -pedagogy defeats the purpose. Always: - -1. Formulate the problem (variables, constraints, objective) — see the - `*-formulation` sibling skills for the relevant problem class. -2. Build and submit it through the matching `cuopt-*-api-*` skill. -3. Report cuOpt's status, objective value, and variable values. -4. Only then, if helpful, walk through the math/intuition. - -**Narrow exceptions** (do not invent others): - -- The capability check shows the required service is unavailable → follow - the decline rule in the next section. Do not silently fall back to a - by-hand solve. -- The user **explicitly** asks for a by-hand walkthrough, "without using - cuOpt", "show the math", "explain the algorithm", or similar — comply, - but state once that cuOpt would be the production answer. - -When in doubt, run cuOpt. - -## Remote-first — never try a local solve before the remote service - -There is no GPU in this sandbox. Every cuOpt Python entry point that -touches CUDA (`from cuopt import routing`, `cuopt.linear_programming` -local solves, anything that initializes `rmm` or `cudf`) **will fail at -import or first-use** with `cudaErrorInsufficientDriver`, -`RMM`/`CUDA driver` errors, or similar. This is expected, not a bug to -work around — the sandbox image deliberately omits the driver because -solves are routed to a host-side cuOpt service over the wire. - -**Mandatory order of attempts for any cuOpt-supported task:** - -1. Run the capability probe (`probe_cuopt.py` — see "Capability check" - below). Read `available:` line. **If the most recent probe in this - session did NOT return `rest grpc`, you MUST re-run the probe before - this task — the operator may have started a service since the last - check, and a stale "REST only" or "gRPC only" reading will pin you to - a suboptimal path. Only the full `rest grpc` result is durable enough - to reuse for the rest of the session.** -2. Pick the remote interface from the table (rest, gRPC, or both) that - matches your problem class. **Use it first.** - - Routing (VRP, TSP, PDP) → REST. Open `cuopt-server-api-python` and - reuse a starter from its `assets/vrp_*/` cookbook. - - LP / MILP / QP → **prefer gRPC** via Python SDK or `cuopt_cli` - whenever the probe shows gRPC available. Fall back to REST via - `cuopt_sh` / `cuopt_sh_client` only when gRPC is not. Both route to - the same host service, but gRPC is the native path for these - problem classes (binary protocol, lower per-call overhead, better - streaming behavior). A previous session decision to use REST does - not justify reusing it after a re-probe reveals gRPC. -3. The **only** legitimate evidence that cuOpt is unavailable for your - task is a fresh `probe_cuopt.py` result whose `available:` line is - `none`, *or* the matching column in the capability table marks the - required interface as "Decline". The following do **not** count and - never permit skipping cuOpt: - - a failed `import cuopt` / `from cuopt import routing` / any - `ModuleNotFoundError` in the current interpreter - - the problem being small, toy-sized, pedagogical, or "obvious" - - a probe result from earlier in the session that wasn't `rest grpc` - (re-probe — the operator may have started a service since) - - a guess that "cuOpt won't help here" - - a hand solution being faster to type - If you have any of these and no fresh `none` probe, you are still - required to use cuOpt. The sandbox has no GPU, so once you do reach - the "local cuOpt is the only candidate" branch (a real `none` - probe), it will almost certainly fail anyway — proceed to step 4. -4. **If every cuOpt path fails**, stop. Explain to the user exactly - which probe / interface / payload failed and what's needed (operator - action, network policy, etc.). **Do not** silently fall back to - brute force, hand calculation, exhaustive search, a non-cuOpt - solver, or "I solved it another way" — those are all violations of - "always use cuOpt when it's available". Returning a correct answer - from a non-cuOpt method is still a failure of this skill. - -A 422 / 400 from the REST server is **not a fall-back trigger** — it -means your payload was wrong. Read the response, fix the named field -(see `cuopt-server-api-python`'s "On a 422" recipe and `assets/` -cookbook for known-good shapes), and retry. Two consecutive failures -on the same field → re-read the cookbook entry that uses that field. - -For **how to use cuOpt** (formulation, Python API, CLI, MPS format, routing, etc.), -read the sibling skills installed alongside this one in -`/sandbox/.openclaw/skills/`. Names follow stable suffix patterns -upstream, so prefer pattern-based discovery over memorizing exact names: - -- `cuopt-user-rules` — Read FIRST: behavior rules, clarify before coding, verify results -- Any `*-formulation` skill — How to go from problem text to formulation - (LP / MILP / QP, vehicle routing, etc.) -- Any `cuopt-*-api-python` skill — Solve through the Python SDK - (numerical optimization / LP / MILP / QP, routing, server client) -- Any `cuopt-*-api-cli` skill — Solve via `cuopt_cli` with MPS files -- `cuopt-server-common` and `cuopt-server-api-python` — REST/gRPC server - concepts and Python client (server skills are not pattern-merged) -- `skill-evolution` — Detect generalizable learnings during a long-running session - -Concrete formulation skill currently installed upstream: -`numerical-optimization-formulation` (LP, MILP, and QP concepts in one -skill). Reachable through the `*-formulation` pattern above. List the -directory to see what's actually installed: - -```bash -ls -1 /sandbox/.openclaw/skills/ -``` - -These are vendored from at -sandbox-setup time so the agent can read them locally — the sandbox cannot -reach `github.com` directly. To refresh, ask the operator to re-run -`./nemoclaw_cuopt_setup.sh install-skill ` on the host. - -## Environment - -The cuOpt client and SDK are installed in a Python virtual environment at -`/sandbox/.openclaw-data/cuopt` (the default NemoClaw filesystem policy -marks `/sandbox` itself as read-only, so the venv lives in the writable -subtree under `/sandbox/.openclaw-data/`). - -The sandbox's `/sandbox/.bash_profile` auto-activates the venv and sets -`CUOPT_SERVER`. It fires for **login shells only** — `bash -l`, -`bash -lc '…'`. Non-login interactive shells (the default behind -`openshell sandbox connect` / `nemoclaw connect`) and non-login -non-interactive shells (`bash -c '…'`, `sh -c '…'`, the default behind -many `tool_call exec` paths) do **not** source `.bash_profile`, so the -venv will *not* be active there. - -This is a NemoClaw constraint, not a cuOpt choice: `/sandbox/.bashrc` -(the file non-login interactive bash would normally source) is sealed -root-owned mode 444 *and* Landlock-protected (see -`04-landlock-readonly.sh` check 2 — even root processes can't write to -it after the sandbox starts), so we can't put activation there. - -Three ways to get a venv-active shell: - -```bash -# After `nemoclaw connect ` (non-login), inside the sandbox shell, -# either source .bash_profile in place: -source /sandbox/.bash_profile -# or replace the current shell with a login shell: -exec bash -l - -# From the host: one-shot login-shell command for any single task. -openshell sandbox exec --name -- bash -lc 'python3 …' -``` - -Prefer the `bash -lc '…'` wrapper for anything dispatched through -`tool_call exec` — it picks up `CUOPT_SERVER`, the `cuopt_sh` alias, and -the venv `PATH` in one shot. - -If the venv doesn't exist, ask the operator to run the host-side setup -script (`./nemoclaw_cuopt_setup.sh add `); the sandbox user -cannot recreate it directly because the packages live under the -`openclaw-sandbox` network policy and the venv path must match the -operator's configuration. - -## Networking — CRITICAL - -> **Always use `host.openshell.internal` as the server address.** -> Do NOT use `localhost`, `127.0.0.1`, or `0.0.0.0` — these resolve inside -> the sandbox container and will be **blocked** (403 Forbidden or timeout). - -Two server interfaces are available on the host: - -| Interface | Port | Protocol | Use for | -|-----------|------|----------|---------| -| REST | 5000 | HTTP | `cuopt_sh` CLI, `cuopt_sh_client` Python client, health checks | -| gRPC | 5001 | HTTP/2 | `cuopt_cli` remote execution, Python SDK remote solves | - -The `CUOPT_SERVER` environment variable (set in `.bash_profile` for login -shells) contains the REST `host:port` value. - -## Capability check — run this FIRST - -**Do not substitute `import cuopt` for the probe.** In this sandbox a -failed `import cuopt` (or `from cuopt import routing`, or -`from cuopt.linear_programming...`) only tells you the *local* runtime -can't initialize — almost always because there is no GPU here, and the -service runs on the host. It says **nothing** about whether the -host-side cuOpt service is reachable. The only authoritative -capability signal is what `probe_cuopt.py` prints on its `available:` -line. If the probe says `rest`, `grpc`, or `rest grpc`, cuOpt is -available and you must use it — regardless of what a local import -does. If you catch yourself reasoning "I tried `import cuopt`, it -failed, so I'll solve this by hand", stop and run the probe. - -Before doing any cuOpt work, probe what the host is actually serving. -**The probe needs the cuOpt venv** for `grpcio`; non-login shells -(`bash -c '…'`, plain `sh -c '…'`) do not source `.bash_profile`, so -either wrap the call in `bash -lc '…'` or source the venv explicitly: - -```bash -source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 /sandbox/probe_cuopt.py -``` - -The last line tells you what's available. Map it to the request you were -asked to handle: - -| `available:` line | You may use | Decline (politely, with reason) | +| Step | Action | Reference | |---|---|---| -| `rest grpc` | everything below | nothing | -| `rest` only | LP / MILP via Python SDK or `cuopt_sh` / `cuopt_sh_client`; vehicle routing (VRP, TSP, PDP) | LP / MILP via `cuopt_cli`; QP | -| `grpc` only | LP / MILP via Python SDK or `cuopt_cli`; QP | vehicle routing (VRP, TSP, PDP); `cuopt_sh*` tools | -| `none` | nothing — refuse | every cuOpt task | - -When a request lands in the "Decline" column, do **not** open the matching -sibling skill and try anyway. Tell the user which service is needed and -point at `cuopt-examples/cuopt_on_nemoclaw/SETUP.md` ("Starting the cuOpt -server"). Example: - -> The cuOpt REST server (port 5000) isn't reachable, so I can't solve -> vehicle-routing problems in this sandbox. Ask the operator to start it -> (see SETUP.md, "Starting the cuOpt server"), then try again. - -The probe also prints the exact endpoint reached, e.g. -`grpc: host.openshell.internal:5001`. Use that endpoint for the -session — set `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` for gRPC, or pass -`ip=` / `port=` to `CuOptServiceSelfHostClient` for REST. - -For machine-parseable output use `--json`: - -```bash -source /sandbox/.openclaw-data/cuopt/bin/activate && \ - python3 /sandbox/probe_cuopt.py --json -``` - -## How to invoke each interface — sandbox-specific delta - -For complete API docs, modeling patterns, and examples, read the upstream -sibling skills listed at the top of this file. Below is only what's -*different* about this sandbox. - -### gRPC path (Python SDK and `cuopt_cli`) - -The Python SDK and `cuopt_cli` solve through the gRPC server. Set: +| 0 | Probe → remote env → smoke | `references/remote-env-and-smoke.md` | +| 1 | Formulate | vendored `*-formulation` skills | +| 2 | Solve (one job, terminal status) | `references/long-running-jobs.md` | -```bash -export CUOPT_REMOTE_HOST=host.openshell.internal -export CUOPT_REMOTE_PORT=5001 -``` - -before the Python or CLI process starts. If you see `Using remote GPU -backend` in the solver output, the remote path engaged. If you see -`cudaErrorInsufficientDriver` instead, the env vars didn't take effect and -the client tried to solve locally — there is no GPU here, so it fails. - -For modeling, status checking, and examples → the matching upstream -skill in `/sandbox/.openclaw/skills/` — typically a `cuopt-*-api-python` -skill (LP / MILP / QP), `cuopt-routing-api-python`, or a `cuopt-*-api-cli` -skill. +Inspecting uploaded data for columns and constraints is fine; emit a +completed plan only after smoke succeeds. -### REST path (`cuopt_sh`, `cuopt_sh_client`) +## Quick reference -REST runs at `host.openshell.internal:5000`. Pass `ip` and `port` (string) -explicitly when constructing the client; the constructor's defaults assume -`localhost`, which is blocked from the sandbox. +**Imports (LP/MILP/QP):** ```python -from cuopt_sh_client import CuOptServiceSelfHostClient -client = CuOptServiceSelfHostClient(ip="host.openshell.internal", port="5000") -``` - -Or with `cuopt_sh`: - -```bash -cuopt_sh -t LP /path/to/problem.mps -i host.openshell.internal -p 5000 -``` - -For request shape, polling, and routing examples → -`cuopt-server-api-python`, `cuopt-server-common`, and `cuopt-routing-api-python` -in `/sandbox/.openclaw/skills/`. - -### Vehicle routing (VRP, TSP, PDP) — REST only in this sandbox - -Routing **must** go through the REST path. The `cuopt.routing` Python -module initializes CUDA/RMM at import time and there is no GPU in this -sandbox, so `from cuopt import routing` fails. This is by design — see -"Remote-first" above. - -Concrete steps: - -1. Open `cuopt-server-api-python` and read its "VRP payload cookbook" - table. -2. Pick the cookbook entry whose feature set is closest to the user's - data — e.g. `vrp_time_windows/` if the user gave time windows, - `vrp_capacities/` for demand+capacity, `vrp_pickup_delivery/` for - paired pickups/deliveries. Each entry is at - `/sandbox/.openclaw/skills/cuopt-server-api-python/assets//` - and contains a runnable `payload.json`, `README.md`, and `run.sh`. -3. Adapt the `payload.json` to the user's data, keeping the field - shapes intact. -4. Submit with `cuopt_sh` (CLI, easiest) or `cuopt_sh_client` (Python). - Both honor `CUOPT_SERVER` (already set by `.bashrc` to - `host.openshell.internal:5000`). -5. On a 422, follow the cookbook's "On a 422" recipe — read the `loc` - path from the response and fix that field. Do not retry blindly; - do not bail to brute force. - -#### Default routing-data assumptions (do not ask) - -When user-supplied routing data is incomplete in any of the ways below, -apply these defaults silently rather than asking. Symmetric costs and -zero diagonals are the conventional defaults for VRP/TSP/PDP; asking the -user to re-state them every time is friction without value. - -- **Cost and time matrices are symmetric by default.** If the user - provides a cost or time for one direction of a location pair (A→B) - but not the reverse (B→A), assume the reverse equals the forward - value. Mirror sparse one-direction entries into a full square matrix - before submitting the payload. -- **Diagonal entries are zero.** Cost and time from a location to itself - is 0. Do not ask whether to include the diagonal or what its value - should be. -- **Explicit asymmetric values always win.** If the user provides both - A→B = 10 and B→A = 12, use both as-is. Symmetry is only the default - for *missing* entries; it is never an override for entries the user - actually gave. - -Only ask for clarification when the gap is genuinely ambiguous in a way -these defaults can't cover, e.g.: - -- No cost or time data of any kind was provided — need a source - (user-supplied matrix? straight-line distance from coordinates? - haversine on lat/lon? external distance API?). -- Multi-modal cost (e.g. distance vs travel time vs toll) where the - formulation needs one but the user supplied another. -- Costs/times for some pairs only, with neither direction provided for - others — explicitly confirm whether the missing pairs are unreachable - or simply unmeasured. - -The `cuopt-routing-api-python` skill describes the GPU-backed Python API -and is **not** the right reference inside this sandbox — use the REST path -instead. - -## Script execution hygiene - -For any solver script longer than a one-liner, write it to a file first -and run that file. Inline heredocs and `python3 -c "..."` strings interact -badly with the `tool_call → exec → shell → Python` quoting chain — quotes -collapse across layer boundaries, and each broken inline script costs a -full sandbox round-trip before the failure is even visible. - -Recommended pattern: - -```bash -cat > /sandbox/solve.py <<'PY' -# … solver code … -PY -bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && python3 /sandbox/solve.py' +from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings ``` -Use `bash -lc` (not bare `sh`) for any command that calls `source`; the -default shell behind `tool_call exec` can be `dash`, which doesn't have -`source`. The same applies to anything that relies on bash-only syntax -(arrays, `[[ ... ]]`, `<<<`, etc.). - -Failure symptoms that mean script construction is broken — **not** cuOpt. -If you see any of these, stop debugging the solver and switch to the -file pattern above: - -- `source: not found` → wrap with `bash -lc '...'`. -- `SyntaxError` on a Python line containing an unquoted URL, path, or - shell metacharacter → quoting collapsed somewhere across the layers. -- `NameError` on a token that should obviously be a string literal - (e.g. `Path(/sandbox)` missing the quotes around `/sandbox`) → same - root cause; the outer layer ate your Python quotes. - -If you see `STATUS None` / `OBJECTIVE None` from a solve that otherwise -ran to completion, that's a **different** failure mode — a response-shape -mismatch in your parser. Open the matching cookbook entry under -`/sandbox/.openclaw/skills/cuopt-server-api-python/assets/` and copy its -extraction code rather than extrapolating from a different problem class: - -| Problem class | Cookbook entry | Response shape | -|---|---|---| -| LP | `lp_basic/client.py` | `result['response'].get('primal_solution')` — direct | -| MILP | `milp_basic/client.py` | `result['response'].get('primal_solution')` — direct | -| Routing (VRP/TSP/PDP) | `vrp_*/client.py` | `result['response']['solver_response']['status']` — nested under `solver_response` | +**Interfaces:** LP/MILP/QP → gRPC `:5001` + `CUOPT_REMOTE_*`; routing → REST +`:5000`. See `references/interfaces.md`, `references/routing-rest-only.md`. -The LP/MILP and routing shapes are different. Do not assume one based on -having read the other. +## Reference index -## Quick connectivity smoke test (LP) +| Topic | File | +|---|---| +| Activation / skill order | `references/activation.md` | +| Intent / paraphrases | `references/intent-and-triggers.md` | +| Gates / common mistakes | `references/gates-and-first-actions.md` | +| Env vars + smoke | `references/remote-env-and-smoke.md` | +| Python imports | `references/python-imports.md` | +| gRPC vs REST | `references/interfaces.md` | +| Routing REST | `references/routing-rest-only.md` | +| Paths + probe | `references/environment-and-networking.md` | +| Long-running jobs | `references/long-running-jobs.md` | +| Troubleshooting | `references/troubleshooting.md` | -After the connectivity probes pass, run this minimal LP to verify the full -remote-solve path works end to end. Expected: `Optimal`, objective `10`, -`x = 2`, `y = 2`, with `Using remote GPU backend` in the solver log. - -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -p = Problem("smoke") -x = p.addVariable(lb=0, vtype=CONTINUOUS, name="x") -y = p.addVariable(lb=0, vtype=CONTINUOUS, name="y") -p.addConstraint(x + y <= 4) -p.addConstraint(x <= 2) -p.addConstraint(y <= 3) -p.setObjective(3*x + 2*y, sense=MAXIMIZE) -p.solve(SolverSettings()) -print(p.Status.name, p.ObjValue, x.getValue(), y.getValue()) -``` +## Orchestration skills (local) -If this fails, do not move on to a real problem — fix connectivity first -(see Troubleshooting below). +After gates: `optimization-from-data-orchestrator` → `optimization-intent-router` +→ `tabular-optimization-ingestion` → `cuopt-model-mapper` (and +`optimization-mode-router` when replay/audit signals appear). -## Troubleshooting +## Vendored upstream skills -| Symptom | Cause | Fix | -|---------|-------|-----| -| `cudaErrorInsufficientDriver` or CUDA errors | Accidentally invoked local solve instead of remote service | Set `CUOPT_REMOTE_HOST=host.openshell.internal` and `CUOPT_REMOTE_PORT=5001` before solving | -| `from cuopt import routing` fails with CUDA / RMM init error | There is no GPU in this sandbox; routing has no remote-aware Python wrapper | Use REST instead: see "Vehicle routing (VRP, TSP, PDP) — REST only in this sandbox" above and `cuopt-server-api-python`'s `assets/vrp_*/` cookbook. Do **not** fall back to brute force or non-cuOpt methods | -| `403 Forbidden` | Wrong address or sandbox policy missing port | Use `host.openshell.internal`, not `localhost`. If address is correct, ask operator to run `nemoclaw_cuopt_setup.sh apply-policy` | -| `Connection refused` on `:5000` | REST service not running or host firewall blocking the port | Check if REST is needed; gRPC alone (5001) is sufficient for LP/MILP. If REST is needed, ask operator to start it | -| `available: none` from `probe_cuopt.py` | No cuOpt service running on host, ports not in sandbox policy, or host firewall | Ask operator to start a cuOpt server (`SETUP.md` > Starting the cuOpt server) and re-run `nemoclaw_cuopt_setup.sh apply-policy`; verify host firewall opens 5000 / 5001 | -| Connection timeout / hang | Server not running or host firewall blocking Docker | Ask operator to verify from host: `ss -tlnp \| grep 500` | -| Timeout through `10.200.0.1:3128` | Sandbox proxy cannot reach the destination | Ask operator to verify sandbox network policy includes the cuOpt ports | -| `ModuleNotFoundError` | Venv not activated — common in non-login shells (`bash -c '…'`) because `.bash_profile` only fires for login shells | Wrap the call in `bash -lc '…'` (preferred) or `source /sandbox/.openclaw-data/cuopt/bin/activate` before the python invocation | -| No `Using remote GPU backend` in output | Remote env vars not set or not picked up | Ensure `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` are exported before the Python process starts | +Installed under `/sandbox/.openclaw/skills/` by `install-skill`: +`numerical-optimization-formulation`, `cuopt-numerical-optimization-api-python`, +`routing-formulation`, `cuopt-routing-api-python`, `cuopt-server-api-python`, +`cuopt-user-rules`, etc. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md new file mode 100644 index 0000000..593d4af --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/activation.md @@ -0,0 +1,33 @@ +# Skill activation and routing (NemoClaw sandbox) + +OpenClaw matches skills from **`name`** and frontmatter **`description`** +in ``. Behavioral rules live here and in sibling skills +— not stuffed into `description`. + +## Skill order for CSV upload + plan request + +When the user uploads tabular files and asks for a schedule, assignment, +roster, allocation, or route (any wording): + +1. **`optimization-from-data-orchestrator`** — workflow sequence +2. **`cuopt-sandbox`** — probe/smoke gates, remote env +3. Downstream: intent-router → ingestion → model-mapper → vendored API skills + +Also loaded every session: bundled **`cuopt-setup`** guardrail (absolute paths). + +## cuOpt before custom Python + +The first code path that **emits** a schedule, roster, or assignment must +be cuOpt after probe/smoke — not a greedy, backtracking, or hand-rolled +scheduler. + +Only bypass cuOpt when: + +1. User explicitly wants a manual/heuristic algorithm **instead of** cuOpt +2. Analytics only (summarize/chart — no new plan) +3. Probe shows host cuOpt unreachable — report; no greedy substitute + +## Intent (not exact phrases) + +See `references/intent-and-triggers.md` — constructive task + constraint +data → cuOpt; feasible/minimize/optimal share one solver path. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md new file mode 100644 index 0000000..7d7881e --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/environment-and-networking.md @@ -0,0 +1,54 @@ +# Environment and networking + +## Sandbox layout + +| Path | Purpose | +|---|---| +| `/sandbox/` | Workspace root — scripts, some uploads | +| `/sandbox/.openclaw/workspace/` | **Common** chat/workspace file uploads | +| `/sandbox/workspace/` | Alternative upload target (openshell) | +| `/sandbox/probe_cuopt.py` | Connectivity probe (no env side effects) | +| `/sandbox/smoke_*.py` | Gate 3 smoke tests | +| `/sandbox/.openclaw-data/cuopt/bin/activate` | cuOpt Python venv | +| `/sandbox/.openclaw/skills/` | Installed skills (upstream + local) | + +## Host endpoints + +| Service | Host:port | Notes | +|---|---|---| +| gRPC (LP/MILP/QP) | `host.openshell.internal:5001` | Requires `CUOPT_REMOTE_*` | +| REST (VRP) | `host.openshell.internal:5000` | No remote env vars | + +From inside the sandbox container, `localhost` points at the sandbox — +not the host cuOpt services. + +## Capability check (probe) + +```bash +bash -lc 'python3 /sandbox/probe_cuopt.py' +``` + +Read `available:` — typical values: `grpc`, `rest`, `rest grpc`, or empty +if host services are down. + +| `available:` | Implication | +|---|---| +| `grpc` | LP/MILP/QP path viable after env + smoke | +| `rest` | VRP REST viable | +| `rest grpc` | Both paths | +| (empty / errors) | Report to user; do not invent heuristics as substitute | + +Probe success ≠ ready to solve — still run env + smoke for gRPC. + +## Remote-first workflow + +1. Probe → note `available:` +2. For gRPC: export vars → smoke_lp (→ smoke_milp if scheduling) +3. For routing only: smoke_vrp if `rest` present +4. Read formulation + API skills → build model +5. Solve once; poll until terminal — `references/long-running-jobs.md` + +## Path quirks + +Tilde paths (`~/file.csv`) may fail in some tool contexts — prefer +`/sandbox/...` absolute paths. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md new file mode 100644 index 0000000..fb91552 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/gates-and-first-actions.md @@ -0,0 +1,77 @@ +# Gates and first actions (NemoClaw sandbox) + +**This sandbox exists to solve with cuOpt.** For schedule, assign, route, +minimize, or allocate-under-constraints tasks, cuOpt is turn one — not +turn two after the user corrects you. + +## Before smoke passes (Gate 3) + +| Ready now | Wait until cuOpt smoke succeeds | +|---|---| +| `probe_cuopt.py` | Timed assignment output (schedule, roster, shift plan) | +| Set `CUOPT_REMOTE_*`, run smoke scripts | Greedy / heuristic / backtracking code | +| Inspect uploaded CSVs for **columns and constraints** | "Draft plan while cuOpt sets up" | +| Ask **one** blocking clarification | `ortools`, `pulp`, hand-rolled search | +| Write `/sandbox/solve.py` for cuOpt | Feasibility verdict without cuOpt status | + +Inspecting uploaded data for structure is fine; emit a completed assignment +plan only after cuOpt returns a solver status. + +## Mandatory order (every optimization task) + +1. **Probe** — `bash -lc 'python3 /sandbox/probe_cuopt.py'` +2. **Remote env** — export `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT` in + the same shell as Python. See `references/remote-env-and-smoke.md`. +3. **Smoke** — run `/sandbox/smoke_lp.py` (+ `smoke_milp.py` for discrete + scheduling MILP). Confirm `Using remote GPU backend`. +4. **Formulation skills** — read vendored `*-formulation` + `cuopt-*-api-*` +5. **Build and solve** — real model via cuOpt; report `Problem.Status.name` + +Start cuOpt gates on the first optimization turn — the user does not need +to say "use cuOpt" first. + +## Scheduling and assignment over time + +Uploaded tables + language like **"build me a schedule"**, "assign shifts", +"fill time slots", or "minimize conflicts/cost under capacity" → **MILP +via cuOpt gRPC**, not a custom Python scheduler. + +The user **does not** need to say minimize, optimal, or best. Feasibility +under capacity, unavailability, and no-double-booking rules is expressed +as **hard constraints** in a MILP; cuOpt satisfies them (and can optimize +a secondary objective when you define one). + +Examples: shift/roster planning, timetabling, resource–slot assignment, +league or event scheduling (e.g. games to courts and slots). + +After gates: `numerical-optimization-formulation` + +`cuopt-numerical-optimization-api-python` → cuOpt `Problem` with INTEGER +vars → report status, objective, and assignments. + +## "Feasibility only" / "fastest path" (wrong in this sandbox) + +Agents sometimes skip cuOpt with reasoning like *"no explicit objective"* +or *"greedy is enough for a valid schedule."* That violates this skill. + +| Wrong rationalization | Correct action | +|---|---| +| "User didn't ask to minimize" | Constructive + constraint data → cuOpt anyway | +| "cuOpt is for optimization, not feasibility" | Feasibility = constraints; cuOpt is the solver here | +| "Greedy is faster for a first result" | Run probe → smoke → cuOpt; no greedy first deliverable | +| "I'll optimize later if they want" | First schedule output must come from cuOpt after gates | +| "Data looks simple" | Simple data still gets cuOpt MILP in this sandbox | + +If the secondary objective is unclear, ask one focused question or state +a default (e.g. minimize penalty slacks, balance slot usage) — then solve +with cuOpt. + +## Common mistakes (and the fix) + +| Mistake | Fix | +|---|---| +| Heuristic assignment plan first, cuOpt after user correction | Run gates 1–3 before any assigner code | +| `from cuopt import milp` then pivot to heuristics | Use `references/python-imports.md`; import errors mean fix the path | +| Greedy solver to "explore structure" | Use formulation skills; cuOpt is the feasibility engine | +| "No minimize in prompt → feasibility greedy OK" | Constructive + CSVs → cuOpt; see intent-and-triggers.md | +| "Valid schedule first, cuOpt later" | First assignment output must be cuOpt after gates | +| Orchestration steps treated as permission to skip gates | Ingestion is interpretation only — gates still apply | diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md new file mode 100644 index 0000000..c54a2ce --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/intent-and-triggers.md @@ -0,0 +1,71 @@ +# When to use cuOpt (intent, not exact wording) + +Skills match **meaning**, not exact phrases. Do not require the user to +say "minimize", "optimal", or "build a schedule." + +## Sandbox default + +In this environment, **cuOpt is the solver for constructive planning +under constraints** — producing an assignment, schedule, roster, route, +or allocation that satisfies rules from uploaded data. + +Use cuOpt when **both** are true: + +1. **Constructive task** — the user wants you to **produce** a plan + (assign, schedule, route, allocate, slot, place, match, fill a + calendar, line up games/shifts/jobs, etc.) +2. **Constraint-bearing data** — CSVs or tables with capacities, slots, + unavailability, demands, limits, pairing rules, or similar + +If (1) and (2) hold → read **`optimization-from-data-orchestrator`** +and **`cuopt-sandbox`**, run gates, then formulate and solve. Wording +varies; the pattern does not. + +## Language clusters (examples only — not exhaustive) + +Any paraphrase in these families counts: + +| Intent family | Example phrasings (same intent) | +|---|---| +| Schedule / timetable | "build a season schedule", "plan the season", "set up game times", "put these on the calendar", "when should each game happen" | +| Assign / allocate | "assign games to slots", "allocate shifts", "place jobs on machines", "who works when" | +| Route / visit | "plan deliveries", "best routes for trucks", "visit all stops" | +| Optimize explicitly | "minimize cost", "maximize profit", "best plan", "optimal mix" | +| Feasible / valid plan | "a valid schedule", "feasible assignment", "make it work under these rules", "respect all constraints" | + +**Paraphrase rule:** If a reasonable planner would read the request as +"turn this data into a constraint-respecting plan," treat it as cuOpt — +even without optimize/minimize/best. + +## Feasibility, minimize, and optimal (same solver here) + +In this sandbox, these are **not different tiers**: + +| User framing | Meaning | Action | +|---|---|---| +| Feasible / valid / make it work | Hard constraints must hold | MILP/LP/QP/routing with constraints; cuOpt finds a satisfying solution | +| Minimize / maximize / best / optimal | Hard constraints + objective | Same path; add or emphasize objective | +| No objective stated | Constraints only (+ optional default objective) | Model constraints; ask **one** objective question or state a default; still cuOpt | + +**Wrong split:** "feasibility → greedy Python, optimization → cuOpt." +Feasibility under discrete rules **is** a MILP (or routing) problem; +cuOpt handles it. + +## When cuOpt does **not** apply + +Skip cuOpt (read/summarize/analyze only) when the user wants: + +- column summaries, counts, charts, filters +- "what does this data contain?" +- explanation of an existing plan they already have +- forecasting or analytics without choosing a new plan + +**Clarifier when unsure:** "Do you want a summary of the data, or a new +plan that satisfies these constraints?" — one question, not a questionnaire. + +## Infrastructure triggers (always this skill) + +Regardless of task wording, also read `cuopt-sandbox` when you see: + +- `ImportError` / wrong cuOpt import path +- `cudaErrorInsufficientDriver` during solve diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md new file mode 100644 index 0000000..abbf8c5 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/interfaces.md @@ -0,0 +1,38 @@ +# gRPC and REST invocation + +## gRPC (LP / MILP / QP) + +Python API with remote backend: + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/solve.py' +``` + +Skill: `cuopt-numerical-optimization-api-python` (vendored upstream). + +MPS files: `cuopt-numerical-optimization-api-cli` or host CLI if exposed. + +## REST (VRP) + +Port 5000, JSON payloads. Skill: `cuopt-server-api-python`. + +Smoke reference: `/sandbox/smoke_vrp.py`. + +## Choosing an interface + +| Problem | Interface | Skill chain | +|---|---|---| +| LP, MILP, QP | gRPC + Python | `numerical-optimization-formulation` → `cuopt-numerical-optimization-api-python` | +| VRP, TSP, PDP | REST | `routing-formulation` → `cuopt-routing-api-python` → `cuopt-server-api-python` | + +Default for MILP scheduling in this sandbox: **gRPC Python** on port 5001. +Use REST only when the user explicitly wants the server JSON workflow. + +## Evidence to report + +- Probe `available:` line +- Smoke: `Using remote GPU backend` + status +- Solve: `Problem.Status.name`, objective, key assignment vars diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md new file mode 100644 index 0000000..06ca2e4 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/long-running-jobs.md @@ -0,0 +1,47 @@ +# Long-running cuOpt jobs + +**One cuOpt job at a time.** Wait for a terminal status before starting +the next solve. + +## Why + +Each solve holds a GPU slot on the host. Overlapping jobs queue or fail +with confusing errors. The user asked for one optimization — finish it. + +## Rules + +1. **Single in-flight job** — wait for terminal status before another + `Problem.solve()` or REST submit. +2. **Set `time_limit`** — default can be long; cap for interactive work + (e.g. 600–1800 s for MILP). +3. **Poll sequentially** — if async, poll status until terminal; one + solve at a time is enough for comparison. +4. **Report status** — always paste `Problem.Status.name` (or REST + equivalent) and objective when available. + +## Python (gRPC) + +```python +from cuopt.linear_programming.solver_settings import SolverSettings + +settings = SolverSettings() +settings.set_parameter("time_limit", 600) +p.solve(settings) +print(p.Status.name, p.ObjValue) +``` + +If status is non-terminal after `time_limit`, report what you have and +suggest tightening the model or raising the limit — stay on cuOpt. + +## REST / VRP + +Submit one job; poll until completed, failed, or timeout. Wait for that +job to finish before submitting another. + +## When the user wants faster iteration + +- Reduce problem size for a smoke iteration, or tighten `time_limit`, rather + than starting a parallel second job. +- Report best-so-far if the API exposes it. +- Keep using cuOpt for the real solve — slowness is a tuning problem, + separate from infrastructure gate failures. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md new file mode 100644 index 0000000..aa9d4b9 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/python-imports.md @@ -0,0 +1,48 @@ +# Python imports (sandbox) + +**Use the canonical import path.** LP, MILP, and QP share one entrypoint +(there is no separate `cuopt.milp` package): + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings +``` + +| Task | Variable type | +|---|---| +| LP | `vtype=CONTINUOUS` | +| MILP (schedule, assign) | `vtype=INTEGER` (binary: `lb=0, ub=1`) | +| QP | same `Problem` + quadratic objective | + +Full examples: vendored skill `cuopt-numerical-optimization-api-python`. + +## Verify before declaring import broken + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 -c "from cuopt.linear_programming.problem import Problem, INTEGER; print(\"api_ok\")"' +``` + +If this prints `api_ok`, the SDK works — adjust your import path and +continue with cuOpt. + +## Scheduling skeleton + +```python +from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +p = Problem("assign") +assign = {} +for e in entities: + for s in slots: + assign[e, s] = p.addVariable(vtype=INTEGER, lb=0, ub=1, name=f"x_{e}_{s}") +# ... constraints, objective ... +settings = SolverSettings() +settings.set_parameter("time_limit", 600) +p.solve(settings) +print(p.Status.name, p.ObjValue) +``` + +Remote env vars must be set in the same shell — see +`references/remote-env-and-smoke.md`. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md new file mode 100644 index 0000000..49178de --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/remote-env-and-smoke.md @@ -0,0 +1,71 @@ +# Remote env vars and smoke tests + +There is **no GPU in this sandbox.** `Problem.solve()` defaults to local +CUDA, which fails with `cudaErrorInsufficientDriver` unless remote env vars +are set **before Python starts**. + +The probe (`probe_cuopt.py`) does **not** set env vars — it only checks +reachability. + +## Mandatory exports (gRPC LP / MILP / QP) + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +``` + +| Variable | Use this value | Wrong for gRPC | +|---|---|---| +| `CUOPT_REMOTE_HOST` | `host.openshell.internal` | `localhost`, `127.0.0.1` | +| `CUOPT_REMOTE_PORT` | `5001` | `5000` (REST) | + +Inline in every solve command (exports do not carry across separate +`tool_call exec` invocations): + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + export CUOPT_REMOTE_HOST=host.openshell.internal && \ + export CUOPT_REMOTE_PORT=5001 && \ + python3 /sandbox/smoke_lp.py' +``` + +Use `bash -lc` so the venv activates. + +## Pre-installed smoke scripts (run as-is for gate checks) + +| Script | When | +|---|---| +| `/sandbox/smoke_lp.py` | Gate 3 — all gRPC LP/MILP/QP | +| `/sandbox/smoke_milp.py` | Extra check for scheduling / INTEGER | +| `/sandbox/smoke_vrp.py` | Routing only — REST, no `CUOPT_REMOTE_*` | + +Expected LP/MILP: log line `Using remote GPU backend`, then +`status=Optimal …`. + +## Gate checklist + +| Step | Evidence | +|---|---| +| Probe returned `grpc` or `rest grpc` | `available:` from probe | +| Solver script in a **file** | `/sandbox/solve.py` or smoke script | +| `bash -lc` + venv + `export CUOPT_REMOTE_*` | in same command as `python3` | +| Log contains `Using remote GPU backend` | paste the line | +| Smoke returned terminal status | e.g. `status=Optimal` | + +## Error → action + +| Symptom | Meaning | Fix | +|---|---|---| +| `cudaErrorInsufficientDriver` without `Using remote GPU backend` | Local solve — env vars missing | Set `CUOPT_REMOTE_*`, `bash -lc`, retry | +| No remote backend log, no CUDA error | Env vars not in same shell | Inline exports in `bash -lc` | +| `Using remote GPU backend` + `Optimal` | Remote path works | Build real model | +| `Using remote GPU backend` + `cudaErrorNoDevice` | Host GPU broken | Operator action | +| Probe `available: grpc` only | Port reachable | Still need env + smoke | + +## Common mistakes (and the fix) + +| Mistake | Fix | +|---|---| +| Treat `cudaErrorInsufficientDriver` (no remote log) as "cuOpt blocked" | Set `CUOPT_REMOTE_*` in the same `bash -lc` command and retry smoke | +| Plan to "set env vars later" | Export before the first smoke test | +| Use REST env vars for LP/MILP | gRPC uses `CUOPT_REMOTE_*` on port 5001; routing uses REST on 5000 — see `references/routing-rest-only.md` | diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md new file mode 100644 index 0000000..b8d3d22 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/routing-rest-only.md @@ -0,0 +1,37 @@ +# Routing (REST only in sandbox) + +Vehicle routing (VRP, TSP, PDP) uses the **REST API** on port **5000**, +not gRPC `CUOPT_REMOTE_*`. + +| Interface | Port | Env vars | +|---|---|---| +| LP / MILP / QP (Python) | 5001 gRPC | `CUOPT_REMOTE_HOST`, `CUOPT_REMOTE_PORT` | +| VRP / routing | 5000 REST | none — use REST client | + +Host: `host.openshell.internal` (not `localhost`). + +## Probe + +If `probe_cuopt.py` shows `rest` in `available:`, REST is reachable. +If only `grpc`, skip VRP smoke — LP/MILP may still work. + +## Smoke + +```bash +bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && \ + python3 /sandbox/smoke_vrp.py' +``` + +Uses pre-installed `cuopt_sh_client` patterns; run as-is for gate checks. + +## Skills after gates + +1. `routing-formulation` +2. `cuopt-routing-api-python` +3. `cuopt-server-api-python` (REST payload shape) + +## Defaults + +- Minimal fleet + cost matrix for first solve; expand after status is + terminal. +- One REST job at a time — see `references/long-running-jobs.md`. diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md new file mode 100644 index 0000000..478b383 --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt-sandbox/references/troubleshooting.md @@ -0,0 +1,37 @@ +# Troubleshooting + +## Symptom → fix + +| Symptom | Likely cause | Action | +|---|---|---| +| `cudaErrorInsufficientDriver`, no remote log | Missing `CUOPT_REMOTE_*` | `references/remote-env-and-smoke.md` | +| `ModuleNotFoundError: cuopt.milp` | Wrong import | `references/python-imports.md` | +| Probe empty `available:` | Host cuOpt down | Report to user; retry cuOpt when service is up | +| VRP fails, LP works | Used gRPC for routing | `references/routing-rest-only.md` | +| Second solve hangs / errors | Overlapping jobs | `references/long-running-jobs.md` | +| `~` path not found | Sandbox path resolution | Use `/sandbox/...` | + +## Script hygiene + +- Put solve logic in `/sandbox/solve.py` (or named script), not inline + one-liners for real models. +- Use `bash -lc` with venv + exports in one command. +- For gate checks, run pre-installed `/sandbox/smoke_*.py` unchanged. + +## When cuOpt returns infeasible / timeout + +Report the solver status honestly. You may suggest model relaxations or +clarifying questions — keep the answer grounded in cuOpt status. + +## Operator vs agent + +| Agent fixes | Operator / host | +|---|---| +| Env vars, imports, script layout | GPU driver on host | +| Wrong port (5000 vs 5001) | cuOpt services not running | +| Formulation errors | Network to `host.openshell.internal` | + +## Guardrail skill + +Bundled `cuopt-setup` in `nemoclaw_cuopt_setup.sh` points here for +session start — read this skill (`cuopt-sandbox`) for full gate order. diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md index e57e45b..7c24692 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-from-data-orchestrator/SKILL.md @@ -1,40 +1,53 @@ --- name: optimization-from-data-orchestrator -summary: Coordinate the fast-path workflow for turning uploaded data and a natural-language question into the right optimization interpretation, clarification, cuOpt solve, and user-facing answer. -description: Use when a user uploads or provides data and asks a question that may be answered by optimization. This skill sequences optimization-intent-router, optimization-mode-router, tabular-optimization-ingestion, formulation skills, and cuOpt model-building skills. +version: "26.06.01" +description: Coordinate uploaded data plus a natural-language question into interpretation, clarification, cuOpt solve, and a user-facing answer. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- # Optimization From Data Orchestrator -Top-level coordinator for the fast path when a user provides data and asks a question that may be optimization. Sequences the supporting skills so the agent does not jump straight from uploaded data into a solver call. +Top-level coordinator when a user provides tabular data and wants a +constructive plan (schedule, assign, allocate, route — any wording). + +**NemoClaw:** read `cuopt-sandbox/references/activation.md` for skill +order and cuOpt-before-heuristic rules. ## When to use -All three must hold: -- the user has provided or is expected to provide data -- the question may be asking for the best / optimal / minimum / maximum decision under constraints -- the request is not yet so fully specified that you can call the solver directly +**Both** must hold: + +- tabular data provided or expected (CSV, etc.) +- user wants a **plan from that data** (any phrasing; minimize/optimal not required) -Skip this skill when the user is clearly asking for non-optimization analytics, the optimization problem is already fully specified mathematically, or the user has already chosen a dedicated replayable/auditable path. +Skip for **analytics-only** requests (summarize, chart, filter), fully +pre-specified math outside this flow, or explicit replayable/auditable path. ## Sequence -Run these in order, but skip any step already settled from context. Default to fast mode; surface replayable/auditable mode only on a real signal (reruns, audit, export, recurring planning). +**Step 0 (NemoClaw — do not skip):** See `cuopt-sandbox` — probe → env → +smoke. No schedule/heuristic output before smoke passes. + +1. **`optimization-intent-router`** — optimization family (LP/MILP/QP/routing) +2. **`optimization-mode-router`** — only if replay/audit/export signals +3. **`tabular-optimization-ingestion`** — table roles (interpretation only) +4. **`cuopt-model-mapper`** — clarify if needed, map to cuOpt, solve -1. **`optimization-intent-router`** — decide whether this is optimization at all and which family (LP / MILP / QP / routing). If non-optimization, stop the optimization flow. -2. **`optimization-mode-router`** — *only if* there is a signal that replayability, audit, export, or recurring runs may matter. Otherwise stay in fast mode silently. -3. **`tabular-optimization-ingestion`** — identify row grain and table roles, infer likely objective and constraint fields, refine the family classification if the data clearly supports a different one, and surface any blockers. -4. **`cuopt-model-mapper`** — ask at most the final blocking clarification, then map directly into cuOpt and solve. +Handoffs after step 4: -Family-specific handoffs after step 4: -- LP / MILP / QP → `numerical-optimization-formulation` then `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) -- Routing → `routing-formulation` then `cuopt-routing-api-python` +- LP / MILP / QP → `numerical-optimization-formulation` → `cuopt-numerical-optimization-api-python` +- Routing → `routing-formulation` → `cuopt-routing-api-python` ## Guardrails -- Do not skip intent classification and jump directly to cuOpt from raw data. -- Do not ask a long questionnaire before inspecting the uploaded data. -- Do not trigger replayable/auditable mode by default — only when the user signals reuse, audit, export, or recurring runs. -- Do not let ingestion become solver construction; the steps stay distinct. -- Do not use cuOpt for descriptive analytics tasks. +- First solver that emits assignments/schedules must be **cuOpt** after step 0 +- Ingestion steps do not authorize heuristic or greedy stand-ins +- Do not skip intent classification; do not use cuOpt for pure analytics +- One focused clarification beats a long questionnaire diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md index 101564a..f137e58 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-intent-router/SKILL.md @@ -1,7 +1,14 @@ --- name: optimization-intent-router -summary: Detect when a user question should be treated as an optimization problem and route it toward LP, MILP, QP, routing, or non-optimization handling. -description: Use when a user provides data and asks a natural-language business or planning question that may require optimization rather than simple analytics. +version: "26.06.01" +description: Classify whether a data-backed request is LP, MILP, QP, routing, or non-optimization analytics. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -35,7 +42,29 @@ Your job is to recognize when the request is really asking for an optimization m ## Core rule -Route to optimization only when the user is asking for the **best / optimal / minimum / maximum / least-cost / highest-profit** plan, allocation, assignment, schedule, route, or mix **subject to constraints or tradeoffs**. +Route to optimization when the user wants a **plan produced under +constraints** — assignment, schedule, allocation, route, mix, or mix of +resources — whether they say **feasible/valid**, **minimize/maximize**, or +**best/optimal**. + +Use **structure**, not exact keywords: + +| Signal | Route to cuOpt? | +|---|---| +| Constructive + constraint tables (uploaded or referenced) | Yes — family TBD (MILP/LP/QP/routing) | +| Minimize / maximize / best / optimal / least cost | Yes | +| Feasible / valid / make it work / respect rules (constructive) | Yes — feasibility = hard constraints | +| Summarize / chart / filter / explain data only | No | + +**NemoClaw sandbox:** When constructive + constraint data → +**`cuopt-sandbox` gates immediately** before any schedule, roster, +assignment, or route output — regardless of whether the user said +minimize or optimal. Do not use a greedy Python builder as a first +deliverable. + +Paraphrase examples (non-exhaustive): "plan the season", "set game times", +"fill the calendar", "assign shifts", "put jobs on machines", "build a +schedule" — same intent. See `cuopt-sandbox/references/intent-and-triggers.md`. If the user is only asking for: - descriptive statistics @@ -52,6 +81,8 @@ then do **not** force the request into optimization. Strong signals: - words like **optimize**, **optimal**, **best**, **maximize**, **minimize** +- **build / create / assign** a schedule, roster, shift plan, or calendar + from structured data (sandbox: treat as MILP even without minimize) - resource tradeoffs: capacity, budget, time, labor, inventory, demand, hours, materials - decision language: how much, how many, which, assign, allocate, route, schedule, choose - explicit constraints: at most, at least, must, cannot exceed, within budget, limited by @@ -63,7 +94,7 @@ Weaker but meaningful signals: - "How should we allocate this?" - "How can we reduce cost while meeting demand?" -When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization. +When weaker signals appear, inspect whether there are real constraints and decisions. If yes, treat it as optimization — and in the NemoClaw sandbox, **`cuopt-sandbox` gates apply immediately** (probe before any schedule/heuristic output). ## Route classification @@ -98,6 +129,8 @@ Common examples: - workforce scheduling with headcounts - assignment with binary decisions - product counts that must be whole +- **slot/resource scheduling** (games, shifts, appointments → time slots + and resources) — including when the user only says "build a schedule" ### Route to QP Use QP when: diff --git a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md index 648457e..df33c89 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/optimization-mode-router/SKILL.md @@ -1,7 +1,15 @@ --- name: optimization-mode-router -summary: Decide whether to default to fast direct-to-cuOpt mode or ask whether the user wants replayable/auditable mode for reruns, review, export, or audit. -description: Use when a user asks a question that may be answered by solving an optimization problem from uploaded or provided data, and you need to decide whether to proceed directly to cuOpt or preserve a structured reusable model artifact. +version: "26.06.01" +description: Choose fast direct-to-cuOpt solve versus replayable or auditable model artifact mode. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration +origin: skill-evolution --- # Optimization Mode Router @@ -32,8 +40,14 @@ Read this skill when all of the following are true: ## Default behavior - Default to **Fast mode**. +- Default to **direct cuOpt solve** for one-off requests from uploaded CSVs + (schedule, assignment, allocation, routing) — proceed to + `cuopt-model-mapper` without asking fast vs replayable unless the user + signals audit/export/rerun. - Do **not** ask about replayability/auditability unless there is a real signal that it matters. - Avoid turning a straightforward optimization request into a heavy upfront questionnaire. +- **NemoClaw sandbox:** Fast mode means cuOpt after `cuopt-sandbox` gates — + never a custom greedy/heuristic builder as the solve path. ## Two modes @@ -120,17 +134,20 @@ After selecting a mode, hand off based on problem type: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` (or `cuopt-numerical-optimization-api-cli` for MPS inputs) - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve - If the request is QP: - use `numerical-optimization-formulation` - then use `cuopt-numerical-optimization-api-python` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve - If the request is routing (VRP / TSP / PDP): - use `routing-formulation` - then use `cuopt-routing-api-python` - - in sandbox contexts, follow `cuopt-sandbox` first when required + - in sandbox contexts, follow `cuopt-sandbox` (gates + remote env) + before any gRPC Python solve - If the user is asking about server usage or deployment rather than solving a model directly: - use `cuopt-server-common` or `cuopt-server-api-python` as appropriate diff --git a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md index c1e14c3..b285bec 100644 --- a/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md +++ b/cuopt_on_nemoclaw/openclaw-skills/tabular-optimization-ingestion/SKILL.md @@ -1,7 +1,14 @@ --- name: tabular-optimization-ingestion -summary: Inspect uploaded or provided tabular data, infer likely optimization structure, and identify the smallest set of clarifications needed before building a cuOpt model. -description: Use when a user provides CSV, Excel, JSON-like tables, or similar structured data and asks a question that may become an LP, MILP, QP, or routing problem. +version: "26.06.01" +description: Infer optimization structure from uploaded tables and identify minimal clarifications before cuOpt modeling. +license: Apache-2.0 +metadata: + author: NVIDIA cuOpt Team + tags: + - cuopt + - nemoclaw + - orchestration origin: skill-evolution --- @@ -13,6 +20,12 @@ The purpose of this skill is to bridge the gap between messy uploaded data and s This skill does **not** solve the optimization problem itself. It inspects the data, infers likely modeling roles, and identifies what still needs clarification. +**It does not authorize heuristic, greedy, or backtracking schedules as +answers.** In the NemoClaw sandbox, read `cuopt-sandbox`: the first solver +that produces assignments or a schedule must be cuOpt after probe → env → +smoke gates pass. Ingestion output is a modeling interpretation (entities, +objective fields, constraints) — never a completed plan. + This skill refines the optimization interpretation using the uploaded data; it does not replace the earlier intent decision unless the data clearly contradicts it. ## Purpose @@ -144,7 +157,6 @@ Examples: - Are demands mandatory or forecast-only? - Must decisions be integers? - Are these time windows hard constraints? -- Is this travel matrix symmetric? - Can unmet demand be allowed with penalty? - Is profit net profit or revenue only? @@ -232,7 +244,22 @@ Likely interpretation: - travel table defines movement cost/time - likely problem family = routing -### Example 3: historical transaction table +### Example 3: time-slot / resource assignment (scheduling MILP) + +Files include patterns such as: +- `games.csv` or `jobs.csv` — items to place (events, tasks, orders) +- `time_slots.csv` or `shifts.csv` — when placement can occur +- `courts.csv`, `machines.csv`, or `rooms.csv` — resources +- `teams.csv` or `workers.csv` — entities tied to shared agents (coaches, operators) +- `*_unavailability.csv` — blocked (resource, slot) or (agent, slot) pairs + +Likely interpretation: +- decision = assign each item to a (slot, resource) or similar binary/integer placement +- hard constraints = no double-booking, unavailability, capacity, one game per team per slot +- likely problem family = **MILP** (even if user only says "build a schedule" or "valid plan") +- **NemoClaw:** read `optimization-from-data-orchestrator` + `cuopt-sandbox` before any custom scheduler code + +### Example 4: historical transaction table File includes: - `sales_history.csv` with `order_id`, `date`, `region`, `revenue`, `units_sold` diff --git a/cuopt_on_nemoclaw/smoke_lp.py b/cuopt_on_nemoclaw/smoke_lp.py new file mode 100644 index 0000000..504ee47 --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_lp.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Remote LP smoke test for the NemoClaw cuOpt sandbox. + +Verifies gRPC remote execution for LP (not MILP routing, not local CUDA). + +Requires CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT in the environment when +Python starts (set them in the same ``bash -lc`` line as this script). + +Success markers in combined stdout/stderr: + Using remote GPU backend + status=Optimal objective=10.0 + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import sys +from os import environ + +DEFAULT_HOST = "host.openshell.internal" +DEFAULT_PORT = "5001" + +OK_STATUSES = frozenset({"Optimal", "PrimalFeasible"}) + + +def _require_remote_env() -> None: + host = environ.get("CUOPT_REMOTE_HOST") + port = environ.get("CUOPT_REMOTE_PORT") + if not host or not port: + print( + "error: CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT must be set " + "before Python starts.\n" + "example:\n" + " bash -lc 'source /sandbox/.openclaw-data/cuopt/bin/activate && " + f"export CUOPT_REMOTE_HOST={DEFAULT_HOST} && " + f"export CUOPT_REMOTE_PORT={DEFAULT_PORT} && " + "python3 /sandbox/smoke_lp.py'", + file=sys.stderr, + ) + sys.exit(1) + + +def main() -> int: + _require_remote_env() + + from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE + from cuopt.linear_programming.solver_settings import SolverSettings + + p = Problem("smoke_lp") + x = p.addVariable(lb=0, vtype=CONTINUOUS, name="x") + y = p.addVariable(lb=0, vtype=CONTINUOUS, name="y") + p.addConstraint(x + y <= 4) + p.addConstraint(x <= 2) + p.addConstraint(y <= 3) + p.setObjective(3 * x + 2 * y, sense=MAXIMIZE) + p.solve(SolverSettings()) + + status = p.Status.name + if status not in OK_STATUSES: + print(f"status={status} FAIL", file=sys.stderr) + return 1 + + print( + f"status={status} objective={p.ObjValue} " + f"x={x.getValue()} y={y.getValue()}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cuopt_on_nemoclaw/smoke_milp.py b/cuopt_on_nemoclaw/smoke_milp.py new file mode 100644 index 0000000..1c85b68 --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_milp.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Remote MILP smoke test for the NemoClaw cuOpt sandbox. + +MILP uses the same ``cuopt.linear_programming.problem.Problem`` class as LP +with ``vtype=INTEGER`` — there is no ``from cuopt import milp``. + +Requires CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT when Python starts. + +Success markers: + Using remote GPU backend + status=Optimal (or FeasibleFound) + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import sys +from os import environ + +OK_STATUSES = frozenset({"Optimal", "FeasibleFound", "PrimalFeasible"}) + + +def _require_remote_env() -> None: + if not environ.get("CUOPT_REMOTE_HOST") or not environ.get("CUOPT_REMOTE_PORT"): + print( + "error: export CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT before " + "running (see /sandbox/.openclaw/skills/cuopt-sandbox/references/remote-env-and-smoke.md)", + file=sys.stderr, + ) + sys.exit(1) + + +def main() -> int: + _require_remote_env() + + from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE + from cuopt.linear_programming.solver_settings import SolverSettings + + # Need at least one constraint so the CSR matrix (A_offsets) is built. + # Tiny 2-variable integer problem (same shape as milp_basic, smaller nums). + p = Problem("smoke_milp") + x = p.addVariable(vtype=INTEGER, lb=0, ub=10, name="x") + y = p.addVariable(vtype=INTEGER, lb=0, ub=10, name="y") + p.addConstraint(x + y <= 4) + p.addConstraint(x <= 2) + p.setObjective(x + 2 * y, sense=MAXIMIZE) + settings = SolverSettings() + settings.set_parameter("time_limit", 60) + p.solve(settings) + + status = p.Status.name + if status not in OK_STATUSES: + print(f"status={status} FAIL", file=sys.stderr) + return 1 + + print(f"status={status} objective={p.ObjValue} x={x.getValue()} y={y.getValue()}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cuopt_on_nemoclaw/smoke_vrp.py b/cuopt_on_nemoclaw/smoke_vrp.py new file mode 100644 index 0000000..7d7fdb2 --- /dev/null +++ b/cuopt_on_nemoclaw/smoke_vrp.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""REST VRP smoke test for the NemoClaw cuOpt sandbox. + +Routing uses REST on port 5000 (not CUOPT_REMOTE_* gRPC vars). +Do not use ``from cuopt import routing`` — there is no GPU in the sandbox. + +Env (defaults shown): + CUOPT_SERVER_HOST=host.openshell.internal + CUOPT_SERVER_PORT=5000 + +Success markers: + status=0 + solution_cost present in solver_response + +Exit code: 0 on success, 1 on failure. +""" + +from __future__ import annotations + +import json +import sys +import time +from os import environ +from typing import Any + +DEFAULT_HOST = "host.openshell.internal" +DEFAULT_PORT = "5000" + +# Minimal valid payload (same shape as vrp_minimal cookbook). +PAYLOAD: dict[str, Any] = { + "cost_matrix_data": { + "data": { + "0": [ + [0, 1, 1], + [1, 0, 1], + [1, 1, 0], + ] + } + }, + "task_data": {"task_locations": [1, 2]}, + "fleet_data": {"vehicle_locations": [[0, 0]]}, + "solver_config": {"time_limit": 30}, +} + + +def _repoll(client: Any, solution: dict[str, Any], tries: int = 120) -> dict[str, Any]: + if "reqId" not in solution or "response" in solution: + return solution + req_id = solution["reqId"] + for _ in range(tries): + solution = client.repoll(req_id, response_type="dict") + if "response" in solution: + return solution + time.sleep(1) + return solution + + +def main() -> int: + host = environ.get("CUOPT_SERVER_HOST", DEFAULT_HOST) + port = environ.get("CUOPT_SERVER_PORT", DEFAULT_PORT) + + from cuopt_sh_client import CuOptServiceSelfHostClient + + client = CuOptServiceSelfHostClient( + ip=host, + port=str(port), + polling_timeout=60, + timeout_exception=False, + ) + solution = client.get_optimized_routes(PAYLOAD) + solution = _repoll(client, solution) + + if "response" not in solution: + print( + "error: no response from REST VRP (still polling or server error)", + file=sys.stderr, + ) + print(json.dumps(solution, indent=2), file=sys.stderr) + return 1 + + sr = solution["response"].get("solver_response", {}) + status = sr.get("status") + cost = sr.get("solution_cost") + if status != 0: + print(f"status={status} FAIL", file=sys.stderr) + print(json.dumps(solution, indent=2), file=sys.stderr) + return 1 + + print(f"status={status} solution_cost={cost} host={host} port={port}") + return 0 + + +if __name__ == "__main__": + sys.exit(main())