From 94821d7037301a73cd43e039d42f1669084a7aa6 Mon Sep 17 00:00:00 2001 From: Aleksandr Dremov Date: Fri, 6 Mar 2026 15:44:03 +0100 Subject: [PATCH 1/4] Change node-type argument to accept multiple values --- csub.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csub.py b/csub.py index db3625d..9e360b2 100644 --- a/csub.py +++ b/csub.py @@ -56,7 +56,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--secret-name", type=str, help="Override RUNAI_SECRET_NAME from the env file") parser.add_argument("--pvc", type=str, help="Override SCRATCH_PVC from the env file") parser.add_argument("--backofflimit", type=int, default=0, help="Retries before marking a training job as failed") - parser.add_argument("--node-type", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool to target") + parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool to target") parser.add_argument("--host-ipc", action="store_true", help="Share the host IPC namespace") parser.add_argument("--large-shm", action="store_true", help="Request a larger /dev/shm") return parser @@ -172,8 +172,8 @@ def build_runai_command( cmd.append("--large-shm") if args.node_type: - cmd.extend(["--node-pools", args.node_type]) - if args.node_type in {"h200", "h100"} and not args.train: + cmd.extend(["--node-pools", ','.join(args.node_type)]) + if any(i in {"h200", "h100"} for i in args.node_type) and not args.train: cmd.append("--preemptible") if distributed: From 1595583a785755316c8f51069f5729dbea9375b1 Mon Sep 17 00:00:00 2001 From: Aleksandr Dremov Date: Fri, 6 Mar 2026 15:47:10 +0100 Subject: [PATCH 2/4] Clarify help text for --node-type argument Updated help text for --node-type argument to clarify that multiple values are accepted and scheduling will occur based on the first fitting pool. --- csub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csub.py b/csub.py index 9e360b2..18ff8e9 100644 --- a/csub.py +++ b/csub.py @@ -56,7 +56,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--secret-name", type=str, help="Override RUNAI_SECRET_NAME from the env file") parser.add_argument("--pvc", type=str, help="Override SCRATCH_PVC from the env file") parser.add_argument("--backofflimit", type=int, default=0, help="Retries before marking a training job as failed") - parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool to target") + parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool to target. Multiple values are accepted, will schedule to the first pool where fits") parser.add_argument("--host-ipc", action="store_true", help="Share the host IPC namespace") parser.add_argument("--large-shm", action="store_true", help="Request a larger /dev/shm") return parser From a87ca447358c4e7a5d80d1aed03e1974b80d0c3f Mon Sep 17 00:00:00 2001 From: Aleksandr Dremov Date: Fri, 6 Mar 2026 15:49:12 +0100 Subject: [PATCH 3/4] Update csub.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- csub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csub.py b/csub.py index 18ff8e9..d2fb95e 100644 --- a/csub.py +++ b/csub.py @@ -56,7 +56,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--secret-name", type=str, help="Override RUNAI_SECRET_NAME from the env file") parser.add_argument("--pvc", type=str, help="Override SCRATCH_PVC from the env file") parser.add_argument("--backofflimit", type=int, default=0, help="Retries before marking a training job as failed") - parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool to target. Multiple values are accepted, will schedule to the first pool where fits") + parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool(s) to target. Multiple values are accepted; the job will be scheduled on the first pool where it fits") parser.add_argument("--host-ipc", action="store_true", help="Share the host IPC namespace") parser.add_argument("--large-shm", action="store_true", help="Request a larger /dev/shm") return parser From 19c81f7c529cec38262c1d4f734a222801fe823b Mon Sep 17 00:00:00 2001 From: Aleksandr Dremov Date: Fri, 6 Mar 2026 15:49:33 +0100 Subject: [PATCH 4/4] Update csub.py --- csub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csub.py b/csub.py index d2fb95e..ac1f668 100644 --- a/csub.py +++ b/csub.py @@ -56,7 +56,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--secret-name", type=str, help="Override RUNAI_SECRET_NAME from the env file") parser.add_argument("--pvc", type=str, help="Override SCRATCH_PVC from the env file") parser.add_argument("--backofflimit", type=int, default=0, help="Retries before marking a training job as failed") - parser.add_argument("--node-type", nargs="*", type=str, choices=["", "v100", "h100", "h200", "default", "a100-40g"], default="", help="GPU node pool(s) to target. Multiple values are accepted; the job will be scheduled on the first pool where it fits") + parser.add_argument("--node-type", nargs="*", type=str, choices=["v100", "h100", "h200", "default", "a100-40g"], default=[], help="GPU node pool(s) to target. Multiple values are accepted; the job will be scheduled on the first pool where it fits") parser.add_argument("--host-ipc", action="store_true", help="Share the host IPC namespace") parser.add_argument("--large-shm", action="store_true", help="Request a larger /dev/shm") return parser