From bfeb3e4c24985c64f9aca4c65e40d49195f4f88c Mon Sep 17 00:00:00 2001
From: jichuanh <jichuanh@nvidia.com>
Date: Wed, 27 May 2026 16:14:42 +0000
Subject: [PATCH 1/2] [Diag] Revert app_launcher torch-defer (#5633) to
 reintroduce atfork bug

Surgical hunk-level revert of commit a5eb9add4c3 ("Fixes OmniHub startup
in Docker tests", #5633) applied only to
source/isaaclab/isaaclab/app/app_launcher.py.

Removes the defer-torch mechanism so that `import torch` (and
transitively `import numpy`) happens in AppLauncher.__init__ BEFORE
SimulationApp's fork() through libomni.platforminfo. If the resolved
numpy is 2.3.5, its bundled OpenBLAS pthread_atfork handler will SIGSEGV
the canary jobs.

Unlike the prior whole-file revert, this preserves PR #5449's
`--deterministic` CLI flag and RTX-determinism logic, which landed after
#5633 and was wiped as collateral damage in the previous attempt.

Companion to the relocated diagnostic conftest in the next commit:
source/isaaclab/test/conftest.py prints the resolved numpy + OpenBLAS
hash so we can confirm which numpy actually landed.

Refs PR #5656 (numpy!=2.3.5 exclusion fix being validated).
---
 source/isaaclab/isaaclab/app/app_launcher.py | 21 ++++++--------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/source/isaaclab/isaaclab/app/app_launcher.py b/source/isaaclab/isaaclab/app/app_launcher.py
index 2bdb8a08932d..dbfac672daf7 100644
--- a/source/isaaclab/isaaclab/app/app_launcher.py
+++ b/source/isaaclab/isaaclab/app/app_launcher.py
@@ -242,7 +242,6 @@ def __init__(self, launcher_args: argparse.Namespace | dict | None = None, **kwa
         # Exposed to train scripts
         self.device_id: int  # device ID for GPU simulation (defaults to 0)
         self.device: str  # resolved device string (e.g. "cuda:0" or "cpu")
-        self._deferred_cuda_device_id: int | None = None
         self.local_rank: int  # local rank of GPUs in the current node
         self.global_rank: int  # global rank for multi-node training
 
@@ -251,7 +250,6 @@ def __init__(self, launcher_args: argparse.Namespace | dict | None = None, **kwa
 
         # Create SimulationApp, passing the resolved self._config to it for initialization
         self._create_app()
-        self._set_deferred_cuda_device()
         # Load IsaacSim extensions
         self._load_extensions()
 
@@ -1007,26 +1005,19 @@ def _resolve_device_settings(self, launcher_args: dict):
         launcher_args["physics_gpu"] = self.device_id
         launcher_args["active_gpu"] = self.device_id
 
-        # Defer importing torch until after SimulationApp starts.  Importing
-        # torch can import NumPy/OpenBLAS, whose at-fork handlers can crash
-        # Kit's platform-info fork during startup.
+        # Set the current CUDA device early so that physics backends (e.g. Newton/Warp)
+        # that allocate on the "current" device during initialization get the correct GPU.
+        # Without this, all ranks may default to cuda:0 for early allocations.
         if "cuda" in device:
-            self._deferred_cuda_device_id = self.device_id
+            import torch
+
+            torch.cuda.set_device(self.device_id)
 
         # Store the resolved device string for downstream consumers (e.g. sim_launcher)
         self.device = device
 
         logger.info("Using device: %s", device)
 
-    def _set_deferred_cuda_device(self) -> None:
-        """Set the current torch CUDA device after Kit startup."""
-        if self._deferred_cuda_device_id is None:
-            return
-
-        import torch
-
-        torch.cuda.set_device(self._deferred_cuda_device_id)
-
     def _resolve_experience_file(self, launcher_args: dict):
         """Resolve experience file related settings."""
         # Check if input keywords contain an 'experience' file setting

From a6332aaf59fb5e75109604466db363b51d140fde Mon Sep 17 00:00:00 2001
From: jichuanh <jichuanh@nvidia.com>
Date: Wed, 27 May 2026 06:51:04 +0000
Subject: [PATCH 2/2] [Diag] Add numpy/OpenBLAS dep-manifest conftest at repo
 root
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prints the resolved numpy version + bundled OpenBLAS .so filename at pytest
session start. Located at the repo root so every subprocess pytest spawned
by tools/conftest.py discovers and loads it, regardless of which package's
tests are running.

Repo root has no isaaclab_* subdirectories, so importmode=prepend placing
the repo root on sys.path does NOT shadow the real pip-installed packages —
unlike source/conftest.py, where source/<pkg>/ (no __init__.py) gets
promoted to a namespace package and breaks
`from isaaclab_teleop import IsaacTeleopCfg`-style imports.

Companion to the previous commit (app_launcher torch-defer revert).
---
 conftest.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 conftest.py

diff --git a/conftest.py b/conftest.py
new file mode 100644
index 000000000000..553e668cbb0b
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Dep-manifest diagnostic: prints numpy version + bundled OpenBLAS hash at pytest session start.
+
+Located at the repo root so every subprocess pytest (driven by
+``tools/conftest.py``) discovers and loads it, regardless of which package's
+tests are running. The repo root has no ``isaaclab_*`` subdirectories, so
+``importmode=prepend`` placing the repo root on ``sys.path`` does NOT shadow
+the real pip-installed IsaacLab packages — unlike ``source/conftest.py``,
+where ``source/<pkg>/`` (no ``__init__.py``) would be promoted to a namespace
+package and break ``from isaaclab_teleop import IsaacTeleopCfg`` style imports.
+
+Importing numpy here registers its vendored OpenBLAS ``pthread_atfork``
+handler in the same process that later calls ``fork()`` via
+``SimulationApp()``. The print output identifies which numpy + OpenBLAS bundle
+actually landed in each CI test container.
+"""
+
+import os
+
+import numpy
+
+print(f"\n[dep-manifest] numpy {numpy.__version__}", flush=True)
+_libs_dir = os.path.join(os.path.dirname(numpy.__file__), os.pardir, "numpy.libs")
+if os.path.isdir(_libs_dir):
+    for _f in sorted(os.listdir(_libs_dir)):
+        if "openblas" in _f.lower():
+            print(f"[dep-manifest] bundled openblas: {_f}", flush=True)