From 6c07dd4b7154a7b7bb4d6131b3ce71c906c04a58 Mon Sep 17 00:00:00 2001
From: wooway777 <wooway777@gmail.com>
Date: Tue, 30 Jun 2026 21:18:08 +0800
Subject: [PATCH] fix: optimize preload and cuda path

---
 python/infinicore/_preload.py | 19 ++++++++-----------
 xmake.lua                     | 19 +++++++++++++++----
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/python/infinicore/_preload.py b/python/infinicore/_preload.py
index b012409d4..fda7fbb46 100644
--- a/python/infinicore/_preload.py
+++ b/python/infinicore/_preload.py
@@ -167,6 +167,10 @@ def _should_preload_device(device_type: str) -> bool:
     for env_var in env_vars:
         if os.getenv(env_var):
             return True
+    if device_type == "HYGON":
+        dtk_root = os.getenv("DTK_ROOT") or "/opt/dtk"
+        if os.path.isdir(dtk_root):
+            return True
     return False
 
 
@@ -181,6 +185,7 @@ def preload_device(device_type: str) -> None:
         preload_hpcc()
     elif device_type == "HYGON":
         preload_torch_hip()
+        preload_flash_attn()
     # Add other device preload functions here as needed:
     # elif device_type == "ASCEND":
     #     preload_ascend()
@@ -194,17 +199,9 @@ def preload() -> None:
     This function detects available device types and preloads their runtime libraries
     if the environment indicates they are needed.
     """
-    # Always try torch HIP preload first (best-effort, no-op if torch/HIP is absent).
-    try:
-        preload_torch_hip()
-    except Exception:
-        pass
-    try:
-        preload_flash_attn()
-    except Exception:
-        pass
-
-    # Device types that may require preload
+    # Device types that may require preload. Keep Hygon-only preloads gated by
+    # Hygon environment markers so other CUDA-compatible platforms do not load
+    # unrelated torch/flash-attn libraries during package import.
     device_types = [
         "METAX",  # HPCC/METAX
         "HYGON",
diff --git a/xmake.lua b/xmake.lua
index 0316680da..fcbcf423d 100644
--- a/xmake.lua
+++ b/xmake.lua
@@ -515,11 +515,22 @@ target("infiniop")
 
     local public_cuda_root = get_config("cuda") or os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH")
     if public_cuda_root and public_cuda_root ~= "" then
-        add_includedirs(path.join(public_cuda_root, "include"))
-        add_linkdirs(
+        for _, include_dir in ipairs({
+            path.join(public_cuda_root, "include"),
+            path.join(public_cuda_root, "targets", "x86_64-linux", "include"),
+        }) do
+            if os.isdir(include_dir) then
+                add_includedirs(include_dir)
+            end
+        end
+        for _, link_dir in ipairs({
             path.join(public_cuda_root, "lib64"),
-            path.join(public_cuda_root, "targets", "x86_64-linux", "lib")
-        )
+            path.join(public_cuda_root, "targets", "x86_64-linux", "lib"),
+        }) do
+            if os.isdir(link_dir) then
+                add_linkdirs(link_dir)
+            end
+        end
     elseif has_config("nv-gpu") then
         add_includedirs(path.join("/usr/local/cuda", "include"))
     end