Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ book
/target
**/.vscode
.devcontainer
.codex
rustc-ice-*.txt
.nix-driver-libs
.claude
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 30 additions & 6 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,21 @@ pub struct CudaBuilder {
pub final_module_path: Option<PathBuf>,
}

/// Default arch for new `CudaBuilder`s.
///
/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19`
/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the
/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy
/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice.
/// Callers can still override via [`CudaBuilder::arch`].
fn default_arch() -> NvvmArch {
if env::var_os("LLVM_CONFIG_19").is_some() {
NvvmArch::Compute100
} else {
NvvmArch::default()
}
}

impl CudaBuilder {
pub fn new(path_to_crate_root: impl AsRef<Path>) -> Self {
Self {
Expand All @@ -204,7 +219,7 @@ impl CudaBuilder {
ptx_file_copy_path: None,
generate_line_info: true,
nvvm_opts: true,
arch: NvvmArch::default(),
arch: default_arch(),
ftz: false,
fast_sqrt: false,
fast_div: false,
Expand Down Expand Up @@ -355,6 +370,7 @@ impl CudaBuilder {
/// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
pub fn build(self) -> Result<PathBuf, CudaBuilderError> {
println!("cargo:rerun-if-changed={}", self.path_to_crate.display());
println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19");
let path = invoke_rustc(&self)?;
if let Some(copy_path) = self.ptx_file_copy_path {
std::fs::copy(path, &copy_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?;
Expand Down Expand Up @@ -550,13 +566,21 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {

let target_dir = workspace_dir.join("target").join("cuda-builder-codegen");

let status = Command::new("cargo")
.args(["build", "-p", "rustc_codegen_nvvm"])
let mut cmd = Command::new("cargo");
cmd.args(["build", "-p", "rustc_codegen_nvvm"])
.arg("--target-dir")
.arg(&target_dir)
.current_dir(&workspace_dir)
.status()
.ok()?;
.current_dir(&workspace_dir);

// Propagate the llvm19 cargo feature to the nested build when the surrounding
// shell is configured for LLVM 19 (signalled by LLVM_CONFIG_19). Without this
// rustc_codegen_nvvm's build.rs defaults to the LLVM 7 path and falls through
// to the prebuilt LLVM 7 download, which fails on Linux.
if env::var_os("LLVM_CONFIG_19").is_some() {
cmd.args(["--features", "llvm19"]);
}

let status = cmd.status().ok()?;

if !status.success() {
return None;
Expand Down
8 changes: 8 additions & 0 deletions crates/cust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,12 @@ fn main() {
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
println!("cargo::rustc-cfg=cuCtxCreate_v4");
}

// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
if driver_version >= 13020 {
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
}
}
15 changes: 15 additions & 0 deletions crates/cust/src/memory/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down Expand Up @@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand All @@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down
1 change: 1 addition & 0 deletions crates/cust_raw/build/cuda_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ impl CudaSdk {
vec![
cuda_root.join("nvvm").join("bin"),
cuda_root.join("nvvm").join("lib64"),
cuda_root.join("nvvm").join("lib"),
]
};
let library_dirs = Self::normalize_dirpaths(search_dirs);
Expand Down
8 changes: 6 additions & 2 deletions crates/cust_raw/build/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,12 @@ fn main() {
println!("cargo::rustc-link-search=native={}", libdir.display());
}
println!("cargo::rustc-link-lib=dylib=nvvm");
// Handle libdevice support.
fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc"))
// `fs::copy` preserves source mode. When libdevice.10.bc comes from
// the Nix store (0444), re-running this build can't overwrite the
// previous copy in OUT_DIR. Drop it first.
let dest = outdir.join("libdevice.bc");
let _ = fs::remove_file(&dest);
fs::copy(sdk.libdevice_bitcode_path(), &dest)
.expect("Cannot copy libdevice bitcode file.");
}
}
Expand Down
32 changes: 30 additions & 2 deletions crates/nvvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
ffi::{CStr, CString},
fmt::Display,
mem::MaybeUninit,
ptr::null_mut,
str::FromStr,
};

Expand Down Expand Up @@ -325,6 +324,10 @@ pub enum NvvmArch {
Compute89,
Compute90,
Compute90a,
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch
/// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set.
Compute100,
Compute100f,
Compute100a,
Expand Down Expand Up @@ -448,6 +451,14 @@ impl NvvmArch {
self.capability_value() % 10
}

/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
///
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
/// `compute_100`.
pub fn uses_modern_ir_dialect(&self) -> bool {
self.capability_value() >= 100
}

/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
/// `Compute90a`).
pub fn target_feature(&self) -> &'static str {
Expand Down Expand Up @@ -739,7 +750,24 @@ impl NvvmProgram {
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
pub fn verify(&self) -> Result<(), NvvmError> {
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
self.verify_with_options(&[])
}

/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
/// fine.
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
unsafe {
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
let mut options_ptr = options
.iter()
.map(|x| x.as_ptr().cast())
.collect::<Vec<_>>();
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
.to_result()
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions crates/rustc_codegen_nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ readme = "../../README.md"
[lib]
crate-type = ["dylib"]

[features]
default = []
llvm19 = []

[dependencies]
nvvm = { version = "0.1", path = "../nvvm" }
rustc-demangle = "0.1.24"
Expand Down
Loading
Loading