diff --git a/.gitignore b/.gitignore index cf9e503c..641bcefd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ book /target **/.vscode .devcontainer +.codex +rustc-ice-*.txt +.nix-driver-libs +.claude diff --git a/Cargo.lock b/Cargo.lock index 7a9496b6..1afa046a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3591,6 +3591,7 @@ version = "0.1.0" dependencies = [ "cuda_builder", "cust", + "cust_raw", "nanorand", ] diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index 7e33ab13..b0c4780b 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -196,6 +196,21 @@ pub struct CudaBuilder { pub final_module_path: Option, } +/// Default arch for new `CudaBuilder`s. +/// +/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19` +/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the +/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy +/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice. +/// Callers can still override via [`CudaBuilder::arch`]. +fn default_arch() -> NvvmArch { + if env::var_os("LLVM_CONFIG_19").is_some() { + NvvmArch::Compute100 + } else { + NvvmArch::default() + } +} + impl CudaBuilder { pub fn new(path_to_crate_root: impl AsRef) -> Self { Self { @@ -204,7 +219,7 @@ impl CudaBuilder { ptx_file_copy_path: None, generate_line_info: true, nvvm_opts: true, - arch: NvvmArch::default(), + arch: default_arch(), ftz: false, fast_sqrt: false, fast_div: false, @@ -355,6 +370,7 @@ impl CudaBuilder { /// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path. pub fn build(self) -> Result { println!("cargo:rerun-if-changed={}", self.path_to_crate.display()); + println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19"); let path = invoke_rustc(&self)?; if let Some(copy_path) = self.ptx_file_copy_path { std::fs::copy(path, ©_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?; @@ -550,13 +566,21 @@ fn build_backend_and_find(filename: &str) -> Option { let target_dir = workspace_dir.join("target").join("cuda-builder-codegen"); - let status = Command::new("cargo") - .args(["build", "-p", "rustc_codegen_nvvm"]) + let mut cmd = Command::new("cargo"); + cmd.args(["build", "-p", "rustc_codegen_nvvm"]) .arg("--target-dir") .arg(&target_dir) - .current_dir(&workspace_dir) - .status() - .ok()?; + .current_dir(&workspace_dir); + + // Propagate the llvm19 cargo feature to the nested build when the surrounding + // shell is configured for LLVM 19 (signalled by LLVM_CONFIG_19). Without this + // rustc_codegen_nvvm's build.rs defaults to the LLVM 7 path and falls through + // to the prebuilt LLVM 7 download, which fails on Linux. + if env::var_os("LLVM_CONFIG_19").is_some() { + cmd.args(["--features", "llvm19"]); + } + + let status = cmd.status().ok()?; if !status.success() { return None; diff --git a/crates/cust/build.rs b/crates/cust/build.rs index 1b1f8674..d002cdc6 100644 --- a/crates/cust/build.rs +++ b/crates/cust/build.rs @@ -40,4 +40,12 @@ fn main() { println!("cargo::rustc-cfg=cuGraphGetEdges_v2"); println!("cargo::rustc-cfg=cuCtxCreate_v4"); } + + // In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union. + // Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a + // direct `id` field. This cfg gates the struct initialization syntax accordingly. + println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)"); + if driver_version >= 13020 { + println!("cargo::rustc-cfg=cuMemLocation_anon_id"); + } } diff --git a/crates/cust/src/memory/unified.rs b/crates/cust/src/memory/unified.rs index c45f5ce3..86829a38 100644 --- a/crates/cust/src/memory/unified.rs +++ b/crates/cust/src/memory/unified.rs @@ -647,6 +647,9 @@ pub trait MemoryAdvise: private::Sealed { #[cfg(cuMemPrefetchAsync_v2)] driver_sys::CUmemLocation { type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, + #[cfg(cuMemLocation_anon_id)] + __bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id }, + #[cfg(not(cuMemLocation_anon_id))] id, }, #[cfg(not(cuMemPrefetchAsync_v2))] @@ -693,6 +696,9 @@ pub trait MemoryAdvise: private::Sealed { #[cfg(cuMemPrefetchAsync_v2)] driver_sys::CUmemLocation { type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, + #[cfg(cuMemLocation_anon_id)] + __bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id }, + #[cfg(not(cuMemLocation_anon_id))] id, }, #[cfg(not(cuMemPrefetchAsync_v2))] @@ -735,6 +741,9 @@ pub trait MemoryAdvise: private::Sealed { #[cfg(cuMemAdvise_v2)] driver_sys::CUmemLocation { type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, + #[cfg(cuMemLocation_anon_id)] + __bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id }, + #[cfg(not(cuMemLocation_anon_id))] id, }, #[cfg(not(cuMemAdvise_v2))] @@ -777,6 +786,9 @@ pub trait MemoryAdvise: private::Sealed { #[cfg(cuMemAdvise_v2)] driver_sys::CUmemLocation { type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, + #[cfg(cuMemLocation_anon_id)] + __bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id }, + #[cfg(not(cuMemLocation_anon_id))] id, }, #[cfg(not(cuMemAdvise_v2))] @@ -801,6 +813,9 @@ pub trait MemoryAdvise: private::Sealed { #[cfg(cuMemAdvise_v2)] driver_sys::CUmemLocation { type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, + #[cfg(cuMemLocation_anon_id)] + __bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id }, + #[cfg(not(cuMemLocation_anon_id))] id, }, #[cfg(not(cuMemAdvise_v2))] diff --git a/crates/cust_raw/build/cuda_sdk.rs b/crates/cust_raw/build/cuda_sdk.rs index e49c134f..f51681ed 100644 --- a/crates/cust_raw/build/cuda_sdk.rs +++ b/crates/cust_raw/build/cuda_sdk.rs @@ -215,6 +215,7 @@ impl CudaSdk { vec![ cuda_root.join("nvvm").join("bin"), cuda_root.join("nvvm").join("lib64"), + cuda_root.join("nvvm").join("lib"), ] }; let library_dirs = Self::normalize_dirpaths(search_dirs); diff --git a/crates/cust_raw/build/main.rs b/crates/cust_raw/build/main.rs index 0c0d1175..aa10096f 100644 --- a/crates/cust_raw/build/main.rs +++ b/crates/cust_raw/build/main.rs @@ -99,8 +99,12 @@ fn main() { println!("cargo::rustc-link-search=native={}", libdir.display()); } println!("cargo::rustc-link-lib=dylib=nvvm"); - // Handle libdevice support. - fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc")) + // `fs::copy` preserves source mode. When libdevice.10.bc comes from + // the Nix store (0444), re-running this build can't overwrite the + // previous copy in OUT_DIR. Drop it first. + let dest = outdir.join("libdevice.bc"); + let _ = fs::remove_file(&dest); + fs::copy(sdk.libdevice_bitcode_path(), &dest) .expect("Cannot copy libdevice bitcode file."); } } diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 9a460c6c..d31bf1d7 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -4,7 +4,6 @@ use std::{ ffi::{CStr, CString}, fmt::Display, mem::MaybeUninit, - ptr::null_mut, str::FromStr, }; @@ -325,6 +324,10 @@ pub enum NvvmArch { Compute89, Compute90, Compute90a, + /// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at + /// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+ + /// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch + /// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set. Compute100, Compute100f, Compute100a, @@ -448,6 +451,14 @@ impl NvvmArch { self.capability_value() % 10 } + /// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect. + /// + /// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at + /// `compute_100`. + pub fn uses_modern_ir_dialect(&self) -> bool { + self.capability_value() >= 100 + } + /// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for /// `Compute90a`). pub fn target_feature(&self) -> &'static str { @@ -739,7 +750,24 @@ impl NvvmProgram { /// Verify the program without actually compiling it. In the case of invalid IR, you can find /// more detailed error info by calling [`compiler_log`](Self::compiler_log). pub fn verify(&self) -> Result<(), NvvmError> { - unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() } + self.verify_with_options(&[]) + } + + /// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will + /// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in + /// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back + /// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile + /// fine. + pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> { + unsafe { + let options = options.iter().map(|x| format!("{x}\0")).collect::>(); + let mut options_ptr = options + .iter() + .map(|x| x.as_ptr().cast()) + .collect::>(); + nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr()) + .to_result() + } } } diff --git a/crates/rustc_codegen_nvvm/Cargo.toml b/crates/rustc_codegen_nvvm/Cargo.toml index f0c4dcfd..bab4c499 100644 --- a/crates/rustc_codegen_nvvm/Cargo.toml +++ b/crates/rustc_codegen_nvvm/Cargo.toml @@ -14,6 +14,10 @@ readme = "../../README.md" [lib] crate-type = ["dylib"] +[features] +default = [] +llvm19 = [] + [dependencies] nvvm = { version = "0.1", path = "../nvvm" } rustc-demangle = "0.1.24" diff --git a/crates/rustc_codegen_nvvm/build.rs b/crates/rustc_codegen_nvvm/build.rs index 8bb25990..475f684d 100644 --- a/crates/rustc_codegen_nvvm/build.rs +++ b/crates/rustc_codegen_nvvm/build.rs @@ -10,13 +10,11 @@ use curl::easy::Easy; use tar::Archive; use xz::read::XzDecoder; -static PREBUILT_LLVM_URL: &str = +static PREBUILT_LLVM_URL_LLVM7: &str = "https://github.com/rust-gpu/rustc_codegen_nvvm-llvm/releases/download/LLVM-7.1.0/"; -static REQUIRED_MAJOR_LLVM_VERSION: u8 = 7; - fn main() { - rustc_llvm_build(); + rustc_llvm_build(llvm19_enabled()); } fn fail(s: &str) -> ! { @@ -41,6 +39,41 @@ pub fn output(cmd: &mut Command) -> String { String::from_utf8(output.stdout).unwrap() } +fn llvm19_enabled() -> bool { + tracked_env_var_os("CARGO_FEATURE_LLVM19").is_some() +} + +fn required_major_llvm_version(llvm19_enabled: bool) -> u8 { + if llvm19_enabled { 19 } else { 7 } +} + +fn command_version(path: &Path) -> Option { + let output = Command::new(path).arg("--version").output().ok()?; + if !output.status.success() { + return None; + } + + Some(String::from_utf8(output.stdout).ok()?.trim().to_string()) +} + +fn llvm_major_version(path: &Path) -> Option { + command_version(path)? + .split(|ch: char| !ch.is_ascii_digit()) + .find(|segment| !segment.is_empty())? + .parse() + .ok() +} + +fn llvm_version_matches(path: &Path, required_major: u8) -> bool { + llvm_major_version(path) == Some(required_major) +} + +fn sibling_llvm_tool(llvm_config: &Path, tool_prefix: &str) -> Option { + let file_name = llvm_config.file_name()?.to_str()?; + let suffix = file_name.strip_prefix("llvm-config")?; + Some(llvm_config.with_file_name(format!("{tool_prefix}{suffix}"))) +} + fn target_to_llvm_prebuilt(target: &str) -> String { let base = match target { "x86_64-pc-windows-msvc" => "windows-x86_64", @@ -53,7 +86,51 @@ fn target_to_llvm_prebuilt(target: &str) -> String { format!("{base}.tar.xz") } -fn find_llvm_config(target: &str) -> PathBuf { +fn find_llvm_config(target: &str, llvm19_enabled: bool) -> PathBuf { + if llvm19_enabled { + return find_llvm_config_llvm19(); + } + + find_llvm_config_llvm7(target) +} + +fn find_llvm_config_llvm19() -> PathBuf { + let required_major = required_major_llvm_version(true); + let mut candidates = Vec::new(); + + if let Some(path) = tracked_env_var_os("LLVM_CONFIG_19") { + candidates.push(PathBuf::from(path)); + } + + candidates.push(PathBuf::from("llvm-config-19")); + + if let Some(cuda_home) = tracked_env_var_os("CUDA_HOME") { + let cuda_home = PathBuf::from(cuda_home); + candidates.push(cuda_home.join("nvvm").join("bin").join("llvm-config")); + candidates.push(cuda_home.join("bin").join("llvm-config")); + } + + for candidate in &candidates { + if llvm_version_matches(candidate, required_major) { + return candidate.clone(); + } + } + + let tried = candidates + .iter() + .map(|candidate| format!(" - {}", candidate.display())) + .collect::>() + .join("\n"); + + fail(&format!( + "LLVM 19 support is enabled, but no LLVM 19 toolchain was found.\n\ + Tried:\n{tried}\n\n\ + Set LLVM_CONFIG_19=/path/to/llvm-config from an LLVM 19 installation." + )); +} + +fn find_llvm_config_llvm7(target: &str) -> PathBuf { + let required_major = required_major_llvm_version(false); // first, if LLVM_CONFIG is set then see if its llvm version if 7.x, if so, use that. let config_env = tracked_env_var_os("LLVM_CONFIG"); // if LLVM_CONFIG is not set, try using llvm-config as a normal app in PATH. @@ -65,11 +142,11 @@ fn find_llvm_config(target: &str) -> PathBuf { if let Ok(out) = cmd { let version = String::from_utf8(out.stdout).unwrap(); - if version.starts_with(&REQUIRED_MAJOR_LLVM_VERSION.to_string()) { + if version.starts_with(&required_major.to_string()) { return PathBuf::from(path_to_try); } println!( - "cargo:warning=Prebuilt llvm-config version does not start with {REQUIRED_MAJOR_LLVM_VERSION}" + "cargo:warning=Prebuilt llvm-config version does not start with {required_major}" ); } else { println!("cargo:warning=Failed to run prebuilt llvm-config"); @@ -80,7 +157,7 @@ fn find_llvm_config(target: &str) -> PathBuf { println!("cargo:warning=Downloading prebuilt LLVM"); let mut url = tracked_env_var_os("PREBUILT_LLVM_URL") .map(|x| x.to_string_lossy().to_string()) - .unwrap_or_else(|| PREBUILT_LLVM_URL.to_string()); + .unwrap_or_else(|| PREBUILT_LLVM_URL_LLVM7.to_string()); let prebuilt_name = target_to_llvm_prebuilt(target); url = format!("{url}{prebuilt_name}"); @@ -117,6 +194,35 @@ fn find_llvm_config(target: &str) -> PathBuf { .join(format!("llvm-config{}", std::env::consts::EXE_SUFFIX)) } +fn find_llvm_as_llvm19(llvm_config: &Path) -> PathBuf { + let required_major = required_major_llvm_version(true); + let mut candidates = Vec::new(); + + if let Some(path) = sibling_llvm_tool(llvm_config, "llvm-as") { + candidates.push(path); + } + + candidates.push(PathBuf::from("llvm-as-19")); + candidates.push(PathBuf::from("llvm-as")); + + for candidate in &candidates { + if llvm_version_matches(candidate, required_major) { + return candidate.clone(); + } + } + + let tried = candidates + .iter() + .map(|candidate| format!(" - {}", candidate.display())) + .collect::>() + .join("\n"); + + fail(&format!( + "LLVM 19 support is enabled, but llvm-as 19 was not found.\n\ + Tried:\n{tried}" + )); +} + fn detect_llvm_link() -> (&'static str, &'static str) { // Force the link mode we want, preferring static by default, but // possibly overridden by `configure --enable-llvm-link-shared`. @@ -132,9 +238,60 @@ pub fn tracked_env_var_os + Display>(key: K) -> Option env::var_os(key) } -fn rustc_llvm_build() { +fn configure_libintrinsics(llvm_config: &Path, llvm19_enabled: bool) { + let manifest_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR was not set")); + + // Both paths share `libintrinsics.ll`. The LLVM 7 build consumes the checked-in + // `libintrinsics.bc` (regenerate manually with `llvm-as-7` when the .ll changes). + // The LLVM 19 build assembles the same .ll on the fly with `llvm-as-19`. + build_helper::rerun_if_changed(Path::new("libintrinsics.ll")); + + if llvm19_enabled { + let input = manifest_dir.join("libintrinsics.ll"); + let output = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR was not set")) + .join("libintrinsics_v19.bc"); + let llvm_as = find_llvm_as_llvm19(llvm_config); + + let status = Command::new(&llvm_as) + .arg(&input) + .arg("-o") + .arg(&output) + .stderr(Stdio::inherit()) + .stdout(Stdio::inherit()) + .status() + .unwrap_or_else(|err| { + fail(&format!( + "failed to execute llvm-as for LLVM 19: {llvm_as:?}\nerror: {err}" + )) + }); + + if !status.success() { + fail(&format!( + "llvm-as did not assemble {} successfully", + input.display() + )); + } + + println!( + "cargo:rustc-env=NVVM_LIBINTRINSICS_BC_PATH={}", + output.display() + ); + } else { + build_helper::rerun_if_changed(Path::new("libintrinsics.bc")); + println!( + "cargo:rustc-env=NVVM_LIBINTRINSICS_BC_PATH={}", + manifest_dir.join("libintrinsics.bc").display() + ); + } +} + +fn rustc_llvm_build(llvm19_enabled: bool) { let target = env::var("TARGET").expect("TARGET was not set"); - let llvm_config = find_llvm_config(&target); + let llvm_config = find_llvm_config(&target, llvm19_enabled); + let required_major = required_major_llvm_version(llvm19_enabled); + + configure_libintrinsics(&llvm_config, llvm19_enabled); let required_components = &["ipo", "bitreader", "bitwriter", "lto", "nvptx"]; @@ -183,6 +340,9 @@ fn rustc_llvm_build() { cfg.define(&flag, None); } + let llvm_version_major = required_major.to_string(); + cfg.define("LLVM_VERSION_MAJOR", Some(llvm_version_major.as_str())); + if tracked_env_var_os("LLVM_RUSTLLVM").is_some() { cfg.define("LLVM_RUSTLLVM", None); } diff --git a/crates/rustc_codegen_nvvm/libintrinsics.ll b/crates/rustc_codegen_nvvm/libintrinsics.ll index f70e84ad..6ddb53db 100644 --- a/crates/rustc_codegen_nvvm/libintrinsics.ll +++ b/crates/rustc_codegen_nvvm/libintrinsics.ll @@ -1,9 +1,13 @@ ; This is a hand-written llvm ir module which contains extra functions -; that are easier to write. They mostly contain nvvm intrinsics that are wrapped in new +; that are easier to write. They mostly contain nvvm intrinsics that are wrapped in new ; functions so that rustc does not think they are llvm intrinsics and so you don't need to always use nightly for that. ; -; if you update this make sure to update libintrinsics.bc by running llvm-as (make sure you are using llvm-7 or it won't work when -; loaded into libnvvm). +; The LLVM 7 path uses the checked-in `libintrinsics.bc`. If you edit this file for the +; LLVM 7 path, regenerate the .bc with `llvm-as-7` (older or newer llvm-as will emit a +; bitcode format libnvvm rejects). +; +; The LLVM 19 path assembles this same source at build time with `llvm-as-19`; no +; regeneration required, just edit and rebuild. source_filename = "libintrinsics" ; This data layout must match `DATA_LAYOUT` in `crates/rustc_codegen_nvvm/src/target.rs`. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" diff --git a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/LLVMWrapper.h b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/LLVMWrapper.h new file mode 100644 index 00000000..127b1756 --- /dev/null +++ b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/LLVMWrapper.h @@ -0,0 +1,51 @@ +#ifndef INCLUDED_RUSTC_LLVM_LLVMWRAPPER_H +#define INCLUDED_RUSTC_LLVM_LLVMWRAPPER_H + +#include "SuppressLLVMWarnings.h" + +#include "llvm/Config/llvm-config.h" // LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR +#include "llvm/Support/raw_ostream.h" // llvm::raw_ostream +#include // size_t etc +#include // uint64_t etc + +// Keep these version helpers close to the shared wrapper declarations so the +// surrounding shim can branch on LLVM major/minor moves in one place. +#define LLVM_VERSION_GE(major, minor) \ + (LLVM_VERSION_MAJOR > (major) || \ + LLVM_VERSION_MAJOR == (major) && LLVM_VERSION_MINOR >= (minor)) + +#define LLVM_VERSION_LT(major, minor) (!LLVM_VERSION_GE((major), (minor))) + +extern "C" void LLVMRustSetLastError(const char *); + +enum class LLVMRustResult { Success, Failure }; + +typedef struct OpaqueRustString *RustStringRef; +typedef struct LLVMOpaqueTwine *LLVMTwineRef; +typedef struct LLVMOpaqueSMDiagnostic *LLVMSMDiagnosticRef; + +extern "C" void LLVMRustStringWriteImpl(RustStringRef buf, + const char *slice_ptr, + size_t slice_len); + +class RawRustStringOstream : public llvm::raw_ostream { + RustStringRef Str; + uint64_t Pos; + + void write_impl(const char *Ptr, size_t Size) override { + LLVMRustStringWriteImpl(Str, Ptr, Size); + Pos += Size; + } + + uint64_t current_pos() const override { return Pos; } + +public: + explicit RawRustStringOstream(RustStringRef Str) : Str(Str), Pos(0) {} + + ~RawRustStringOstream() { + // LLVM requires this. + flush(); + } +}; + +#endif // INCLUDED_RUSTC_LLVM_LLVMWRAPPER_H diff --git a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/PassWrapper.cpp b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/PassWrapper.cpp index 5c0f5e83..e03bdaa6 100644 --- a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/PassWrapper.cpp +++ b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/PassWrapper.cpp @@ -12,6 +12,8 @@ #include #include +#include +#include #include "rustllvm.h" @@ -21,9 +23,18 @@ #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/FileSystem.h" +#if LLVM_VERSION_MAJOR >= 19 +#include "llvm/Transforms/IPO/Internalize.h" +#endif +#if LLVM_VERSION_MAJOR >= 19 +#include "llvm/TargetParser/Host.h" +#else #include "llvm/Support/Host.h" +#endif #include "llvm/Target/TargetMachine.h" +#if LLVM_VERSION_MAJOR < 19 #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#endif #if LLVM_VERSION_GE(6, 0) #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -42,7 +53,11 @@ #endif #endif +#if LLVM_VERSION_MAJOR < 19 #include "llvm-c/Transforms/PassManagerBuilder.h" +#else +typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef; +#endif #if LLVM_VERSION_GE(4, 0) #define PGO_AVAILABLE @@ -58,11 +73,105 @@ typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef; DEFINE_STDCXX_CONVERSION_FUNCTIONS(Pass, LLVMPassRef) DEFINE_STDCXX_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) +#if LLVM_VERSION_MAJOR < 19 DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBuilder, LLVMPassManagerBuilderRef) +#else +struct LLVMOpaquePassManagerBuilder +{ + unsigned OptLevel = 0; + unsigned SizeLevel = 0; + bool MergeFunctions = false; + bool SLPVectorize = false; + bool LoopVectorize = false; + bool PrepareForThinLTO = false; + bool DisableUnrollLoops = false; + bool DisableSimplifyLibCalls = false; + bool UseAlwaysInline = false; + bool AddLifetimes = false; + int InlinerThreshold = -1; + std::string TargetTriple; +}; + +static LLVMOpaquePassManagerBuilder *unwrap(LLVMPassManagerBuilderRef PMB) +{ + return PMB; +} + +extern "C" LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() +{ + return new LLVMOpaquePassManagerBuilder(); +} + +extern "C" void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) +{ + delete unwrap(PMB); +} + +extern "C" void LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned Value) +{ + unwrap(PMB)->SizeLevel = Value; +} + +extern "C" void LLVMPassManagerBuilderSetDisableUnrollLoops( + LLVMPassManagerBuilderRef PMB, + LLVMBool Value) +{ + unwrap(PMB)->DisableUnrollLoops = Value; +} + +extern "C" void LLVMPassManagerBuilderUseInlinerWithThreshold( + LLVMPassManagerBuilderRef PMB, + unsigned Threshold) +{ + auto *Builder = unwrap(PMB); + Builder->UseAlwaysInline = false; + Builder->AddLifetimes = false; + Builder->InlinerThreshold = Threshold; +} + +extern "C" void LLVMPassManagerBuilderPopulateModulePassManager( + LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PMR) +{ + auto *Builder = unwrap(PMB); + PassManagerBase *PM = unwrap(PMR); + + if (!Builder->TargetTriple.empty()) + { + Triple TargetTriple(Builder->TargetTriple); + TargetLibraryInfoImpl TLII(TargetTriple); + if (Builder->DisableSimplifyLibCalls) + TLII.disableAllFunctions(); + PM->add(new TargetLibraryInfoWrapperPass(TLII)); + } + + if (Builder->UseAlwaysInline) + PM->add(createAlwaysInlinerLegacyPass(Builder->AddLifetimes)); +} + +extern "C" void LLVMPassManagerBuilderPopulateFunctionPassManager( + LLVMPassManagerBuilderRef, + LLVMPassManagerRef) +{ +} + +extern "C" void LLVMPassManagerBuilderPopulateLTOPassManager( + LLVMPassManagerBuilderRef, + LLVMPassManagerRef, + LLVMBool, + LLVMBool) +{ +} +#endif extern "C" void LLVMInitializePasses() { +#if LLVM_VERSION_MAJOR >= 19 + // LLVM 19's pass pipeline is driven through PassBuilder, so the legacy + // registry initialization hooks are not needed here. +#else PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeCore(Registry); initializeCodeGen(Registry); @@ -74,6 +183,7 @@ extern "C" void LLVMInitializePasses() initializeInstCombine(Registry); initializeInstrumentation(Registry); initializeTarget(Registry); +#endif } enum class LLVMRustPassKind @@ -128,7 +238,11 @@ extern "C" bool LLVMRustPassManagerBuilderPopulateThinLTOPassManager( LLVMPassManagerBuilderRef PMBR, LLVMPassManagerRef PMR) { -#if LLVM_VERSION_GE(4, 0) +#if LLVM_VERSION_MAJOR >= 19 + (void)PMBR; + (void)PMR; + return false; +#elif LLVM_VERSION_GE(4, 0) unwrap(PMBR)->populateThinLTOPassManager(*unwrap(PMR)); return true; #else @@ -259,18 +373,24 @@ enum class LLVMRustCodeGenOptLevel Aggressive, }; -static CodeGenOpt::Level fromRust(LLVMRustCodeGenOptLevel Level) +#if LLVM_VERSION_MAJOR >= 19 +using CodeGenOptLevelEnum = llvm::CodeGenOptLevel; +#else +using CodeGenOptLevelEnum = CodeGenOpt::Level; +#endif + +static CodeGenOptLevelEnum fromRust(LLVMRustCodeGenOptLevel Level) { switch (Level) { case LLVMRustCodeGenOptLevel::None: - return CodeGenOpt::None; + return CodeGenOptLevelEnum::None; case LLVMRustCodeGenOptLevel::Less: - return CodeGenOpt::Less; + return CodeGenOptLevelEnum::Less; case LLVMRustCodeGenOptLevel::Default: - return CodeGenOpt::Default; + return CodeGenOptLevelEnum::Default; case LLVMRustCodeGenOptLevel::Aggressive: - return CodeGenOpt::Aggressive; + return CodeGenOptLevelEnum::Aggressive; default: report_fatal_error("Bad CodeGenOptLevel."); } @@ -287,12 +407,20 @@ enum class LLVMRustRelocMode ROPIRWPI, }; +#if LLVM_VERSION_MAJOR >= 19 +static std::optional fromRust(LLVMRustRelocMode RustReloc) +#else static Optional fromRust(LLVMRustRelocMode RustReloc) +#endif { switch (RustReloc) { case LLVMRustRelocMode::Default: +#if LLVM_VERSION_MAJOR >= 19 + return std::nullopt; +#else return None; +#endif case LLVMRustRelocMode::Static: return Reloc::Static; case LLVMRustRelocMode::PIC: @@ -431,7 +559,9 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine( Options.ThreadModel = ThreadModel::Single; } -#if LLVM_VERSION_GE(6, 0) +#if LLVM_VERSION_MAJOR >= 19 + std::optional CM; +#elif LLVM_VERSION_GE(6, 0) Optional CM; #else CodeModel::Model CM = CodeModel::Model::Default; @@ -465,6 +595,16 @@ extern "C" void LLVMRustConfigurePassManagerBuilder( bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO, const char *PGOGenPath, const char *PGOUsePath) { +#if LLVM_VERSION_MAJOR >= 19 + auto *Builder = unwrap(PMBR); + Builder->MergeFunctions = MergeFunctions; + Builder->SLPVectorize = SLPVectorize; + Builder->OptLevel = static_cast(OptLevel); + Builder->LoopVectorize = LoopVectorize; + Builder->PrepareForThinLTO = PrepareForThinLTO; + (void)PGOGenPath; + (void)PGOUsePath; +#else #if LLVM_RUSTLLVM unwrap(PMBR)->MergeFunctions = MergeFunctions; #endif @@ -490,6 +630,7 @@ extern "C" void LLVMRustConfigurePassManagerBuilder( #else assert(!PGOGenPath && !PGOUsePath && "Should've caught earlier"); #endif +#endif } // Unfortunately, the LLVM C API doesn't provide a way to set the `LibraryInfo` @@ -498,11 +639,17 @@ extern "C" void LLVMRustAddBuilderLibraryInfo(LLVMPassManagerBuilderRef PMBR, LLVMModuleRef M, bool DisableSimplifyLibCalls) { +#if LLVM_VERSION_MAJOR >= 19 + auto *Builder = unwrap(PMBR); + Builder->TargetTriple = unwrap(M)->getTargetTriple(); + Builder->DisableSimplifyLibCalls = DisableSimplifyLibCalls; +#else Triple TargetTriple(unwrap(M)->getTargetTriple()); TargetLibraryInfoImpl *TLI = new TargetLibraryInfoImpl(TargetTriple); if (DisableSimplifyLibCalls) TLI->disableAllFunctions(); unwrap(PMBR)->LibraryInfo = TLI; +#endif } // Unfortunately, the LLVM C API doesn't provide a way to create the @@ -559,14 +706,28 @@ enum class LLVMRustFileType ObjectFile, }; +#if LLVM_VERSION_MAJOR >= 19 +static CodeGenFileType fromRust(LLVMRustFileType Type) +#else static TargetMachine::CodeGenFileType fromRust(LLVMRustFileType Type) +#endif { switch (Type) { case LLVMRustFileType::AssemblyFile: - return TargetMachine::CGFT_AssemblyFile; + return +#if LLVM_VERSION_MAJOR >= 19 + CodeGenFileType::AssemblyFile; +#else + TargetMachine::CGFT_AssemblyFile; +#endif case LLVMRustFileType::ObjectFile: - return TargetMachine::CGFT_ObjectFile; + return +#if LLVM_VERSION_MAJOR >= 19 + CodeGenFileType::ObjectFile; +#else + TargetMachine::CGFT_ObjectFile; +#endif default: report_fatal_error("Bad FileType."); } @@ -582,7 +743,13 @@ LLVMRustWriteOutputFile(LLVMTargetMachineRef Target, LLVMPassManagerRef PMR, std::string ErrorInfo; std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::F_None); + raw_fd_ostream OS(Path, EC, +#if LLVM_VERSION_MAJOR >= 19 + sys::fs::OF_None +#else + sys::fs::F_None +#endif + ); if (EC) ErrorInfo = EC.message(); if (ErrorInfo != "") @@ -678,12 +845,22 @@ namespace if (const CallInst *CI = dyn_cast(I)) { Name = "call"; - Value = CI->getCalledValue(); + Value = +#if LLVM_VERSION_MAJOR >= 19 + CI->getCalledOperand(); +#else + CI->getCalledValue(); +#endif } else if (const InvokeInst *II = dyn_cast(I)) { Name = "invoke"; - Value = II->getCalledValue(); + Value = +#if LLVM_VERSION_MAJOR >= 19 + II->getCalledOperand(); +#else + II->getCalledValue(); +#endif } else { @@ -819,18 +996,23 @@ extern "C" void LLVMRustPrintPasses() extern "C" void LLVMRustAddAlwaysInlinePass(LLVMPassManagerBuilderRef PMBR, bool AddLifetimes) { +#if LLVM_VERSION_MAJOR >= 19 + auto *Builder = unwrap(PMBR); + Builder->UseAlwaysInline = true; + Builder->AddLifetimes = AddLifetimes; + Builder->InlinerThreshold = -1; +#else #if LLVM_VERSION_GE(4, 0) unwrap(PMBR)->Inliner = llvm::createAlwaysInlinerLegacyPass(AddLifetimes); #else unwrap(PMBR)->Inliner = createAlwaysInlinerPass(AddLifetimes); #endif +#endif } extern "C" void LLVMRustRunRestrictionPass(LLVMModuleRef M, char **Symbols, size_t Len) { - llvm::legacy::PassManager passes; - auto PreserveFunctions = [=](const GlobalValue &GV) { for (size_t I = 0; I < Len; I++) @@ -843,9 +1025,13 @@ extern "C" void LLVMRustRunRestrictionPass(LLVMModuleRef M, char **Symbols, return false; }; +#if LLVM_VERSION_MAJOR >= 19 + llvm::internalizeModule(*unwrap(M), PreserveFunctions); +#else + llvm::legacy::PassManager passes; passes.add(llvm::createInternalizePass(PreserveFunctions)); - passes.run(*unwrap(M)); +#endif } extern "C" void LLVMRustMarkAllFunctionsNounwind(LLVMModuleRef M) @@ -888,7 +1074,9 @@ extern "C" void LLVMRustSetModulePIELevel(LLVMModuleRef M) extern "C" bool LLVMRustThinLTOAvailable() { -#if LLVM_VERSION_GE(4, 0) +#if LLVM_VERSION_MAJOR >= 19 + return false; +#elif LLVM_VERSION_GE(4, 0) return true; #else return false; @@ -935,6 +1123,119 @@ LLVMRustPGOAvailable() // and various online resources about ThinLTO to make heads or tails of all // this. +#if LLVM_VERSION_MAJOR >= 19 +extern "C" bool +LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR, + LLVMModuleRef M, + const char *BcFile, + size_t BcFileLen) +{ + (void)PMR; + (void)M; + (void)BcFile; + (void)BcFileLen; + LLVMRustSetLastError("ThinLTO bitcode writing is not implemented for LLVM 19 yet"); + return false; +} + +struct LLVMRustThinLTOData +{ +}; + +struct LLVMRustThinLTOModule +{ + const char *identifier; + const char *data; + size_t len; +}; + +extern "C" LLVMRustThinLTOData * +LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, + int num_modules, + const char **preserved_symbols, + int num_symbols) +{ + (void)modules; + (void)num_modules; + (void)preserved_symbols; + (void)num_symbols; + LLVMRustSetLastError("ThinLTO indexing is not implemented for LLVM 19 yet"); + return nullptr; +} + +extern "C" void +LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) +{ + delete Data; +} + +extern "C" bool +LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data, LLVMModuleRef M) +{ + (void)Data; + (void)M; + LLVMRustSetLastError("ThinLTO rename is not implemented for LLVM 19 yet"); + return false; +} + +extern "C" bool +LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data, LLVMModuleRef M) +{ + (void)Data; + (void)M; + LLVMRustSetLastError("ThinLTO weak resolution is not implemented for LLVM 19 yet"); + return false; +} + +extern "C" bool +LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data, LLVMModuleRef M) +{ + (void)Data; + (void)M; + LLVMRustSetLastError("ThinLTO internalization is not implemented for LLVM 19 yet"); + return false; +} + +extern "C" bool +LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) +{ + (void)Data; + (void)M; + LLVMRustSetLastError("ThinLTO importing is not implemented for LLVM 19 yet"); + return false; +} + +struct LLVMRustThinLTOBuffer +{ + std::string data; +}; + +extern "C" LLVMRustThinLTOBuffer * +LLVMRustThinLTOBufferCreate(LLVMModuleRef M) +{ + (void)M; + auto Ret = new LLVMRustThinLTOBuffer(); + return Ret; +} + +extern "C" void +LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) +{ + delete Buffer; +} + +extern "C" const void * +LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) +{ + return Buffer->data.data(); +} + +extern "C" size_t +LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) +{ + return Buffer->data.length(); +} +#else extern "C" bool LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR, LLVMModuleRef M, @@ -1280,6 +1581,7 @@ LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) { return Buffer->data.length(); } +#endif // This is what we used to parse upstream bitcode for actual ThinLTO // processing. We'll call this once per module optimized through ThinLTO, and @@ -1356,12 +1658,16 @@ LLVMRustThinLTOPatchDICompileUnit(LLVMModuleRef Mod, DICompileUnit *Unit) { for (Instruction &BI : FI) { +#if LLVM_VERSION_MAJOR >= 19 + Finder.processInstruction(*M, BI); +#else if (auto Loc = BI.getDebugLoc()) Finder.processLocation(*M, Loc); if (auto DVI = dyn_cast(&BI)) Finder.processValue(*M, DVI); if (auto DDI = dyn_cast(&BI)) Finder.processDeclare(*M, DDI); +#endif } } } @@ -1507,4 +1813,4 @@ LLVMRustThinLTORemoveAvailableExternally(LLVMModuleRef Mod) report_fatal_error("ThinLTO not available"); } -#endif // LLVM_VERSION_GE(4, 0) \ No newline at end of file +#endif // LLVM_VERSION_GE(4, 0) diff --git a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/RustWrapper.cpp b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/RustWrapper.cpp index 8d90dc9e..5abe7900 100644 --- a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/RustWrapper.cpp +++ b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/RustWrapper.cpp @@ -9,7 +9,9 @@ // except according to those terms. #include "rustllvm.h" +#if LLVM_VERSION_MAJOR < 19 #include "llvm/IR/CallSite.h" +#endif #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" @@ -20,10 +22,9 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" -#if LLVM_VERSION_GE(5, 0) -#include "llvm/ADT/Optional.h" -#else +#if LLVM_VERSION_LT(5, 0) #include #endif @@ -113,10 +114,17 @@ extern "C" LLVMValueRef LLVMRustBuildMemCpy(LLVMBuilderRef B, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size, bool IsVolatile) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(unwrap(B)->CreateMemCpy( + unwrap(Dst), MaybeAlign(DstAlign), + unwrap(Src), MaybeAlign(SrcAlign), + unwrap(Size), IsVolatile)); +#else return wrap(unwrap(B)->CreateMemCpy( unwrap(Dst), DstAlign, unwrap(Src), SrcAlign, unwrap(Size), IsVolatile)); +#endif } extern "C" LLVMValueRef LLVMRustBuildMemMove(LLVMBuilderRef B, @@ -124,10 +132,17 @@ extern "C" LLVMValueRef LLVMRustBuildMemMove(LLVMBuilderRef B, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size, bool IsVolatile) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(unwrap(B)->CreateMemMove( + unwrap(Dst), MaybeAlign(DstAlign), + unwrap(Src), MaybeAlign(SrcAlign), + unwrap(Size), IsVolatile)); +#else return wrap(unwrap(B)->CreateMemMove( unwrap(Dst), DstAlign, unwrap(Src), SrcAlign, unwrap(Size), IsVolatile)); +#endif } extern "C" LLVMValueRef LLVMRustBuildMemSet(LLVMBuilderRef B, @@ -135,8 +150,13 @@ extern "C" LLVMValueRef LLVMRustBuildMemSet(LLVMBuilderRef B, LLVMValueRef Val, LLVMValueRef Size, bool IsVolatile) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(unwrap(B)->CreateMemSet( + unwrap(Dst), unwrap(Val), unwrap(Size), MaybeAlign(DstAlign), IsVolatile)); +#else return wrap(unwrap(B)->CreateMemSet( unwrap(Dst), unwrap(Val), unwrap(Size), DstAlign, IsVolatile)); +#endif } static LLVM_THREAD_LOCAL char *LastError; @@ -195,8 +215,12 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertFunction(LLVMModuleRef M, const char *Name, LLVMTypeRef FunctionTy) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(unwrap(M)->getOrInsertFunction(StringRef(Name), unwrap(FunctionTy)).getCallee()); +#else return wrap( unwrap(M)->getOrInsertFunction(Name, unwrap(FunctionTy))); +#endif } extern "C" LLVMValueRef @@ -260,7 +284,11 @@ static Attribute::AttrKind fromRust(LLVMRustAttribute Kind) case SExt: return Attribute::SExt; case StructRet: +#if LLVM_VERSION_MAJOR >= 19 + report_fatal_error("StructRet not supported without a type on LLVM 19+"); +#else return Attribute::StructRet; +#endif case UWTable: return Attribute::UWTable; case ZExt: @@ -282,6 +310,13 @@ static Attribute::AttrKind fromRust(LLVMRustAttribute Kind) extern "C" void LLVMRustAddCallSiteAttribute(LLVMValueRef Instr, unsigned Index, LLVMRustAttribute RustAttr) { +#if LLVM_VERSION_MAJOR >= 19 + CallBase *Call = unwrap(Instr); + LLVMContext &Ctx = Call->getContext(); + AttrBuilder B(Ctx); + B.addAttribute(Attribute::get(Ctx, fromRust(RustAttr))); + Call->setAttributes(Call->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else CallSite Call = CallSite(unwrap(Instr)); Attribute Attr = Attribute::get(Call->getContext(), fromRust(RustAttr)); #if LLVM_VERSION_GE(5, 0) @@ -292,12 +327,20 @@ extern "C" void LLVMRustAddCallSiteAttribute(LLVMValueRef Instr, unsigned Index, Call->getContext(), Index, AttributeSet::get(Call->getContext(), Index, B))); #endif +#endif } extern "C" void LLVMRustAddAlignmentCallSiteAttr(LLVMValueRef Instr, unsigned Index, uint32_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + CallBase *Call = unwrap(Instr); + LLVMContext &Ctx = Call->getContext(); + AttrBuilder B(Ctx); + B.addAlignmentAttr(Bytes); + Call->setAttributes(Call->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B; B.addAlignmentAttr(Bytes); @@ -309,12 +352,20 @@ extern "C" void LLVMRustAddAlignmentCallSiteAttr(LLVMValueRef Instr, Call->getContext(), Index, AttributeSet::get(Call->getContext(), Index, B))); #endif +#endif } extern "C" void LLVMRustAddDereferenceableCallSiteAttr(LLVMValueRef Instr, unsigned Index, uint64_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + CallBase *Call = unwrap(Instr); + LLVMContext &Ctx = Call->getContext(); + AttrBuilder B(Ctx); + B.addDereferenceableAttr(Bytes); + Call->setAttributes(Call->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B; B.addDereferenceableAttr(Bytes); @@ -326,12 +377,20 @@ extern "C" void LLVMRustAddDereferenceableCallSiteAttr(LLVMValueRef Instr, Call->getContext(), Index, AttributeSet::get(Call->getContext(), Index, B))); #endif +#endif } extern "C" void LLVMRustAddDereferenceableOrNullCallSiteAttr(LLVMValueRef Instr, unsigned Index, uint64_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + CallBase *Call = unwrap(Instr); + LLVMContext &Ctx = Call->getContext(); + AttrBuilder B(Ctx); + B.addDereferenceableOrNullAttr(Bytes); + Call->setAttributes(Call->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B; B.addDereferenceableOrNullAttr(Bytes); @@ -343,11 +402,19 @@ extern "C" void LLVMRustAddDereferenceableOrNullCallSiteAttr(LLVMValueRef Instr, Call->getContext(), Index, AttributeSet::get(Call->getContext(), Index, B))); #endif +#endif } extern "C" void LLVMRustAddFunctionAttribute(LLVMValueRef Fn, unsigned Index, LLVMRustAttribute RustAttr) { +#if LLVM_VERSION_MAJOR >= 19 + Function *A = unwrap(Fn); + LLVMContext &Ctx = A->getContext(); + AttrBuilder B(Ctx); + B.addAttribute(Attribute::get(Ctx, fromRust(RustAttr))); + A->setAttributes(A->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else Function *A = unwrap(Fn); Attribute Attr = Attribute::get(A->getContext(), fromRust(RustAttr)); AttrBuilder B(Attr); @@ -356,12 +423,52 @@ extern "C" void LLVMRustAddFunctionAttribute(LLVMValueRef Fn, unsigned Index, #else A->addAttributes(Index, AttributeSet::get(A->getContext(), Index, B)); #endif +#endif +} + +extern "C" void LLVMRustAddFunctionAttributeWithType(LLVMValueRef Fn, unsigned Index, + LLVMRustAttribute RustAttr, + LLVMTypeRef Ty) +{ +#if LLVM_VERSION_MAJOR >= 19 + Function *A = unwrap(Fn); + LLVMContext &Ctx = A->getContext(); + AttrBuilder B(Ctx); + if (RustAttr == StructRet) { + B.addStructRetAttr(unwrap(Ty)); + } else if (RustAttr == ByVal) { + B.addByValAttr(unwrap(Ty)); + } else { + B.addAttribute(Attribute::get(Ctx, fromRust(RustAttr))); + } + A->setAttributes(A->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else + // LLVM 7's StructRet/ByVal are plain attribute kinds with no type payload, + // so the Ty argument is only meaningful on the LLVM 19 path above. Fall through + // to the kind-only add on legacy LLVM. + (void)Ty; + Function *A = unwrap(Fn); + Attribute Attr = Attribute::get(A->getContext(), fromRust(RustAttr)); + AttrBuilder B(Attr); +#if LLVM_VERSION_GE(5, 0) + A->addAttributes(Index, B); +#else + A->addAttributes(Index, AttributeSet::get(A->getContext(), Index, B)); +#endif +#endif } extern "C" void LLVMRustAddAlignmentAttr(LLVMValueRef Fn, unsigned Index, uint32_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + Function *A = unwrap(Fn); + LLVMContext &Ctx = A->getContext(); + AttrBuilder B(Ctx); + B.addAlignmentAttr(Bytes); + A->setAttributes(A->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else Function *A = unwrap(Fn); AttrBuilder B; B.addAlignmentAttr(Bytes); @@ -370,11 +477,19 @@ extern "C" void LLVMRustAddAlignmentAttr(LLVMValueRef Fn, #else A->addAttributes(Index, AttributeSet::get(A->getContext(), Index, B)); #endif +#endif } extern "C" void LLVMRustAddDereferenceableAttr(LLVMValueRef Fn, unsigned Index, uint64_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + Function *A = unwrap(Fn); + LLVMContext &Ctx = A->getContext(); + AttrBuilder B(Ctx); + B.addDereferenceableAttr(Bytes); + A->setAttributes(A->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else Function *A = unwrap(Fn); AttrBuilder B; B.addDereferenceableAttr(Bytes); @@ -383,12 +498,20 @@ extern "C" void LLVMRustAddDereferenceableAttr(LLVMValueRef Fn, unsigned Index, #else A->addAttributes(Index, AttributeSet::get(A->getContext(), Index, B)); #endif +#endif } extern "C" void LLVMRustAddDereferenceableOrNullAttr(LLVMValueRef Fn, unsigned Index, uint64_t Bytes) { +#if LLVM_VERSION_MAJOR >= 19 + Function *A = unwrap(Fn); + LLVMContext &Ctx = A->getContext(); + AttrBuilder B(Ctx); + B.addDereferenceableOrNullAttr(Bytes); + A->setAttributes(A->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else Function *A = unwrap(Fn); AttrBuilder B; B.addDereferenceableOrNullAttr(Bytes); @@ -397,6 +520,7 @@ extern "C" void LLVMRustAddDereferenceableOrNullAttr(LLVMValueRef Fn, #else A->addAttributes(Index, AttributeSet::get(A->getContext(), Index, B)); #endif +#endif } extern "C" void LLVMRustAddFunctionAttrStringValue(LLVMValueRef Fn, @@ -404,6 +528,13 @@ extern "C" void LLVMRustAddFunctionAttrStringValue(LLVMValueRef Fn, const char *Name, size_t NameLen, const char *Value, size_t ValueLen) { +#if LLVM_VERSION_MAJOR >= 19 + Function *F = unwrap(Fn); + LLVMContext &Ctx = F->getContext(); + AttrBuilder B(Ctx); + B.addAttribute(StringRef(Name, NameLen), StringRef(Value, ValueLen)); + F->setAttributes(F->getAttributes().addAttributesAtIndex(Ctx, Index, B)); +#else Function *F = unwrap(Fn); AttrBuilder B; B.addAttribute(StringRef(Name, NameLen), StringRef(Value, ValueLen)); @@ -412,12 +543,19 @@ extern "C" void LLVMRustAddFunctionAttrStringValue(LLVMValueRef Fn, #else F->addAttributes(Index, AttributeSet::get(F->getContext(), Index, B)); #endif +#endif } extern "C" void LLVMRustRemoveFunctionAttributes(LLVMValueRef Fn, unsigned Index, LLVMRustAttribute RustAttr) { +#if LLVM_VERSION_MAJOR >= 19 + Function *F = unwrap(Fn); + auto PALNew = + F->getAttributes().removeAttributeAtIndex(F->getContext(), Index, fromRust(RustAttr)); + F->setAttributes(PALNew); +#else Function *F = unwrap(Fn); Attribute Attr = Attribute::get(F->getContext(), fromRust(RustAttr)); AttrBuilder B(Attr); @@ -429,6 +567,7 @@ extern "C" void LLVMRustRemoveFunctionAttributes(LLVMValueRef Fn, F->getContext(), Index, AttributeSet::get(F->getContext(), Index, B)); #endif F->setAttributes(PALNew); +#endif } // enable fpmath flag UnsafeAlgebra @@ -448,9 +587,13 @@ extern "C" LLVMValueRef LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMValueRef Source, const char *Name, LLVMAtomicOrdering Order) { +#if LLVM_VERSION_MAJOR >= 19 + report_fatal_error("LLVMRustBuildAtomicLoad requires a type-aware LLVM 19 wrapper"); +#else LoadInst *LI = new LoadInst(unwrap(Source), 0); LI->setAtomic(fromRust(Order)); return wrap(unwrap(B)->Insert(LI, Name)); +#endif } extern "C" LLVMValueRef LLVMRustBuildAtomicStore(LLVMBuilderRef B, @@ -458,9 +601,15 @@ extern "C" LLVMValueRef LLVMRustBuildAtomicStore(LLVMBuilderRef B, LLVMValueRef Target, LLVMAtomicOrdering Order) { +#if LLVM_VERSION_MAJOR >= 19 + StoreInst *SI = unwrap(B)->CreateStore(unwrap(V), unwrap(Target)); + SI->setAtomic(fromRust(Order)); + return wrap(SI); +#else StoreInst *SI = new StoreInst(unwrap(V), unwrap(Target)); SI->setAtomic(fromRust(Order)); return wrap(unwrap(B)->Insert(SI)); +#endif } extern "C" LLVMValueRef @@ -469,9 +618,15 @@ LLVMRustBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Target, LLVMAtomicOrdering Order, LLVMAtomicOrdering FailureOrder, LLVMBool Weak) { +#if LLVM_VERSION_MAJOR >= 19 + AtomicCmpXchgInst *ACXI = unwrap(B)->CreateAtomicCmpXchg( + unwrap(Target), unwrap(Old), unwrap(Source), MaybeAlign(), fromRust(Order), + fromRust(FailureOrder)); +#else AtomicCmpXchgInst *ACXI = unwrap(B)->CreateAtomicCmpXchg( unwrap(Target), unwrap(Old), unwrap(Source), fromRust(Order), fromRust(FailureOrder)); +#endif ACXI->setWeak(Weak); return wrap(ACXI); } @@ -554,8 +709,13 @@ LLVMRustInlineAsm(LLVMTypeRef Ty, char *AsmString, size_t AsmStringLen, extern "C" bool LLVMRustInlineAsmVerify(LLVMTypeRef Ty, char *Constraints, size_t ConstraintsLen) { +#if LLVM_VERSION_MAJOR >= 19 + return !llvm::errorToBool(InlineAsm::verify( + unwrap(Ty), StringRef(Constraints, ConstraintsLen))); +#else return InlineAsm::Verify(unwrap(Ty), StringRef(Constraints, ConstraintsLen)); +#endif } extern "C" void LLVMRustAppendModuleInlineAsm(LLVMModuleRef M, const char *Asm, size_t AsmLen) @@ -672,10 +832,12 @@ static DINode::DIFlags fromRust(LLVMRustDIFlags Flags) { Result |= DINode::DIFlags::FlagAppleBlock; } +#if LLVM_VERSION_MAJOR < 19 if (isSet(Flags & LLVMRustDIFlags::FlagBlockByrefStruct)) { Result |= DINode::DIFlags::FlagBlockByrefStruct; } +#endif if (isSet(Flags & LLVMRustDIFlags::FlagVirtual)) { Result |= DINode::DIFlags::FlagVirtual; @@ -735,10 +897,12 @@ static DINode::DIFlags fromRust(LLVMRustDIFlags Flags) { Result |= DINode::DIFlags::FlagNoReturn; } +#if LLVM_VERSION_MAJOR < 19 if (isSet(Flags & LLVMRustDIFlags::FlagMainSubprogram)) { Result |= DINode::DIFlags::FlagMainSubprogram; } +#endif #endif return Result; @@ -865,11 +1029,29 @@ extern "C" LLVMMetadataRef LLVMRustDIBuilderCreateFunction( { DITemplateParameterArray TParams = DITemplateParameterArray(unwrap(TParam)); +#if LLVM_VERSION_MAJOR >= 19 + DISubprogram::DISPFlags SPFlags = DISubprogram::DISPFlags::SPFlagZero; + if (IsLocalToUnit) + SPFlags |= DISubprogram::DISPFlags::SPFlagLocalToUnit; + if (IsDefinition) + SPFlags |= DISubprogram::DISPFlags::SPFlagDefinition; + if (IsOptimized) + SPFlags |= DISubprogram::DISPFlags::SPFlagOptimized; + if (isSet(Flags & LLVMRustDIFlags::FlagMainSubprogram)) + SPFlags |= DISubprogram::DISPFlags::SPFlagMainSubprogram; + + DISubprogram *Sub = Builder->createFunction( + unwrapDI(Scope), StringRef(Name ? Name : ""), + StringRef(LinkageName ? LinkageName : ""), unwrapDI(File), + LineNo, unwrapDI(Ty), ScopeLine, fromRust(Flags), + SPFlags, TParams, unwrapDIPtr(Decl)); +#else DISubprogram *Sub = Builder->createFunction( unwrapDI(Scope), Name, LinkageName, unwrapDI(File), LineNo, unwrapDI(Ty), IsLocalToUnit, IsDefinition, ScopeLine, fromRust(Flags), IsOptimized, TParams, unwrapDIPtr(Decl)); +#endif if (MaybeFn) unwrap(MaybeFn)->setSubprogram(Sub); return wrap(Sub); @@ -898,12 +1080,19 @@ extern "C" LLVMMetadataRef LLVMRustDIBuilderCreatePointerType( LLVMRustDIBuilderRef Builder, LLVMMetadataRef PointeeTy, uint64_t SizeInBits, uint32_t AlignInBits, const char *Name) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(Builder->createPointerType(unwrapDI(PointeeTy), + SizeInBits, AlignInBits, + std::nullopt, + StringRef(Name ? Name : ""))); +#else return wrap(Builder->createPointerType(unwrapDI(PointeeTy), SizeInBits, AlignInBits, #if LLVM_VERSION_GE(5, 0) /* DWARFAddressSpace */ None, #endif Name)); +#endif } extern "C" LLVMMetadataRef LLVMRustDIBuilderCreateStructType( @@ -1004,11 +1193,20 @@ extern "C" LLVMMetadataRef LLVMRustDIBuilderCreateStaticVariable( FPVal->getValueAPF().bitcastToAPInt().getZExtValue()); } +#if LLVM_VERSION_MAJOR >= 19 + llvm::DIGlobalVariableExpression *VarExpr = Builder->createGlobalVariableExpression( + unwrapDI(Context), StringRef(Name, NameLen), + StringRef(LinkageName, LinkageNameLen), + unwrapDI(File), LineNo, unwrapDI(Ty), IsLocalToUnit, + /* isDefined */ true, InitExpr, unwrapDIPtr(Decl), + /* templateParams */ nullptr, AlignInBits); +#else llvm::DIGlobalVariableExpression *VarExpr = Builder->createGlobalVariableExpression( unwrapDI(Context), StringRef(Name, NameLen), StringRef(LinkageName, LinkageNameLen), unwrapDI(File), LineNo, unwrapDI(Ty), IsLocalToUnit, InitExpr, unwrapDIPtr(Decl), AlignInBits); +#endif InitVal->setMetadata("dbg", VarExpr); @@ -1076,11 +1274,27 @@ extern "C" LLVMValueRef LLVMRustDIBuilderInsertDeclareAtEnd( int64_t *AddrOps, unsigned AddrOpsCount, LLVMMetadataRef DL, LLVMBasicBlockRef InsertAtEnd) { +#if LLVM_VERSION_MAJOR >= 19 + SmallVector AddrOpsStorage; + AddrOpsStorage.reserve(AddrOpsCount); + for (unsigned I = 0; I < AddrOpsCount; ++I) + AddrOpsStorage.push_back(static_cast(AddrOps[I])); + + DbgInstPtr Declare = Builder->insertDeclare( + unwrap(V), unwrap(VarInfo), + Builder->createExpression(ArrayRef(AddrOpsStorage)), + DebugLoc(cast(unwrap(DL))), + unwrap(InsertAtEnd)); + if (Instruction *Inst = Declare.dyn_cast()) + return wrap(Inst); + return nullptr; +#else return wrap(Builder->insertDeclare( unwrap(V), unwrap(VarInfo), Builder->createExpression(llvm::ArrayRef(AddrOps, AddrOpsCount)), DebugLoc(cast(unwrap(DL))), unwrap(InsertAtEnd))); +#endif } extern "C" LLVMMetadataRef @@ -1118,8 +1332,14 @@ extern "C" LLVMMetadataRef LLVMRustDIBuilderCreateTemplateTypeParameter( LLVMRustDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, size_t NameLen, LLVMMetadataRef Ty) { +#if LLVM_VERSION_MAJOR >= 19 + return wrap(Builder->createTemplateTypeParameter( + unwrapDI(Scope), StringRef(Name, NameLen), + unwrapDI(Ty), false)); +#else return wrap(Builder->createTemplateTypeParameter( unwrapDI(Scope), StringRef(Name, NameLen), unwrapDI(Ty))); +#endif } extern "C" LLVMMetadataRef @@ -1160,10 +1380,16 @@ LLVMRustDIBuilderCreateDebugLocation(unsigned Line, unsigned Column, LLVMMetadataRef Scope, LLVMMetadataRef InlinedAt) { +#if LLVM_VERSION_MAJOR >= 19 + MDNode *ScopeNode = unwrapDIPtr(Scope); + return wrap(DILocation::get(ScopeNode->getContext(), Line, Column, ScopeNode, + unwrapDIPtr(InlinedAt))); +#else DebugLoc debug_loc = DebugLoc::get(Line, Column, unwrapDIPtr(Scope), unwrapDIPtr(InlinedAt)); return wrap(debug_loc.getAsMDNode()); +#endif } extern "C" LLVMMetadataRef @@ -1221,9 +1447,22 @@ inline section_iterator *unwrap(LLVMSectionIteratorRef SI) extern "C" size_t LLVMRustGetSectionName(LLVMSectionIteratorRef SI, const char **Ptr) { +#if LLVM_VERSION_MAJOR >= 19 + Expected NameOrErr = (*unwrap(SI))->getName(); + if (!NameOrErr) + { + std::string Err = toString(NameOrErr.takeError()); + report_fatal_error(Err.c_str()); + } + StringRef Ret = *NameOrErr; +#else StringRef Ret; if (std::error_code EC = (*unwrap(SI))->getName(Ret)) - report_fatal_error(EC.message()); + { + std::string Err = EC.message(); + report_fatal_error(Err.c_str()); + } +#endif *Ptr = Ret.data(); return Ret.size(); } @@ -1263,7 +1502,11 @@ extern "C" void LLVMRustUnpackOptimizationDiagnostic( { *Line = loc.getLine(); *Column = loc.getColumn(); +#if LLVM_VERSION_MAJOR >= 19 + FilenameOS << loc.getAbsolutePath(); +#else FilenameOS << loc.getFilename(); +#endif } #else const DebugLoc &loc = Opt->getDebugLoc(); @@ -1389,8 +1632,17 @@ extern "C" LLVMTypeKind LLVMRustGetTypeKind(LLVMTypeRef Ty) return LLVMArrayTypeKind; case Type::PointerTyID: return LLVMPointerTypeKind; +#if LLVM_VERSION_MAJOR >= 19 + case Type::FixedVectorTyID: + return LLVMVectorTypeKind; + case Type::ScalableVectorTyID: + return LLVMScalableVectorTypeKind; + case Type::BFloatTyID: + return LLVMBFloatTypeKind; +#else case Type::VectorTyID: return LLVMVectorTypeKind; +#endif case Type::X86_MMXTyID: return LLVMX86_MMXTypeKind; case Type::TokenTyID: @@ -1401,11 +1653,19 @@ extern "C" LLVMTypeKind LLVMRustGetTypeKind(LLVMTypeRef Ty) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SMDiagnostic, LLVMSMDiagnosticRef) +#if LLVM_VERSION_MAJOR >= 19 +extern "C" void LLVMRustSetInlineAsmDiagnosticHandler(LLVMContextRef, + void *, + void *) +{ +} +#else extern "C" void LLVMRustSetInlineAsmDiagnosticHandler( LLVMContextRef C, LLVMContext::InlineAsmDiagHandlerTy H, void *CX) { unwrap(C)->setInlineAsmDiagnosticHandler(H, CX); } +#endif extern "C" void LLVMRustWriteSMDiagnosticToString(LLVMSMDiagnosticRef D, RustStringRef Str) @@ -1477,11 +1737,25 @@ extern "C" void LLVMRustAddHandler(LLVMValueRef CatchSwitchRef, cast(CatchSwitch)->addHandler(unwrap(Handler)); } +#if LLVM_VERSION_MAJOR >= 19 +static FunctionType *LLVMRustGetFunctionTypeForCallee(Value *Callee) +{ + if (Function *Fn = dyn_cast(Callee->stripPointerCasts())) + return Fn->getFunctionType(); + + report_fatal_error("LLVMRustBuildCall requires an explicit callee type on LLVM 19"); +} +#endif + extern "C" OperandBundleDef *LLVMRustBuildOperandBundleDef(const char *Name, LLVMValueRef *Inputs, unsigned NumInputs) { +#if LLVM_VERSION_MAJOR >= 19 + return new OperandBundleDef(Name, ArrayRef(unwrap(Inputs), NumInputs)); +#else return new OperandBundleDef(Name, makeArrayRef(unwrap(Inputs), NumInputs)); +#endif } extern "C" void LLVMRustFreeOperandBundleDef(OperandBundleDef *Bundle) @@ -1495,9 +1769,19 @@ extern "C" LLVMValueRef LLVMRustBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, const char *Name) { unsigned Len = Bundle ? 1 : 0; +#if LLVM_VERSION_MAJOR >= 19 + Value *Callee = unwrap(Fn); + FunctionType *FnTy = LLVMRustGetFunctionTypeForCallee(Callee); + ArrayRef Bundles = + Bundle ? ArrayRef(Bundle, Len) + : ArrayRef(); + return wrap(unwrap(B)->CreateCall( + FnTy, Callee, ArrayRef(unwrap(Args), NumArgs), Bundles, Name)); +#else ArrayRef Bundles = makeArrayRef(Bundle, Len); return wrap(unwrap(B)->CreateCall( unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Bundles, Name)); +#endif } extern "C" LLVMValueRef LLVMRustBuildCall2(LLVMBuilderRef B, LLVMTypeRef FnTy, @@ -1507,9 +1791,15 @@ extern "C" LLVMValueRef LLVMRustBuildCall2(LLVMBuilderRef B, LLVMTypeRef FnTy, const char *Name) { assert(Bundle == nullptr && "LLVM 7 lacks CreateCall(FunctionType, ..., Bundles)"); +#if LLVM_VERSION_MAJOR >= 19 + return wrap(unwrap(B)->CreateCall( + unwrap(FnTy), unwrap(Fn), + ArrayRef(unwrap(Args), NumArgs), Name)); +#else return wrap(unwrap(B)->CreateCall( unwrap(FnTy), unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Name)); +#endif } extern "C" LLVMValueRef @@ -1519,10 +1809,21 @@ LLVMRustBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, const char *Name) { unsigned Len = Bundle ? 1 : 0; +#if LLVM_VERSION_MAJOR >= 19 + Value *Callee = unwrap(Fn); + FunctionType *FnTy = LLVMRustGetFunctionTypeForCallee(Callee); + ArrayRef Bundles = + Bundle ? ArrayRef(Bundle, Len) + : ArrayRef(); + return wrap(unwrap(B)->CreateInvoke(FnTy, Callee, unwrap(Then), unwrap(Catch), + ArrayRef(unwrap(Args), NumArgs), + Bundles, Name)); +#else ArrayRef Bundles = makeArrayRef(Bundle, Len); return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch), makeArrayRef(unwrap(Args), NumArgs), Bundles, Name)); +#endif } extern "C" void LLVMRustPositionBuilderAtStart(LLVMBuilderRef B, @@ -1648,11 +1949,19 @@ extern "C" bool LLVMRustConstInt128Get(LLVMValueRef CV, bool sext, uint64_t *hig APInt AP; if (sext) { +#if LLVM_VERSION_MAJOR >= 19 + AP = C->getValue().sext(128); +#else AP = C->getValue().sextOrSelf(128); +#endif } else { +#if LLVM_VERSION_MAJOR >= 19 + AP = C->getValue().zext(128); +#else AP = C->getValue().zextOrSelf(128); +#endif } *low = AP.getLoBits(64).getZExtValue(); *high = AP.getHiBits(64).getZExtValue(); @@ -1720,14 +2029,20 @@ struct LLVMRustModuleBuffer extern "C" LLVMRustModuleBuffer * LLVMRustModuleBufferCreate(LLVMModuleRef M) { - auto Ret = llvm::make_unique(); + // Longhand form avoids std::make_unique (C++14) so this compiles under + // LLVM 7's `-std=c++11` llvm-config cxxflags as well as LLVM 19's C++17. + auto Ret = std::unique_ptr(new LLVMRustModuleBuffer()); { raw_string_ostream OS(Ret->data); +#if LLVM_VERSION_MAJOR >= 19 + WriteBitcodeToFile(*unwrap(M), OS); +#else { legacy::PassManager PM; PM.add(createBitcodeWriterPass(OS)); PM.run(*unwrap(M)); } +#endif } return Ret.release(); } @@ -1807,12 +2122,22 @@ LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) extern "C" LLVMValueRef LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { +#if LLVM_VERSION_MAJOR >= 19 + (void)NoNaN; + return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src))); +#else return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN)); +#endif } extern "C" LLVMValueRef LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { +#if LLVM_VERSION_MAJOR >= 19 + (void)NoNaN; + return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src))); +#else return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN)); +#endif } #else diff --git a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/SuppressLLVMWarnings.h b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/SuppressLLVMWarnings.h new file mode 100644 index 00000000..09ecfd54 --- /dev/null +++ b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/SuppressLLVMWarnings.h @@ -0,0 +1,17 @@ +#ifndef _rustc_llvm_SuppressLLVMWarnings_h +#define _rustc_llvm_SuppressLLVMWarnings_h + +// LLVM currently generates many warnings when compiled using MSVC. These +// warnings make it difficult to diagnose real problems when working on C++ +// code, so we suppress them. + +#ifdef _MSC_VER +#pragma warning(disable : 4530) // C++ exception handler used, but unwind + // semantics are not enabled. +#pragma warning( \ + disable : 4624) // 'xxx': destructor was implicitly defined as deleted +#pragma warning( \ + disable : 4244) // conversion from 'xxx' to 'yyy', possible loss of data +#endif + +#endif // _rustc_llvm_SuppressLLVMWarnings_h diff --git a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/rustllvm.h b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/rustllvm.h index e334ac45..653c7818 100644 --- a/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/rustllvm.h +++ b/crates/rustc_codegen_nvvm/rustc_llvm_wrapper/rustllvm.h @@ -8,13 +8,19 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#include "LLVMWrapper.h" + #include "llvm-c/BitReader.h" #include "llvm-c/Core.h" #include "llvm-c/ExecutionEngine.h" #include "llvm-c/Object.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" +#if LLVM_VERSION_MAJOR >= 19 +#include "llvm/TargetParser/Triple.h" +#else #include "llvm/ADT/Triple.h" +#endif #include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" @@ -26,23 +32,33 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/FormattedStream.h" +#if LLVM_VERSION_MAJOR >= 19 +#include "llvm/TargetParser/Host.h" +#else #include "llvm/Support/Host.h" +#endif #include "llvm/Support/Memory.h" #include "llvm/Support/SourceMgr.h" +#if LLVM_VERSION_MAJOR >= 19 +#include "llvm/MC/TargetRegistry.h" +#else #include "llvm/Support/TargetRegistry.h" +#endif #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" +#if LLVM_VERSION_MAJOR < 19 #include "llvm/Transforms/Vectorize.h" - -#define LLVM_VERSION_GE(major, minor) \ - (LLVM_VERSION_MAJOR > (major) || \ - LLVM_VERSION_MAJOR == (major) && LLVM_VERSION_MINOR >= (minor)) +#include "llvm/ADT/Optional.h" +#else +#include +template +using Optional = std::optional; +#endif #define LLVM_VERSION_EQ(major, minor) \ (LLVM_VERSION_MAJOR == (major) && LLVM_VERSION_MINOR == (minor)) @@ -51,8 +67,6 @@ (LLVM_VERSION_MAJOR < (major) || \ LLVM_VERSION_MAJOR == (major) && LLVM_VERSION_MINOR <= (minor)) -#define LLVM_VERSION_LT(major, minor) (!LLVM_VERSION_GE((major), (minor))) - #include "llvm/IR/LegacyPassManager.h" #if LLVM_VERSION_GE(4, 0) @@ -66,15 +80,6 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/Linker/Linker.h" - -extern "C" void LLVMRustSetLastError(const char *); - -enum class LLVMRustResult -{ - Success, - Failure -}; - enum LLVMRustAttribute { AlwaysInline = 0, @@ -103,33 +108,3 @@ enum LLVMRustAttribute SanitizeMemory = 23, ReadNone = 24 }; - -typedef struct OpaqueRustString *RustStringRef; -typedef struct LLVMOpaqueTwine *LLVMTwineRef; -typedef struct LLVMOpaqueSMDiagnostic *LLVMSMDiagnosticRef; - -extern "C" void LLVMRustStringWriteImpl(RustStringRef Str, const char *Ptr, - size_t Size); - -class RawRustStringOstream : public llvm::raw_ostream -{ - RustStringRef Str; - uint64_t Pos; - - void write_impl(const char *Ptr, size_t Size) override - { - LLVMRustStringWriteImpl(Str, Ptr, Size); - Pos += Size; - } - - uint64_t current_pos() const override { return Pos; } - -public: - explicit RawRustStringOstream(RustStringRef Str) : Str(Str), Pos(0) {} - - ~RawRustStringOstream() - { - // LLVM requires this. - flush(); - } -}; \ No newline at end of file diff --git a/crates/rustc_codegen_nvvm/src/abi.rs b/crates/rustc_codegen_nvvm/src/abi.rs index 712b9a55..b24cc543 100644 --- a/crates/rustc_codegen_nvvm/src/abi.rs +++ b/crates/rustc_codegen_nvvm/src/abi.rs @@ -443,6 +443,13 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { } => { assert!(!on_stack); let i = apply(attrs); + #[cfg(feature = "llvm19")] + llvm::Attribute::StructRet.apply_llfn_with_type( + llvm::AttributePlace::Argument(i), + llfn, + self.ret.memory_ty(cx), + ); + #[cfg(not(feature = "llvm19"))] llvm::Attribute::StructRet.apply_llfn(llvm::AttributePlace::Argument(i), llfn); } _ => {} diff --git a/crates/rustc_codegen_nvvm/src/allocator.rs b/crates/rustc_codegen_nvvm/src/allocator.rs index c7a34973..38ee1bb9 100644 --- a/crates/rustc_codegen_nvvm/src/allocator.rs +++ b/crates/rustc_codegen_nvvm/src/allocator.rs @@ -57,7 +57,7 @@ pub(crate) fn codegen( False, ); let from_name = mangle_internal_symbol(tcx, &global_fn_name(method.name)); - let llfn = llvm::LLVMRustGetOrInsertFunction( + let llfn = llvm::get_or_insert_function( llmod, from_name.as_ptr().cast(), from_name.len(), @@ -69,7 +69,7 @@ pub(crate) fn codegen( } let to_name = mangle_internal_symbol(tcx, &default_fn_name(method.name)); - let callee = llvm::LLVMRustGetOrInsertFunction( + let callee = llvm::get_or_insert_function( llmod, to_name.as_ptr().cast(), to_name.len(), @@ -89,7 +89,7 @@ pub(crate) fn codegen( .enumerate() .map(|(i, _)| llvm::LLVMGetParam(llfn, i as c_uint)) .collect::>(); - let ret = llvm::LLVMRustBuildCall( + let ret = llvm::build_call( llbuilder, callee, args.as_ptr(), @@ -107,7 +107,7 @@ pub(crate) fn codegen( let shim_ty = llvm::LLVMFunctionType(void, std::ptr::null(), 0, False); let shim_name = mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE); - let shim = llvm::LLVMRustGetOrInsertFunction( + let shim = llvm::get_or_insert_function( llmod, shim_name.as_ptr().cast(), shim_name.len(), diff --git a/crates/rustc_codegen_nvvm/src/back.rs b/crates/rustc_codegen_nvvm/src/back.rs index c3112164..23548063 100644 --- a/crates/rustc_codegen_nvvm/src/back.rs +++ b/crates/rustc_codegen_nvvm/src/back.rs @@ -104,23 +104,20 @@ pub fn target_machine_factory( Arc::new(move |dcx, _config: TargetMachineFactoryConfig| { let tm = unsafe { - llvm::LLVMRustCreateTargetMachine( - triple.as_c_char_ptr(), - triple.len(), - std::ptr::null(), - 0, - features.as_c_char_ptr(), - features.len(), + llvm::create_target_machine(&llvm::TargetMachineConfig { + triple: &triple, + cpu: None, + features, code_model, - reloc_model, + reloc_mode: reloc_model, opt_level, - false, - use_softfp, - ffunction_sections, - fdata_sections, + use_soft_fp: use_softfp, + position_independent_executable: false, + function_sections: ffunction_sections, + data_sections: fdata_sections, trap_unreachable, - false, - ) + singlethread: false, + }) }; tm.unwrap_or_else(|| { dcx.fatal(format!( @@ -217,6 +214,13 @@ pub(crate) unsafe fn codegen( let _bc_timer = prof.generic_activity_with_arg("NVVM_module_codegen_make_bitcode", &module.name[..]); + #[cfg(feature = "llvm19")] + if let Err(err) = llvm::verify_module(llmod) { + return Err(dcx.fatal(format!( + "LLVM module verification failed for {module_name}: {err}" + ))); + } + let thin = ModuleBuffer::new(llmod, false); let data = thin.data(); diff --git a/crates/rustc_codegen_nvvm/src/builder.rs b/crates/rustc_codegen_nvvm/src/builder.rs index 7997504d..b864e0ab 100644 --- a/crates/rustc_codegen_nvvm/src/builder.rs +++ b/crates/rustc_codegen_nvvm/src/builder.rs @@ -230,23 +230,28 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { if changed { v = transmute_llval(self.llbuilder, self.cx, v, new_ty); } - // Get the return type. - let sig = llvm::LLVMGetElementType(self.val_ty(self.llfn())); - let return_ty = llvm::LLVMGetReturnType(sig); - // Check if new_ty & return_ty are different pointers. - // FIXME: get rid of this nonsense once we are past LLVM 7 and don't have - // to suffer from typed pointers. - if return_ty != new_ty - && llvm::LLVMRustGetTypeKind(return_ty) == llvm::TypeKind::Pointer - && llvm::LLVMRustGetTypeKind(new_ty) == llvm::TypeKind::Pointer + + #[cfg(not(feature = "llvm19"))] { - v = llvm::LLVMBuildBitCast( - self.llbuilder, - v, - return_ty, - c"return pointer adjust".as_ptr(), - ); + // Get the return type. + let sig = llvm::LLVMGetElementType(self.val_ty(self.llfn())); + let return_ty = llvm::LLVMGetReturnType(sig); + // Check if new_ty & return_ty are different pointers. + // FIXME: get rid of this nonsense once we are past LLVM 7 and don't have + // to suffer from typed pointers. + if return_ty != new_ty + && llvm::LLVMRustGetTypeKind(return_ty) == llvm::TypeKind::Pointer + && llvm::LLVMRustGetTypeKind(new_ty) == llvm::TypeKind::Pointer + { + v = llvm::LLVMBuildBitCast( + self.llbuilder, + v, + return_ty, + c"return pointer adjust".as_ptr(), + ); + } } + llvm::LLVMBuildRet(self.llbuilder, v); } } @@ -506,6 +511,9 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { trace!("Load {ty:?} {:?}", ptr); let ptr = self.pointercast(ptr, self.cx.type_ptr_to(ty)); unsafe { + #[cfg(feature = "llvm19")] + let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); + #[cfg(not(feature = "llvm19"))] let load = llvm::LLVMBuildLoad(self.llbuilder, ptr, UNNAMED); llvm::LLVMSetAlignment(load, align.bytes() as c_uint); load @@ -516,6 +524,9 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { trace!("Volatile load `{:?}`", ptr); let ptr = self.pointercast(ptr, self.cx.type_ptr_to(ty)); unsafe { + #[cfg(feature = "llvm19")] + let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); + #[cfg(not(feature = "llvm19"))] let load = llvm::LLVMBuildLoad(self.llbuilder, ptr, UNNAMED); llvm::LLVMSetVolatile(load, llvm::True); load @@ -1218,8 +1229,16 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { |builder, dst| { // Local space is only accessible to the current thread. // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store. - let load: &'ll Value = - unsafe { llvm::LLVMBuildLoad(builder.llbuilder, dst, UNNAMED) }; + let load: &'ll Value = unsafe { + #[cfg(feature = "llvm19")] + { + llvm::LLVMBuildLoad2(builder.llbuilder, builder.val_ty(cmp), dst, UNNAMED) + } + #[cfg(not(feature = "llvm19"))] + { + llvm::LLVMBuildLoad(builder.llbuilder, dst, UNNAMED) + } + }; let compare = builder.icmp(IntPredicate::IntEQ, load, cmp); // We can do something smart & branchless here: // We select either the current value(if the comparison fails), or a new value. @@ -1270,8 +1289,16 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { |builder, dst| { // Local space is only accessible to the current thread. So, there are no // synchronization issues, and we can emulate it using a simple load/compare/store. - let load: &'ll Value = - unsafe { llvm::LLVMBuildLoad(builder.llbuilder, dst, UNNAMED) }; + let load: &'ll Value = unsafe { + #[cfg(feature = "llvm19")] + { + llvm::LLVMBuildLoad2(builder.llbuilder, builder.val_ty(src), dst, UNNAMED) + } + #[cfg(not(feature = "llvm19"))] + { + llvm::LLVMBuildLoad(builder.llbuilder, dst, UNNAMED) + } + }; let next_val = match op { AtomicRmwBinOp::AtomicXchg => src, AtomicRmwBinOp::AtomicAdd => builder.add(load, src), @@ -1335,10 +1362,16 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } fn lifetime_start(&mut self, ptr: &'ll Value, size: Size) { + #[cfg(feature = "llvm19")] + self.call_lifetime_intrinsic("llvm.lifetime.start.p0", ptr, size); + #[cfg(not(feature = "llvm19"))] self.call_lifetime_intrinsic("llvm.lifetime.start.p0i8", ptr, size); } fn lifetime_end(&mut self, ptr: &'ll Value, size: Size) { + #[cfg(feature = "llvm19")] + self.call_lifetime_intrinsic("llvm.lifetime.end.p0", ptr, size); + #[cfg(not(feature = "llvm19"))] self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size); } @@ -1355,15 +1388,21 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { trace!("Calling fn {:?} with args {:?}", llfn, args); self.cx.last_call_llfn.set(None); let args = self.check_call("call", llty, llfn, args); - let llfn_ptr_ty = unsafe { llvm::LLVMPointerType(llty, 0) }; - let llfn = if self.val_ty(llfn) == llfn_ptr_ty { - llfn - } else { - self.pointercast(llfn, llfn_ptr_ty) + + // On LLVM 7 we must ensure the callee has a pointer-to-FnTy type; LLVM 19's + // opaque pointers make this a no-op, so skip the cast on that path entirely. + #[cfg(not(feature = "llvm19"))] + let llfn = unsafe { + let llfn_ptr_ty = llvm::LLVMPointerType(llty, 0); + if self.val_ty(llfn) == llfn_ptr_ty { + llfn + } else { + self.pointercast(llfn, llfn_ptr_ty) + } }; let mut call = unsafe { - llvm::LLVMRustBuildCall2( + llvm::build_call2( self.llbuilder, llty, llfn, @@ -1378,7 +1417,11 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { // bitcast return type if the type was remapped let map = self.cx.remapped_integer_args.borrow(); + #[cfg(feature = "llvm19")] + let fn_ty = llty; + #[cfg(not(feature = "llvm19"))] let mut fn_ty = self.val_ty(llfn); + #[cfg(not(feature = "llvm19"))] while self.cx.type_kind(fn_ty) == TypeKind::Pointer { fn_ty = self.cx.element_type(fn_ty); } diff --git a/crates/rustc_codegen_nvvm/src/consts.rs b/crates/rustc_codegen_nvvm/src/consts.rs index d6a7987c..b21a3b88 100644 --- a/crates/rustc_codegen_nvvm/src/consts.rs +++ b/crates/rustc_codegen_nvvm/src/consts.rs @@ -354,7 +354,16 @@ impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> { let mut val_llty = self.val_ty(v); let v = if val_llty == self.type_i1() { val_llty = self.type_i8(); - llvm::LLVMConstZExt(v, val_llty) + #[cfg(feature = "llvm19")] + { + let const_int = v as *const llvm::Value as *const llvm::ConstantInt; + let const_val = llvm::LLVMConstIntGetZExtValue(&*const_int); + llvm::LLVMConstInt(val_llty, const_val, 0) + } + #[cfg(not(feature = "llvm19"))] + { + llvm::LLVMConstZExt(v, val_llty) + } } else { v }; @@ -379,7 +388,7 @@ impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> { let visibility = llvm::LLVMRustGetVisibility(g); let addrspace = self.static_addrspace(instance); - let new_g = llvm::LLVMRustGetOrInsertGlobal( + let new_g = llvm::get_or_insert_global( self.llmod, name.as_ptr().cast(), name.len(), diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 41481808..4087f7a7 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -125,7 +125,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { let void = llvm::LLVMVoidTypeInContext(llcx); let llfnty = llvm::LLVMFunctionType(void, null(), 0, llvm::False); let name = "__rust_eh_personality"; - llvm::LLVMRustGetOrInsertFunction(llmod, name.as_ptr().cast(), name.len(), llfnty) + llvm::get_or_insert_function(llmod, name.as_ptr().cast(), name.len(), llfnty) }; let dbg_cx = if tcx.sess.opts.debuginfo != DebugInfo::None { @@ -393,7 +393,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { let name = sanitize_global_ident(name); trace!("Declaring global `{}`", name); unsafe { - llvm::LLVMRustGetOrInsertGlobal( + llvm::get_or_insert_global( self.llmod, name.as_ptr().cast(), name.len(), @@ -413,7 +413,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, ) -> &'ll Value { let llfn = unsafe { - llvm::LLVMRustGetOrInsertFunction(self.llmod, name.as_ptr().cast(), name.len(), ty) + llvm::get_or_insert_function(self.llmod, name.as_ptr().cast(), name.len(), ty) }; trace!("Declaring function `{}` with ty `{:?}`", name, ty); diff --git a/crates/rustc_codegen_nvvm/src/ctx_intrinsics.rs b/crates/rustc_codegen_nvvm/src/ctx_intrinsics.rs index 553b1195..3da09599 100644 --- a/crates/rustc_codegen_nvvm/src/ctx_intrinsics.rs +++ b/crates/rustc_codegen_nvvm/src/ctx_intrinsics.rs @@ -137,7 +137,13 @@ impl<'ll> CodegenCx<'ll, '_> { ifn!(map, "llvm.cttz.i32", fn(t_i32, i1) -> t_i32); ifn!(map, "llvm.cttz.i64", fn(t_i64, i1) -> t_i64); + #[cfg(feature = "llvm19")] + ifn!(map, "llvm.lifetime.start.p0", fn(t_i64, i8p) -> void); + #[cfg(not(feature = "llvm19"))] ifn!(map, "llvm.lifetime.start.p0i8", fn(t_i64, i8p) -> void); + #[cfg(feature = "llvm19")] + ifn!(map, "llvm.lifetime.end.p0", fn(t_i64, i8p) -> void); + #[cfg(not(feature = "llvm19"))] ifn!(map, "llvm.lifetime.end.p0i8", fn(t_i64, i8p) -> void); ifn!(map, "llvm.expect.i1", fn(i1, i1) -> i1); diff --git a/crates/rustc_codegen_nvvm/src/debug_info/metadata.rs b/crates/rustc_codegen_nvvm/src/debug_info/metadata.rs index 549d5102..ee3221e0 100644 --- a/crates/rustc_codegen_nvvm/src/debug_info/metadata.rs +++ b/crates/rustc_codegen_nvvm/src/debug_info/metadata.rs @@ -792,24 +792,23 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>( 0, ); - llvm::LLVMRustDIBuilderCreateCompileUnit( - debug_context.builder, - dwarf_const::DW_LANG_Rust, - compile_unit_file, - producer.as_c_char_ptr(), - producer.len(), - tcx.sess.opts.optimize != config::OptLevel::No, - c"".as_ptr(), - 0, + llvm::di_builder_create_compile_unit(debug_context.builder, llvm::DICompileUnitOptions { + lang: dwarf_const::DW_LANG_Rust, + file: compile_unit_file, + producer: producer.as_c_char_ptr(), + producer_len: producer.len(), + is_optimized: tcx.sess.opts.optimize != config::OptLevel::No, + flags: c"".as_ptr(), + runtime_ver: 0, // NB: this doesn't actually have any perceptible effect, it seems. LLVM will instead // put the path supplied to `MCSplitDwarfFile` into the debug info of the final // output(s). - split_name.as_c_char_ptr(), - split_name.len(), - kind, - 0, - tcx.sess.opts.unstable_opts.split_dwarf_inlining, - ) + split_name: split_name.as_c_char_ptr(), + split_name_len: split_name.len(), + emission_kind: kind, + dwo_id: 0, + split_debug_inlining: tcx.sess.opts.unstable_opts.split_dwarf_inlining, + }) } } diff --git a/crates/rustc_codegen_nvvm/src/debug_info/mod.rs b/crates/rustc_codegen_nvvm/src/debug_info/mod.rs index f9e7ade1..147eaf2f 100644 --- a/crates/rustc_codegen_nvvm/src/debug_info/mod.rs +++ b/crates/rustc_codegen_nvvm/src/debug_info/mod.rs @@ -154,8 +154,7 @@ impl<'ll, 'tcx> DebugInfoBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { fn set_dbg_loc(&mut self, dbg_loc: &'ll DILocation) { unsafe { - let dbg_loc_as_llval = llvm::LLVMRustMetadataAsValue(self.cx().llcx, dbg_loc); - llvm::LLVMSetCurrentDebugLocation(self.llbuilder, Some(dbg_loc_as_llval)); + llvm::set_current_debug_location(self.llbuilder, self.cx().llcx, Some(dbg_loc)); } } @@ -172,7 +171,7 @@ impl<'ll, 'tcx> DebugInfoBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { fn clear_dbg_loc(&mut self) { unsafe { - llvm::LLVMSetCurrentDebugLocation(self.llbuilder, None); + llvm::set_current_debug_location(self.llbuilder, self.cx().llcx, None); } } @@ -313,23 +312,22 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { } unsafe { - return llvm::LLVMRustDIBuilderCreateFunction( - DIB(self), - containing_scope.0, - name.as_ptr(), - linkage_name.as_ptr(), - file_metadata, - loc.line, - function_type_metadata, - is_node_local_to_unit(self, def_id), - true, + return llvm::di_builder_create_function(DIB(self), llvm::DIFunctionOptions { + scope: containing_scope.0, + name: name.as_ptr(), + linkage_name: linkage_name.as_ptr(), + file: file_metadata, + line_no: loc.line, + ty: function_type_metadata, + is_local_to_unit: is_node_local_to_unit(self, def_id), + is_definition: true, scope_line, flags, - self.sess().opts.optimize != config::OptLevel::No, - maybe_definition_llfn, - template_parameters, - None, - ); + is_optimized: self.sess().opts.optimize != config::OptLevel::No, + maybe_fn: maybe_definition_llfn, + template_params: template_parameters, + decl: None, + }); } fn get_function_signature<'ll, 'tcx>( @@ -502,19 +500,18 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { let name = CString::new(variable_name.as_str()).unwrap(); unsafe { - llvm::LLVMRustDIBuilderCreateVariable( - DIB(self), - dwarf_tag, - scope_metadata, - name.as_ptr().cast(), - file_metadata, - loc.line, - type_metadata, - true, - DIFlags::FlagZero, - argument_index, - align.bytes() as u32, - ) + llvm::di_builder_create_variable(DIB(self), llvm::DIVariableOptions { + tag: dwarf_tag, + scope: scope_metadata, + name: name.as_ptr().cast(), + file: file_metadata, + line_no: loc.line, + ty: type_metadata, + always_preserve: true, + flags: DIFlags::FlagZero, + arg_no: argument_index, + align_in_bits: align.bytes() as u32, + }) } } } diff --git a/crates/rustc_codegen_nvvm/src/init.rs b/crates/rustc_codegen_nvvm/src/init.rs index 3b13b8a3..60fef46f 100644 --- a/crates/rustc_codegen_nvvm/src/init.rs +++ b/crates/rustc_codegen_nvvm/src/init.rs @@ -104,7 +104,9 @@ unsafe fn configure_llvm(sess: &Session) { false, ); - // Use non-zero `import-instr-limit` multiplier for cold callsites. + // This tuning flag isn't guaranteed to be registered in the dylib-loaded + // LLVM19 backend configuration, and it is not required for correctness. + #[cfg(not(feature = "llvm19"))] add("-import-cold-multiplier=0.1", false); // for arg in sess_args { diff --git a/crates/rustc_codegen_nvvm/src/intrinsic.rs b/crates/rustc_codegen_nvvm/src/intrinsic.rs index f6e9d161..f68dcc87 100644 --- a/crates/rustc_codegen_nvvm/src/intrinsic.rs +++ b/crates/rustc_codegen_nvvm/src/intrinsic.rs @@ -871,7 +871,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { ); let args = self.check_call("call", fn_ty, fn_ptr, &llargs); let llret = unsafe { - llvm::LLVMRustBuildCall2( + llvm::build_call2( self.llbuilder, fn_ty, fn_ptr, diff --git a/crates/rustc_codegen_nvvm/src/llvm.rs b/crates/rustc_codegen_nvvm/src/llvm.rs index 2fea8e12..9f315cc3 100644 --- a/crates/rustc_codegen_nvvm/src/llvm.rs +++ b/crates/rustc_codegen_nvvm/src/llvm.rs @@ -25,7 +25,7 @@ use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use std::ptr::{self}; -use crate::builder::unnamed; +use crate::{builder::unnamed, common::AsCCharPtr}; pub use debuginfo::*; impl PartialEq for Value { @@ -97,6 +97,11 @@ impl Attribute { unsafe { LLVMRustAddFunctionAttribute(llfn, idx.as_uint(), *self) } } + #[cfg(feature = "llvm19")] + pub fn apply_llfn_with_type(&self, idx: AttributePlace, llfn: &Value, ty: &Type) { + unsafe { LLVMRustAddFunctionAttributeWithType(llfn, idx.as_uint(), *self, ty) } + } + pub fn apply_callsite(&self, idx: AttributePlace, callsite: &Value) { unsafe { LLVMRustAddCallSiteAttribute(callsite, idx.as_uint(), *self) } } @@ -161,6 +166,29 @@ pub fn last_error() -> Option { } } +pub(crate) fn verify_module(module: &Module) -> Result<(), String> { + unsafe { + let mut message = ptr::null_mut(); + let failed = LLVMVerifyModule( + module, + LLVMVerifierFailureAction::LLVMReturnStatusAction, + &mut message, + ); + if failed == False { + return Ok(()); + } + + let err = if message.is_null() { + "LLVM module verification failed".to_string() + } else { + let err = CStr::from_ptr(message).to_string_lossy().into_owned(); + LLVMDisposeMessage(message); + err + }; + Err(err) + } +} + pub(crate) fn SetUnnamedAddress(global: &'_ Value, unnamed: UnnamedAddr) { unsafe { LLVMSetUnnamedAddress(global, unnamed); @@ -598,11 +626,124 @@ pub mod debuginfo { } } -// These functions are kind of a hack for the future. They wrap LLVM 7 rust shim functions -// and turn them into the API that the llvm 12 shim has. This way, if nvidia ever updates their -// dinosaur llvm version, switching for us should be extremely easy. `Name` is assumed to be -// a utf8 string -pub(crate) unsafe fn LLVMRustGetOrInsertFunction<'a>( +// Keep the LLVM-version-specific symbol shims behind a small Rust helper +// surface so callers do not have to care which ABI details the C++ layer is +// preserving for us. +pub(crate) struct DICompileUnitOptions<'a> { + pub lang: c_uint, + pub file: &'a DIFile, + pub producer: *const c_char, + pub producer_len: size_t, + pub is_optimized: bool, + pub flags: *const c_char, + pub runtime_ver: c_uint, + pub split_name: *const c_char, + pub split_name_len: size_t, + pub emission_kind: DebugEmissionKind, + pub dwo_id: u64, + pub split_debug_inlining: bool, +} + +pub(crate) unsafe fn di_builder_create_compile_unit<'a>( + builder: &DIBuilder<'a>, + options: DICompileUnitOptions<'a>, +) -> &'a DIDescriptor { + unsafe { + LLVMRustDIBuilderCreateCompileUnit( + builder, + options.lang, + options.file, + options.producer, + options.producer_len, + options.is_optimized, + options.flags, + options.runtime_ver, + options.split_name, + options.split_name_len, + options.emission_kind, + options.dwo_id, + options.split_debug_inlining, + ) + } +} + +pub(crate) struct DIFunctionOptions<'a> { + pub scope: &'a DIDescriptor, + pub name: *const c_char, + pub linkage_name: *const c_char, + pub file: &'a DIFile, + pub line_no: c_uint, + pub ty: &'a DIType, + pub is_local_to_unit: bool, + pub is_definition: bool, + pub scope_line: c_uint, + pub flags: DIFlags, + pub is_optimized: bool, + pub maybe_fn: Option<&'a Value>, + pub template_params: &'a DIArray, + pub decl: Option<&'a DIDescriptor>, +} + +pub(crate) unsafe fn di_builder_create_function<'a>( + builder: &DIBuilder<'a>, + options: DIFunctionOptions<'a>, +) -> &'a DISubprogram { + unsafe { + LLVMRustDIBuilderCreateFunction( + builder, + options.scope, + options.name, + options.linkage_name, + options.file, + options.line_no, + options.ty, + options.is_local_to_unit, + options.is_definition, + options.scope_line, + options.flags, + options.is_optimized, + options.maybe_fn, + options.template_params, + options.decl, + ) + } +} + +pub(crate) struct DIVariableOptions<'a> { + pub tag: c_uint, + pub scope: &'a DIDescriptor, + pub name: *const c_char, + pub file: &'a DIFile, + pub line_no: c_uint, + pub ty: &'a DIType, + pub always_preserve: bool, + pub flags: DIFlags, + pub arg_no: c_uint, + pub align_in_bits: u32, +} + +pub(crate) unsafe fn di_builder_create_variable<'a>( + builder: &DIBuilder<'a>, + options: DIVariableOptions<'a>, +) -> &'a DIVariable { + unsafe { + LLVMRustDIBuilderCreateVariable( + builder, + options.tag, + options.scope, + options.name, + options.file, + options.line_no, + options.ty, + options.always_preserve, + options.flags, + options.arg_no, + options.align_in_bits, + ) + } +} + +pub(crate) unsafe fn get_or_insert_function<'a>( M: &'a Module, Name: *const c_char, NameLen: usize, @@ -615,7 +756,36 @@ pub(crate) unsafe fn LLVMRustGetOrInsertFunction<'a>( } } -pub(crate) unsafe fn LLVMRustBuildCall<'a>( +pub(crate) unsafe fn LLVMRustGetOrInsertFunction<'a>( + M: &'a Module, + Name: *const c_char, + NameLen: usize, + FunctionTy: &'a Type, +) -> &'a Value { + unsafe { get_or_insert_function(M, Name, NameLen, FunctionTy) } +} + +pub(crate) unsafe fn get_or_insert_global<'a>( + M: &'a Module, + Name: *const c_char, + NameLen: usize, + T: &'a Type, + AddressSpace: c_uint, +) -> &'a Value { + unsafe { __LLVMRustGetOrInsertGlobal(M, Name, NameLen, T, AddressSpace) } +} + +pub(crate) unsafe fn LLVMRustGetOrInsertGlobal<'a>( + M: &'a Module, + Name: *const c_char, + NameLen: usize, + T: &'a Type, + AddressSpace: c_uint, +) -> &'a Value { + unsafe { get_or_insert_global(M, Name, NameLen, T, AddressSpace) } +} + +pub(crate) unsafe fn build_call<'a>( B: &Builder<'a>, Fn: &'a Value, Args: *const &'a Value, @@ -625,7 +795,17 @@ pub(crate) unsafe fn LLVMRustBuildCall<'a>( unsafe { __LLVMRustBuildCall(B, Fn, Args, NumArgs, Bundle, unnamed()) } } -pub(crate) unsafe fn LLVMRustBuildCall2<'a>( +pub(crate) unsafe fn LLVMRustBuildCall<'a>( + B: &Builder<'a>, + Fn: &'a Value, + Args: *const &'a Value, + NumArgs: c_uint, + Bundle: Option<&OperandBundleDef<'a>>, +) -> &'a Value { + unsafe { build_call(B, Fn, Args, NumArgs, Bundle) } +} + +pub(crate) unsafe fn build_call2<'a>( B: &Builder<'a>, FnTy: &'a Type, Fn: &'a Value, @@ -636,6 +816,28 @@ pub(crate) unsafe fn LLVMRustBuildCall2<'a>( unsafe { __LLVMRustBuildCall2(B, FnTy, Fn, Args, NumArgs, Bundle, unnamed()) } } +pub(crate) unsafe fn LLVMRustBuildCall2<'a>( + B: &Builder<'a>, + FnTy: &'a Type, + Fn: &'a Value, + Args: *const &'a Value, + NumArgs: c_uint, + Bundle: Option<&OperandBundleDef<'a>>, +) -> &'a Value { + unsafe { build_call2(B, FnTy, Fn, Args, NumArgs, Bundle) } +} + +pub(crate) unsafe fn set_current_debug_location<'a>( + Builder: &Builder<'a>, + Context: &'a Context, + Location: Option<&'a DILocation>, +) { + unsafe { + let location = Location.map(|location| LLVMRustMetadataAsValue(Context, location)); + LLVMSetCurrentDebugLocation(Builder, location) + } +} + /// LLVMRustCodeGenOptLevel #[derive(Copy, Clone, PartialEq)] #[repr(C)] @@ -673,6 +875,46 @@ pub enum CodeModel { None, } +pub(crate) struct TargetMachineConfig<'a> { + pub triple: &'a str, + pub cpu: Option<&'a str>, + pub features: &'a str, + pub code_model: CodeModel, + pub reloc_mode: RelocMode, + pub opt_level: CodeGenOptLevel, + pub use_soft_fp: bool, + pub position_independent_executable: bool, + pub function_sections: bool, + pub data_sections: bool, + pub trap_unreachable: bool, + pub singlethread: bool, +} + +pub(crate) unsafe fn create_target_machine( + config: &TargetMachineConfig<'_>, +) -> Option<&'static mut TargetMachine> { + let cpu = config.cpu.unwrap_or(""); + unsafe { + LLVMRustCreateTargetMachine( + config.triple.as_c_char_ptr(), + config.triple.len(), + cpu.as_c_char_ptr(), + cpu.len(), + config.features.as_c_char_ptr(), + config.features.len(), + config.code_model, + config.reloc_mode, + config.opt_level, + config.use_soft_fp, + config.position_independent_executable, + config.function_sections, + config.data_sections, + config.trap_unreachable, + config.singlethread, + ) + } +} + unsafe extern "C" { #[link_name = "LLVMRustBuildCall"] pub(crate) fn __LLVMRustBuildCall<'a>( @@ -723,7 +965,8 @@ unsafe extern "C" { DestTy: &'a Type, Name: *const c_char, ) -> &'a Value; - pub(crate) fn LLVMRustGetOrInsertGlobal<'a>( + #[link_name = "LLVMRustGetOrInsertGlobal"] + fn __LLVMRustGetOrInsertGlobal<'a>( M: &'a Module, Name: *const c_char, NameLen: usize, @@ -1257,6 +1500,8 @@ unsafe extern "C" { // Operations on array, pointer, and vector types (sequence types) pub(crate) fn LLVMRustArrayType(ElementType: &Type, ElementCount: u64) -> &Type; + #[cfg(feature = "llvm19")] + pub(crate) fn LLVMPointerTypeInContext(C: &Context, AddressSpace: c_uint) -> &Type; pub(crate) fn LLVMPointerType(ElementType: &Type, AddressSpace: c_uint) -> &Type; pub(crate) fn LLVMVectorType(ElementType: &Type, ElementCount: c_uint) -> &Type; @@ -1384,6 +1629,13 @@ unsafe extern "C" { pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint); pub(crate) fn LLVMRustAddAlignmentAttr(Fn: &Value, index: c_uint, bytes: u32); pub(crate) fn LLVMRustAddFunctionAttribute(Fn: &Value, index: c_uint, attr: Attribute); + #[cfg(feature = "llvm19")] + pub(crate) fn LLVMRustAddFunctionAttributeWithType( + Fn: &Value, + index: c_uint, + attr: Attribute, + ty: &Type, + ); pub(crate) fn LLVMRustAddFunctionAttrStringValue( Fn: &Value, index: c_uint, @@ -1642,6 +1894,13 @@ unsafe extern "C" { Val: &'a Value, Name: *const c_char, ) -> &'a Value; + #[cfg(feature = "llvm19")] + pub(crate) fn LLVMBuildLoad2<'a>( + B: &Builder<'a>, + Ty: &'a Type, + PointerVal: &'a Value, + Name: *const c_char, + ) -> &'a Value; pub(crate) fn LLVMBuildLoad<'a>( B: &Builder<'a>, PointerVal: &'a Value, @@ -1894,6 +2153,11 @@ unsafe extern "C" { ) -> &'a Value; pub(crate) fn LLVMDisposeMessage(message: *mut c_char); + pub(crate) fn LLVMVerifyModule( + M: &Module, + Action: LLVMVerifierFailureAction, + OutMessage: *mut *mut c_char, + ) -> Bool; /// Returns a string describing the last error caused by an LLVMRust* call. pub(crate) fn LLVMRustGetLastError() -> *const c_char; diff --git a/crates/rustc_codegen_nvvm/src/lto.rs b/crates/rustc_codegen_nvvm/src/lto.rs index bed126a8..b75b0ab4 100644 --- a/crates/rustc_codegen_nvvm/src/lto.rs +++ b/crates/rustc_codegen_nvvm/src/lto.rs @@ -24,7 +24,12 @@ unsafe impl Send for ModuleBuffer {} unsafe impl Sync for ModuleBuffer {} impl ModuleBuffer { - pub(crate) fn new(m: &llvm::Module, _is_thin: bool) -> ModuleBuffer { + pub(crate) fn new(m: &llvm::Module, is_thin: bool) -> ModuleBuffer { + if is_thin { + trace!( + "serializing thin-LTO input via full-module bitcode; ThinLTO-specific shim APIs remain unwired on the Rust side" + ); + } ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) }) } } diff --git a/crates/rustc_codegen_nvvm/src/nvvm.rs b/crates/rustc_codegen_nvvm/src/nvvm.rs index 735a2973..a6012c5c 100644 --- a/crates/rustc_codegen_nvvm/src/nvvm.rs +++ b/crates/rustc_codegen_nvvm/src/nvvm.rs @@ -15,7 +15,7 @@ use std::ptr; use tracing::debug; // see libintrinsics.ll on what this is. -const LIBINTRINSICS: &[u8] = include_bytes!("../libintrinsics.bc"); +const LIBINTRINSICS: &[u8] = include_bytes!(env!("NVVM_LIBINTRINSICS_BC_PATH")); pub enum CodegenErr { Nvvm(NvvmError), @@ -43,6 +43,23 @@ impl Display for CodegenErr { } } +#[cfg(feature = "llvm19")] +fn is_known_nvvm_verify_false_negative(log: &str) -> bool { + log.contains("Producer: 'LLVM19") + && log.contains("Reader: 'LLVM 7.0.1'") + && log.contains("parse Invalid value") +} + +fn selected_arch(args: &CodegenArgs) -> NvvmArch { + args.nvvm_options + .iter() + .find_map(|opt| match opt { + NvvmOption::Arch(arch) => Some(*arch), + _ => None, + }) + .unwrap_or_default() +} + /// Take a list of bitcode module bytes and their names and codegen it /// into PTX bytes. The final PTX *should* be utf8, but just to be on the safe side /// it returns a vector of bytes. @@ -56,6 +73,12 @@ pub fn codegen_bitcode_modules( llcx: &Context, ) -> Result, CodegenErr> { debug!("Codegenning bitcode to PTX"); + let target_arch = selected_arch(args); + debug!( + "selected NVVM target arch: {} (modern dialect: {})", + target_arch, + target_arch.uses_modern_ir_dialect() + ); // Make sure the nvvm version is high enough so users don't get confusing compilation errors. let (major, minor) = nvvm::ir_version(); @@ -113,22 +136,43 @@ pub fn codegen_bitcode_modules( // giving it to libnvvm. Then to debug codegen failures, we can just ask the user to provide the corresponding llvm ir // file with --emit=llvm-ir + // On the llvm19 path, pass the same options we'll hand to `compile` so the verifier uses + // the same arch-specific parser. Without this libnvvm can default to the legacy LLVM 7 + // reader and reject LLVM 19 dialect bitcode that would otherwise compile fine (see + // `is_known_nvvm_verify_false_negative` for the resulting log signature). On the LLVM 7 + // path we keep the original option-less verify to avoid drift from the pre-llvm19 baseline. + #[cfg(feature = "llvm19")] + let verification_res = prog.verify_with_options(&args.nvvm_options); + #[cfg(not(feature = "llvm19"))] let verification_res = prog.verify(); if verification_res.is_err() { let log = prog.compiler_log().unwrap().unwrap_or_default(); - let footer = "If you plan to submit a bug report please re-run the codegen with `RUSTFLAGS=\"--emit=llvm-ir\" and include the .ll file corresponding to the .o file mentioned in the log"; - panic!( - "Malformed NVVM IR program rejected by libnvvm, dumping verifier log:\n\n{log}\n\n{footer}" - ); + #[cfg(feature = "llvm19")] + if target_arch.uses_modern_ir_dialect() && is_known_nvvm_verify_false_negative(&log) { + sess.dcx().warn( + "libnvvm verification rejected LLVM 19 bitcode with the known legacy-reader message; proceeding to compilation anyway on the llvm19 path" + ); + } else { + let footer = "If you plan to submit a bug report please re-run the codegen with `RUSTFLAGS=\"--emit=llvm-ir\" and include the .ll file corresponding to the .o file mentioned in the log"; + panic!( + "Malformed NVVM IR program rejected by libnvvm, dumping verifier log:\n\n{log}\n\n{footer}" + ); + } + #[cfg(not(feature = "llvm19"))] + { + let footer = "If you plan to submit a bug report please re-run the codegen with `RUSTFLAGS=\"--emit=llvm-ir\" and include the .ll file corresponding to the .o file mentioned in the log"; + panic!( + "Malformed NVVM IR program rejected by libnvvm, dumping verifier log:\n\n{log}\n\n{footer}" + ); + } } let res = match prog.compile(&args.nvvm_options) { Ok(b) => b, Err(error) => { let log = prog.compiler_log().unwrap().unwrap_or_default(); - // this should never happen, if it does, something went really bad or its a bug on libnvvm's end panic!( - "libnvvm returned an error that was not previously caught by the verifier: {error:?} {log:?}" + "libnvvm compilation failed: {error:?}\n\n{log}" ); } }; @@ -310,6 +354,17 @@ unsafe fn internalize_pass(module: &Module, cx: &Context) { } unsafe fn dce_pass(module: &Module) { + #[cfg(feature = "llvm19")] + { + // The legacy C API entrypoint used below (`LLVMAddGlobalDCEPass`) is not + // available on our current LLVM 19 runtime path. Keep the backend loadable + // by skipping this cleanup for now; revisit if LLVM 19 smoke tests show we + // need an explicit replacement pass. + let _ = module; + return; + } + + #[cfg(not(feature = "llvm19"))] unsafe { let pass_manager = LLVMCreatePassManager(); diff --git a/crates/rustc_codegen_nvvm/src/target.rs b/crates/rustc_codegen_nvvm/src/target.rs index 48249ed8..562cd412 100644 --- a/crates/rustc_codegen_nvvm/src/target.rs +++ b/crates/rustc_codegen_nvvm/src/target.rs @@ -1,7 +1,10 @@ use crate::llvm::{self, Type}; -use rustc_target::spec::{Target, TargetTuple}; +use rustc_target::spec::{MergeFunctions, Target, TargetTuple}; // This data layout must match `datalayout` in `crates/rustc_codegen_nvvm/libintrinsics.ll`. +// Both LLVM 7 and LLVM 19 accept this string; the explicit specs are equivalent to the +// defaults LLVM would fill in from the shorter `clang-19 -target nvptx64-nvidia-cuda` +// output. pub const DATA_LAYOUT: &str = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"; pub const TARGET_TRIPLE: &str = "nvptx64-nvidia-cuda"; pub const POINTER_WIDTH: u32 = 64; @@ -15,5 +18,8 @@ pub fn target() -> Target { let mut target = Target::expect_builtin(&TargetTuple::TargetTuple(TARGET_TRIPLE.into())); target.data_layout = DATA_LAYOUT.into(); target.pointer_width = POINTER_WIDTH as u16; + // Disable MergeFunctions LLVM optimisation pass because it can produce kernel + // functions that call other kernel functions, which is not supported by PTX ISA. + target.options.merge_functions = MergeFunctions::Disabled; target } diff --git a/crates/rustc_codegen_nvvm/src/ty.rs b/crates/rustc_codegen_nvvm/src/ty.rs index 5633c71b..b1e74365 100644 --- a/crates/rustc_codegen_nvvm/src/ty.rs +++ b/crates/rustc_codegen_nvvm/src/ty.rs @@ -56,8 +56,16 @@ impl Type { impl<'ll> CodegenCx<'ll, '_> { pub(crate) fn voidp(&self) -> &'ll Type { // llvm uses i8* for void ptrs, void* is invalid - let i8_ty = self.type_i8(); - self.type_ptr_to_ext(i8_ty, AddressSpace::ZERO) + #[cfg(feature = "llvm19")] + { + self.type_ptr_ext(AddressSpace::ZERO) + } + + #[cfg(not(feature = "llvm19"))] + { + let i8_ty = self.type_i8(); + self.type_ptr_to_ext(i8_ty, AddressSpace::ZERO) + } } pub(crate) fn type_named_struct(&self, name: &str) -> &'ll Type { @@ -97,7 +105,15 @@ impl<'ll> CodegenCx<'ll, '_> { } pub(crate) fn type_i8p_ext(&self, address_space: AddressSpace) -> &'ll Type { - self.type_ptr_to_ext(self.type_i8(), address_space) + #[cfg(feature = "llvm19")] + { + self.type_ptr_ext(address_space) + } + + #[cfg(not(feature = "llvm19"))] + { + self.type_ptr_to_ext(self.type_i8(), address_space) + } } ///x Creates an integer type with the given number of bits, e.g., i24 @@ -110,17 +126,35 @@ impl<'ll> CodegenCx<'ll, '_> { } pub(crate) fn type_ptr_to(&self, ty: &'ll Type) -> &'ll Type { - assert_ne!( - self.type_kind(ty), - TypeKind::Function, - "don't call ptr_to on function types, use ptr_to_llvm_type on FnAbi instead or explicitly specify an address space if it makes sense" - ); + #[cfg(feature = "llvm19")] + { + let _ = ty; + self.type_ptr_ext(AddressSpace::ZERO) + } + + #[cfg(not(feature = "llvm19"))] + { + assert_ne!( + self.type_kind(ty), + TypeKind::Function, + "don't call ptr_to on function types, use ptr_to_llvm_type on FnAbi instead or explicitly specify an address space if it makes sense" + ); - unsafe { llvm::LLVMPointerType(ty, AddressSpace::ZERO.0) } + unsafe { llvm::LLVMPointerType(ty, AddressSpace::ZERO.0) } + } } pub(crate) fn type_ptr_to_ext(&self, ty: &'ll Type, address_space: AddressSpace) -> &'ll Type { - unsafe { llvm::LLVMPointerType(ty, address_space.0) } + #[cfg(feature = "llvm19")] + { + let _ = ty; + self.type_ptr_ext(address_space) + } + + #[cfg(not(feature = "llvm19"))] + { + unsafe { llvm::LLVMPointerType(ty, address_space.0) } + } } pub(crate) fn func_params_types(&self, ty: &'ll Type) -> Vec<&'ll Type> { @@ -211,11 +245,27 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods for CodegenCx<'ll, 'tcx> { } fn type_ptr(&self) -> Self::Type { - self.type_ptr_ext(AddressSpace::ZERO) + #[cfg(feature = "llvm19")] + unsafe { + return llvm::LLVMPointerTypeInContext(self.llcx, AddressSpace::ZERO.0); + } + + #[cfg(not(feature = "llvm19"))] + { + self.type_ptr_ext(AddressSpace::ZERO) + } } fn type_ptr_ext(&self, address_space: AddressSpace) -> Self::Type { - self.type_ptr_to_ext(self.type_i8(), address_space) + #[cfg(feature = "llvm19")] + unsafe { + return llvm::LLVMPointerTypeInContext(self.llcx, address_space.0); + } + + #[cfg(not(feature = "llvm19"))] + { + self.type_ptr_to_ext(self.type_i8(), address_space) + } } fn element_type(&self, ty: &'ll Type) -> &'ll Type { diff --git a/examples/vecadd/Cargo.toml b/examples/vecadd/Cargo.toml index aca2f476..4de99985 100644 --- a/examples/vecadd/Cargo.toml +++ b/examples/vecadd/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] cust = { path = "../../crates/cust" } +cust_raw = { path = "../../crates/cust_raw", default-features = false, features = ["driver"] } nanorand = "0.7" [build-dependencies] diff --git a/examples/vecadd/build.rs b/examples/vecadd/build.rs index ea054c7d..564de15e 100644 --- a/examples/vecadd/build.rs +++ b/examples/vecadd/build.rs @@ -12,12 +12,25 @@ fn main() { println!("cargo::rerun-if-changed=build.rs"); println!("cargo::rerun-if-changed=kernels"); + println!("cargo::rerun-if-env-changed=RUST_CUDA_DUMP_FINAL_MODULE"); + println!("cargo::rerun-if-env-changed=RUST_CUDA_EMIT_LLVM_IR"); let out_path = path::PathBuf::from(env::var("OUT_DIR").unwrap()); let manifest_dir = path::PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - CudaBuilder::new(manifest_dir.join("kernels")) - .copy_to(out_path.join("kernels.ptx")) - .build() - .unwrap(); + let dump_final_module = env::var_os("RUST_CUDA_DUMP_FINAL_MODULE").is_some(); + let emit_llvm_ir = env::var_os("RUST_CUDA_EMIT_LLVM_IR").is_some(); + + let mut builder = CudaBuilder::new(manifest_dir.join("kernels")); + builder = builder.copy_to(out_path.join("kernels.ptx")); + + if dump_final_module { + builder = builder.final_module_path(out_path.join("final-module.ll")); + } + + if emit_llvm_ir { + builder = builder.emit_llvm_ir(true); + } + + builder.build().unwrap(); } diff --git a/examples/vecadd/src/main.rs b/examples/vecadd/src/main.rs index cf804d0c..e2240c22 100644 --- a/examples/vecadd/src/main.rs +++ b/examples/vecadd/src/main.rs @@ -1,12 +1,114 @@ use cust::prelude::*; +use cust_raw::driver_sys; use nanorand::{Rng, WyRand}; use std::error::Error; +use std::ffi::{CStr, CString, c_void}; +use std::io::Write; +use std::os::raw::c_uint; +use std::ptr; /// How many numbers to generate and add together. const NUMBERS_LEN: usize = 100_000; static PTX: &str = include_str!(concat!(env!("OUT_DIR"), "/kernels.ptx")); +fn load_ptx_with_log(ptx: &str) -> Result> { + let cstr = CString::new(ptx).expect("PTX contains nul bytes"); + + // Pre-allocate log buffers so the driver can write its real complaint there. + const LOG_CAP: usize = 16 * 1024; + let mut info_log = vec![0u8; LOG_CAP]; + let mut error_log = vec![0u8; LOG_CAP]; + + // Driver packs values directly into the *mut c_void slot when the payload fits. + // LOG_VERBOSE = request detailed log + // INFO/ERROR_LOG_BUFFER = pointer to buffer + // *_LOG_BUFFER_SIZE_BYTES = capacity (in), bytes written (out) + let mut options = [ + driver_sys::CUjit_option::CU_JIT_LOG_VERBOSE, + driver_sys::CUjit_option::CU_JIT_INFO_LOG_BUFFER, + driver_sys::CUjit_option::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + driver_sys::CUjit_option::CU_JIT_ERROR_LOG_BUFFER, + driver_sys::CUjit_option::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + ]; + let mut option_values: [*mut c_void; 5] = [ + 1_usize as *mut c_void, + info_log.as_mut_ptr() as *mut c_void, + LOG_CAP as *mut c_void, + error_log.as_mut_ptr() as *mut c_void, + LOG_CAP as *mut c_void, + ]; + + let mut module_ptr: driver_sys::CUmodule = ptr::null_mut(); + let res = unsafe { + driver_sys::cuModuleLoadDataEx( + &mut module_ptr, + cstr.as_ptr() as *const c_void, + options.len() as c_uint, + options.as_mut_ptr(), + option_values.as_mut_ptr(), + ) + }; + + let info_len = option_values[2] as usize; + let error_len = option_values[4] as usize; + let info_str = String::from_utf8_lossy(&info_log[..info_len.min(LOG_CAP)]); + let error_str = String::from_utf8_lossy(&error_log[..error_len.min(LOG_CAP)]); + + if !info_str.trim().is_empty() { + eprintln!("[vecadd] JIT info log ({info_len} bytes):\n{info_str}"); + } + if !error_str.trim().is_empty() { + eprintln!("[vecadd] JIT error log ({error_len} bytes):\n{error_str}"); + } + eprintln!("[vecadd] cuModuleLoadDataEx raw result code: {:?}", res); + + if res != driver_sys::cudaError_enum::CUDA_SUCCESS { + unsafe { + let mut err_cstr: *const std::os::raw::c_char = ptr::null(); + if driver_sys::cuGetErrorString(res, &mut err_cstr) + == driver_sys::cudaError_enum::CUDA_SUCCESS + && !err_cstr.is_null() + { + let msg = CStr::from_ptr(err_cstr).to_string_lossy(); + eprintln!("[vecadd] cuGetErrorString: {msg}"); + } + } + return Err(format!("cuModuleLoadDataEx failed: {:?}", res).into()); + } + + // The driver accepted the PTX; drop our raw handle and re-load via cust so the + // caller gets a typed Module with cust's lifetime/drop machinery. + let _ = unsafe { driver_sys::cuModuleUnload(module_ptr) }; + Module::from_ptx(ptx, &[]).map_err(|e| e.into()) +} + +// Flush stdout after every println so it stays ordered against our eprintln +// traces when the two streams get muxed (e.g. over SSH, where stdout would +// otherwise be block-buffered and dump out-of-order). +macro_rules! sayln { + ($($arg:tt)*) => {{ + println!($($arg)*); + let _ = std::io::stdout().flush(); + }}; +} + +macro_rules! step { + ($label:expr, $expr:expr) => {{ + eprintln!("[vecadd] {} ...", $label); + match $expr { + Ok(v) => { + eprintln!("[vecadd] {} ok", $label); + v + } + Err(e) => { + eprintln!("[vecadd] {} FAILED: {:?}", $label, e); + return Err(e.into()); + } + } + }}; +} + fn main() -> Result<(), Box> { // generate our random vectors. let mut wyrand = WyRand::new(); @@ -15,45 +117,68 @@ fn main() -> Result<(), Box> { let mut rhs = vec![0.0f32; NUMBERS_LEN]; wyrand.fill(&mut rhs); - // initialize CUDA, this will pick the first available device and will - // make a CUDA context from it. - // We don't need the context for anything but it must be kept alive. - let _ctx = cust::quick_init()?; - - // Make the CUDA module, modules just house the GPU code for the kernels we created. - // they can be made from PTX code, cubins, or fatbins. - let module = Module::from_ptx(PTX, &[])?; - - // make a CUDA stream to issue calls to. You can think of this as an OS thread but for dispatching - // GPU calls. - let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; - - // allocate the GPU memory needed to house our numbers and copy them over. - let lhs_gpu = lhs.as_slice().as_dbuf()?; - let rhs_gpu = rhs.as_slice().as_dbuf()?; + let _ctx = step!("cust::quick_init", cust::quick_init()); + + let (driver_major, driver_minor) = step!( + "CudaApiVersion::get", + cust::CudaApiVersion::get().map(|v| (v.major(), v.minor())) + ); + eprintln!("[vecadd] CUDA driver API version: {driver_major}.{driver_minor}"); + + let device = step!("Device::get_device(0)", cust::device::Device::get_device(0)); + let cc_major = step!( + "Device::get_attribute(ComputeCapabilityMajor)", + device.get_attribute(cust::device::DeviceAttribute::ComputeCapabilityMajor) + ); + let cc_minor = step!( + "Device::get_attribute(ComputeCapabilityMinor)", + device.get_attribute(cust::device::DeviceAttribute::ComputeCapabilityMinor) + ); + let name = step!("Device::name", device.name()); + eprintln!("[vecadd] GPU: {name} (compute {cc_major}.{cc_minor})"); + + eprintln!("[vecadd] PTX size: {} bytes", PTX.len()); + eprintln!( + "[vecadd] PTX header: {}", + PTX.lines().take(10).collect::>().join(" | ") + ); + + // Load PTX via raw cuModuleLoadDataEx so we can capture the JIT error/info log + // buffers; cust's ModuleJitOption doesn't surface those yet, and on UnknownError + // the log is the only way to see the driver's real complaint. + let module = step!( + "cuModuleLoadDataEx (with JIT log buffers)", + load_ptx_with_log(PTX) + ); + + let stream = step!( + "Stream::new", + Stream::new(StreamFlags::NON_BLOCKING, None) + ); + + let lhs_gpu = step!("DeviceBuffer::from lhs", lhs.as_slice().as_dbuf()); + let rhs_gpu = step!("DeviceBuffer::from rhs", rhs.as_slice().as_dbuf()); - // allocate our output buffer. You could also use DeviceBuffer::uninitialized() to avoid the - // cost of the copy, but you need to be careful not to read from the buffer. let mut out = vec![0.0f32; NUMBERS_LEN]; - let out_buf = out.as_slice().as_dbuf()?; + let out_buf = step!("DeviceBuffer::from out", out.as_slice().as_dbuf()); - // retrieve the `vecadd` kernel from the module so we can calculate the right launch config. - let vecadd = module.get_function("vecadd")?; + let vecadd = step!( + "Module::get_function(\"vecadd\")", + module.get_function("vecadd") + ); - // use the CUDA occupancy API to find an optimal launch configuration for the grid and block size. - // This will try to maximize how much of the GPU is used by finding the best launch configuration for the - // current CUDA device/architecture. - let (_, block_size) = vecadd.suggested_launch_configuration(0, 0.into())?; + let (_, block_size) = step!( + "suggested_launch_configuration", + vecadd.suggested_launch_configuration(0, 0.into()) + ); let grid_size = (NUMBERS_LEN as u32).div_ceil(block_size); - println!("using {grid_size} blocks and {block_size} threads per block"); + sayln!("using {grid_size} blocks and {block_size} threads per block"); - // Actually launch the GPU kernel. This will queue up the launch on the stream, it will - // not block the thread until the kernel is finished. + eprintln!("[vecadd] launching kernel ..."); unsafe { launch!( - // slices are passed as two parameters, the pointer and the length. vecadd<<>>( lhs_gpu.as_device_ptr(), lhs_gpu.len(), @@ -61,15 +186,19 @@ fn main() -> Result<(), Box> { rhs_gpu.len(), out_buf.as_device_ptr(), ) - )?; + ) + .map_err(|e| { + eprintln!("[vecadd] launch FAILED: {e:?}"); + e + })?; } + eprintln!("[vecadd] launch queued ok"); - stream.synchronize()?; + step!("stream.synchronize", stream.synchronize()); - // copy back the data from the GPU. - out_buf.copy_to(&mut out)?; + step!("copy_to", out_buf.copy_to(&mut out)); - println!("{} + {} = {}", lhs[0], rhs[0], out[0]); + sayln!("{} + {} = {}", lhs[0], rhs[0], out[0]); Ok(()) } diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..7eaca8a8 --- /dev/null +++ b/flake.lock @@ -0,0 +1,65 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1775888245, + "narHash": "sha256-nwASzrRDD1JBEu/o8ekKYEXm/oJW6EMCzCRdrwcLe90=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "13043924aaa7375ce482ebe2494338e058282925", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-llvm7": { + "locked": { + "lastModified": 1704290814, + "narHash": "sha256-LWvKHp7kGxk/GEtlrGYV68qIvPHkU9iToomNFGagixU=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "70bdadeb94ffc8806c0570eb5c2695ad29f0e421", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-23.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "nixpkgs-llvm7": "nixpkgs-llvm7", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1776136407, + "narHash": "sha256-Cp8XrVLGruSDBTRs8L4LmvaEcd76tHHU9esLk7Ysa4E=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "753568957a87312ed599cba5699e67126eded6c0", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..2bbf0563 --- /dev/null +++ b/flake.nix @@ -0,0 +1,176 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + # LLVM 7 is no longer carried by nixpkgs-unstable. Pin a second nixpkgs just + # for `llvmPackages_7` so someone else's compat patches do the hard work. + nixpkgs-llvm7.url = "github:NixOS/nixpkgs/nixos-23.05"; + rust-overlay.url = "github:oxalica/rust-overlay"; + rust-overlay.inputs.nixpkgs.follows = "nixpkgs"; + }; + + outputs = { nixpkgs, nixpkgs-llvm7, rust-overlay, ... }: + let + system = "x86_64-linux"; + # allowUnfree is required because CUDA is unfree. + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + overlays = [ rust-overlay.overlays.default ]; + }; + pkgsLlvm7 = import nixpkgs-llvm7 { inherit system; }; + lib = pkgs.lib; + + # ---- CUDA toolkit (Nix-managed) ---- + # The NVIDIA **driver** (libcuda.so.1, libnvidia-*) still comes from the + # host — apt on Debian, hardware.nvidia on NixOS. Nix only provides the + # **toolkit** (nvcc, libnvvm, cudart, headers). + # + # Toolkit pin chooses what PTX version NVVM emits, which then dictates + # the minimum host driver version at runtime: + # CUDA 13.2 → NVVM 22.0 → PTX 9.2 → needs driver 580.x+ (CUDA 13) + # CUDA 12.9 → NVVM 21.x → PTX 8.x → runs on CUDA 12.x drivers + # `cudatoolkit` is the kitchen-sink symlinkJoin maintained by nixpkgs — + # every header path and lib layout is already wired correctly. + cuda19Root = pkgs.cudaPackages_13_2.cudatoolkit; + cuda7Root = pkgs.cudaPackages_12_9.cudatoolkit; + + driverLibDir = "/usr/lib/x86_64-linux-gnu"; + + toolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; + + # ---- LLVM 19 (from current nixpkgs) ---- + llvm19 = pkgs.llvmPackages_19; + llvm19Bin = lib.getBin llvm19.llvm; + llvm19Dev = lib.getDev llvm19.llvm; + llvm19CompatTools = pkgs.symlinkJoin { + name = "llvm19-compat-tools"; + paths = [ + (pkgs.writeShellScriptBin "opt-19" ''exec ${llvm19Bin}/bin/opt "$@"'') + (pkgs.writeShellScriptBin "llvm-as-19" ''exec ${llvm19Bin}/bin/llvm-as "$@"'') + (pkgs.writeShellScriptBin "llvm-dis-19" ''exec ${llvm19Bin}/bin/llvm-dis "$@"'') + (pkgs.writeShellScriptBin "llc-19" ''exec ${llvm19Bin}/bin/llc "$@"'') + ]; + }; + + # ---- LLVM 7.1.0 (from pinned nixos-23.05 nixpkgs) ---- + llvm7Pkg = pkgsLlvm7.llvmPackages_7.llvm; + llvm7Bin = pkgsLlvm7.lib.getBin llvm7Pkg; + llvm7Dev = pkgsLlvm7.lib.getDev llvm7Pkg; + llvm7CompatTools = pkgs.symlinkJoin { + name = "llvm7-compat-tools"; + paths = [ + (pkgs.writeShellScriptBin "llvm-config-7" ''exec ${llvm7Dev}/bin/llvm-config "$@"'') + (pkgs.writeShellScriptBin "llvm-as-7" ''exec ${llvm7Bin}/bin/llvm-as "$@"'') + (pkgs.writeShellScriptBin "llvm-dis-7" ''exec ${llvm7Bin}/bin/llvm-dis "$@"'') + (pkgs.writeShellScriptBin "llc-7" ''exec ${llvm7Bin}/bin/llc "$@"'') + (pkgs.writeShellScriptBin "opt-7" ''exec ${llvm7Bin}/bin/opt "$@"'') + ]; + }; + + # ---- Shared bits across both shells ---- + commonNativeInputs = [ + toolchain + pkgs.pkg-config + pkgs.cmake + pkgs.ninja + ]; + # The v19 shell uses unstable's runtime libs (modern glibc). The v7 shell has + # to match LLVM 7's glibc generation (23.05), otherwise ncurses/libstdc++ from + # unstable demand GLIBC_2.38+ symbols LLVM 7's linked glibc 2.37 doesn't have. + v19BuildInputs = [ + pkgs.openssl + pkgs.libxml2 + pkgs.zlib + pkgs.ncurses + pkgs.stdenv.cc.cc.lib + ]; + v7BuildInputs = [ + pkgsLlvm7.openssl + pkgsLlvm7.libxml2 + pkgsLlvm7.zlib + pkgsLlvm7.ncurses + pkgsLlvm7.stdenv.cc.cc.lib + ]; + mkCudaEnv = root: { + CUDA_HOME = "${root}"; + CUDA_ROOT = "${root}"; + CUDA_PATH = "${root}"; + CUDA_TOOLKIT_ROOT_DIR = "${root}"; + # Cover both lib/ (nix-style) and lib64/ (FHS-style) so downstream + # build.rs scripts that probe either layout resolve libcudart + stubs. + CUDA_LIBRARY_PATH = + "${root}/lib:${root}/lib64:${root}/lib/stubs:${root}/lib64/stubs"; + }; + # Symlink every NVIDIA-shipped driver library (libcuda, libnvidia-*) into a + # single shim dir that we then stick on LD_LIBRARY_PATH. libcuda alone is not + # enough: the driver will dlopen companions like libnvidia-ptxjitcompiler.so.1 + # when JITing PTX, and failing to find them surfaces as + # CUDA_ERROR_JIT_COMPILER_NOT_FOUND from cuModuleLoadDataEx. + driverShimHook = '' + driver_shim_dir="$PWD/.nix-driver-libs" + mkdir -p "$driver_shim_dir" + for src in "${driverLibDir}"/libcuda.so* "${driverLibDir}"/libnvidia-*.so*; do + [ -e "$src" ] || continue + ln -sf "$src" "$driver_shim_dir/$(basename "$src")" + done + ''; + + # ---- LLVM 7-only shell (CUDA 12.9 toolkit) ---- + v7Shell = pkgs.mkShell ((mkCudaEnv cuda7Root) // { + nativeBuildInputs = commonNativeInputs ++ [ + cuda7Root + llvm7Bin + llvm7Dev + llvm7CompatTools + pkgsLlvm7.llvmPackages_7.clang + pkgsLlvm7.llvmPackages_7.libclang + ]; + buildInputs = v7BuildInputs; + LLVM_CONFIG = "${llvm7Dev}/bin/llvm-config"; + # Give bindgen an explicit libclang (matched to 23.05's glibc) so it doesn't + # fall back to scanning system paths and pick up an apt-installed LLVM 19 + # with deps the v7 shell's LD_LIBRARY_PATH doesn't satisfy. + LIBCLANG_PATH = "${pkgsLlvm7.lib.getLib pkgsLlvm7.llvmPackages_7.libclang}/lib"; + shellHook = driverShimHook + '' + export PATH="${llvm7CompatTools}/bin:${llvm7Bin}/bin:${llvm7Dev}/bin:${cuda7Root}/bin:${cuda7Root}/nvvm/bin:$PATH" + export LD_LIBRARY_PATH="$driver_shim_dir:${cuda7Root}/nvvm/lib:${cuda7Root}/nvvm/lib64:${cuda7Root}/lib64:${cuda7Root}/lib:${pkgsLlvm7.ncurses.out}/lib:${pkgsLlvm7.libxml2.out}/lib:${pkgsLlvm7.zlib.out}/lib:${pkgsLlvm7.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + + echo "rust-cuda llvm7 shell" + echo " CUDA_HOME=$CUDA_HOME" + echo " LLVM_CONFIG=$LLVM_CONFIG" + echo " NVIDIA_DRIVER_LIB=$driver_shim_dir/libcuda.so.1" + ''; + }); + + # ---- LLVM 19-only shell (CUDA 13.2 toolkit, the active-work shell) ---- + v19Shell = pkgs.mkShell ((mkCudaEnv cuda19Root) // { + nativeBuildInputs = commonNativeInputs ++ [ + cuda19Root + llvm19.clang + llvm19.libclang + llvm19Bin + llvm19Dev + llvm19CompatTools + ]; + buildInputs = v19BuildInputs; + LLVM_CONFIG_19 = "${llvm19Dev}/bin/llvm-config"; + LIBCLANG_PATH = "${lib.getLib llvm19.libclang}/lib"; + shellHook = driverShimHook + '' + export PATH="${llvm19CompatTools}/bin:${llvm19Bin}/bin:${llvm19Dev}/bin:${cuda19Root}/bin:${cuda19Root}/nvvm/bin:$PATH" + export LD_LIBRARY_PATH="$driver_shim_dir:${cuda19Root}/nvvm/lib:${cuda19Root}/nvvm/lib64:${cuda19Root}/lib64:${cuda19Root}/lib:${pkgs.ncurses.out}/lib:${pkgs.libxml2.out}/lib:${pkgs.zlib.out}/lib:${pkgs.stdenv.cc.cc.lib}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + + echo "rust-cuda llvm19 shell" + echo " CUDA_HOME=$CUDA_HOME" + echo " LLVM_CONFIG_19=$LLVM_CONFIG_19" + echo " NVIDIA_DRIVER_LIB=$driver_shim_dir/libcuda.so.1" + ''; + }); + in + { + devShells.${system} = { + default = v19Shell; + v7 = v7Shell; + v19 = v19Shell; + }; + }; +} diff --git a/llvm-19 b/llvm-19 new file mode 160000 index 00000000..2123f5cd --- /dev/null +++ b/llvm-19 @@ -0,0 +1 @@ +Subproject commit 2123f5cd336f2bed449e8d8d6612c4224553f2ba diff --git a/scripts/vast-ai.sh b/scripts/vast-ai.sh new file mode 100755 index 00000000..951c7ddd --- /dev/null +++ b/scripts/vast-ai.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Build vecadd on the Nix-equipped build host, then push it to a vast.ai +# Blackwell box with CUDA 13.2+ and run it there. + +set -euo pipefail + +BUILD_HOST="${BUILD_HOST:-brandon@asusrogstrix.local}" +BUILD_DIR="${BUILD_DIR:-/home/brandon/Rust-CUDA}" +BUILD_BIN="${BUILD_BIN:-target/debug/vecadd}" + +VAST_HOST="${VAST_HOST:-root@ssh6.vast.ai}" +VAST_PORT="${VAST_PORT:-34929}" +VAST_DEST="${VAST_DEST:-/workspace/vecadd}" + +# Vast.ai hands us a new container (→ new host key) on every rental, so skip +# the TOFU prompt and keep the churn out of ~/.ssh/known_hosts. +VAST_SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR) + +# Nix-built binaries bake /nix/store/...-glibc/ld-linux-x86-64.so.2 as their ELF +# interpreter, which doesn't exist on the vast.ai container. Rewrite it to the +# standard FHS path at build time and strip the rpath so the loader only pulls +# in the container's glibc + the CUDA driver's libcuda.so.1. +echo ">> Building on $BUILD_HOST" +ssh "$BUILD_HOST" "cd '$BUILD_DIR' \ + && nix develop .#v19 --command cargo build -p vecadd \ + && nix shell nixpkgs#patchelf --command patchelf \ + --set-interpreter /lib64/ld-linux-x86-64.so.2 \ + --remove-rpath '$BUILD_BIN'" + +echo ">> Staging binary locally" +local_bin="$(mktemp -d)/vecadd" +scp "$BUILD_HOST:$BUILD_DIR/$BUILD_BIN" "$local_bin" + +echo ">> Uploading to $VAST_HOST:$VAST_PORT" +scp "${VAST_SSH_OPTS[@]}" -P "$VAST_PORT" "$local_bin" "$VAST_HOST:$VAST_DEST" + +echo ">> Running on vast.ai" +ssh "${VAST_SSH_OPTS[@]}" -p "$VAST_PORT" "$VAST_HOST" "chmod +x '$VAST_DEST' && nvidia-smi -L && '$VAST_DEST'"