Skip to content

Commit 72feb1c

Browse files
committed
cleanup
1 parent 1225768 commit 72feb1c

1 file changed

Lines changed: 10 additions & 8 deletions

File tree

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ impl KernelArgsTy {
288288
pub(crate) struct OffloadKernelGlobals<'ll> {
289289
pub offload_sizes: &'ll llvm::Value,
290290
pub memtransfer_types: &'ll llvm::Value,
291-
pub region_id: &'ll llvm::Value,
291+
pub region_id: Option<&'ll llvm::Value>,
292292
pub offload_entry: Option<&'ll llvm::Value>,
293293
}
294294

@@ -391,11 +391,11 @@ pub(crate) fn gen_define_handling<'ll>(
391391
// Next: For each function, generate these three entries. A weak constant,
392392
// the llvm.rodata entry name, and the llvm_offload_entries value
393393

394-
let name = format!(".{symbol}.region_id");
395-
let initializer = cx.get_const_i8(0);
396-
let region_id = add_global(&cx, &name, initializer, WeakAnyLinkage);
394+
let (offload_entry, region_id) = if !host {
395+
let name = format!(".{symbol}.region_id");
396+
let initializer = cx.get_const_i8(0);
397+
let region_id = add_global(&cx, &name, initializer, WeakAnyLinkage);
397398

398-
let offload_entry = if !host {
399399
let c_entry_name = CString::new(symbol.clone()).unwrap();
400400
let c_val = c_entry_name.as_bytes_with_nul();
401401
let offload_entry_name = format!(".offloading.entry_name.{symbol}");
@@ -419,9 +419,9 @@ pub(crate) fn gen_define_handling<'ll>(
419419
llvm::set_alignment(offload_entry, Align::EIGHT);
420420
let c_section_name = CString::new("llvm_offload_entries").unwrap();
421421
llvm::set_section(offload_entry, &c_section_name);
422-
Some(offload_entry)
422+
(Some(offload_entry), Some(region_id))
423423
} else {
424-
None
424+
(None, None)
425425
};
426426

427427
let result =
@@ -472,6 +472,8 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
472472
args: &[&'ll Value],
473473
types: &[&Type],
474474
metadata: &[OffloadMetadata],
475+
// TODO: fuse the device global part of offload_globals with offload_dims, s.t. they are set iff
476+
// !host is given.
475477
offload_globals: &OffloadGlobals<'ll>,
476478
offload_dims: Option<&OffloadKernelDims<'ll>>,
477479
host: bool,
@@ -663,7 +665,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
663665
cx.get_const_i64(u64::MAX), // MAX == -1.
664666
num_workgroups,
665667
threads_per_block,
666-
region_id,
668+
region_id.unwrap(),
667669
a5,
668670
];
669671
// %41 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)

0 commit comments

Comments
 (0)