rust-lang · shua · Feb 26, 2026 · May 5, 2026 · Walnut356 · May 17, 2026
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -152,7 +152,20 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
         cx.size_and_align_of(Ty::new_mut_ptr(cx.tcx, pointee_type))
     );
 
-    let pointee_type_di_node = type_di_node(cx, pointee_type);
+    let pointee_type_di_node = match pointee_type.kind() {
+        // `&[T]` will look like `{ data_ptr: *const T, length: usize }`
+        ty::Slice(element_type) => type_di_node(cx, *element_type),
+        // `&str` will look like `{ data_ptr: *const u8, length: usize }`
+        ty::Str => type_di_node(cx, cx.tcx.types.u8),
+
+        // `&dyn K` will look like `{ pointer: _, vtable: _}`
+        // any Adt `Foo` containing an unsized type (eg `&[_]` or `&dyn _`)
+        //   will look like `{ data_ptr: *const Foo, length: usize }`
+        // and thin pointers `&Foo` will just look like `*const Foo`.
+        //
+        // in all those cases, we just use the pointee_type
+        _ => type_di_node(cx, pointee_type),
+    };
 
     return_if_di_node_created_in_meantime!(cx, unique_type_id);
 
@@ -389,26 +402,11 @@ fn build_dyn_type_di_node<'ll, 'tcx>(
 }
 
 /// Create debuginfo for `[T]` and `str`. These are unsized.
-///
-/// NOTE: We currently emit just emit the debuginfo for the element type here
-/// (i.e. `T` for slices and `u8` for `str`), so that we end up with
-/// `*const T` for the `data_ptr` field of the corresponding wide-pointer
-/// debuginfo of `&[T]`.
-///
-/// It would be preferable and more accurate if we emitted a DIArray of T
-/// without an upper bound instead. That is, LLVM already supports emitting
-/// debuginfo of arrays of unknown size. But GDB currently seems to end up
-/// in an infinite loop when confronted with such a type.
-///
-/// As a side effect of the current encoding every instance of a type like
-/// `struct Foo { unsized_field: [u8] }` will look like
-/// `struct Foo { unsized_field: u8 }` in debuginfo. If the length of the
-/// slice is zero, then accessing `unsized_field` in the debugger would
-/// result in an out-of-bounds access.
 fn build_slice_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     slice_type: Ty<'tcx>,
     unique_type_id: UniqueTypeId<'tcx>,
+    span: Span,
 ) -> DINodeCreationResult<'ll> {
     let element_type = match slice_type.kind() {
         ty::Slice(element_type) => *element_type,
@@ -423,7 +421,20 @@ fn build_slice_type_di_node<'ll, 'tcx>(
 
     let element_type_di_node = type_di_node(cx, element_type);
     return_if_di_node_created_in_meantime!(cx, unique_type_id);
-    DINodeCreationResult { di_node: element_type_di_node, already_stored_in_typemap: false }
+    let (size, align) = cx.spanned_size_and_align_of(slice_type, span);
+    let subrange = unsafe { llvm::LLVMDIBuilderGetOrCreateSubrange(DIB(cx), 0, -1) };
+    let subscripts = &[subrange];
+    let di_node = unsafe {
+        llvm::LLVMDIBuilderCreateArrayType(
+            DIB(cx),
+            size.bits(),
+            align.bits() as u32,
+            element_type_di_node,
+            subscripts.as_ptr(),
+            subscripts.len() as c_uint,
+        )
+    };
+    DINodeCreationResult { di_node, already_stored_in_typemap: false }
 }
 
 /// Get the debuginfo node for the given type.
@@ -454,7 +465,7 @@ pub(crate) fn spanned_type_di_node<'ll, 'tcx>(
         }
         ty::Tuple(elements) if elements.is_empty() => build_basic_type_di_node(cx, t),
         ty::Array(..) => build_fixed_size_array_di_node(cx, unique_type_id, t, span),
-        ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id),
+        ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id, span),
         ty::Dynamic(..) => build_dyn_type_di_node(cx, t, unique_type_id),
         ty::Foreign(..) => build_foreign_type_di_node(cx, t, unique_type_id),
         ty::RawPtr(pointee_type, _) | ty::Ref(_, pointee_type, _) => {

diff --git a/src/etc/gdb_lookup.py b/src/etc/gdb_lookup.py
@@ -103,6 +103,7 @@ def __call__(self, valobj):
 printer.add(RustType.StdString, StdStringProvider)
 printer.add(RustType.StdOsString, StdOsStringProvider)
 printer.add(RustType.StdStr, StdStrProvider)
+printer.add(RustType.StdBoxStr, StdBoxStrProvider)
 printer.add(RustType.StdSlice, StdSliceProvider)
 printer.add(RustType.StdVec, StdVecProvider)
 printer.add(RustType.StdVecDeque, StdVecDequeProvider)

diff --git a/src/etc/gdb_providers.py b/src/etc/gdb_providers.py
@@ -142,6 +142,20 @@ def display_hint():
         return "array"
 
 
+class StdBoxStrProvider(printer_base):
+    def __init__(self, valobj):
+        self._valobj = valobj
+        self._length = int(valobj["length"])
+        self._data_ptr = valobj["data_ptr"]
+
+    def to_string(self):
+        return self._data_ptr.lazy_string(encoding="utf-8", length=self._length)
+
+    @staticmethod
+    def display_hint():
+        return "string"
+
+
 class StdVecProvider(printer_base):
     def __init__(self, valobj):
         self._valobj = valobj
@@ -203,6 +217,12 @@ def __init__(self, valobj, is_atomic=False):
         self._is_atomic = is_atomic
         self._ptr = unwrap_unique_or_non_null(valobj["ptr"])
         self._value = self._ptr["data" if is_atomic else "value"]
+        # FIXME(shua): the debuginfo template type should be 'str' not 'u8'
+        if self._ptr.type.target().name == "alloc::rc::RcInner<str>":
+            length = self._valobj["ptr"]["pointer"]["length"]
+            u8_ptr_ty = gdb.Type.pointer(gdb.lookup_type("u8"))
+            ptr = self._value.address.reinterpret_cast(u8_ptr_ty)
+            self._value = ptr.lazy_string(encoding="utf-8", length=length)
         self._strong = unwrap_scalar_wrappers(self._ptr["strong"])
         self._weak = unwrap_scalar_wrappers(self._ptr["weak"]) - 1
 

diff --git a/src/etc/rust_types.py b/src/etc/rust_types.py
@@ -37,12 +37,14 @@ class RustType(Enum):
     StdNonZeroNumber = 29
     StdPath = 30
     StdPathBuf = 31
+    StdBoxStr = 32
 
 
 STD_STRING_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)String$")
 STD_STR_REGEX = re.compile(r"^&(mut )?str$")
 STD_SLICE_REGEX = re.compile(r"^&(mut )?\[.+\]$")
 STD_OS_STRING_REGEX = re.compile(r"^(std::ffi::([a-z_]+::)+)OsString$")
+STD_BOX_STR_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Box<str,.+>$")
 STD_VEC_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Vec<.+>$")
 STD_VEC_DEQUE_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)VecDeque<.+>$")
 STD_BTREE_SET_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)BTreeSet<.+>$")
@@ -67,6 +69,7 @@ class RustType(Enum):
     RustType.StdString: STD_STRING_REGEX,
     RustType.StdOsString: STD_OS_STRING_REGEX,
     RustType.StdStr: STD_STR_REGEX,
+    RustType.StdBoxStr: STD_BOX_STR_REGEX,
     RustType.StdSlice: STD_SLICE_REGEX,
     RustType.StdVec: STD_VEC_REGEX,
     RustType.StdVecDeque: STD_VEC_DEQUE_REGEX,

diff --git a/tests/codegen-llvm/debuginfo-unsize-field.rs b/tests/codegen-llvm/debuginfo-unsize-field.rs
@@ -0,0 +1,66 @@
+//@ compile-flags:-g -Copt-level=0 -C panic=abort
+
+// Check that debug information for structs with embedded str and [u8] slices is distinct from
+// structs with embedded u8
+
+#![crate_type = "lib"]
+
+// NOTE: regex for the CHECK directives,
+// depending on the target, u8/usize are basic types or typedefs
+//   linux:  !1 = !DIBasicType(name: "u8",
+//   win:    !1 = !DIDerivedType(tag: .*, name: "u8",
+// and references types are
+//   linux:  name: "&debuginfo_unsize_field::Foo"
+//   win:    name: "ref$<debuginfo_unsize_field::Foo>"
+
+// CHECK: ![[U8:[0-9]+]] = !DI{{Basic|Derived}}Type({{.*}}name: "u8",
+
+pub struct Foo {
+    a: u32,
+    b: str,
+}
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "{{&|ref\$<}}{{[^"]+}}::Foo{{>?}}", {{.*}}elements: ![[FOO_REF_ELEMS:[0-9]+]]
+// CHECK: ![[FOO_REF_ELEMS]] = !{![[FOO_REF_PTR:[0-9]+]], ![[FOO_REF_LEN:[0-9]+]]}
+// CHECK: ![[FOO_REF_PTR]] = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", {{.*}}baseType: ![[FOO_PTR:[0-9]+]]
+// CHECK: ![[FOO_PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[FOO:[0-9]+]]
+// CHECK: ![[FOO]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", {{.*}}elements: ![[FOO_ELEMS:[0-9]+]]
+// CHECK: ![[FOO_ELEMS]] = !{![[FOO_A:[0-9]+]], ![[FOO_B:[0-9]+]]}
+// CHECK: ![[FOO_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a"
+// CHECK: ![[FOO_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8_SLICE:[0-9]+]]
+//
+// CHECK: ![[U8_SLICE]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[U8]], {{.*}}elements: ![[U8_SLICE_ELEMS:[0-9]+]]
+// CHECK: ![[U8_SLICE_ELEMS]] = !{![[U8_SLICE_RANGE:[0-9]+]]}
+// this is special to embedded slices, there is no upper bound on the number of elements,
+// that info is stored in the length metadata for a reference to the parent struct
+// CHECK: ![[U8_SLICE_RANGE]] = !DISubrange(count: -1, lowerBound: 0)
+//
+// CHECK: ![[FOO_REF_LEN]] = !DIDerivedType(tag: DW_TAG_member, name: "length", {{.*}}baseType: ![[USIZE:[0-9]+]]
+// CHECK: ![[USIZE]] = !DI{{Basic|Derived}}Type({{.*}}name: "usize"
+pub struct Bar {
+    a: u32,
+    b: [u8],
+}
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "{{&|ref\$<}}{{[^"]+}}::Bar{{>?}}", {{.*}}elements: ![[BAR_REF_ELEMS:[0-9]+]]
+// CHECK: ![[BAR_REF_ELEMS]] = !{![[BAR_REF_PTR:[0-9]+]], ![[BAR_REF_LEN:[0-9]+]]}
+// CHECK: ![[BAR_REF_PTR]] = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", {{.*}}baseType: ![[BAR_PTR:[0-9]+]]
+// CHECK: ![[BAR_PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BAR:[0-9]+]]
+// CHECK: ![[BAR]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Bar", {{.*}}elements: ![[BAR_ELEMS:[0-9]+]]
+// CHECK: ![[BAR_ELEMS]] = !{![[BAR_A:[0-9]+]], ![[BAR_B:[0-9]+]]}
+// CHECK: ![[BAR_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a"
+// CHECK: ![[BAR_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8_SLICE]]
+// CHECK: ![[BAR_REF_LEN]] = !DIDerivedType(tag: DW_TAG_member, name: "length", {{.*}}baseType: ![[USIZE:[0-9]+]]
+pub struct Baz {
+    a: u32,
+    b: u8,
+}
+// CHECK: !DIDerivedType(tag: DW_TAG_pointer_type, name: "{{&|ref\$<}}{{[^"]+}}::Baz{{>?}}", {{.*}}baseType: ![[BAZ:[0-9]+]]
+// CHECK: ![[BAZ]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Baz", {{.*}}elements: ![[BAZ_ELEMS:[0-9]+]]
+// CHECK: ![[BAZ_ELEMS]] = !{![[BAZ_A:[0-9]+]], ![[BAZ_B:[0-9]+]]}
+// CHECK: ![[BAZ_A]] = !DIDerivedType(tag: DW_TAG_member, name: "a"
+// CHECK: ![[BAZ_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b", {{.*}}baseType: ![[U8]]
+
+#[no_mangle]
+pub fn test<'a>(a: &'a Foo, b: &'a Bar, c: &'a Baz) -> &'a u8 {
+    // just use this somehow so the debuginfo isn't removed
+    &a.b.as_bytes()[0]
+}
diff --git a/tests/debuginfo/strings-and-strs.rs b/tests/debuginfo/strings-and-strs.rs
@@ -1,6 +1,7 @@
 //@ min-gdb-version: 14.0
 // LLDB 1800+ tests were not tested in CI, broke, and now are disabled
 //@ ignore-lldb
+//@ ignore-x86_64-pc-windows-gnu #153238 this target in CI doesn't print Rc<str> correctly
 
 //@ compile-flags:-g
 //@ disable-gdb-pretty-printers
@@ -23,6 +24,12 @@
 //@ gdb-command:print str_in_rc
 //@ gdb-check:$5 = alloc::rc::Rc<&str, alloc::alloc::Global> {ptr: core::ptr::non_null::NonNull<alloc::rc::RcInner<&str>> {pointer: 0x[...]}, phantom: core::marker::PhantomData<alloc::rc::RcInner<&str>>, alloc: alloc::alloc::Global}
 
+//@ gdb-command:print box_str
+//@ gdb-check:$6 = alloc::boxed::Box<str, alloc::alloc::Global> [87, 111, 114, 108, 100]
+
+//@ gdb-command:print rc_str
+//@ gdb-check:$7 = alloc::rc::Rc<str, alloc::alloc::Global> {ptr: core::ptr::non_null::NonNull<alloc::rc::RcInner<str>> {pointer: alloc::rc::RcInner<str> {strong: core::cell::Cell<usize> {value: core::cell::UnsafeCell<usize> {value: 1}}, weak: core::cell::Cell<usize> {value: core::cell::UnsafeCell<usize> {value: 1}}, value: 0x[...]}}, phantom: core::marker::PhantomData<alloc::rc::RcInner<str>>, alloc: alloc::alloc::Global}
+
 // === LLDB TESTS ==================================================================================
 //@ lldb-command:run
 //@ lldb-command:v plain_string
@@ -40,6 +47,12 @@
 //@ lldb-command:v str_in_rc
 //@ lldb-check:(alloc::rc::Rc<&str, alloc::alloc::Global>) str_in_rc = strong=1, weak=0 { value = "Hello" { [0] = 'H' [1] = 'e' [2] = 'l' [3] = 'l' [4] = 'o' } }
 
+//@ lldb-command:v box_str
+//@ lldb-check:(alloc::boxed::Box<unsigned char[], alloc::alloc::Global>) box_str = { __0 = { pointer = { pointer = { data_ptr = 0x[...] "World" length = 5 } } _marker = } __1 = }
+
+//@ lldb-command:v rc_str
+//@ lldb-check:(alloc::rc::Rc<unsigned char[], alloc::alloc::Global>) rc_str = strong=1, weak=0 { value = "World" }
+
 #![allow(unused_variables)]
 
 pub struct Foo<'a> {
@@ -53,6 +66,8 @@ fn main() {
     let str_in_tuple = ("Hello", "World");
 
     let str_in_rc = std::rc::Rc::new("Hello");
+    let box_str: Box<str> = "World".into();
+    let rc_str: std::rc::Rc<str> = "World".into();
     zzz(); // #break
 }