Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 81 additions & 21 deletions libc/libc.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,67 @@ import (
"fmt"
"regexp"

"go.opentelemetry.io/ebpf-profiler/internal/log"

"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
"go.opentelemetry.io/ebpf-profiler/support"
)

type TSDInfo = support.TSDInfo
type DTVInfo = support.DTVInfo

// LibcInfo contains introspection information extracted from the C-library
type LibcInfo struct {
// TSDInfo is the TSDInfo extracted for this C-library
TSDInfo TSDInfo
// TODO comment
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO?

DTVInfo DTVInfo
}

// IsEqual checks if two LibcInfo instances are equal
func (l LibcInfo) IsEqual(other LibcInfo) bool {
return l.TSDInfo == other.TSDInfo && l.DTVInfo == other.DTVInfo
}

// Merge merges another LibcInfo into this one.
// Non-empty values from other override values in the receiver.
func (l *LibcInfo) Merge(other LibcInfo) {
// If other has TSDInfo and this instance does not, take it
if l.TSDInfo == (TSDInfo{}) {
l.TSDInfo = other.TSDInfo
}

// If other has DTVInfo and this instance does not, take it
if other.DTVInfo != (DTVInfo{}) && l.DTVInfo == (DTVInfo{}) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same thing as the earlier hunk

Suggested change
if other.DTVInfo != (DTVInfo{}) && l.DTVInfo == (DTVInfo{}) {
if l.DTVInfo == (DTVInfo{}) {

l.DTVInfo = other.DTVInfo
}
}

var (
// regex for the libc
libcRegex = regexp.MustCompile(`.*/(ld-musl|ld-linux|libc|libpthread)([-.].*)?\.so`)
)

// IsPotentialLibcDSO determines if the DSO filename potentially contains libc code
func IsPotentialLibcDSO(filename string) bool {
return libcRegex.MatchString(filename)
}

func ExtractLibcInfo(ef *pfelf.File) (*LibcInfo, error) {
tsdinfo, err := extractTSDInfo(ef)
if err != nil {
return nil, err
}

dtvinfo, err := extractDTVInfo(ef)
if err != nil {
return &LibcInfo{}, err
}

return &LibcInfo{
TSDInfo: tsdinfo,
DTVInfo: dtvinfo,
}, nil
}

// This code analyzes the C-library provided POSIX defined function which is used
Expand Down Expand Up @@ -65,27 +116,6 @@ type LibcInfo struct {
//
// Reading the value is basically "return self->specific_1stblock[key].data;"

var (
// regex for the libc
libcRegex = regexp.MustCompile(`.*/(ld-musl|libc|libpthread)([-.].*)?\.so`)
)

// IsPotentialTSDDSO determines if the DSO filename potentially contains pthread code
func IsPotentialTSDDSO(filename string) bool {
return libcRegex.MatchString(filename)
}

func ExtractLibcInfo(ef *pfelf.File) (*LibcInfo, error) {
tsdinfo, err := extractTSDInfo(ef)
if err != nil {
return nil, err
}

return &LibcInfo{
TSDInfo: tsdinfo,
}, nil
}

// extractTSDInfo extracts the introspection data for pthread thread specific data.
func extractTSDInfo(ef *pfelf.File) (TSDInfo, error) {
_, code, err := ef.SymbolData("__pthread_getspecific", 2048)
Expand Down Expand Up @@ -113,3 +143,33 @@ func extractTSDInfo(ef *pfelf.File) (TSDInfo, error) {
}
return info, nil
}

// extractDTVInfo extracts the introspection data for the DTV to access TLS vars
func extractDTVInfo(ef *pfelf.File) (DTVInfo, error) {
var info DTVInfo
_, code, err := ef.SymbolData("__tls_get_addr", 2048)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the test coredumps don't actually have this symbol, so it is necessary to not error out if the symbol isn't present. Hence why i added the logging package, and we now just log the error if the symbol is missing.

We return an empty DTVInfo struct, it is up to users of DTV info to check that it is valid before using it. This can easily be done by verifying that "EntryWidth" is not 0.

In the cases where we DO have the symbol, but fail to extract info from it, we legitimately error out.

Copy link
Contributor

@fabled fabled Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems this is defined in the ld-linux-x86-64.so.2 in (some versions of) glibc. So it means that the libc information may need to be collected from two DSOs in case of glibc.

You should add this to the regexp pattern in IsPotentialTSDDSO. Perhaps rename that to IsLibcDSO?

This also means that ProcessManager.assignLibcInfo should be updated to merge the information from these two different DSOs. Probably add a helper libc.MergeLibcInfo or add a struct method for LibcInfo.Merge?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the ability to check for equality and merge, and the values are "accumulated" when we call assignLibcInfo.

Also added unit tests for the associated LibcInfo.IsEqual and LibcInfo.Merge, and to verify the accumulation behaviour in assignLibcInfo.

if err != nil {
// Only error out reading DTV if we have the symbol, but fail to parse it
// if the symbol is not exported, failing to read it is not a critical error
// and empty DTV introspection data is returned
log.Warnf("unable to read '__tls_get_addr': %s, libc DTV introspection data is unavailable", err)
return info, nil
}

if len(code) < 8 {
return info, fmt.Errorf("__tls_get_addr function size is %d", len(code))
}

switch ef.Machine {
case elf.EM_AARCH64:
info, err = extractDTVInfoARM(code)
case elf.EM_X86_64:
info, err = extractDTVInfoX86(code)
default:
return info, fmt.Errorf("unsupported arch %s", ef.Machine.String())
}
if err != nil {
return info, fmt.Errorf("failed to extract DTV data: %s", err)
}
return info, nil
}
143 changes: 143 additions & 0 deletions libc/libc_aarch64.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,146 @@ func extractTSDInfoARM(code []byte) (TSDInfo, error) {
Indirect: indirect,
}, nil
}

func extractDTVInfoARM(code []byte) (DTVInfo, error) {
// Track register states similar to extractTSDInfoARM
Comment on lines +272 to +273
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm. we really should implement the arm.NewInterpreter similar to the amd.NewInterpreter that creates similar expressions to match to avoid all this state tracking duplication. but i suppose that's outside the scope of this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes i found myself wishing for that several times while working on this, but yeah as it stands this is the status quo for arm. The amd api is quite nice. Something for future work - i can cut an issue for that?

var regs [32]regState

dtvOffset := int16(0)
entryWidth := uint32(0)
resetReg := int(-1)

// Scan entire function
for offs := 0; offs < len(code); offs += 4 {
if offs+4 > len(code) {
break
}

if resetReg >= 0 {
// Reset register state if something unsupported happens on it
regs[resetReg] = regState{status: Unspec}
}

inst, err := aa.Decode(code[offs:])
if err != nil {
continue
}
if inst.Op == aa.RET {
break
}

destReg, ok := ah.Xreg2num(inst.Args[0])
if !ok {
continue
}

resetReg = destReg
switch inst.Op {
case aa.MOV:
// Track register moves
srcReg, ok := ah.Xreg2num(inst.Args[1])
if !ok {
continue
}
regs[destReg] = regs[srcReg]

case aa.MRS:
// MRS X1, S3_3_C13_C0_2 (tpidr_el0)
if inst.Args[1].String() == "S3_3_C13_C0_2" {
regs[destReg] = regState{
status: TSDBase, // Reuse TSDBase to mean thread pointer
multiplier: 1,
}
}

case aa.LDUR:
// LDUR X1, [X1,#-8]
m, ok := inst.Args[1].(aa.MemImmediate)
if !ok {
continue
}
srcReg, ok := ah.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDBase {
imm, ok := ah.DecodeImmediate(m)
if !ok {
continue
}
// This is loading the DTV pointer from thread pointer
dtvOffset = int16(imm & 0xFFFF)
regs[destReg] = regState{
status: TSDElementBase, // DTV pointer
offset: imm,
multiplier: 1,
}
} else {
continue
}

case aa.LDR:
if len(inst.Args) < 2 {
continue
}
switch m := inst.Args[1].(type) {
case aa.MemImmediate:
// ldr x1, [x1, #0] or ldr x1, [x1]
srcReg, ok := ah.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDBase {
// Loading DTV pointer from thread pointer
imm, ok := ah.DecodeImmediate(m)
if !ok {
imm = 0
}
dtvOffset = int16(imm & 0xFFFF)
regs[destReg] = regState{
status: TSDElementBase, // DTV pointer
offset: imm,
multiplier: 1,
}
} else {
continue
}

case aa.MemExtend:
// ldr x1, [x1, x2, lsl #3]
srcReg, ok := ah.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDElementBase {
// This is indexing into the DTV array
if m.Amount > 0 {
entryWidth = uint32(1 << m.Amount)
}
}
}

case aa.LSL:
// lsl x3, x3, #4
if len(inst.Args) >= 3 {
if imm, ok := inst.Args[2].(aa.Imm); ok {
entryWidth = uint32(1 << imm.Imm)
}
}

case aa.CMP, aa.CBZ, aa.CMN:
// Opcode with no affect on first argument.
// Noop to exit switch without default continue.

default:
continue
}
resetReg = -1
}

return DTVInfo{
Offset: dtvOffset,
Multiplier: uint8(entryWidth),
Indirect: 1,
}, nil
}
Loading