diff --git a/interpreter/beam/beam.go b/interpreter/beam/beam.go index 91fe04d92..bb531156b 100644 --- a/interpreter/beam/beam.go +++ b/interpreter/beam/beam.go @@ -9,16 +9,22 @@ package beam // import "go.opentelemetry.io/ebpf-profiler/interpreter/beam" // that share the same bytecode, such as Elixir and Gleam. import ( + "encoding/binary" "fmt" + "hash/crc32" "regexp" + "strings" "unsafe" + "github.com/elastic/go-freelru" "go.opentelemetry.io/ebpf-profiler/internal/log" + "go.opentelemetry.io/ebpf-profiler/libpf/hash" "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/lpm" + "go.opentelemetry.io/ebpf-profiler/nopanicslicereader" "go.opentelemetry.io/ebpf-profiler/process" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/reporter" @@ -33,13 +39,16 @@ var ( ) type beamData struct { - otpRelease string - ertsVersion string - theActiveCodeIndex libpf.Address - r libpf.Address - beamNormalExit libpf.Address - ertsFrameLayout uint64 - + otpRelease string + ertsVersion string + theActiveCodeIndex libpf.Address + r libpf.Address + beamNormalExit libpf.Address + ertsFrameLayout uint64 + ertsAtomTable uint64 + etpPtrMask uint64 + etpHeaderSubtagMask uint64 + etpHeapBitsSubtag uint64 // Sizes and offsets BEAM internal structs we need to traverse vmStructs struct { // ranges @@ -47,15 +56,73 @@ type beamData struct { ranges struct { sizeOf uint8 } + + // BeamCodeHeader + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/beam_code.h#L56-L125 + beamCodeHeader struct { + sizeOf, numFunctions, lineTable, functions uint8 + } + + // ErtsCodeInfo + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/code_ix.h#L104-L123 + ertsCodeInfo struct { + sizeOf, mfa uint8 + } + + // ErtsCodeMFA + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/code_ix.h#L87-L95 + ertsCodeMfa struct { + sizeOf, module, function, arity uint8 + } + + // BeamCodeLineTab + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/beam_code.h#L130-L138 + beamCodeLineTab struct { + sizeOf, fnamePtr, locSize, locTab, funcTab uint8 + } + + // IndexTable + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/index.h#L39-L47 + indexTable struct { + segTable uint8 + } + + // Atom + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/atom.h#L48-L54 + // In OTP 28, we need to look it up as a binary: + // https://github.com/erlang/otp/blob/OTP-28.0.2/erts/emulator/beam/atom.h#L50-L59 + atom struct { + len, name uint8 + u struct { + bin uint8 + } + } + + // ErlHeapBits + // https://github.com/erlang/otp/blob/OTP-28.0.2/erts/emulator/beam/erl_bits.h#L149-L154 + erlHeapBits struct { + data uint8 + } } } +type beamMfa struct { + module uint32 + function uint32 + arity uint32 +} + type beamInstance struct { interpreter.InstanceStubs - pid libpf.PID - data *beamData - rm remotememory.RemoteMemory + pid libpf.PID + data *beamData + rm remotememory.RemoteMemory + rangesPtr libpf.Address + atomTable libpf.Address + atomCache *freelru.LRU[uint32, libpf.String] + mfaNameCache *freelru.LRU[beamMfa, libpf.String] + stringCache *freelru.LRU[libpf.Address, libpf.String] // prefixes is indexed by the prefix added to ebpf maps (to be cleaned up) to its generation prefixes map[lpm.Prefix]uint32 @@ -109,6 +176,34 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr return nil, fmt.Errorf("symbol 'the_active_code_index' not found: %v", err) } + // "erts_atom_table" symbol is from: + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/atom.c#L35 + atomTable, _, err := ef.SymbolData("erts_atom_table", 128) + if err != nil { + return nil, fmt.Errorf("symbol 'erts_atom_table' not found: %v", err) + } + + // "etp_ptr_mask" symbol is from: + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/erl_etp.c#L82-L85 + _, etpPtrMask, err := ef.SymbolData("etp_ptr_mask", 8) + if err != nil { + return nil, fmt.Errorf("symbol 'etp_ptr_mask' not found: %v", err) + } + + // "etp_header_subtag_mask" is from: + // https://github.com/erlang/otp/blob/OTP-28.0.2/erts/emulator/beam/erl_etp.c#L132 + _, etpHeaderSubtagMask, err := ef.SymbolData("etp_header_subtag_mask", 8) + if err != nil { + return nil, fmt.Errorf("symbol 'etp_header_subtag_mask' not found: %v", err) + } + + // "etp_heap_bits_subtag" is from: + // https://github.com/erlang/otp/blob/OTP-28.0.2/erts/emulator/beam/erl_etp.c#L108 + _, etpHeapBitsSubtag, err := ef.SymbolData("etp_heap_bits_subtag", 8) + if err != nil { + return nil, fmt.Errorf("symbol 'etp_heap_bits_subtag' not found: %v", err) + } + // "beam_normal_exit" symbol is from: // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/emulator/beam/jit/beam_jit_main.cpp#L54 beamNormalExit, _, err := ef.SymbolData("beam_normal_exit", 8) @@ -117,11 +212,15 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr } d := &beamData{ - otpRelease: string(otpRelease[:len(otpRelease)-1]), - ertsVersion: string(ertsVersion[:len(ertsVersion)-1]), - theActiveCodeIndex: libpf.Address(codeIndex.Address), - r: libpf.Address(r.Address), - beamNormalExit: libpf.Address(beamNormalExit.Address), + otpRelease: string(otpRelease[:len(otpRelease)-1]), + ertsVersion: string(ertsVersion[:len(ertsVersion)-1]), + theActiveCodeIndex: libpf.Address(codeIndex.Address), + r: libpf.Address(r.Address), + beamNormalExit: libpf.Address(beamNormalExit.Address), + ertsAtomTable: uint64(atomTable.Address), + etpPtrMask: nopanicslicereader.Uint64(etpPtrMask, 0), + etpHeaderSubtagMask: nopanicslicereader.Uint64(etpHeaderSubtagMask, 0), + etpHeapBitsSubtag: nopanicslicereader.Uint64(etpHeapBitsSubtag, 0), } // If erts_frame_layout is not defined, it means that frame pointers are not supported, @@ -135,10 +234,35 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr vms := &d.vmStructs - // This is the same on OTP releases 27.2.4 and 28.0.2. + // These values are the same on OTP releases 27.2.4 and 28.0.2. vms.ranges.sizeOf = 32 - - if d.otpRelease != "27" && d.otpRelease != "28" { + vms.beamCodeHeader.numFunctions = 0 + vms.beamCodeHeader.lineTable = 72 + vms.ertsCodeInfo.sizeOf = 40 + vms.ertsCodeInfo.mfa = 16 + vms.ertsCodeMfa.sizeOf = 24 + vms.ertsCodeMfa.module = 0 + vms.ertsCodeMfa.function = 8 + vms.ertsCodeMfa.arity = 16 + vms.beamCodeLineTab.sizeOf = 32 + vms.beamCodeLineTab.fnamePtr = 0 + vms.beamCodeLineTab.locSize = 8 + vms.beamCodeLineTab.locTab = 16 + vms.beamCodeLineTab.funcTab = 24 + vms.indexTable.segTable = 120 + vms.atom.len = 24 + vms.erlHeapBits.data = 16 + + switch d.otpRelease { + case "27": + vms.beamCodeHeader.sizeOf = 144 + vms.beamCodeHeader.functions = 136 + vms.atom.name = 32 + case "28": + vms.beamCodeHeader.sizeOf = 160 + vms.beamCodeHeader.functions = 152 + vms.atom.u.bin = 32 + default: return d, fmt.Errorf("unsupported OTP version for BEAM interpreter: %s", d.otpRelease) } @@ -170,11 +294,41 @@ func (d *beamData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libp return nil, err } + atomCache, err := freelru.New[uint32, libpf.String]( + interpreter.LruFunctionCacheSize, hash.Uint32) + if err != nil { + return nil, err + } + + hashMFA := func(key beamMfa) uint32 { + data := make([]byte, 12) + binary.LittleEndian.PutUint32(data[0:4], key.module) + binary.LittleEndian.PutUint32(data[4:8], key.function) + binary.LittleEndian.PutUint32(data[8:12], key.arity) + return crc32.ChecksumIEEE(data) + } + mfaNameCache, err := freelru.New[beamMfa, libpf.String]( + interpreter.LruFunctionCacheSize, hashMFA) + if err != nil { + return nil, err + } + + stringCache, err := freelru.New[libpf.Address, libpf.String]( + interpreter.LruFunctionCacheSize, libpf.Address.Hash32) + if err != nil { + return nil, err + } + return &beamInstance{ - pid: pid, - data: d, - rm: rm, - prefixes: make(map[lpm.Prefix]uint32), + pid: pid, + data: d, + rm: rm, + prefixes: make(map[lpm.Prefix]uint32), + rangesPtr: bias + libpf.Address(d.r), + atomTable: bias + libpf.Address(d.ertsAtomTable), + atomCache: atomCache, + mfaNameCache: mfaNameCache, + stringCache: stringCache, }, nil } @@ -231,11 +385,224 @@ func (i *beamInstance) Symbolize(frame *host.Frame, frames *libpf.Frames) error if !frame.Type.IsInterpType(libpf.BEAM) { return interpreter.ErrMismatchInterpreterType } + codeHeader := libpf.Address(frame.File) + pc := libpf.Address(frame.Lineno) - frames.Append(&libpf.Frame{ - Type: libpf.BEAMFrame, - AddressOrLineno: frame.Lineno, - }) + functionIndex, mfa, err := i.findMFA(pc, codeHeader) + if err != nil { + return err + } + + var mfaName libpf.String + if value, ok := i.mfaNameCache.Get(mfa); ok { + mfaName = value + } else { + moduleName, err := i.lookupAtom(mfa.module) + if err != nil { + return err + } + functionName, err := i.lookupAtom(mfa.function) + if err != nil { + return err + } + + if strings.HasPrefix(moduleName.String(), "Elixir.") { + // This is an Elixir module, so format the function using Elixir syntax (without the "Elixir." prefix) + mfaName = libpf.Intern(fmt.Sprintf("%s.%s/%d", moduleName.String()[7:], functionName, mfa.arity)) + } else { + // Assume it's Erlang and format it using Erlang syntax + mfaName = libpf.Intern(fmt.Sprintf("%s:%s/%d", moduleName, functionName, mfa.arity)) + } + + i.mfaNameCache.Add(mfa, mfaName) + } + + fileName, lineNumber, err := i.findFileLocation(codeHeader, functionIndex, pc) + if err == nil { + log.Debugf("BEAM Found function %s at %s:%d", mfaName, fileName, lineNumber) + frames.Append(&libpf.Frame{ + Type: libpf.BEAMFrame, + FunctionName: mfaName, + SourceFile: fileName, + SourceLine: libpf.SourceLineno(lineNumber), + }) + } else { + log.Debugf("BEAM Found function %s", mfaName) + frames.Append(&libpf.Frame{ + Type: libpf.BEAMFrame, + FunctionName: mfaName, + }) + } return nil } + +func (i *beamInstance) findMFA(pc libpf.Address, codeHeader libpf.Address) (functionIndex uint64, mfa beamMfa, err error) { + vms := i.data.vmStructs + + numFunctions := i.rm.Uint32(codeHeader + libpf.Address(vms.beamCodeHeader.numFunctions)) + functions := codeHeader + libpf.Address(vms.beamCodeHeader.functions) + + midBuffer := make([]byte, 16) + + ertsCodeInfo := libpf.Address(0) + lowIdx := uint64(0) + highIdx := uint64(numFunctions) - 1 + for lowIdx < highIdx { + midIdx := lowIdx + (highIdx-lowIdx)/2 + err := i.rm.Read(functions+libpf.Address(midIdx*8), midBuffer) + if err != nil { + return 0, beamMfa{}, fmt.Errorf("BEAM unable to read codeHeader.functions[%d] for codeHeader 0x%x", midIdx, codeHeader) + } + midStart := nopanicslicereader.Ptr(midBuffer, 0) + midEnd := nopanicslicereader.Ptr(midBuffer, 8) + if pc < midStart { + highIdx = midIdx + } else if pc >= midEnd { + lowIdx = midIdx + 1 + } else { + ertsCodeInfo = midStart + functionIndex = midIdx + + data := make([]byte, vms.ertsCodeMfa.sizeOf) + err = i.rm.Read(ertsCodeInfo+libpf.Address(vms.ertsCodeInfo.mfa), data) + if err != nil { + return 0, beamMfa{}, fmt.Errorf("BEAM unable to look up MFA at for ertsCodeInfo 0x%x", ertsCodeInfo) + } + mfa.module = nopanicslicereader.Uint32(data, uint(vms.ertsCodeMfa.module)) + mfa.function = nopanicslicereader.Uint32(data, uint(vms.ertsCodeMfa.function)) + mfa.arity = nopanicslicereader.Uint32(data, uint(vms.ertsCodeMfa.arity)) + + return functionIndex, mfa, nil + } + } + + return 0, beamMfa{}, fmt.Errorf("BEAM unable to find the MFA for PC 0x%x in expected code range", pc) +} + +func (i *beamInstance) findFileLocation(codeHeader libpf.Address, functionIndex uint64, pc libpf.Address) (fileName libpf.String, lineNumber uint64, err error) { + vms := i.data.vmStructs + + lineTable := i.rm.Ptr(codeHeader + libpf.Address(vms.beamCodeHeader.lineTable)) + functionTable := lineTable + libpf.Address(vms.beamCodeLineTab.funcTab) + + lineRange := make([]byte, 16) + err = i.rm.Read(functionTable+libpf.Address(8*functionIndex), lineRange) + if err != nil { + return libpf.NullString, 0, fmt.Errorf("BEAM failed to read function table info") + } + lineLow := nopanicslicereader.Ptr(lineRange, 0) + lineHigh := nopanicslicereader.Ptr(lineRange, 8) + + lineMidBuffer := make([]byte, 16) + // We need to align the lineMid values on 8-byte address boundaries + bitmask := libpf.Address(^(uint64(0xf))) + for lineHigh > lineLow { + lineMid := lineLow + ((lineHigh-lineLow)/2)&bitmask + err := i.rm.Read(lineMid, lineMidBuffer) + if err != nil { + return libpf.NullString, 0, fmt.Errorf("BEAM failed to read line table") + } + if pc < nopanicslicereader.Ptr(lineMidBuffer, 0) { + lineHigh = lineMid + } else if pc < nopanicslicereader.Ptr(lineMidBuffer, 8) { + firstLine := i.rm.Ptr(functionTable) + locIndex := uint32((lineMid - firstLine) / 8) + lineTab := make([]byte, vms.beamCodeLineTab.sizeOf) + err = i.rm.Read(lineTable, lineTab) + if err != nil { + return libpf.NullString, 0, fmt.Errorf("BEAM failed to read line table info") + } + locSize := nopanicslicereader.Uint32(lineTab, uint(vms.beamCodeLineTab.locSize)) + locTab := nopanicslicereader.Ptr(lineTab, uint(vms.beamCodeLineTab.locTab)) + locAddr := locTab + libpf.Address(locSize*locIndex) + loc := uint64(0) + if locSize == 2 { + loc = uint64(i.rm.Uint16(locAddr)) + } else { + loc = uint64(i.rm.Uint32(locAddr)) + } + fnameIndex := loc >> 24 + fileNamePtr := i.rm.Ptr(lineTable) + libpf.Address(8*fnameIndex) + fileName = i.readErlangString(i.rm.Ptr(fileNamePtr), 256) + + return fileName, loc & ((1 << 24) - 1), nil + } else { + lineLow = lineMid + 8 + } + } + + return libpf.NullString, 0, fmt.Errorf("BEAM unable to find file and line number") +} + +func (i *beamInstance) lookupAtom(index uint32) (libpf.String, error) { + if value, ok := i.atomCache.Get(index); ok { + return value, nil + } + + vms := i.data.vmStructs + + segTable := i.rm.Ptr(i.atomTable + libpf.Address(vms.indexTable.segTable)) + segment := i.rm.Ptr(segTable + libpf.Address(8*(index>>16))) + entry := i.rm.Ptr(segment + libpf.Address(8*((index>>6)&0x3FF))) + + len := i.rm.Uint16(entry + libpf.Address(vms.atom.len)) + + name := make([]byte, len) + switch i.data.otpRelease { + case "27": + err := i.rm.Read(i.rm.Ptr(entry+libpf.Address(vms.atom.name)), name) + if err != nil { + return libpf.NullString, fmt.Errorf("BEAM Unable to lookup atom with index %d: %v", index, err) + } + case "28": + // Implementation based on https://github.com/erlang/otp/blob/OTP-28.0.2/erts/etc/unix/etp-commands.in#L657-L674 + unboxed := i.rm.Ptr(entry+libpf.Address(vms.atom.u.bin)) & libpf.Address(i.data.etpPtrMask) + + subtag := i.rm.Uint64(unboxed) & uint64(i.data.etpHeaderSubtagMask) + if subtag == uint64(i.data.etpHeapBitsSubtag) { + err := i.rm.Read(unboxed+libpf.Address(vms.erlHeapBits.data), name) + if err != nil { + return libpf.NullString, fmt.Errorf("BEAM Unable to lookup atom with index %d (ErlHeapBits tag): %v", index, err) + } + } else { + return libpf.NullString, fmt.Errorf("BEAM Unable to lookup atom with index %d: expected boxed value subtag 0x%x, found 0x%x", index, i.data.etpHeapBitsSubtag, subtag) + } + } + + nameString := libpf.Intern(string(name)) + i.atomCache.Add(index, nameString) + return nameString, nil +} + +func (i *beamInstance) readErlangString(eterm libpf.Address, maxLength uint64) libpf.String { + if value, ok := i.stringCache.Get(eterm); ok { + return value + } + + result := strings.Builder{} + length := uint64(0) + + // TODO: Get this exported if possible in erl_etp.c + // https://github.com/erlang/otp/blob/OTP-27.2.4/erts/etc/unix/etp-commands.in#L5326 + etp_nil := libpf.Address(0x3B) + + for eterm != etp_nil && length < maxLength { + charAddr := eterm & libpf.Address(i.data.etpPtrMask) + charValue := i.rm.Uint64(charAddr) + char := uint8(charValue >> 4) + result.WriteByte(char) + length++ + nextAddr := libpf.Address((eterm & libpf.Address(i.data.etpPtrMask)) + 8) + eterm = libpf.Address(i.rm.Uint64(nextAddr)) + } + + if length > maxLength { + result.WriteString("...") + } + + value := libpf.Intern(result.String()) + i.stringCache.Add(eterm, value) + + return value +}