diff --git a/cmd/flamegraph/flamegraph.go b/cmd/flamegraph/flamegraph.go index cf9c9130..68389800 100644 --- a/cmd/flamegraph/flamegraph.go +++ b/cmd/flamegraph/flamegraph.go @@ -52,6 +52,7 @@ var ( flagNoSystemSummary bool flagMaxDepth int flagPerfEvent string + flagAsprofArguments string ) const ( @@ -61,6 +62,7 @@ const ( flagNoSystemSummaryName = "no-summary" flagMaxDepthName = "max-depth" flagPerfEventName = "perf-event" + flagAsprofArgumentsName = "asprof-args" ) func init() { @@ -72,7 +74,7 @@ func init() { Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "") Cmd.Flags().IntVar(&flagMaxDepth, flagMaxDepthName, 0, "") Cmd.Flags().StringVar(&flagPerfEvent, flagPerfEventName, "cycles:P", "") - + Cmd.Flags().StringVar(&flagAsprofArguments, flagAsprofArgumentsName, "-t -F probesp+vtable", "") workflow.AddTargetFlags(Cmd) Cmd.SetUsageFunc(usageFunc) @@ -122,6 +124,10 @@ func getFlagGroups() []app.FlagGroup { Name: flagPerfEventName, Help: "perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.)", }, + { + Name: flagAsprofArgumentsName, + Help: "arguments to pass to async-profiler, e.g., $ asprof start -i .", + }, { Name: flagMaxDepthName, Help: "maximum render depth of call stack in flamegraph (0 = no limit)", @@ -198,11 +204,12 @@ func runCmd(cmd *cobra.Command, args []string) error { Cmd: cmd, ReportNamePost: "flame", ScriptParams: map[string]string{ - "Frequency": strconv.Itoa(flagFrequency), - "Duration": strconv.Itoa(flagDuration), - "PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","), - "MaxDepth": strconv.Itoa(flagMaxDepth), - "PerfEvent": flagPerfEvent, + "Frequency": strconv.Itoa(flagFrequency), + "Duration": strconv.Itoa(flagDuration), + "PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","), + "MaxDepth": strconv.Itoa(flagMaxDepth), + "PerfEvent": flagPerfEvent, + "AsprofArguments": flagAsprofArguments, }, Tables: tables, Input: flagInput, diff --git a/cmd/flamegraph/flamegraph_renderers.go b/cmd/flamegraph/flamegraph_renderers.go index 5cd2c7fb..ecfa8fb0 100644 --- a/cmd/flamegraph/flamegraph_renderers.go +++ b/cmd/flamegraph/flamegraph_renderers.go @@ -156,6 +156,10 @@ func renderFlameGraph(header string, tableValues table.TableValues, field string return } maxDepth := tableValues.Fields[maxDepthFieldIndex].Values[0] + if maxDepth == "" { + slog.Error("maximum render depth field is empty") + return + } maxStackDepth, err := strconv.Atoi(maxDepth) if err != nil { slog.Error("failed to convert maximum stack depth", slog.String("error", err.Error())) @@ -229,8 +233,27 @@ func callStackFrequencyTableHTMLRenderer(tableValues table.TableValues, targetNa slog.Error("didn't find expected field (Perf Event) in table", slog.String("error", err.Error())) return out } + if len(tableValues.Fields[perfEventFieldIndex].Values) == 0 { + slog.Error("no values for perf event field in table") + return out + } perfEvent := tableValues.Fields[perfEventFieldIndex].Values[0] - out += renderFlameGraph(fmt.Sprintf("Native (%s)", perfEvent), tableValues, "Native Stacks") - out += renderFlameGraph("Java (async-profiler)", tableValues, "Java Stacks") + out += renderFlameGraph(fmt.Sprintf("Native (perf record -e %s)", perfEvent), tableValues, "Native Stacks") + + // get the asprof arguments from the table values + asprofArgumentsFieldIndex, err := table.GetFieldIndex("Asprof Arguments", tableValues) + if err != nil { + slog.Error("didn't find expected field (Asprof Arguments) in table", slog.String("error", err.Error())) + return out + } + if len(tableValues.Fields[asprofArgumentsFieldIndex].Values) == 0 { + slog.Error("no values for asprof arguments field in table") + return out + } + asprofArguments := tableValues.Fields[asprofArgumentsFieldIndex].Values[0] + if asprofArguments != "" { + asprofArguments = " " + asprofArguments + } + out += renderFlameGraph(fmt.Sprintf("Java (asprof start%s)", asprofArguments), tableValues, "Java Stacks") return out } diff --git a/cmd/flamegraph/flamegraph_tables.go b/cmd/flamegraph/flamegraph_tables.go index 60b667da..c3efb9bc 100644 --- a/cmd/flamegraph/flamegraph_tables.go +++ b/cmd/flamegraph/flamegraph_tables.go @@ -38,6 +38,7 @@ func flameGraphTableValues(outputs map[string]script.ScriptOutput) []table.Field {Name: "Java Stacks", Values: []string{javaFoldedFromOutput(outputs)}}, {Name: "Maximum Render Depth", Values: []string{maxRenderDepthFromOutput(outputs)}}, {Name: "Perf Event", Values: []string{perfEventFromOutput(outputs)}}, + {Name: "Asprof Arguments", Values: []string{asprofArgumentsFromOutput(outputs)}}, } return fields } @@ -104,7 +105,6 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } } if dwarfFolded == "" && fpFolded == "" { - slog.Warn("no native folded stacks found") // "event syntax error: 'foo'" indicates that the perf event specified is invalid/unsupported if strings.Contains(outputs[script.FlameGraphScriptName].Stderr, "event syntax error") { slog.Error("unsupported perf event specified", slog.String("error", outputs[script.FlameGraphScriptName].Stderr)) @@ -154,6 +154,24 @@ func perfEventFromOutput(outputs map[string]script.ScriptOutput) string { return "" } +func asprofArgumentsFromOutput(outputs map[string]script.ScriptOutput) string { + if outputs[script.FlameGraphScriptName].Stdout == "" { + slog.Warn("collapsed call stack output is empty") + return "" + } + sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout) + if len(sections) == 0 { + slog.Warn("no sections in collapsed call stack output") + return "" + } + for header, content := range sections { + if header == "asprof_arguments" { + return strings.TrimSpace(content) + } + } + return "" +} + // ProcessStacks ... // [processName][callStack]=count type ProcessStacks map[string]Stacks diff --git a/internal/script/script.go b/internal/script/script.go index 4d8b78c5..b28c7fda 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -319,21 +319,29 @@ kill_script() { local pid="${pids[$s]:-}" [[ -z "$pid" ]] && return 0 if ! ps -p "$pid" > /dev/null 2>&1; then return 0; fi - # Send signal to the process group (negative PID) + # Signal the process group (negative PID) + echo "Sending SIGINT to script '${orig_names[$s]}' with PID $pid" >&2 kill -SIGINT -"$pid" 2>/dev/null || true - # Give it time to clean up so child script can finalize - # Wait up to 5 seconds in 0.5s intervals + # Wait up to 1 minute in 1s intervals local waited=0 - while ps -p "$pid" > /dev/null 2>&1 && [ "$waited" -lt 10 ]; do - sleep 0.5 + echo "Waiting for script '${orig_names[$s]}' with PID $pid to exit gracefully" >&2 + while ps -p "$pid" > /dev/null 2>&1 && [ "$waited" -lt 60 ]; do + echo -n "." >&2 + sleep 1 waited=$((waited + 1)) done + echo "Done waiting for script '${orig_names[$s]}' with PID $pid to exit gracefully" >&2 # Force kill the process group if still alive if ps -p "$pid" > /dev/null 2>&1; then + echo "Force killing script '${orig_names[$s]}' with PID $pid" >&2 kill -SIGKILL -"$pid" 2>/dev/null || true fi wait "$pid" 2>/dev/null || true - if [[ -z "${exitcodes[$s]:-}" ]]; then exitcodes[$s]=130; fi + echo "Script '${orig_names[$s]}' with PID $pid has been killed" >&2 + if [[ -z "${exitcodes[$s]:-}" ]]; then + echo "Setting exit code for script '${orig_names[$s]}' to 130 (terminated by SIGINT)" >&2 + exitcodes[$s]=130 + fi } wait_for_concurrent_scripts() { diff --git a/internal/script/scripts.go b/internal/script/scripts.go index 3e3c689f..168a7e31 100644 --- a/internal/script/scripts.go +++ b/internal/script/scripts.go @@ -1557,8 +1557,12 @@ done duration={{.Duration}} INTERVAL_MS=$((interval * 1000)) +PERF_PID="" cleanup() { + if [[ -n "$PERF_PID" ]]; then + kill -0 "$PERF_PID" 2>/dev/null && kill -INT "$PERF_PID" 2>/dev/null || true + fi exit 0 } trap cleanup INT TERM @@ -1619,7 +1623,9 @@ stdbuf -oL awk -v interval="$interval" -v num_events="$NUM_EVENTS" -v duration=" } } } -' +' & +PERF_PID=$! +wait $PERF_PID `, Superuser: true, Depends: []string{"perf"}, @@ -1633,44 +1639,71 @@ duration={{.Duration}} frequency={{.Frequency}} maxdepth={{.MaxDepth}} perf_event={{.PerfEvent}} +read -r -a asprof_arguments <<< "{{.AsprofArguments}}" ap_interval=0 if [ "$frequency" -ne 0 ]; then ap_interval=$((1000000000 / frequency)) fi -# Function to stop profiling -stop_profiling() { - if [ -n "$perf_fp_pid" ]; then - kill -0 "$perf_fp_pid" 2>/dev/null && kill -INT "$perf_fp_pid" - wait "$perf_fp_pid" || true - fi - if [ -n "$perf_dwarf_pid" ]; then - kill -0 "$perf_dwarf_pid" 2>/dev/null && kill -INT "$perf_dwarf_pid" - wait "$perf_dwarf_pid" || true - fi - for pid in "${java_pids[@]}"; do - async-profiler/bin/asprof stop -o collapsed -f ap_folded_"$pid" "$pid" - done - # Restore original settings +# Adjust perf_event_paranoid and kptr_restrict for profiling, saving original values to restore later +adjust_and_save_settings() { + perf_event_paranoid=$(cat /proc/sys/kernel/perf_event_paranoid) + echo -1 >/proc/sys/kernel/perf_event_paranoid + kptr_restrict=$(cat /proc/sys/kernel/kptr_restrict) + echo 0 >/proc/sys/kernel/kptr_restrict +} + +# Restore original settings +restore_settings() { echo "$perf_event_paranoid" > /proc/sys/kernel/perf_event_paranoid echo "$kptr_restrict" > /proc/sys/kernel/kptr_restrict } +# Function to stop profiling +stop_profiling() { + # stop perf (may already be stopped if stopped by signal) + if [ -n "$perf_fp_pid" ]; then + kill -0 "$perf_fp_pid" 2>/dev/null && kill -INT "$perf_fp_pid" + fi + if [ -n "$perf_dwarf_pid" ]; then + kill -0 "$perf_dwarf_pid" 2>/dev/null && kill -INT "$perf_dwarf_pid" + fi + # stop async-profiler + for pid in "${java_pids[@]}"; do + # this call is synchronous and will wait until the profile is written to file before returning + async-profiler/bin/asprof stop -o collapsed -f ap_folded_"$pid" "$pid" + done + # wait for perf processes to finish and exit + if [ -n "$perf_fp_pid" ]; then + wait "$perf_fp_pid" || true + fi + if [ -n "$perf_dwarf_pid" ]; then + wait "$perf_dwarf_pid" || true + fi + restore_settings +} + # Function to collapse perf data collapse_perf_data() { if [ -f perf_dwarf_data ]; then - perf script -i perf_dwarf_data > perf_dwarf_stacks - stackcollapse-perf perf_dwarf_stacks > perf_dwarf_folded + (perf script -i perf_dwarf_data > perf_dwarf_stacks && stackcollapse-perf perf_dwarf_stacks > perf_dwarf_folded) & + local dwarf_pid=$! else echo "Error: perf_dwarf_data file not found" >&2 fi if [ -f perf_fp_data ]; then - perf script -i perf_fp_data > perf_fp_stacks - stackcollapse-perf perf_fp_stacks > perf_fp_folded + (perf script -i perf_fp_data > perf_fp_stacks && stackcollapse-perf perf_fp_stacks > perf_fp_folded) & + local fp_pid=$! else echo "Error: perf_fp_data file not found" >&2 fi + if [ -n "$dwarf_pid" ]; then + wait "$dwarf_pid" || echo "Error: failed to process perf_dwarf_data (perf script or stackcollapse-perf failed)" >&2 + fi + if [ -n "$fp_pid" ]; then + wait "$fp_pid" || echo "Error: failed to process perf_fp_data (perf script or stackcollapse-perf failed)" >&2 + fi } # Function to print results to stdout @@ -1678,8 +1711,11 @@ print_results() { echo "########## maximum depth ##########" echo "$maxdepth" - echo "########## perf_event ##########" - echo "$perf_event" + echo "########## perf_event ##########" + echo "$perf_event" + + echo "########## asprof_arguments ##########" + printf '%s\n' "${asprof_arguments[*]}" if [ -f perf_dwarf_folded ]; then echo "########## perf_dwarf ##########" @@ -1707,21 +1743,28 @@ _finalize=0 # flag to indicate if finalize has been called # Function to finalize profiling and output finalize() { if [ $_finalize -eq 1 ]; then + echo "Finalize has already been called, skipping..." >&2 return fi _finalize=1 + echo "Finalizing profiling and outputting results..." >&2 stop_profiling collapse_perf_data print_results rm -f controller.pid - exit 0 + #exit 0 } +on_signal() { + echo "Received signal to stop profiling..." >&2 + finalize +} + +# trap signals - set flag instead of calling finalize directly to avoid bash exit race +trap on_signal INT TERM EXIT + # Adjust perf_event_paranoid and kptr_restrict -perf_event_paranoid=$(cat /proc/sys/kernel/perf_event_paranoid) -echo -1 >/proc/sys/kernel/perf_event_paranoid -kptr_restrict=$(cat /proc/sys/kernel/kptr_restrict) -echo 0 >/proc/sys/kernel/kptr_restrict +adjust_and_save_settings # If pids specified, check if at least one of them is running if [ -n "$pids" ]; then @@ -1733,7 +1776,7 @@ if [ -n "$pids" ]; then fi else echo "Error: Process $p is not running." >&2 - stop_profiling + restore_settings exit 1 fi done @@ -1743,49 +1786,58 @@ fi # Start profiling with perf in frame pointer mode if [ -n "$pids" ]; then - perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 & + perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 & else - perf record -e "$perf_event" -F "$frequency" -a -g -o perf_fp_data -m 129 & + perf record -e "$perf_event" -F "$frequency" -a -g -o perf_fp_data -m 129 & fi perf_fp_pid=$! if ! kill -0 $perf_fp_pid 2>/dev/null; then - echo "Failed to start perf record in frame pointer mode" >&2 - stop_profiling - exit 1 + echo "Failed to start perf record in frame pointer mode" >&2 + stop_profiling + exit 1 fi # Start profiling with perf in dwarf mode if [ -n "$pids" ]; then - perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & else - perf record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + perf record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & fi perf_dwarf_pid=$! if ! kill -0 $perf_dwarf_pid 2>/dev/null; then - echo "Failed to start perf record in dwarf mode" >&2 - stop_profiling - exit 1 + echo "Failed to start perf record in dwarf mode" >&2 + stop_profiling + exit 1 fi -# Start profiling Java with async-profiler for each Java PID -for pid in "${java_pids[@]}"; do - java_cmds+=("$(tr '\000' ' ' < /proc/"$pid"/cmdline)") - async-profiler/bin/asprof start -i "$ap_interval" -F probesp+vtable "$pid" -done - -# profiling has been started, set up trap to finalize on interrupt -trap finalize INT TERM EXIT +if [ ${#java_pids[@]} -eq 0 ]; then + echo "Warning: Java sampling was requested, but no Java processes were found; skipping Java profiling." >&2 +else + # Start profiling Java with async-profiler for each Java PID + for pid in "${java_pids[@]}"; do + java_cmds+=("$(tr '\000' ' ' < /proc/"$pid"/cmdline)") + # asprof start will return immediately after starting the profiler + if ! async-profiler/bin/asprof start "${asprof_arguments[@]}" -i "$ap_interval" "$pid"; then + echo "Failed to start async-profiler for PID $pid" >&2 + stop_profiling + exit 1 + fi + done +fi # wait if [ "$duration" -gt 0 ]; then - # wait for the specified duration (seconds), then wrap it up by calling finalize + # sleep for the specified duration (seconds) + echo "Profiling for specified duration of $duration seconds..." >&2 sleep "$duration" else - # wait indefinitely until child processes are killed or interrupted + # wait indefinitely until child processes are killed or interrupted] + echo "Profiling indefinitely until interrupted..." >&2 wait fi finalize +exit 0 `, Superuser: true, Sequential: true,