Skip to content

Commit 27dece7

Browse files
committed
core/web: support pprof for loop plugins
1 parent 5c1512f commit 27dece7

25 files changed

Lines changed: 339 additions & 93 deletions

File tree

core/cmd/admin_commands.go

Lines changed: 110 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ package cmd
22

33
import (
44
"bytes"
5+
"context"
56
"encoding/json"
67
"errors"
78
"fmt"
89
"io"
10+
"math"
911
"net/http"
1012
"os"
1113
"path/filepath"
@@ -17,6 +19,7 @@ import (
1719
"github.com/urfave/cli"
1820

1921
cutils "github.com/smartcontractkit/chainlink-common/pkg/utils"
22+
"github.com/smartcontractkit/chainlink/v2/core/web"
2023

2124
"github.com/smartcontractkit/chainlink/v2/core/sessions"
2225
"github.com/smartcontractkit/chainlink/v2/core/utils"
@@ -65,6 +68,10 @@ func initAdminSubCmds(s *Shell) []cli.Command {
6568
Usage: "output directory of the captured profile",
6669
Value: "/tmp/",
6770
},
71+
cli.StringSliceFlag{
72+
Name: "vitals, v",
73+
Usage: "vitals to collect, can be specified multiple times. Options: 'allocs', 'block', 'cmdline', 'goroutine', 'heap', 'mutex', 'profile', 'threadcreate', 'trace'",
74+
},
6875
},
6976
},
7077
{
@@ -324,30 +331,114 @@ func (s *Shell) Profile(c *cli.Context) error {
324331

325332
genDir := filepath.Join(baseDir, "debuginfo-"+time.Now().Format(time.RFC3339))
326333

327-
if err := os.Mkdir(genDir, 0o755); err != nil {
334+
vitals := c.StringSlice("vitals")
335+
if len(vitals) == 0 {
336+
vitals = []string{
337+
"allocs", // A sampling of all past memory allocations
338+
"block", // Stack traces that led to blocking on synchronization primitives
339+
"cmdline", // The command line invocation of the current program
340+
"goroutine", // Stack traces of all current goroutines
341+
"heap", // A sampling of memory allocations of live objects.
342+
"mutex", // Stack traces of holders of contended mutexes
343+
"profile", // CPU profile.
344+
"threadcreate", // Stack traces that led to the creation of new OS threads
345+
"trace", // A trace of execution of the current program.
346+
}
347+
}
348+
349+
plugins, err := s.discoverPlugins(ctx)
350+
if err != nil {
328351
return s.errorOut(err)
329352
}
330-
var wgPprof sync.WaitGroup
331-
vitals := []string{
332-
"allocs", // A sampling of all past memory allocations
333-
"block", // Stack traces that led to blocking on synchronization primitives
334-
"cmdline", // The command line invocation of the current program
335-
"goroutine", // Stack traces of all current goroutines
336-
"heap", // A sampling of memory allocations of live objects.
337-
"mutex", // Stack traces of holders of contended mutexes
338-
"profile", // CPU profile.
339-
"threadcreate", // Stack traces that led to the creation of new OS threads
340-
"trace", // A trace of execution of the current program.
353+
var names []string
354+
for _, group := range plugins {
355+
if name := group.Labels[web.LabelMetaPluginName]; name != "" {
356+
names = append(names, name)
357+
}
358+
}
359+
360+
if len(names) == 0 {
361+
s.Logger.Infof("Collecting profiles: %v", vitals)
362+
} else {
363+
s.Logger.Infof("Collecting profiles from host and %d plugins: %v", len(names), vitals)
341364
}
342-
wgPprof.Add(len(vitals))
343-
s.Logger.Infof("Collecting profiles: %v", vitals)
344365
s.Logger.Infof("writing debug info to %s", genDir)
345366

367+
var wg sync.WaitGroup
368+
errs := make([]error, len(names)+1)
369+
wg.Add(len(names) + 1)
370+
go func() {
371+
defer wg.Done()
372+
errs[0] = s.profile(ctx, genDir, "", vitals, seconds)
373+
}()
374+
for i, name := range names {
375+
go func() {
376+
defer wg.Done()
377+
errs[i] = s.profile(ctx, genDir, name, vitals, seconds)
378+
}()
379+
}
380+
wg.Wait()
381+
382+
err = errors.Join(errs...)
383+
if err != nil {
384+
return s.errorOut(err)
385+
}
386+
return nil
387+
}
388+
func (s *Shell) discoverPlugins(ctx context.Context) (
389+
got []struct {
390+
Targets []string `yaml:"targets"`
391+
Labels map[string]string `yaml:"labels"`
392+
},
393+
err error,
394+
) {
395+
resp, err := s.HTTP.Get(ctx, "/discovery")
396+
if err != nil {
397+
return
398+
}
399+
defer func() {
400+
if resp.Body != nil {
401+
resp.Body.Close()
402+
}
403+
}()
404+
data, err := io.ReadAll(resp.Body)
405+
if err != nil {
406+
return
407+
}
408+
409+
if err = json.Unmarshal(data, &got); err != nil {
410+
s.Logger.Errorf("failed to unmarshal discovery response: %s", string(data))
411+
return
412+
}
413+
return
414+
}
415+
416+
func (s *Shell) profile(ctx context.Context, genDir string, name string, vitals []string, seconds uint) error {
417+
lggr := s.Logger
418+
path := "/v2"
419+
if name != "" {
420+
genDir = filepath.Join(genDir, "plugins", name)
421+
path += "/plugins/" + name
422+
lggr = lggr.With("plugin", name)
423+
}
424+
if err := os.MkdirAll(genDir, 0o755); err != nil {
425+
return fmt.Errorf("failed to create directory: %w", err)
426+
}
427+
428+
timeout := seconds + max(10, seconds>>2) // +25%
429+
if timeout > math.MaxInt64 {
430+
return fmt.Errorf("profile timeout %d seconds overflows int64", seconds, math.MaxInt64)
431+
}
432+
346433
errs := make(chan error, len(vitals))
434+
var wgPprof sync.WaitGroup
435+
wgPprof.Add(len(vitals))
347436
for _, vt := range vitals {
348-
go func(vt string) {
437+
go func(ctx context.Context, vt string) {
349438
defer wgPprof.Done()
350-
uri := fmt.Sprintf("/v2/debug/pprof/%s?seconds=%d", vt, seconds)
439+
ctx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
440+
defer cancel()
441+
uri := fmt.Sprintf(path+"/debug/pprof/%s?seconds=%d", vt, seconds)
351442
resp, err := s.HTTP.Get(ctx, uri)
352443
if err != nil {
353444
errs <- fmt.Errorf("error collecting %s: %w", vt, err)
@@ -403,12 +494,12 @@ func (s *Shell) Profile(c *cli.Context) error {
403494
errs <- fmt.Errorf("error closing file for %s: %w", vt, err)
404495
return
405496
}
406-
}(vt)
497+
}(ctx, vt)
407498
}
408499
wgPprof.Wait()
409500
close(errs)
410-
// Atmost one err is emitted per vital.
411-
s.Logger.Infof("collected %d/%d profiles", len(vitals)-len(errs), len(vitals))
501+
// At most one err is emitted per vital.
502+
lggr.Infof("collected %d/%d profiles", len(vitals)-len(errs), len(vitals))
412503
if len(errs) > 0 {
413504
var merr error
414505
for err := range errs {

core/cmd/shell_local.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ func initLocalSubCmds(s *Shell, safe bool) []cli.Command {
148148
Usage: "output directory of the captured profile",
149149
Value: "/tmp/",
150150
},
151+
cli.StringSliceFlag{
152+
Name: "vitals, v",
153+
Usage: "vitals to collect, can be specified multiple times. Options: 'allocs', 'block', 'cmdline', 'goroutine', 'heap', 'mutex', 'profile', 'threadcreate', 'trace'",
154+
},
151155
},
152156
Hidden: true,
153157
Before: func(_ *cli.Context) error {

core/scripts/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ require (
4646
github.com/shopspring/decimal v1.4.0
4747
github.com/smartcontractkit/chainlink-automation v0.8.1
4848
github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260203202624-5101f4d33736
49-
github.com/smartcontractkit/chainlink-common v0.10.0
49+
github.com/smartcontractkit/chainlink-common v0.10.1-0.20260217013856-dd1e16fa7183
5050
github.com/smartcontractkit/chainlink-common/keystore v1.0.2-0.20260211140822-b833b412cdd9
5151
github.com/smartcontractkit/chainlink-data-streams v0.1.11
5252
github.com/smartcontractkit/chainlink-deployments-framework v0.80.1-0.20260209182815-b296b7df28a6
@@ -480,7 +480,7 @@ require (
480480
github.com/smartcontractkit/ccip-contract-examples/chains/evm v0.0.0-20260129135848-c86808ba5cb9 // indirect
481481
github.com/smartcontractkit/ccip-owner-contracts v0.1.0 // indirect
482482
github.com/smartcontractkit/chain-selectors v1.0.91 // indirect
483-
github.com/smartcontractkit/chainlink-aptos v0.0.0-20251212131933-e5e85d6fa4d3 // indirect
483+
github.com/smartcontractkit/chainlink-aptos v0.0.0-20260217145444-2c23f991df07 // indirect
484484
github.com/smartcontractkit/chainlink-ccip/ccv/chains/evm v0.0.0-20260206205333-9187f22f0a04 // indirect
485485
github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260121163256-85accaf3d28d // indirect
486486
github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5 // indirect

core/scripts/go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,8 +1596,8 @@ github.com/smartcontractkit/ccip-owner-contracts v0.1.0 h1:GiBDtlx7539o7AKlDV+9L
15961596
github.com/smartcontractkit/ccip-owner-contracts v0.1.0/go.mod h1:NnT6w4Kj42OFFXhSx99LvJZWPpMjmo4+CpDEWfw61xY=
15971597
github.com/smartcontractkit/chain-selectors v1.0.91 h1:Aip7IZTv40RtbHgZ9mTjm5KyhYrpPefG7iVMzLZ27M4=
15981598
github.com/smartcontractkit/chain-selectors v1.0.91/go.mod h1:qy7whtgG5g+7z0jt0nRyii9bLND9m15NZTzuQPkMZ5w=
1599-
github.com/smartcontractkit/chainlink-aptos v0.0.0-20251212131933-e5e85d6fa4d3 h1:bbVSKb++R+rpLkydNvyS4nZPNkcjtolUuFC8YVwtMVk=
1600-
github.com/smartcontractkit/chainlink-aptos v0.0.0-20251212131933-e5e85d6fa4d3/go.mod h1:OywVThRaVXwknATT2B8QAwjOJ1LoYBB9bTsmRpf6RPw=
1599+
github.com/smartcontractkit/chainlink-aptos v0.0.0-20260217145444-2c23f991df07 h1:aOI563MHGo/Al4fFtgKFbZo8DB6FRJh1pf+uo5S87jg=
1600+
github.com/smartcontractkit/chainlink-aptos v0.0.0-20260217145444-2c23f991df07/go.mod h1:dTKyBdwtx1OXzVBwglpB0zRCFW0sG4JZkhMqv4yyFLU=
16011601
github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgBc2xpDKBco/Q4h4ydl6+UUU=
16021602
github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08=
16031603
github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260203202624-5101f4d33736 h1:h2r/UWIJI1zP/I8IwmmJ44aAfPZZcRgfFjHAzehqqGQ=
@@ -1612,8 +1612,8 @@ github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260129103204-4c84
16121612
github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260129103204-4c8453dd8139/go.mod h1:gUbichNQBqk+fBF2aV40ZkzFmAJ8SygH6DEPd3cJkQE=
16131613
github.com/smartcontractkit/chainlink-ccv v0.0.0-20260210123725-95a6e7788856 h1:e/L0oKHTwXjqIf66vw8NWYvXecCq5tybGwHm7YK9wuo=
16141614
github.com/smartcontractkit/chainlink-ccv v0.0.0-20260210123725-95a6e7788856/go.mod h1:wLN3X89JoiWr951CX+lALwmDirEm1KzP0n+gGFblwxw=
1615-
github.com/smartcontractkit/chainlink-common v0.10.0 h1:d90b9UPJecrIryzhl43F1oQwkJQoug3TaANlJ1xLHyI=
1616-
github.com/smartcontractkit/chainlink-common v0.10.0/go.mod h1:13YN2kb3Vqpw2S7d4IwhX/578WPGC0JHN5JrOnAEsOc=
1615+
github.com/smartcontractkit/chainlink-common v0.10.1-0.20260217013856-dd1e16fa7183 h1:DPAgzaz82El15lr7hZgzZw/G+qdgPuFRRKngihi6gW4=
1616+
github.com/smartcontractkit/chainlink-common v0.10.1-0.20260217013856-dd1e16fa7183/go.mod h1:13YN2kb3Vqpw2S7d4IwhX/578WPGC0JHN5JrOnAEsOc=
16171617
github.com/smartcontractkit/chainlink-common/keystore v1.0.2-0.20260211140822-b833b412cdd9 h1:dWqd2lOW3GbwtgZEeDSDI1b1X175MPOlCU6GDQQVBjk=
16181618
github.com/smartcontractkit/chainlink-common/keystore v1.0.2-0.20260211140822-b833b412cdd9/go.mod h1:SMegDBf3KDs2tuKApmTRyO2xQthMu3gV2J+IuHEs0Y0=
16191619
github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.11-0.20251211140724-319861e514c4 h1:NOUsjsMzNecbjiPWUQGlRSRAutEvCFrqqyETDJeh5q4=

core/web/loop_registry.go

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"html"
77
"io"
88
"net/http"
9+
"net/url"
910
"os"
1011
"time"
1112

@@ -40,20 +41,26 @@ func NewLoopRegistryServer(app chainlink.Application) *LoopRegistryServer {
4041
discoveryHostName: discoveryHostName,
4142
loopHostName: loopHostName,
4243
client: &http.Client{Timeout: 1 * time.Second}, // some value much less than the prometheus poll interval will do there
44+
// TODO need longer timeouts for pprof, or none? just control context via seconds param?
4345
}
4446
}
4547

48+
// TODO move
49+
const LabelMetaPluginName = "__meta_plugin_name"
50+
4651
// discoveryHandler implements service discovery of prom endpoints for LOOPs in the registry
4752
func (l *LoopRegistryServer) discoveryHandler(w http.ResponseWriter, req *http.Request) {
4853
w.Header().Set("Content-Type", "application/json")
4954
var groups []*targetgroup.Group
5055

5156
// add node metrics to service discovery
52-
groups = append(groups, metricTarget(l.discoveryHostName, l.exposedPromPort, "/metrics"))
57+
groups = append(groups, pluginGroup(l.discoveryHostName, l.exposedPromPort, "/metrics"))
5358

5459
// add all the plugins
5560
for _, registeredPlugin := range l.registry.List() {
56-
groups = append(groups, metricTarget(l.discoveryHostName, l.exposedPromPort, pluginMetricPath(registeredPlugin.Name)))
61+
group := pluginGroup(l.discoveryHostName, l.exposedPromPort, pluginMetricPath(registeredPlugin.Name))
62+
group.Labels[LabelMetaPluginName] = model.LabelValue(registeredPlugin.Name)
63+
groups = append(groups, group)
5764
}
5865

5966
b, err := l.jsonMarshalFn(groups)
@@ -72,7 +79,7 @@ func (l *LoopRegistryServer) discoveryHandler(w http.ResponseWriter, req *http.R
7279
}
7380
}
7481

75-
func metricTarget(hostName string, port int, path string) *targetgroup.Group {
82+
func pluginGroup(hostName string, port int, path string) *targetgroup.Group {
7683
return &targetgroup.Group{
7784
Targets: []model.LabelSet{
7885
// target address will be called by external prometheus
@@ -114,6 +121,48 @@ func (l *LoopRegistryServer) pluginMetricHandler(gc *gin.Context) {
114121
gc.Data(http.StatusOK, "text/plain", b)
115122
}
116123

124+
func (l *LoopRegistryServer) pluginPPROFHandler(gc *gin.Context) {
125+
pluginName := gc.Param("name")
126+
p, ok := l.registry.Get(pluginName)
127+
if !ok {
128+
gc.Data(http.StatusNotFound, "text/plain", fmt.Appendf(nil, "plugin %q does not exist", html.EscapeString(pluginName)))
129+
return
130+
}
131+
132+
// unlike discovery, this endpoint is internal btw the node and plugin
133+
pluginURL := fmt.Sprintf("http://%s:%d/debug/pprof/"+gc.Param("profile"), l.loopHostName, p.EnvCfg.PrometheusPort)
134+
var urlVals url.Values
135+
if db, ok := gc.GetQuery("debug"); ok {
136+
urlVals.Set("debug", db)
137+
}
138+
if gc, ok := gc.GetQuery("gc"); ok {
139+
urlVals.Set("gc", gc)
140+
}
141+
if sec, ok := gc.GetQuery("seconds"); ok {
142+
urlVals.Set("seconds", sec)
143+
}
144+
if s := urlVals.Encode(); s != "" {
145+
pluginURL += "?" + s
146+
}
147+
res, err := l.client.Get(pluginURL)
148+
if err != nil {
149+
msg := "plugin pprof handler failed to get plugin url " + html.EscapeString(pluginURL)
150+
l.logger.Errorw(msg, "err", err)
151+
gc.Data(http.StatusInternalServerError, "text/plain", fmt.Appendf(nil, "%s: %s", msg, err))
152+
return
153+
}
154+
defer res.Body.Close()
155+
b, err := io.ReadAll(res.Body)
156+
if err != nil {
157+
msg := fmt.Sprintf("error reading plugin %q pprof", html.EscapeString(pluginName))
158+
l.logger.Errorw(msg, "err", err)
159+
gc.Data(http.StatusInternalServerError, "text/plain", fmt.Appendf(nil, "%s: %s", msg, err))
160+
return
161+
}
162+
163+
gc.Data(http.StatusOK, "text/plain", b)
164+
}
165+
117166
func initHostNames() (discoveryHost, loopHost string) {
118167
var exists bool
119168
discoveryHost, exists = env.PrometheusDiscoveryHostName.Lookup()

core/web/loop_registry_test.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
"github.com/smartcontractkit/freeport"
1818

1919
"github.com/smartcontractkit/chainlink-common/pkg/loop"
20+
"github.com/smartcontractkit/chainlink-common/pkg/services"
21+
"github.com/smartcontractkit/chainlink-common/pkg/services/servicetest"
2022
"github.com/smartcontractkit/chainlink/v2/core/internal/cltest"
2123
"github.com/smartcontractkit/chainlink/v2/core/internal/testutils"
2224
"github.com/smartcontractkit/chainlink/v2/core/internal/testutils/configtest"
@@ -26,7 +28,7 @@ import (
2628

2729
type mockLoopImpl struct {
2830
t *testing.T
29-
*loop.PromServer
31+
services.Service
3032
}
3133

3234
// test prom var to avoid collision with real chainlink metrics
@@ -45,19 +47,11 @@ func configurePromRegistry() {
4547

4648
func newMockLoopImpl(t *testing.T, port int) *mockLoopImpl {
4749
return &mockLoopImpl{
48-
t: t,
49-
PromServer: loop.PromServerOpts{Handler: testHandler}.New(port, logger.TestLogger(t).Named("mock-loop")),
50+
t: t,
51+
Service: loop.WebServerOpts{Handler: testHandler}.New(logger.TestLogger(t).Named("mock-loop"), port),
5052
}
5153
}
5254

53-
func (m *mockLoopImpl) start() {
54-
require.NoError(m.t, m.Start())
55-
}
56-
57-
func (m *mockLoopImpl) close() {
58-
require.NoError(m.t, m.Close())
59-
}
60-
6155
func (m *mockLoopImpl) run() {
6256
testMetric.Inc()
6357
}
@@ -95,8 +89,7 @@ func TestLoopRegistry(t *testing.T) {
9589
// our mock loop impl and isolated from the default prom register
9690
configurePromRegistry()
9791
mockLoop := newMockLoopImpl(t, loop.EnvCfg.PrometheusPort)
98-
mockLoop.start()
99-
defer mockLoop.close()
92+
servicetest.Run(t, mockLoop)
10093
mockLoop.run()
10194

10295
client := app.NewHTTPClient(nil)

core/web/router.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ func loopRoutes(app chainlink.Application, r *gin.RouterGroup) {
233233
loopRegistry := NewLoopRegistryServer(app)
234234
r.GET("/discovery", ginHandlerFromHTTP(loopRegistry.discoveryHandler))
235235
r.GET("/plugins/:name/metrics", loopRegistry.pluginMetricHandler)
236+
r.GET("/plugins/:name/debug/pprof/*profile", loopRegistry.pluginPPROFHandler)
237+
//TODO post route too? (symbol)
236238
}
237239

238240
func v2Routes(app chainlink.Application, r *gin.RouterGroup) {

deployment/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ require (
3838
github.com/smartcontractkit/ccip-contract-examples/chains/evm v0.0.0-20260129135848-c86808ba5cb9
3939
github.com/smartcontractkit/ccip-owner-contracts v0.1.0
4040
github.com/smartcontractkit/chain-selectors v1.0.91
41-
github.com/smartcontractkit/chainlink-aptos v0.0.0-20251212131933-e5e85d6fa4d3
41+
github.com/smartcontractkit/chainlink-aptos v0.0.0-20260217145444-2c23f991df07
4242
github.com/smartcontractkit/chainlink-ccip v0.1.1-solana.0.20260203202624-5101f4d33736
4343
github.com/smartcontractkit/chainlink-ccip/chains/solana v0.0.0-20260121163256-85accaf3d28d
4444
github.com/smartcontractkit/chainlink-ccip/chains/solana/gobindings v0.0.0-20250912190424-fd2e35d7deb5
4545
github.com/smartcontractkit/chainlink-ccip/deployment v0.0.0-20260129103204-4c8453dd8139
46-
github.com/smartcontractkit/chainlink-common v0.10.0
46+
github.com/smartcontractkit/chainlink-common v0.10.1-0.20260217013856-dd1e16fa7183
4747
github.com/smartcontractkit/chainlink-common/keystore v1.0.2-0.20260211140822-b833b412cdd9
4848
github.com/smartcontractkit/chainlink-deployments-framework v0.80.1-0.20260209182815-b296b7df28a6
4949
github.com/smartcontractkit/chainlink-evm v0.3.4-0.20260205183656-836ec9472717

0 commit comments

Comments
 (0)