diff --git a/caddy/caddy_test.go b/caddy/caddy_test.go index b8961b96d8..acabe66156 100644 --- a/caddy/caddy_test.go +++ b/caddy/caddy_test.go @@ -828,19 +828,19 @@ func TestWorkerMetrics(t *testing.T) { # HELP frankenphp_busy_workers Number of busy PHP workers for this worker # TYPE frankenphp_busy_workers gauge - frankenphp_busy_workers{worker="` + workerName + `"} 0 + frankenphp_busy_workers{server="0",worker="` + workerName + `"} 0 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="` + workerName + `"} 2 + frankenphp_total_workers{server="0",worker="` + workerName + `"} 2 # HELP frankenphp_worker_request_count # TYPE frankenphp_worker_request_count counter - frankenphp_worker_request_count{worker="` + workerName + `"} 10 + frankenphp_worker_request_count{server="0",worker="` + workerName + `"} 10 # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="` + workerName + `"} 2 + frankenphp_ready_workers{server="0",worker="` + workerName + `"} 2 ` ctx := caddy.ActiveContext() @@ -922,19 +922,19 @@ func TestNamedWorkerMetrics(t *testing.T) { # HELP frankenphp_busy_workers Number of busy PHP workers for this worker # TYPE frankenphp_busy_workers gauge - frankenphp_busy_workers{worker="my_app"} 0 + frankenphp_busy_workers{server="0",worker="my_app"} 0 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="my_app"} 2 + frankenphp_total_workers{server="0",worker="my_app"} 2 # HELP frankenphp_worker_request_count # TYPE frankenphp_worker_request_count counter - frankenphp_worker_request_count{worker="my_app"} 10 + frankenphp_worker_request_count{server="0",worker="my_app"} 10 # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="my_app"} 2 + frankenphp_ready_workers{server="0",worker="my_app"} 2 ` ctx := caddy.ActiveContext() @@ -1018,19 +1018,19 @@ func TestAutoWorkerConfig(t *testing.T) { # HELP frankenphp_busy_workers Number of busy PHP workers for this worker # TYPE frankenphp_busy_workers gauge - frankenphp_busy_workers{worker="` + workerName + `"} 0 + frankenphp_busy_workers{server="0",worker="` + workerName + `"} 0 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="` + workerName + `"} ` + workers + ` + frankenphp_total_workers{server="0",worker="` + workerName + `"} ` + workers + ` # HELP frankenphp_worker_request_count # TYPE frankenphp_worker_request_count counter - frankenphp_worker_request_count{worker="` + workerName + `"} 10 + frankenphp_worker_request_count{server="0",worker="` + workerName + `"} 10 # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="` + workerName + `"} ` + workers + ` + frankenphp_ready_workers{server="0",worker="` + workerName + `"} ` + workers + ` ` ctx := caddy.ActiveContext() @@ -1284,7 +1284,7 @@ func TestMaxWaitTimeWorker(t *testing.T) { expectedMetrics := ` # TYPE frankenphp_worker_queue_depth gauge - frankenphp_worker_queue_depth{worker="service"} 0 + frankenphp_worker_queue_depth{server="0",worker="service"} 0 ` ctx := caddy.ActiveContext() @@ -1385,21 +1385,21 @@ func TestMultiWorkersMetrics(t *testing.T) { # HELP frankenphp_busy_workers Number of busy PHP workers for this worker # TYPE frankenphp_busy_workers gauge - frankenphp_busy_workers{worker="service1"} 0 + frankenphp_busy_workers{server="0",worker="service1"} 0 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="service1"} 2 - frankenphp_total_workers{worker="service2"} 3 + frankenphp_total_workers{server="0",worker="service1"} 2 + frankenphp_total_workers{server="0",worker="service2"} 3 # HELP frankenphp_worker_request_count # TYPE frankenphp_worker_request_count counter - frankenphp_worker_request_count{worker="service1"} 10 + frankenphp_worker_request_count{server="0",worker="service1"} 10 # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="service1"} 2 - frankenphp_ready_workers{worker="service2"} 3 + frankenphp_ready_workers{server="0",worker="service1"} 2 + frankenphp_ready_workers{server="0",worker="service2"} 3 ` ctx := caddy.ActiveContext() @@ -1552,10 +1552,10 @@ func TestWorkerRestart(t *testing.T) { expectedMetrics := ` # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="service"} 1 + frankenphp_ready_workers{server="0",worker="service"} 1 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="service"} 1 + frankenphp_total_workers{server="0",worker="service"} 1 ` require.NoError(t, @@ -1580,13 +1580,13 @@ func TestWorkerRestart(t *testing.T) { expectedMetrics = ` # HELP frankenphp_ready_workers Running workers that have successfully called frankenphp_handle_request at least once # TYPE frankenphp_ready_workers gauge - frankenphp_ready_workers{worker="service"} 1 + frankenphp_ready_workers{server="0",worker="service"} 1 # HELP frankenphp_total_workers Total number of PHP workers for this worker # TYPE frankenphp_total_workers gauge - frankenphp_total_workers{worker="service"} 1 + frankenphp_total_workers{server="0",worker="service"} 1 # HELP frankenphp_worker_restarts Number of PHP worker restarts for this worker # TYPE frankenphp_worker_restarts counter - frankenphp_worker_restarts{worker="service"} 3 + frankenphp_worker_restarts{server="0",worker="service"} 3 ` require.NoError(t, diff --git a/caddy/module.go b/caddy/module.go index fe14818105..e22225ff86 100644 --- a/caddy/module.go +++ b/caddy/module.go @@ -11,6 +11,7 @@ import ( "slices" "strconv" "strings" + "sync" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/caddyconfig" @@ -51,6 +52,8 @@ type FrankenPHPModule struct { preparedEnvNeedsReplacement bool logger *slog.Logger requestOptions []frankenphp.RequestOption + scope frankenphp.Scope + scopeLabelOnce sync.Once } // CaddyModule returns the Caddy module information. @@ -78,6 +81,14 @@ func (f *FrankenPHPModule) Provision(ctx caddy.Context) error { f.assignMercureHub(ctx) + // Each php_server block gets its own scope so its workers' metric + // series stay distinct from any other block's workers (the "server" + // label). Provision can be called more than once for the same module; + // only assign once. + if f.scope == 0 { + f.scope = frankenphp.NextScope() + } + loggerOpt := frankenphp.WithRequestLogger(f.logger) for i, wc := range f.Workers { // make the file path absolute from the public directory @@ -92,6 +103,7 @@ func (f *FrankenPHPModule) Provision(ctx caddy.Context) error { } wc.requestOptions = append(wc.requestOptions, loggerOpt) + wc.options = append(wc.options, frankenphp.WithWorkerScope(f.scope)) f.Workers[i] = wc } @@ -235,6 +247,16 @@ func (f *FrankenPHPModule) ServeHTTP(w http.ResponseWriter, r *http.Request, _ c } } + f.scopeLabelOnce.Do(func() { + srv, _ := ctx.Value(caddyhttp.ServerCtxKey).(*caddyhttp.Server) + if srv == nil { + return + } + if label := f.resolveScopeLabel(srv); label != "" { + frankenphp.SetScopeLabel(f.scope, label) + } + }) + fr, err := frankenphp.NewRequestWithContext( r, append( diff --git a/caddy/scopelabel.go b/caddy/scopelabel.go new file mode 100644 index 0000000000..8df6e6c993 --- /dev/null +++ b/caddy/scopelabel.go @@ -0,0 +1,79 @@ +package caddy + +import ( + "strings" + + "github.com/caddyserver/caddy/v2/modules/caddyhttp" +) + +// resolveScopeLabel picks a stable, human-friendly identifier for this +// module's scope so metric/log emitters can render it (e.g. +// server="api.example.com") instead of the opaque numeric id. +// Cascade: +// 1. First host of the route's host matcher. +// 2. Caddy server name when user-set (i.e. not the auto srvN form). +// 3. First listener address of the server. +func (f *FrankenPHPModule) resolveScopeLabel(srv *caddyhttp.Server) string { + if h := findHostInRoutes(srv.Routes, f); h != "" { + return h + } + if name := srv.Name(); name != "" && !isAutoServerName(name) { + return name + } + if len(srv.Listen) > 0 { + return srv.Listen[0] + } + return "" +} + +// findHostInRoutes walks routes (recursing into Subroute handlers) to +// locate the route that contains target, then returns the first host of +// that route's host matcher. Returns "" if no enclosing route or no host +// matcher is found. +func findHostInRoutes(routes caddyhttp.RouteList, target caddyhttp.MiddlewareHandler) string { + for _, route := range routes { + if !routeContainsHandler(route, target) { + continue + } + for _, mset := range route.MatcherSets { + for _, m := range mset { + hp, ok := m.(*caddyhttp.MatchHost) + if !ok || hp == nil || len(*hp) == 0 { + continue + } + return (*hp)[0] + } + } + } + return "" +} + +func routeContainsHandler(route caddyhttp.Route, target caddyhttp.MiddlewareHandler) bool { + for _, h := range route.Handlers { + if h == target { + return true + } + if sub, ok := h.(*caddyhttp.Subroute); ok { + for _, r := range sub.Routes { + if routeContainsHandler(r, target) { + return true + } + } + } + } + return false +} + +// isAutoServerName reports whether name is one of Caddy's auto-assigned +// server names (srv0, srv1, ...). Anything else is treated as user-set. +func isAutoServerName(name string) bool { + if !strings.HasPrefix(name, "srv") || len(name) <= 3 { + return false + } + for _, c := range name[3:] { + if c < '0' || c > '9' { + return false + } + } + return true +} diff --git a/docs/metrics.md b/docs/metrics.md index a783a5c90e..383efd9298 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -13,13 +13,13 @@ When [Caddy metrics](https://caddyserver.com/docs/metrics) are enabled, FrankenP - `frankenphp_total_threads`: The total number of PHP threads. - `frankenphp_busy_threads`: The number of PHP threads currently processing a request (running workers always consume a thread). - `frankenphp_queue_depth`: The number of regular queued requests -- `frankenphp_total_workers{worker="[worker_name]"}`: The total number of workers. -- `frankenphp_busy_workers{worker="[worker_name]"}`: The number of workers currently processing a request. -- `frankenphp_worker_request_time{worker="[worker_name]"}`: The time spent processing requests by all workers. -- `frankenphp_worker_request_count{worker="[worker_name]"}`: The number of requests processed by all workers. -- `frankenphp_ready_workers{worker="[worker_name]"}`: The number of workers that have called `frankenphp_handle_request` at least once. -- `frankenphp_worker_crashes{worker="[worker_name]"}`: The number of times a worker has unexpectedly terminated. -- `frankenphp_worker_restarts{worker="[worker_name]"}`: The number of times a worker has been deliberately restarted. -- `frankenphp_worker_queue_depth{worker="[worker_name]"}`: The number of queued requests. +- `frankenphp_total_workers{server="[server]",worker="[worker_name]"}`: The total number of workers. +- `frankenphp_busy_workers{server="[server]",worker="[worker_name]"}`: The number of workers currently processing a request. +- `frankenphp_worker_request_time{server="[server]",worker="[worker_name]"}`: The time spent processing requests by all workers. +- `frankenphp_worker_request_count{server="[server]",worker="[worker_name]"}`: The number of requests processed by all workers. +- `frankenphp_ready_workers{server="[server]",worker="[worker_name]"}`: The number of workers that have called `frankenphp_handle_request` at least once. +- `frankenphp_worker_crashes{server="[server]",worker="[worker_name]"}`: The number of times a worker has unexpectedly terminated. +- `frankenphp_worker_restarts{server="[server]",worker="[worker_name]"}`: The number of times a worker has been deliberately restarted. +- `frankenphp_worker_queue_depth{server="[server]",worker="[worker_name]"}`: The number of queued requests. -For worker metrics, the `[worker_name]` placeholder is replaced by the worker name in the Caddyfile, otherwise the absolute path of the worker file will be used. +For worker metrics, the `[worker_name]` placeholder is replaced by the worker name in the Caddyfile, otherwise the absolute path of the worker file will be used. The `[server]` label identifies the `php_server` block that declared the worker; the Caddy module resolves it to the first host of the route's host matcher (e.g. `api.example.com`), falling back to the user-set Caddy server name and finally to the first listener address. Same-named workers in distinct `php_server` blocks therefore stay on distinct series. diff --git a/frankenphp.go b/frankenphp.go index 52246d01c7..f885704211 100644 --- a/frankenphp.go +++ b/frankenphp.go @@ -163,7 +163,7 @@ func calculateMaxThreads(opt *opt) (numWorkers int, _ error) { // https://github.com/php/frankenphp/issues/126 opt.workers[i].num = maxProcs } - metrics.TotalWorkers(w.name, w.num) + metrics.TotalWorkers(ScopeLabel(w.scope), w.name, w.num) numWorkers += opt.workers[i].num diff --git a/metrics.go b/metrics.go index b6b4ca119a..862813b321 100644 --- a/metrics.go +++ b/metrics.go @@ -16,15 +16,20 @@ const ( type StopReason int +// Metrics is the worker-level instrumentation surface. Every method that +// identifies a specific worker takes a (server, name) pair: server is the +// per-php_server label resolved via ScopeLabel, name is the worker name. +// The pair is what disambiguates same-named workers declared in distinct +// php_server blocks. type Metrics interface { // StartWorker collects started workers - StartWorker(name string) + StartWorker(server, name string) // ReadyWorker collects ready workers - ReadyWorker(name string) + ReadyWorker(server, name string) // StopWorker collects stopped workers - StopWorker(name string, reason StopReason) + StopWorker(server, name string, reason StopReason) // TotalWorkers collects expected workers - TotalWorkers(name string, num int) + TotalWorkers(server, name string, num int) // TotalThreads collects total threads TotalThreads(num int) // StartRequest collects started requests @@ -32,28 +37,28 @@ type Metrics interface { // StopRequest collects stopped requests StopRequest() // StopWorkerRequest collects stopped worker requests - StopWorkerRequest(name string, duration time.Duration) + StopWorkerRequest(server, name string, duration time.Duration) // StartWorkerRequest collects started worker requests - StartWorkerRequest(name string) + StartWorkerRequest(server, name string) Shutdown() - QueuedWorkerRequest(name string) - DequeuedWorkerRequest(name string) + QueuedWorkerRequest(server, name string) + DequeuedWorkerRequest(server, name string) QueuedRequest() DequeuedRequest() } type nullMetrics struct{} -func (n nullMetrics) StartWorker(string) { +func (n nullMetrics) StartWorker(string, string) { } -func (n nullMetrics) ReadyWorker(string) { +func (n nullMetrics) ReadyWorker(string, string) { } -func (n nullMetrics) StopWorker(string, StopReason) { +func (n nullMetrics) StopWorker(string, string, StopReason) { } -func (n nullMetrics) TotalWorkers(string, int) { +func (n nullMetrics) TotalWorkers(string, string, int) { } func (n nullMetrics) TotalThreads(int) { @@ -65,18 +70,18 @@ func (n nullMetrics) StartRequest() { func (n nullMetrics) StopRequest() { } -func (n nullMetrics) StopWorkerRequest(string, time.Duration) { +func (n nullMetrics) StopWorkerRequest(string, string, time.Duration) { } -func (n nullMetrics) StartWorkerRequest(string) { +func (n nullMetrics) StartWorkerRequest(string, string) { } func (n nullMetrics) Shutdown() { } -func (n nullMetrics) QueuedWorkerRequest(string) {} +func (n nullMetrics) QueuedWorkerRequest(string, string) {} -func (n nullMetrics) DequeuedWorkerRequest(string) {} +func (n nullMetrics) DequeuedWorkerRequest(string, string) {} func (n nullMetrics) QueuedRequest() {} func (n nullMetrics) DequeuedRequest() {} @@ -97,7 +102,7 @@ type PrometheusMetrics struct { mu sync.Mutex } -func (m *PrometheusMetrics) StartWorker(name string) { +func (m *PrometheusMetrics) StartWorker(server, name string) { m.busyThreads.Inc() // tests do not register workers before starting them @@ -105,18 +110,18 @@ func (m *PrometheusMetrics) StartWorker(name string) { return } - m.totalWorkers.WithLabelValues(name).Inc() + m.totalWorkers.WithLabelValues(server, name).Inc() } -func (m *PrometheusMetrics) ReadyWorker(name string) { +func (m *PrometheusMetrics) ReadyWorker(server, name string) { if m.totalWorkers == nil { return } - m.readyWorkers.WithLabelValues(name).Inc() + m.readyWorkers.WithLabelValues(server, name).Inc() } -func (m *PrometheusMetrics) StopWorker(name string, reason StopReason) { +func (m *PrometheusMetrics) StopWorker(server, name string, reason StopReason) { m.busyThreads.Dec() // tests do not register workers before starting them @@ -124,27 +129,27 @@ func (m *PrometheusMetrics) StopWorker(name string, reason StopReason) { return } - m.totalWorkers.WithLabelValues(name).Dec() + m.totalWorkers.WithLabelValues(server, name).Dec() // only decrement readyWorkers if the worker actually reached frankenphp_handle_request if reason != StopReasonBootFailure { - m.readyWorkers.WithLabelValues(name).Dec() + m.readyWorkers.WithLabelValues(server, name).Dec() } switch reason { case StopReasonCrash, StopReasonBootFailure: - m.workerCrashes.WithLabelValues(name).Inc() + m.workerCrashes.WithLabelValues(server, name).Inc() case StopReasonRestart: - m.workerRestarts.WithLabelValues(name).Inc() + m.workerRestarts.WithLabelValues(server, name).Inc() } } -func (m *PrometheusMetrics) TotalWorkers(string, int) { +func (m *PrometheusMetrics) TotalWorkers(string, string, int) { m.mu.Lock() defer m.mu.Unlock() const ns, sub = "frankenphp", "worker" - basicLabels := []string{"worker"} + basicLabels := []string{"server", "worker"} if m.totalWorkers == nil { m.totalWorkers = prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -257,35 +262,35 @@ func (m *PrometheusMetrics) StopRequest() { m.busyThreads.Dec() } -func (m *PrometheusMetrics) StopWorkerRequest(name string, duration time.Duration) { +func (m *PrometheusMetrics) StopWorkerRequest(server, name string, duration time.Duration) { if m.workerRequestTime == nil { return } - m.workerRequestCount.WithLabelValues(name).Inc() - m.busyWorkers.WithLabelValues(name).Dec() - m.workerRequestTime.WithLabelValues(name).Add(duration.Seconds()) + m.workerRequestCount.WithLabelValues(server, name).Inc() + m.busyWorkers.WithLabelValues(server, name).Dec() + m.workerRequestTime.WithLabelValues(server, name).Add(duration.Seconds()) } -func (m *PrometheusMetrics) StartWorkerRequest(name string) { +func (m *PrometheusMetrics) StartWorkerRequest(server, name string) { if m.busyWorkers == nil { return } - m.busyWorkers.WithLabelValues(name).Inc() + m.busyWorkers.WithLabelValues(server, name).Inc() } -func (m *PrometheusMetrics) QueuedWorkerRequest(name string) { +func (m *PrometheusMetrics) QueuedWorkerRequest(server, name string) { if m.workerQueueDepth == nil { return } - m.workerQueueDepth.WithLabelValues(name).Inc() + m.workerQueueDepth.WithLabelValues(server, name).Inc() } -func (m *PrometheusMetrics) DequeuedWorkerRequest(name string) { +func (m *PrometheusMetrics) DequeuedWorkerRequest(server, name string) { if m.workerQueueDepth == nil { return } - m.workerQueueDepth.WithLabelValues(name).Dec() + m.workerQueueDepth.WithLabelValues(server, name).Dec() } func (m *PrometheusMetrics) QueuedRequest() { diff --git a/metrics_test.go b/metrics_test.go index 41d6518367..c0f6d75203 100644 --- a/metrics_test.go +++ b/metrics_test.go @@ -32,7 +32,7 @@ func TestPrometheusMetrics_TotalWorkers(t *testing.T) { require.Nil(t, m.workerRequestTime) require.Nil(t, m.workerRequestCount) - m.TotalWorkers("test_worker", 2) + m.TotalWorkers("test_server", "test_worker", 2) require.NotNil(t, m.totalWorkers) require.NotNil(t, m.busyWorkers) @@ -45,8 +45,8 @@ func TestPrometheusMetrics_TotalWorkers(t *testing.T) { func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) { m := createPrometheusMetrics() - m.TotalWorkers("test_worker", 2) - m.StopWorkerRequest("test_worker", 2*time.Second) + m.TotalWorkers("test_server", "test_worker", 2) + m.StopWorkerRequest("test_server", "test_worker", 2*time.Second) inputs := []struct { name string @@ -62,7 +62,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) { # TYPE frankenphp_worker_request_count counter `, expect: ` - frankenphp_worker_request_count{worker="test_worker"} 1 + frankenphp_worker_request_count{server="test_server",worker="test_worker"} 1 `, }, { @@ -73,7 +73,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) { # TYPE frankenphp_busy_workers gauge `, expect: ` - frankenphp_busy_workers{worker="test_worker"} -1 + frankenphp_busy_workers{server="test_server",worker="test_worker"} -1 `, }, { @@ -84,7 +84,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) { # TYPE frankenphp_worker_request_time counter `, expect: ` - frankenphp_worker_request_time{worker="test_worker"} 2 + frankenphp_worker_request_time{server="test_server",worker="test_worker"} 2 `, }, } @@ -99,8 +99,8 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) { func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) { m := createPrometheusMetrics() - m.TotalWorkers("test_worker", 2) - m.StartWorkerRequest("test_worker") + m.TotalWorkers("test_server", "test_worker", 2) + m.StartWorkerRequest("test_server", "test_worker") inputs := []struct { name string @@ -116,7 +116,7 @@ func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) { # TYPE frankenphp_busy_workers gauge `, expect: ` - frankenphp_busy_workers{worker="test_worker"} 1 + frankenphp_busy_workers{server="test_server",worker="test_worker"} 1 `, }, } @@ -131,8 +131,8 @@ func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) { func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) { m := createPrometheusMetrics() - m.TotalWorkers("test_worker", 2) - m.StopWorker("test_worker", StopReasonCrash) + m.TotalWorkers("test_server", "test_worker", 2) + m.StopWorker("test_server", "test_worker", StopReasonCrash) inputs := []struct { name string @@ -159,7 +159,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) { # TYPE frankenphp_total_workers gauge `, expect: ` - frankenphp_total_workers{worker="test_worker"} -1 + frankenphp_total_workers{server="test_server",worker="test_worker"} -1 `, }, { @@ -170,7 +170,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) { # TYPE frankenphp_ready_workers gauge `, expect: ` - frankenphp_ready_workers{worker="test_worker"} -1 + frankenphp_ready_workers{server="test_server",worker="test_worker"} -1 `, }, { @@ -181,7 +181,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) { # TYPE frankenphp_worker_crashes counter `, expect: ` - frankenphp_worker_crashes{worker="test_worker"} 1 + frankenphp_worker_crashes{server="test_server",worker="test_worker"} 1 `, }, } diff --git a/options.go b/options.go index a9cd2a2630..b81e07be6b 100644 --- a/options.go +++ b/options.go @@ -50,6 +50,7 @@ type workerOpt struct { onThreadShutdown func(int) onServerStartup func() onServerShutdown func() + scope Scope } // WithContext sets the main context to use. @@ -258,6 +259,18 @@ func WithWorkerOnServerShutdown(f func()) WorkerOption { } } +// EXPERIMENTAL: WithWorkerScope assigns this worker to a given scope. +// The scope is exposed as the "server" label on the worker's Prometheus +// metrics so workers declared in distinct php_server blocks stay on +// distinct series. The zero value is the global/embed scope. +func WithWorkerScope(scope Scope) WorkerOption { + return func(w *workerOpt) error { + w.scope = scope + + return nil + } +} + func withExtensionWorkers(w *extensionWorkers) WorkerOption { return func(wo *workerOpt) error { wo.extensionWorkers = w diff --git a/scope.go b/scope.go new file mode 100644 index 0000000000..803f0aeca8 --- /dev/null +++ b/scope.go @@ -0,0 +1,49 @@ +package frankenphp + +import ( + "strconv" + "sync" + "sync/atomic" +) + +// Scope is an opaque per-php_server identifier. The zero value is the +// global/embed scope. Obtain values via NextScope. +// +// Scopes let metric series carry a "server" label so workers declared +// in distinct php_server blocks stay on distinct series. Future +// per-server features (per-server isolation, dispatching) can reuse the +// same identifier. +type Scope uint64 + +var scopeCounter atomic.Uint64 + +// NextScope returns a fresh scope value. Each php_server block should +// call this once during provisioning. +func NextScope() Scope { + return Scope(scopeCounter.Add(1)) +} + +// scopeLabels maps Scope -> human-readable label registered by the embedder +// (e.g. the Caddy module). Read by ScopeLabel; written by SetScopeLabel. +var scopeLabels sync.Map + +// SetScopeLabel attaches a human-readable label to a scope so metric/log +// emitters can render it as e.g. server="api.example.com" instead of an +// opaque numeric id. Empty labels are ignored. Embedders (Caddy module, +// custom hosts) own the labelling policy. +func SetScopeLabel(s Scope, label string) { + if label == "" { + return + } + scopeLabels.Store(s, label) +} + +// ScopeLabel returns the label registered for s. When none is set +// (including the zero/global scope), it returns the numeric id so +// callers always get a non-empty value. +func ScopeLabel(s Scope) string { + if v, ok := scopeLabels.Load(s); ok { + return v.(string) + } + return strconv.FormatUint(uint64(s), 10) +} diff --git a/threadworker.go b/threadworker.go index 21e1034805..5801db5497 100644 --- a/threadworker.go +++ b/threadworker.go @@ -108,7 +108,7 @@ func (handler *workerThread) name() string { func (handler *workerThread) drain() {} func setupWorkerScript(handler *workerThread, worker *worker) { - metrics.StartWorker(worker.name) + metrics.StartWorker(ScopeLabel(worker.scope), worker.name) // Create a dummy request to set up the worker fc, err := newDummyContext( @@ -147,9 +147,11 @@ func tearDownWorkerScript(handler *workerThread, exitStatus int) { handler.thread.contextMu.Unlock() } + server := ScopeLabel(worker.scope) + // on exit status 0 we just run the worker script again if exitStatus == 0 && !handler.isBootingScript { - metrics.StopWorker(worker.name, StopReasonRestart) + metrics.StopWorker(server, worker.name, StopReasonRestart) if globalLogger.Enabled(globalCtx, slog.LevelDebug) { globalLogger.LogAttrs(globalCtx, slog.LevelDebug, "restarting", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex), slog.Int("exit_status", exitStatus)) @@ -160,9 +162,9 @@ func tearDownWorkerScript(handler *workerThread, exitStatus int) { // worker has thrown a fatal error or has not reached frankenphp_handle_request if handler.isBootingScript { - metrics.StopWorker(worker.name, StopReasonBootFailure) + metrics.StopWorker(server, worker.name, StopReasonBootFailure) } else { - metrics.StopWorker(worker.name, StopReasonCrash) + metrics.StopWorker(server, worker.name, StopReasonCrash) } if !handler.isBootingScript { @@ -220,7 +222,7 @@ func (handler *workerThread) waitForWorkerRequest() (bool, any) { } // worker is truly ready only after reaching frankenphp_handle_request() - metrics.ReadyWorker(handler.worker.name) + metrics.ReadyWorker(ScopeLabel(handler.worker.scope), handler.worker.name) } // max_requests reached: signal reboot for full ZTS cleanup diff --git a/worker.go b/worker.go index fdc3098da6..de4f90256a 100644 --- a/worker.go +++ b/worker.go @@ -33,6 +33,7 @@ type worker struct { onThreadReady func(int) onThreadShutdown func(int) queuedRequests atomic.Int32 + scope Scope } var ( @@ -148,6 +149,7 @@ func newWorker(o workerOpt) (*worker, error) { maxConsecutiveFailures: o.maxConsecutiveFailures, onThreadReady: o.onThreadReady, onThreadShutdown: o.onThreadShutdown, + scope: o.scope, } w.configureMercure(&o) @@ -288,7 +290,8 @@ func (worker *worker) isAtThreadLimit() bool { } func (worker *worker) handleRequest(ch contextHolder) error { - metrics.StartWorkerRequest(worker.name) + server := ScopeLabel(worker.scope) + metrics.StartWorkerRequest(server, worker.name) runtime.Gosched() @@ -300,7 +303,7 @@ func (worker *worker) handleRequest(ch contextHolder) error { case thread.requestChan <- ch: worker.threadMutex.RUnlock() <-ch.frankenPHPContext.done - metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt)) + metrics.StopWorkerRequest(server, worker.name, time.Since(ch.frankenPHPContext.startedAt)) return nil default: @@ -312,7 +315,7 @@ func (worker *worker) handleRequest(ch contextHolder) error { // if no thread was available, mark the request as queued and apply the scaling strategy worker.queuedRequests.Add(1) - metrics.QueuedWorkerRequest(worker.name) + metrics.QueuedWorkerRequest(server, worker.name) for { workerScaleChan := scaleChan @@ -323,9 +326,9 @@ func (worker *worker) handleRequest(ch contextHolder) error { select { case worker.requestChan <- ch: worker.queuedRequests.Add(-1) - metrics.DequeuedWorkerRequest(worker.name) + metrics.DequeuedWorkerRequest(server, worker.name) <-ch.frankenPHPContext.done - metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt)) + metrics.StopWorkerRequest(server, worker.name, time.Since(ch.frankenPHPContext.startedAt)) return nil case workerScaleChan <- ch.frankenPHPContext: @@ -333,8 +336,8 @@ func (worker *worker) handleRequest(ch contextHolder) error { case <-timeoutChan(maxWaitTime): // the request has timed out stalling worker.queuedRequests.Add(-1) - metrics.DequeuedWorkerRequest(worker.name) - metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt)) + metrics.DequeuedWorkerRequest(server, worker.name) + metrics.StopWorkerRequest(server, worker.name, time.Since(ch.frankenPHPContext.startedAt)) ch.frankenPHPContext.reject(ErrMaxWaitTimeExceeded)