From 3fbcdb4f2b5f468e79e220c60a8f3101e4fd3e2d Mon Sep 17 00:00:00 2001 From: Oleksii Sholik Date: Thu, 4 Jun 2026 13:02:52 +0200 Subject: [PATCH 1/5] Export serve_shape request metrics with status, live and known_error tags --- .../lib/electric/telemetry/stack_telemetry.ex | 5 +++++ .../sync-service/lib/electric/plug/serve_shape_plug.ex | 7 ++++++- packages/sync-service/lib/electric/shapes/api/response.ex | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/electric-telemetry/lib/electric/telemetry/stack_telemetry.ex b/packages/electric-telemetry/lib/electric/telemetry/stack_telemetry.ex index 9393adf765..105d1c0c35 100644 --- a/packages/electric-telemetry/lib/electric/telemetry/stack_telemetry.ex +++ b/packages/electric-telemetry/lib/electric/telemetry/stack_telemetry.ex @@ -93,6 +93,11 @@ defmodule ElectricTelemetry.StackTelemetry do unit: :byte, keep: fn metadata -> metadata[:live] != true end ), + counter("electric.plug.serve_shape.requests.count", + event_name: [:electric, :plug, :serve_shape], + measurement: :count, + tags: [:status, :known_error, :live] + ), distribution("electric.shape.response_size.bytes", unit: :byte, tags: [:root_table, :is_live, :stack_id] diff --git a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex index d7a67d64e4..d3fd935554 100644 --- a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex +++ b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex @@ -451,7 +451,8 @@ defmodule Electric.Plug.ServeShapePlug do shape_handle: get_handle(assigns) || conn.query_params["handle"], client_ip: conn.remote_ip, status: conn.status, - stack_id: stack_id + stack_id: stack_id, + known_error: has_known_error_header?(conn) } ) @@ -546,4 +547,8 @@ defmodule Electric.Plug.ServeShapePlug do defp bare_map(%_{} = struct), do: Map.from_struct(struct) defp bare_map(map) when is_map(map), do: map + + defp has_known_error_header?(conn) do + Conn.get_resp_header(conn, Api.Response.known_error_header()) == ["true"] + end end diff --git a/packages/sync-service/lib/electric/shapes/api/response.ex b/packages/sync-service/lib/electric/shapes/api/response.ex index 121762e2ae..77fdfff44d 100644 --- a/packages/sync-service/lib/electric/shapes/api/response.ex +++ b/packages/sync-service/lib/electric/shapes/api/response.ex @@ -477,4 +477,10 @@ defmodule Electric.Shapes.Api.Response do end def electric_headers, do: @electric_headers + + @doc """ + The response header Electric sets to mark an error as a "known" (expected, + typically retryable) error. + """ + def known_error_header, do: @electric_known_error_header end From 307ba33f1906bb1e7e138fb0b9c64898b612a367 Mon Sep 17 00:00:00 2001 From: Oleksii Sholik Date: Thu, 4 Jun 2026 13:05:42 +0200 Subject: [PATCH 2/5] chore: add changeset for serve_shape request metrics Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/serve-shape-request-metrics.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .changeset/serve-shape-request-metrics.md diff --git a/.changeset/serve-shape-request-metrics.md b/.changeset/serve-shape-request-metrics.md new file mode 100644 index 0000000000..367e558fcb --- /dev/null +++ b/.changeset/serve-shape-request-metrics.md @@ -0,0 +1,17 @@ +--- +'@core/electric-telemetry': patch +'@core/sync-service': patch +--- + +Export a per-request `electric.plug.serve_shape.requests.count` metric tagged +by `status`, `known_error` and `live`. + +Existing `serve_shape` metrics drop live (long-poll) requests and are not +dimensioned by response status, so they can't answer "what's my request mix / +error rate right now". This counter intentionally counts every request +(including live) and is unsampled, making it a reliable request-health signal +that doesn't depend on trace sampling. Admission-control rejections show up +here as `status=503, known_error=true` (the conn is halted but still flows +through `emit_shape_telemetry/1`), so overload is visible alongside normal +traffic. The `known_error` tag mirrors the `electric-internal-known-error` +response header, so it matches the classification downstream consumers key on. From 8ee563274eb12393db374d5f0924107c199f6acf Mon Sep 17 00:00:00 2001 From: Oleksii Sholik Date: Thu, 4 Jun 2026 13:51:49 +0200 Subject: [PATCH 3/5] Add missing test coverage for the known_error telemetry metadata --- .../electric/plug/serve_shape_plug_test.exs | 98 ++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/packages/sync-service/test/electric/plug/serve_shape_plug_test.exs b/packages/sync-service/test/electric/plug/serve_shape_plug_test.exs index 6aa840f8de..44cb0b4dc6 100644 --- a/packages/sync-service/test/electric/plug/serve_shape_plug_test.exs +++ b/packages/sync-service/test/electric/plug/serve_shape_plug_test.exs @@ -86,7 +86,8 @@ defmodule Electric.Plug.ServeShapePlugTest do sse_timeout: sse_timeout(ctx), max_age: max_age(ctx), stale_age: stale_age(ctx), - max_concurrent_requests: %{initial: 300, existing: 10_000} + max_concurrent_requests: + Access.get(ctx, :max_concurrent_requests, %{initial: 300, existing: 10_000}) ) end @@ -1044,6 +1045,101 @@ defmodule Electric.Plug.ServeShapePlugTest do :telemetry.detach(handler_id) end end + + test "[:electric, :plug, :serve_shape] tags a successful response with known_error: false", + ctx do + expect_shape_cache( + get_or_create_shape_handle: fn @test_shape, _stack_id, _opts -> + {@test_shape_handle, @test_offset} + end, + await_snapshot_start: fn @test_shape_handle, _ -> :started end + ) + + patch_shape_cache(has_shape?: fn @test_shape_handle, _opts -> true end) + + next_offset = LogOffset.increment(@first_offset) + + expect_storage( + get_chunk_end_log_offset: fn @before_all_offset, _ -> + @first_offset + end, + get_log_stream: fn @before_all_offset, @first_offset, @test_opts -> + [Jason.encode!(%{key: "log", value: "foo", headers: %{}, offset: next_offset})] + end + ) + + test_pid = self() + ref = make_ref() + handler_id = "test-serve-shape-known-error-false-#{inspect(ref)}" + + :telemetry.attach( + handler_id, + [:electric, :plug, :serve_shape], + fn event, measurements, metadata, _config -> + send(test_pid, {:telemetry_serve_shape, event, measurements, metadata}) + end, + nil + ) + + stack_id = ctx.stack_id + + try do + conn = + ctx + |> conn(:get, %{"table" => "public.users"}, "?offset=-1") + |> call_serve_shape_plug(ctx) + + assert conn.status == 200 + + assert_receive {:telemetry_serve_shape, [:electric, :plug, :serve_shape], _measurements, + %{stack_id: ^stack_id} = metadata} + + assert metadata.status == 200 + assert metadata.known_error == false + after + :telemetry.detach(handler_id) + end + end + + test "[:electric, :plug, :serve_shape] tags an admission-rejected 503 with known_error: true", + ctx do + # Force the load-shedding path: with a zero concurrency limit, check_admission + # rejects immediately with a 503 carrying the `electric-internal-known-error` + # header. + ctx = Map.put(ctx, :max_concurrent_requests, %{initial: 0, existing: 0}) + + test_pid = self() + ref = make_ref() + handler_id = "test-serve-shape-known-error-true-#{inspect(ref)}" + + :telemetry.attach( + handler_id, + [:electric, :plug, :serve_shape], + fn event, measurements, metadata, _config -> + send(test_pid, {:telemetry_serve_shape, event, measurements, metadata}) + end, + nil + ) + + stack_id = ctx.stack_id + + try do + conn = + ctx + |> conn(:get, %{"table" => "public.users"}, "?offset=-1") + |> call_serve_shape_plug(ctx) + + assert conn.status == 503 + + assert_receive {:telemetry_serve_shape, [:electric, :plug, :serve_shape], _measurements, + %{stack_id: ^stack_id} = metadata} + + assert metadata.status == 503 + assert metadata.known_error == true + after + :telemetry.detach(handler_id) + end + end end describe "serving shapes with sse mode" do From 1261fcb7510497787f643f4c5216a7eaae0d29d8 Mon Sep 17 00:00:00 2001 From: Oleksii Sholik Date: Thu, 4 Jun 2026 13:55:26 +0200 Subject: [PATCH 4/5] Future-proof the known_error header check via code locality --- .../lib/electric/plug/serve_shape_plug.ex | 6 +----- .../sync-service/lib/electric/shapes/api/response.ex | 12 ++++++------ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex index d3fd935554..2f3609e862 100644 --- a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex +++ b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex @@ -452,7 +452,7 @@ defmodule Electric.Plug.ServeShapePlug do client_ip: conn.remote_ip, status: conn.status, stack_id: stack_id, - known_error: has_known_error_header?(conn) + known_error: Api.Response.conn_has_known_error?(conn) } ) @@ -547,8 +547,4 @@ defmodule Electric.Plug.ServeShapePlug do defp bare_map(%_{} = struct), do: Map.from_struct(struct) defp bare_map(map) when is_map(map), do: map - - defp has_known_error_header?(conn) do - Conn.get_resp_header(conn, Api.Response.known_error_header()) == ["true"] - end end diff --git a/packages/sync-service/lib/electric/shapes/api/response.ex b/packages/sync-service/lib/electric/shapes/api/response.ex index 77fdfff44d..eea39c55ff 100644 --- a/packages/sync-service/lib/electric/shapes/api/response.ex +++ b/packages/sync-service/lib/electric/shapes/api/response.ex @@ -390,6 +390,12 @@ defmodule Electric.Shapes.Api.Response do Plug.Conn.put_resp_header(conn, @electric_known_error_header, "#{known_error}") end + # keeping this function close to `put_known_error_header/2` above so that we know exactly + # which value to expect for a set known_error header: i.e. "true" or "" (as opposed to e.g. "1" etc). + def conn_has_known_error?(conn) do + Plug.Conn.get_resp_header(conn, @electric_known_error_header) == ["true"] + end + defp put_retry_after_header(conn, %__MODULE__{retry_after: nil}) do conn end @@ -477,10 +483,4 @@ defmodule Electric.Shapes.Api.Response do end def electric_headers, do: @electric_headers - - @doc """ - The response header Electric sets to mark an error as a "known" (expected, - typically retryable) error. - """ - def known_error_header, do: @electric_known_error_header end From d3bd19bf107c78fd8e502842346affd33a49cae4 Mon Sep 17 00:00:00 2001 From: Oleksii Sholik Date: Thu, 4 Jun 2026 14:19:44 +0200 Subject: [PATCH 5/5] Fix comment wording on known_error header values Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/sync-service/lib/electric/shapes/api/response.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/sync-service/lib/electric/shapes/api/response.ex b/packages/sync-service/lib/electric/shapes/api/response.ex index eea39c55ff..f7276e6cd7 100644 --- a/packages/sync-service/lib/electric/shapes/api/response.ex +++ b/packages/sync-service/lib/electric/shapes/api/response.ex @@ -391,7 +391,8 @@ defmodule Electric.Shapes.Api.Response do end # keeping this function close to `put_known_error_header/2` above so that we know exactly - # which value to expect for a set known_error header: i.e. "true" or "" (as opposed to e.g. "1" etc). + # which value to expect for a set known_error header: i.e. "true" or "false" (and absent when + # known_error is nil), as opposed to e.g. "1" etc. def conn_has_known_error?(conn) do Plug.Conn.get_resp_header(conn, @electric_known_error_header) == ["true"] end