diff --git a/CHANGELOG.md b/CHANGELOG.md index 3367ab10..3e53013a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). --- +## v26.06.97 (2026-06-11) + +### Fixed + +- **Process-global, idempotent metric registration.** `MetricsRegistry` cached its + Prometheus collectors in **per-instance** dicts (`self._counters`/`_histograms`/ + `_gauges`), but prometheus_client registers every `Counter`/`Histogram`/`Gauge` on the + process-global default `REGISTRY` keyed by name. A second `MetricsRegistry` (e.g. a + second `create_app()` in one pytest process) therefore re-created an already-registered + collector and prometheus raised `ValueError: Duplicated timeseries in CollectorRegistry: + pyfly_db_query_duration_seconds...`. The collector caches are now **module-level** + (process-global), so get-or-create is idempotent across every `MetricsRegistry` in the + process — matching prometheus's one-collector-per-name model. The global default registry + is still used for `/metrics` exposition (production behavior unchanged). This retires the + per-app `conftest` fixture workaround that downstream services carried to reset the + registry between apps. Regression tests in `tests/observability/test_metrics_idempotent.py`. + +--- + ## v26.06.96 (2026-06-10) ### Fixed diff --git a/README.md b/README.md index ff106c35..3b2d3a37 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Firefly Framework Python 3.12+ License: Apache 2.0 - Version: 26.06.96 + Version: 26.06.97 Type Checked: mypy strict Code Style: Ruff Async First diff --git a/pyproject.toml b/pyproject.toml index d9495372..f51d1420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "pyfly" # CalVer YY.MM.PATCH — package metadata uses PEP 440 normalized form (26.5.4); # git tag, GitHub release and human-readable display use leading-zero form # (v26.05.04) to match the Java/.NET/Go siblings. -version = "26.6.96" +version = "26.6.97" description = "The official Python implementation of the Firefly Framework — DI, CQRS, EDA, hexagonal architecture, and more." readme = "README.md" license = "Apache-2.0" diff --git a/src/pyfly/observability/metrics.py b/src/pyfly/observability/metrics.py index 513b813f..094b9d80 100644 --- a/src/pyfly/observability/metrics.py +++ b/src/pyfly/observability/metrics.py @@ -35,12 +35,29 @@ F = TypeVar("F", bound=Callable[..., Any]) +# Metric collector caches are PROCESS-GLOBAL (module-level), not per-instance. +# prometheus_client registers every Counter/Histogram/Gauge on the process-global +# default ``REGISTRY`` keyed by metric name, so a name may only be created once per +# process. Caching per ``MetricsRegistry`` instance meant a second registry (e.g. a +# second ``create_app()`` in one pytest process) re-created an already-registered +# collector and prometheus raised "Duplicated timeseries in CollectorRegistry". Keying +# the caches at module scope makes get-or-create idempotent across every +# ``MetricsRegistry`` in the process, matching prometheus's one-collector-per-name model. +_COUNTERS: dict[str, Counter] = {} +_HISTOGRAMS: dict[str, Histogram] = {} +_GAUGES: dict[str, Gauge] = {} + class MetricsRegistry(MetricsRecorder): """Registry for application metrics — the Prometheus :class:`MetricsRecorder` adapter. - Wraps prometheus_client to provide a clean API for creating and - managing metrics. Ensures each metric name is registered only once. + Wraps prometheus_client to provide a clean API for creating and managing metrics. + Registration is **process-global and idempotent**: collector caches live at module + scope, so every metric name is created exactly once per process no matter how many + ``MetricsRegistry`` instances exist. This mirrors prometheus_client's own + one-collector-per-name model on the global default ``REGISTRY`` and means a second + application (e.g. a second ``create_app()`` in one test process) reuses the existing + collectors instead of raising "Duplicated timeseries in CollectorRegistry". """ def __init__(self) -> None: @@ -48,15 +65,12 @@ def __init__(self) -> None: raise ImportError( "prometheus_client is required for metrics. Install the observability extra: pyfly[observability]" ) - self._counters: dict[str, Counter] = {} - self._histograms: dict[str, Histogram] = {} - self._gauges: dict[str, Gauge] = {} def counter(self, name: str, description: str, labels: list[str] | None = None) -> Counter: - """Get or create a counter metric.""" - if name not in self._counters: - self._counters[name] = Counter(name, description, labels or []) - return self._counters[name] + """Get or create a counter metric (idempotent process-wide).""" + if name not in _COUNTERS: + _COUNTERS[name] = Counter(name, description, labels or []) + return _COUNTERS[name] def histogram( self, @@ -65,19 +79,19 @@ def histogram( labels: list[str] | None = None, buckets: tuple[float, ...] | None = None, ) -> Histogram: - """Get or create a histogram metric.""" - if name not in self._histograms: + """Get or create a histogram metric (idempotent process-wide).""" + if name not in _HISTOGRAMS: kwargs: dict[str, Any] = {} if buckets: kwargs["buckets"] = buckets - self._histograms[name] = Histogram(name, description, labels or [], **kwargs) - return self._histograms[name] + _HISTOGRAMS[name] = Histogram(name, description, labels or [], **kwargs) + return _HISTOGRAMS[name] def gauge(self, name: str, description: str, labels: list[str] | None = None) -> Gauge: - """Get or create a gauge metric.""" - if name not in self._gauges: - self._gauges[name] = Gauge(name, description, labels or []) - return self._gauges[name] + """Get or create a gauge metric (idempotent process-wide).""" + if name not in _GAUGES: + _GAUGES[name] = Gauge(name, description, labels or []) + return _GAUGES[name] def _sanitize(name: str) -> str: diff --git a/tests/observability/test_metrics_idempotent.py b/tests/observability/test_metrics_idempotent.py new file mode 100644 index 00000000..c7d1bbdc --- /dev/null +++ b/tests/observability/test_metrics_idempotent.py @@ -0,0 +1,77 @@ +# Copyright 2026 Firefly Software Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Process-global, idempotent metric registration (v26.06.97). + +Two ``MetricsRegistry`` instances (e.g. a second ``create_app()`` in one pytest +process) used to re-create already-registered prometheus collectors and raise +"Duplicated timeseries in CollectorRegistry". Collector caches are now module-level, +so get-or-create is idempotent across every registry in the process. +""" + +from __future__ import annotations + +import pytest + +pytest.importorskip("prometheus_client") + +from pyfly.observability.metrics import MetricsRegistry # noqa: E402 + + +def test_two_registries_share_the_same_counter() -> None: + # The exact scenario that used to raise "Duplicated timeseries". + name = "pyfly_test_idempotent_counter_total" + first = MetricsRegistry() + second = MetricsRegistry() + + c1 = first.counter(name, "first registry counter", ["route"]) + c2 = second.counter(name, "second registry counter", ["route"]) + + assert c1 is c2 + + +def test_two_registries_share_the_same_histogram() -> None: + name = "pyfly_test_idempotent_histogram_seconds" + first = MetricsRegistry() + second = MetricsRegistry() + + h1 = first.histogram(name, "first registry histogram", ["op"], buckets=(0.1, 0.5)) + h2 = second.histogram(name, "second registry histogram", ["op"], buckets=(0.1, 0.5)) + + assert h1 is h2 + + +def test_two_registries_share_the_same_gauge() -> None: + name = "pyfly_test_idempotent_gauge" + first = MetricsRegistry() + second = MetricsRegistry() + + g1 = first.gauge(name, "first registry gauge", ["pool"]) + g2 = second.gauge(name, "second registry gauge", ["pool"]) + + assert g1 is g2 + + +def test_second_registry_does_not_raise_duplicated_timeseries() -> None: + # Creating every collector type twice across two registries must not raise. + first = MetricsRegistry() + second = MetricsRegistry() + + first.counter("pyfly_test_dup_counter_total", "c", ["a"]) + first.histogram("pyfly_test_dup_histogram_seconds", "h", ["a"]) + first.gauge("pyfly_test_dup_gauge", "g", ["a"]) + + # Would previously raise ValueError("Duplicated timeseries in CollectorRegistry: ..."). + second.counter("pyfly_test_dup_counter_total", "c", ["a"]) + second.histogram("pyfly_test_dup_histogram_seconds", "h", ["a"]) + second.gauge("pyfly_test_dup_gauge", "g", ["a"]) diff --git a/tests/observability/test_observability.py b/tests/observability/test_observability.py index 7911227a..032ded15 100644 --- a/tests/observability/test_observability.py +++ b/tests/observability/test_observability.py @@ -73,7 +73,7 @@ async def my_operation(fail: bool = False) -> str: await my_operation() await my_operation() - counter = registry._counters["operation_calls"] # prometheus appends _total + counter = registry.counter("operation_calls", "Operation calls") # prometheus appends _total success = counter.labels(method="my_operation", result="success", exception="none", **{"class": ""}) assert success._value.get() == 2.0 diff --git a/tests/test_integration.py b/tests/test_integration.py index 30e94cf6..c53a85fb 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -187,7 +187,7 @@ async def tracked_create(data: dict) -> dict: return data await tracked_create(order) - counter = metrics._counters["orders_created"] + counter = metrics.counter("orders_created", "Total orders created") child = counter.labels(method="tracked_create", result="success", exception="none", **{"class": ""}) assert child._value.get() == 1.0 diff --git a/uv.lock b/uv.lock index d57ed1b7..64ea0222 100644 --- a/uv.lock +++ b/uv.lock @@ -2160,7 +2160,7 @@ wheels = [ [[package]] name = "pyfly" -version = "26.6.96" +version = "26.6.97" source = { editable = "." } dependencies = [ { name = "pydantic" },