Skip to content

Commit 953bcac

Browse files
feat: memory debug mode (RSS vs tracked heap + top allocations) (#100)
1 parent 98235b5 commit 953bcac

4 files changed

Lines changed: 137 additions & 1 deletion

File tree

chart/templates/s3proxy/configmap.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,7 @@ data:
2222
S3PROXY_DASHBOARD_UI: "true"
2323
S3PROXY_DASHBOARD_PATH: {{ .Values.dashboard.path | quote }}
2424
{{- end }}
25+
{{- /* Arbitrary extra env (e.g. memory debug mode: S3PROXY_MEMORY_DEBUG=1). */ -}}
26+
{{- range $k, $v := .Values.extraConfig }}
27+
{{ $k }}: {{ $v | quote }}
28+
{{- end }}

chart/values.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,4 +264,10 @@ topologySpreadConstraints: []
264264
podDisruptionBudget:
265265
enabled: true
266266
minAvailable: 1
267-
# maxUnavailable: 1 # Alternative to minAvailable
267+
# maxUnavailable: 1 # Alternative to minAvailable
268+
# Arbitrary extra S3PROXY_* env, injected via the config ConfigMap (envFrom).
269+
# Used for time-boxed diagnostics, e.g. memory debug mode:
270+
# extraConfig: { S3PROXY_MEMORY_DEBUG: "1" }
271+
# which logs RSS vs tracked heap + top allocations every interval (raise the pod
272+
# memory limit first so it survives long enough to dump).
273+
extraConfig: {}

s3proxy/app.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@
22

33
from __future__ import annotations
44

5+
import asyncio
6+
import contextlib
57
import logging
68
import os
9+
import signal
710
import sys
811
import time
12+
import tracemalloc
913
import uuid
1014
from collections.abc import AsyncIterator
1115
from contextlib import asynccontextmanager
@@ -17,6 +21,7 @@
1721
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
1822
from structlog.stdlib import BoundLogger
1923

24+
from . import concurrency
2025
from .client import SigV4Verifier
2126
from .config import Settings
2227
from .errors import S3Error, get_s3_error_code
@@ -65,6 +70,78 @@ def _silence_health_probe_access_logs() -> None:
6570
access_logger.addFilter(_health_probe_filter)
6671

6772

73+
def _rss_mb() -> float | None:
74+
"""Process resident set size in MB from /proc (Linux). None elsewhere."""
75+
try:
76+
with open("/proc/self/status") as f:
77+
for line in f:
78+
if line.startswith("VmRSS:"):
79+
return int(line.split()[1]) / 1024 # kB -> MB
80+
except OSError:
81+
return None
82+
return None
83+
84+
85+
def _dump_tracemalloc(limit: int = 20) -> None:
86+
"""Log real RSS vs tracked Python heap + the top live allocations by call site.
87+
88+
Diagnostic only (memory debug mode). The whole point is the gap: RSS is what
89+
the kernel OOM-kills on, while tracemalloc only sees Python allocations. A
90+
large rss-minus-tracked gap means the memory is C-level (uvicorn/httptools
91+
socket buffers, openssl, allocator retention) -- NOT something a call site in
92+
the top list will explain. A small gap means it IS Python, and the top list
93+
names the exact line. Logging both each interval settles which world we're in.
94+
"""
95+
if not tracemalloc.is_tracing():
96+
return
97+
snap = tracemalloc.take_snapshot()
98+
stats = snap.statistics("lineno")
99+
tracked_mb = sum(s.size for s in stats) / 1024 / 1024
100+
rss = _rss_mb()
101+
governed_mb = concurrency.get_active_memory() / 1024 / 1024
102+
logger.warning(
103+
"MEMORY_DEBUG",
104+
rss_mb=round(rss, 1) if rss is not None else None,
105+
tracked_mb=round(tracked_mb, 1),
106+
untracked_mb=round(rss - tracked_mb, 1) if rss is not None else None,
107+
governed_active_mb=round(governed_mb, 1),
108+
shown=limit,
109+
)
110+
for i, st in enumerate(stats[:limit], 1):
111+
fr = st.traceback[0]
112+
logger.warning(
113+
"MEMORY_DEBUG_TOP",
114+
rank=i,
115+
size_mb=round(st.size / 1024 / 1024, 2),
116+
count=st.count,
117+
loc=f"{fr.filename}:{fr.lineno}",
118+
)
119+
120+
121+
async def _periodic_tracemalloc(interval: int) -> None:
122+
while True:
123+
await asyncio.sleep(interval)
124+
_dump_tracemalloc()
125+
126+
127+
def _maybe_start_tracemalloc() -> asyncio.Task | None:
128+
"""Enable memory debug mode (RSS + tracemalloc heap dumps) when requested.
129+
130+
Gated by S3PROXY_MEMORY_DEBUG (alias: S3PROXY_TRACEMALLOC). No-op with zero
131+
overhead when unset. Used for one-pod, time-boxed profiling: dumps every
132+
S3PROXY_MEMORY_DEBUG_INTERVAL secs and on SIGUSR1.
133+
"""
134+
if not (os.environ.get("S3PROXY_MEMORY_DEBUG") or os.environ.get("S3PROXY_TRACEMALLOC")):
135+
return None
136+
frames = int(os.environ.get("S3PROXY_MEMORY_DEBUG_FRAMES", "4"))
137+
interval = int(os.environ.get("S3PROXY_MEMORY_DEBUG_INTERVAL", "15"))
138+
tracemalloc.start(frames)
139+
logger.warning("MEMORY_DEBUG_ENABLED", frames=frames, interval_sec=interval, rss_mb=_rss_mb())
140+
with contextlib.suppress(NotImplementedError, RuntimeError):
141+
asyncio.get_running_loop().add_signal_handler(signal.SIGUSR1, _dump_tracemalloc)
142+
return asyncio.create_task(_periodic_tracemalloc(interval))
143+
144+
68145
def create_lifespan(settings: Settings, credentials_store: dict[str, str]) -> AsyncIterator[None]:
69146
"""Create lifespan context manager for FastAPI app.
70147
@@ -114,8 +191,12 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
114191
app.state.stats_store = stats_store
115192
app.state.start_time = time.monotonic()
116193

194+
tracemalloc_task = _maybe_start_tracemalloc()
195+
117196
yield
118197

198+
if tracemalloc_task is not None:
199+
tracemalloc_task.cancel()
119200
await stats_store.aclose() # flush buffered samples before Redis closes
120201
await close_redis()
121202
await close_http_client()
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""Self-check for the gated tracemalloc heap-dump diagnostic.
2+
3+
Off by default (no env) => zero overhead, no tracing started. When enabled it
4+
must take a snapshot and not raise. Used for one-pod, time-boxed prod profiling
5+
to identify the live allocations driving the OOM.
6+
"""
7+
8+
import tracemalloc
9+
10+
from s3proxy import app
11+
12+
13+
def test_disabled_by_default(monkeypatch):
14+
monkeypatch.delenv("S3PROXY_MEMORY_DEBUG", raising=False)
15+
monkeypatch.delenv("S3PROXY_TRACEMALLOC", raising=False)
16+
assert app._maybe_start_tracemalloc() is None
17+
18+
19+
def test_dump_is_noop_when_not_tracing():
20+
# Should not raise even if tracemalloc isn't running.
21+
if tracemalloc.is_tracing():
22+
tracemalloc.stop()
23+
app._dump_tracemalloc() # no exception = pass
24+
25+
26+
def test_dump_reports_allocations_when_tracing():
27+
tracemalloc.start(2)
28+
try:
29+
blob = bytearray(4 * 1024 * 1024) # 4MB, should show up
30+
# Capture warning logs to confirm it emits a snapshot + top lines.
31+
events = []
32+
import structlog
33+
34+
app.logger = structlog.wrap_logger(
35+
app.logger, processors=[lambda _l, _m, ev: events.append(ev) or ev]
36+
)
37+
app._dump_tracemalloc(limit=5)
38+
assert blob is not None
39+
snap = next(e for e in events if e.get("event") == "MEMORY_DEBUG")
40+
# The debug line must carry the tracked total; rss/untracked are present
41+
# on Linux (None elsewhere) -- the gap between them is the whole point.
42+
assert "tracked_mb" in snap and "untracked_mb" in snap
43+
assert any(e.get("event") == "MEMORY_DEBUG_TOP" for e in events)
44+
finally:
45+
tracemalloc.stop()

0 commit comments

Comments
 (0)