diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index a8ae7c3c..6052a4be 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -41,8 +41,9 @@ from app.core.logging import get_logger from app.core.problem_details import EMBEDDING_AUTH_CODE, ERROR_TYPES from app.features.demo import workspace -from app.features.demo.schemas import DemoRunRequest, StepEvent, StepStatus +from app.features.demo.schemas import DemoRunRequest, StepEvent, StepStatus, UserScope from app.shared.seeder.config import ScenarioPreset +from app.shared.seeder.overrides import SeederOverrides logger = get_logger(__name__) @@ -261,6 +262,12 @@ class DemoContext: # E3 (#392) -- workspace label for plan tagging. Set alongside # workspace_id in run_pipeline's keep-branch; None on ephemeral runs. workspace_name: str | None = None + # E3 (#409) -- additive Optional start-frame config. seed_overrides is + # forwarded verbatim to /seeder/generate by step_seed (None on legacy + # frames); user_scope is the operator-selected focus pair step_status + # validates and adopts (warn + fallback to discovery when dangling). + seed_overrides: SeederOverrides | None = None + user_scope: UserScope | None = None # ============================================================================= @@ -546,12 +553,33 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult: ctx.scenario, _SeedProfile(DEMO_SEED_STORES, DEMO_SEED_PRODUCTS, DEMO_SEED_SPAN_DAYS), ) - stores, products = profile.stores, profile.products - if profile.window is not None: + # E3 (#409) -- effective dims = override-or-profile, used for BOTH the POST + # scalars and the detail line so the step card tells the truth. The nested + # object is ALSO forwarded verbatim; the seeder applies it last (wins). + overrides = ctx.seed_overrides + stores = ( + overrides.stores + if overrides is not None and overrides.stores is not None + else profile.stores + ) + products = ( + overrides.products + if overrides is not None and overrides.products is not None + else profile.products + ) + if overrides is not None and overrides.window_days is not None: + # The DemoRunRequest validator guarantees window_days is never set on + # the calendar-pinned holiday_rush preset, so today-anchored is safe. + seed_end = datetime.now(UTC).date() + seed_start = seed_end - timedelta(days=overrides.window_days) + elif profile.window is not None: seed_start, seed_end = profile.window else: seed_end = datetime.now(UTC).date() seed_start = seed_end - timedelta(days=profile.span_days) + # Scalar sparsity stays 0.0 (preserves preset character per the + # `if params.sparsity > 0` guard); overrides.sparsity is the only way the + # demo overrides sparsity. body = await client.request( "seed", "POST", @@ -565,6 +593,11 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult: "end_date": seed_end.isoformat(), "sparsity": 0.0, "dry_run": False, + **( + {"overrides": overrides.model_dump(exclude_none=True)} + if overrides is not None + else {} + ), }, ) raw_records: dict[str, Any] = body.get("records_created", {}) @@ -572,10 +605,21 @@ async def step_seed(ctx: DemoContext, client: _Client) -> StepResult: ctx.seed_records = records # GenerateResult.records_created uses "sales" (singular), not "sales_daily". sales = records.get("sales", records.get("sales_daily", 0)) + overrides_applied = ( + sorted(overrides.model_dump(exclude_none=True)) if overrides is not None else [] + ) + detail = f"{ctx.scenario.value}: {stores} stores x {products} products, {sales} sales rows" + if overrides_applied: + detail += f" (overrides: {', '.join(overrides_applied)})" return ( "pass", - f"{ctx.scenario.value}: {stores} stores x {products} products, {sales} sales rows", - {"records_created": records, "scenario": ctx.scenario.value}, + detail, + { + "records_created": records, + "scenario": ctx.scenario.value, + # E3 (#409) -- additive echo of the applied override knobs. + "overrides_applied": overrides_applied, + }, ) @@ -593,6 +637,46 @@ async def step_status(ctx: DemoContext, client: _Client) -> StepResult: return ("fail", "no date_range in /seeder/status -- seed the database first", {}) ctx.date_start = date.fromisoformat(raw_start) ctx.date_end = date.fromisoformat(raw_end) + sales = body.get("sales", 0) + + # E3 (#409) -- operator-selected focus pair: validate both ids against the + # dimensions endpoints and adopt them. A dangling pair (e.g. after a + # reset+reseed re-issued ids -- sequences never reset) WARNS and falls back + # to discovery so a replayed reset=true workspace can never hard-fail here. + scope_warning = "" + if ctx.user_scope is not None: + try: + await client.request( + "status[scope-store]", + "GET", + f"/dimensions/stores/{ctx.user_scope.store_id}", + ) + await client.request( + "status[scope-product]", + "GET", + f"/dimensions/products/{ctx.user_scope.product_id}", + ) + except _StepError: + scope_warning = ( + f"user_scope (store={ctx.user_scope.store_id}, " + f"product={ctx.user_scope.product_id}) not found -- " + "fell back to discovered pair; " + ) + else: + ctx.store_id = ctx.user_scope.store_id + ctx.product_id = ctx.user_scope.product_id + return ( + "pass", + f"date_range={raw_start}..{raw_end} sales={sales} " + f"store_id={ctx.store_id} product_id={ctx.product_id} (user-selected)", + { + "store_id": ctx.store_id, + "product_id": ctx.product_id, + "date_range_start": raw_start, + "date_range_end": raw_end, + "user_scope_applied": True, + }, + ) stores_body = await client.request( "status[stores]", "GET", "/dimensions/stores?page=1&page_size=1" @@ -617,16 +701,19 @@ async def step_status(ctx: DemoContext, client: _Client) -> StepResult: ctx.store_id = store_id_raw ctx.product_id = product_id_raw - sales = body.get("sales", 0) return ( - "pass", - f"date_range={raw_start}..{raw_end} sales={sales} " + # E3 (#409) -- "warn" (never "fail") when a requested scope dangled: + # only "fail" stops the run, so the pipeline proceeds on the + # discovered pair with the divergence visible on the step card. + "warn" if scope_warning else "pass", + f"{scope_warning}date_range={raw_start}..{raw_end} sales={sales} " f"store_id={ctx.store_id} product_id={ctx.product_id}", { "store_id": ctx.store_id, "product_id": ctx.product_id, "date_range_start": raw_start, "date_range_end": raw_end, + "user_scope_applied": False, }, ) @@ -2648,6 +2735,9 @@ async def run_pipeline(app: FastAPI, req: DemoRunRequest) -> AsyncIterator[StepE skip_seed=req.skip_seed, reset=req.reset, scenario=req.scenario, + # E3 (#409) -- thread the validated start-frame config verbatim. + seed_overrides=req.seed_overrides, + user_scope=req.user_scope, ) # E1 (#390) -- create the workspace row BEFORE the first step executes so # even an early failure records the run config. create_workspace is diff --git a/app/features/demo/schemas.py b/app/features/demo/schemas.py index 58daf891..d5aa78ea 100644 --- a/app/features/demo/schemas.py +++ b/app/features/demo/schemas.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from app.shared.seeder.config import ScenarioPreset +from app.shared.seeder.overrides import SeederOverrides # One pipeline step's outcome. StepStatus = Literal["running", "pass", "fail", "skip", "warn"] @@ -26,6 +27,22 @@ def _utc_now() -> datetime: return datetime.now(UTC) +class UserScope(BaseModel): + """Operator-selected (store, product) focus pair (E3, issue #409). + + Ids are REAL discovered ids (Postgres sequences never reset -- ids are not + 1-based); ``step_status`` validates them against ``/dimensions/*/{id}`` + and warn-falls-back to discovery when the pair dangles (e.g. after a + reset+reseed re-issued ids). ``extra="forbid"`` keeps the slot schema + closed; additive keys need a documented schema change. + """ + + model_config = ConfigDict(strict=True, extra="forbid") + + store_id: int = Field(..., ge=1, description="Real store id from /dimensions/stores.") + product_id: int = Field(..., ge=1, description="Real product id from /dimensions/products.") + + class DemoRunRequest(BaseModel): """Request body for ``POST /demo/run`` and the ``WS /demo/stream`` start frame. @@ -34,7 +51,10 @@ class DemoRunRequest(BaseModel): override -- there is no ``date`` / ``datetime`` / ``UUID`` / ``Decimal`` field (see ``.claude/rules/security-patterns.md`` and ``test_strict_mode_policy.py``). The sole exception is ``scenario``, whose - enum-on-the-wire form carries its own override (PRP-38). + enum-on-the-wire form carries its own override (PRP-38). The nested + ``seed_overrides`` / ``user_scope`` models are themselves all-JSON-native + and validate from the JSON-parsed dict under the parent's strict mode + (runtime-verified on pydantic 2.12.5 -- E3 #409). """ model_config = ConfigDict(strict=True) @@ -85,6 +105,25 @@ class DemoRunRequest(BaseModel): pattern=r"^[0-9a-f]{32}$", # uuid4().hex shape of workspace_id description="workspace_id this run replays; requires preservation='keep'.", ) + # E3 (#409): curated seed overrides + operator-selected focus pair. Both + # additive Optional with None defaults so legacy frames stay byte-identical. + # The nested models carry their own ConfigDict(strict=True, extra="forbid"). + seed_overrides: SeederOverrides | None = Field( + default=None, + description=( + "Curated seeder overrides (allow-listed knobs); requires " + "skip_seed=false (Re-seed first). Forwarded verbatim to " + "POST /seeder/generate and recorded on a kept workspace row." + ), + ) + user_scope: UserScope | None = Field( + default=None, + description=( + "Operator-selected (store, product) focus pair the pipeline models " + "instead of the auto-discovered first pair; validated by the status " + "step (warn + fallback to discovery on a dangling pair)." + ), + ) @model_validator(mode="after") def _workspace_name_requires_keep(self) -> DemoRunRequest: @@ -100,6 +139,34 @@ def _replayed_from_requires_keep(self) -> DemoRunRequest: raise ValueError("replayed_from_workspace_id requires preservation='keep'") return self + @model_validator(mode="after") + def _seed_overrides_require_reseed(self) -> DemoRunRequest: + """Reject overrides on a run that skips the seed step (silent no-op trap). + + An empty overrides object (``{}`` on the wire) normalizes to ``None`` + so downstream code has a single "no overrides" representation. + """ + if self.seed_overrides is not None and self.seed_overrides.is_empty(): + self.seed_overrides = None + if self.seed_overrides is not None and self.skip_seed: + raise ValueError("seed_overrides requires skip_seed=false (Re-seed first)") + return self + + @model_validator(mode="after") + def _window_days_forbidden_on_holiday_rush(self) -> DemoRunRequest: + """Reject window_days on the calendar-pinned holiday_rush preset. + + The preset's HolidayConfig spikes are fixed 2024 dates -- a shifted + window would silently drop every holiday spike, so this fails loudly. + """ + if ( + self.seed_overrides is not None + and self.seed_overrides.window_days is not None + and self.scenario is ScenarioPreset.HOLIDAY_RUSH + ): + raise ValueError("window_days cannot override the calendar-pinned holiday_rush window") + return self + class WorkspaceUpdateRequest(BaseModel): """Partial lifecycle update for ``PATCH /demo/workspaces/{workspace_id}``. @@ -266,6 +333,15 @@ class WorkspaceListItem(BaseModel): default=None, description="workspace_id this run replayed (soft reference; may dangle).", ) + # E3 (#409) -- the two replay-relevant story slots live on the LIST item + # (not detail-only): the frontend Replay reads list rows, and the + # replay-verbatim contract includes both slots. + seed_overrides: dict[str, Any] | None = Field( + default=None, description="Story slot (E3 #409): seeder-override payload." + ) + user_scope: dict[str, Any] | None = Field( + default=None, description="Story slot (E3 #409): operator-selected focus." + ) class WorkspaceDetailResponse(WorkspaceListItem): @@ -285,12 +361,8 @@ class WorkspaceDetailResponse(WorkspaceListItem): config_schema_version: int = Field( default=1, description="Version of the config + story-slot schema." ) - seed_overrides: dict[str, Any] | None = Field( - default=None, description="Story slot (E3 #409 writes): seeder-override payload." - ) - user_scope: dict[str, Any] | None = Field( - default=None, description="Story slot (E3 #409 writes): operator-selected focus." - ) + # E3 (#409) -- seed_overrides / user_scope moved UP to WorkspaceListItem + # (replay reads list rows); the four remaining story slots stay detail-only. approval_events: list[dict[str, Any]] | None = Field( default=None, description="Story slot (E5 #411 writes): HITL approval audit." ) diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index 197c2842..1fc4c1b4 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -16,8 +16,9 @@ from fastapi import FastAPI from app.features.demo import pipeline -from app.features.demo.schemas import DemoRunRequest +from app.features.demo.schemas import DemoRunRequest, UserScope from app.shared.seeder.config import ScenarioPreset +from app.shared.seeder.overrides import SeederOverrides # A bare app instance -- the fake clients ignore it; it only satisfies the # run_pipeline(app: FastAPI, ...) signature. @@ -2454,3 +2455,190 @@ async def test_step_seed_retail_standard_posts_demo_scaled_profile(): # sparsity stays 0.0 — the seeder override fires only when > 0, which is # what preserves the sparse preset's 50%-missing character. assert body["sparsity"] == 0.0 + + +# ============================================================================= +# E3 (#409) — seed overrides + user scope +# ============================================================================= + + +async def test_step_seed_forwards_seed_overrides(): + """E3 (#409) — the nested overrides ride the /seeder/generate body verbatim, + the POST scalars echo the effective dims, and scalar sparsity stays 0.0.""" + ctx = pipeline.DemoContext( + seed=42, + skip_seed=False, + reset=False, + scenario=ScenarioPreset.DEMO_MINIMAL, + seed_overrides=SeederOverrides(stores=8, products=20, promotion_intensity=0.3), + ) + client = _RecordingClient( + None, + responses={("POST", "/seeder/generate"): {"records_created": {"sales": 1}}}, + ) + status, detail, data = await pipeline.step_seed(ctx, _as_client(client)) + assert status == "pass" + body = client.calls[0][2] + assert body is not None + assert body["overrides"] == {"stores": 8, "products": 20, "promotion_intensity": 0.3} + # Effective dims on the scalars + the detail line (the card tells the truth). + assert body["stores"] == 8 + assert body["products"] == 20 + assert body["sparsity"] == 0.0 # preset-character guard; nested wins anyway + assert "8 stores x 20 products" in detail + assert "overrides: products, promotion_intensity, stores" in detail + assert data["overrides_applied"] == ["products", "promotion_intensity", "stores"] + + +async def test_step_seed_without_overrides_is_legacy_identical(): + """E3 (#409) — a legacy ctx posts NO overrides key (byte-identical body).""" + ctx = pipeline.DemoContext( + seed=42, skip_seed=False, reset=False, scenario=ScenarioPreset.DEMO_MINIMAL + ) + client = _RecordingClient( + None, + responses={("POST", "/seeder/generate"): {"records_created": {"sales": 1}}}, + ) + status, _detail, data = await pipeline.step_seed(ctx, _as_client(client)) + assert status == "pass" + body = client.calls[0][2] + assert body is not None + assert "overrides" not in body + assert body["stores"] == 3 # demo_minimal profile + assert data["overrides_applied"] == [] + + +async def test_step_seed_window_days_overrides_profile_window(): + """E3 (#409) — window_days drives a today-anchored window of that length.""" + ctx = pipeline.DemoContext( + seed=42, + skip_seed=False, + reset=False, + scenario=ScenarioPreset.DEMO_MINIMAL, + seed_overrides=SeederOverrides(window_days=120), + ) + client = _RecordingClient( + None, + responses={("POST", "/seeder/generate"): {"records_created": {"sales": 1}}}, + ) + status, _detail, _data = await pipeline.step_seed(ctx, _as_client(client)) + assert status == "pass" + body = client.calls[0][2] + assert body is not None + start = date.fromisoformat(body["start_date"]) + end = date.fromisoformat(body["end_date"]) + assert end - start == timedelta(days=120) + assert body["overrides"] == {"window_days": 120} + + +def _status_discovery_responses() -> dict[tuple[str, str], Any]: + """Canned responses for the legacy first-pair discovery path.""" + return { + ("GET", "/seeder/status"): { + "date_range_start": "2026-01-01", + "date_range_end": "2026-03-31", + "sales": 900, + }, + ("GET", "/dimensions/stores?page=1&page_size=1"): {"stores": [{"id": 4}]}, + ("GET", "/dimensions/products?page=1&page_size=1"): {"products": [{"id": 9}]}, + } + + +async def test_step_status_honors_user_scope(): + """E3 (#409) — a valid pair is validated via GET-by-id and adopted.""" + ctx = pipeline.DemoContext( + seed=42, + skip_seed=True, + reset=False, + scenario=ScenarioPreset.DEMO_MINIMAL, + user_scope=UserScope(store_id=12, product_id=47), + ) + client = _RecordingClient( + None, + responses={ + ("GET", "/seeder/status"): { + "date_range_start": "2026-01-01", + "date_range_end": "2026-03-31", + "sales": 900, + }, + ("GET", "/dimensions/stores/12"): {"id": 12, "code": "S012"}, + ("GET", "/dimensions/products/47"): {"id": 47, "sku": "P047"}, + }, + ) + status, detail, data = await pipeline.step_status(ctx, _as_client(client)) + assert status == "pass" + assert ctx.store_id == 12 + assert ctx.product_id == 47 + assert "(user-selected)" in detail + assert data["user_scope_applied"] is True + # Both GET-by-id validations were issued; no discovery call happened. + paths = [path for _method, path, _body in client.calls] + assert "/dimensions/stores/12" in paths + assert "/dimensions/products/47" in paths + assert "/dimensions/stores?page=1&page_size=1" not in paths + + +async def test_step_status_dangling_scope_warns_and_falls_back(): + """E3 (#409) — a 404 pair WARNS (never fails) and discovery takes over.""" + responses = _status_discovery_responses() + responses[("GET", "/dimensions/products/47")] = {"id": 47} + ctx = pipeline.DemoContext( + seed=42, + skip_seed=True, + reset=False, + scenario=ScenarioPreset.DEMO_MINIMAL, + user_scope=UserScope(store_id=12, product_id=47), + ) + client = _RecordingClient( + None, + responses=responses, + errors={ + ("GET", "/dimensions/stores/12"): pipeline._StepError( + "status[scope-store]", 404, {"title": "Not Found"} + ), + }, + ) + status, detail, data = await pipeline.step_status(ctx, _as_client(client)) + assert status == "warn" + assert ctx.store_id == 4 # discovered pair + assert ctx.product_id == 9 + assert "user_scope (store=12, product=47) not found" in detail + assert data["user_scope_applied"] is False + + +async def test_step_status_without_scope_unchanged(): + """E3 (#409) — the legacy discovery path is byte-identical (pass, no warn).""" + ctx = pipeline.DemoContext( + seed=42, skip_seed=True, reset=False, scenario=ScenarioPreset.DEMO_MINIMAL + ) + client = _RecordingClient(None, responses=_status_discovery_responses()) + status, detail, data = await pipeline.step_status(ctx, _as_client(client)) + assert status == "pass" + assert ctx.store_id == 4 + assert ctx.product_id == 9 + assert "user_scope" not in detail + assert data["user_scope_applied"] is False + + +async def test_run_pipeline_threads_e3_fields(monkeypatch): + """E3 (#409) — run_pipeline threads seed_overrides/user_scope into ctx.""" + captured: dict[str, Any] = {} + + async def _capturing_precheck(ctx: Any, _client: Any) -> Any: + captured["seed_overrides"] = ctx.seed_overrides + captured["user_scope"] = ctx.user_scope + return ("fail", "stop after capture", {}) + + monkeypatch.setattr(pipeline, "step_precheck", _capturing_precheck) + monkeypatch.setattr(pipeline, "_Client", _build_fake_client("unused", {})) + + req = DemoRunRequest.model_validate( + { + "skip_seed": False, + "seed_overrides": {"stores": 8, "noise_sigma": 0.25}, + "user_scope": {"store_id": 12, "product_id": 47}, + } + ) + _events = [e async for e in pipeline.run_pipeline(app=_FAKE_APP, req=req)] + assert captured["seed_overrides"] == SeederOverrides(stores=8, noise_sigma=0.25) + assert captured["user_scope"] == UserScope(store_id=12, product_id=47) diff --git a/app/features/demo/tests/test_schemas.py b/app/features/demo/tests/test_schemas.py index 866f708c..8019d219 100644 --- a/app/features/demo/tests/test_schemas.py +++ b/app/features/demo/tests/test_schemas.py @@ -10,6 +10,7 @@ DemoRunRequest, DemoRunResult, StepEvent, + UserScope, WorkspaceDetailResponse, WorkspaceListItem, WorkspaceListResponse, @@ -143,6 +144,95 @@ def test_demo_run_request_replayed_from_pattern_rejected(): ) +# ============================================================================= +# E3 (#409) -- seed_overrides + user_scope (advanced seed config + focus pair) +# ============================================================================= + + +def test_demo_run_request_e3_field_defaults(): + """E3 (#409) -- defaults None; a legacy 4-field frame stays byte-identical.""" + req = DemoRunRequest.model_validate( + {"seed": 7, "reset": False, "skip_seed": True, "scenario": "demo_minimal"} + ) + assert req.seed_overrides is None + assert req.user_scope is None + + +def test_demo_run_request_seed_overrides_json_path(): + """E3 (#409) -- the JSON wire form (validate_python on a parsed dict, the + path FastAPI uses) accepts a nested overrides object on a re-seed run.""" + req = DemoRunRequest.model_validate( + {"skip_seed": False, "seed_overrides": {"stores": 8, "promotion_intensity": 0.3}} + ) + assert req.seed_overrides is not None + assert req.seed_overrides.stores == 8 + assert req.seed_overrides.promotion_intensity == 0.3 + + +def test_demo_run_request_seed_overrides_require_reseed(): + """E3 (#409) -- overrides on a skip_seed run would be a silent no-op.""" + with pytest.raises(ValidationError): + DemoRunRequest.model_validate({"skip_seed": True, "seed_overrides": {"stores": 8}}) + # skip_seed defaults to True -- omitting it must also reject. + with pytest.raises(ValidationError): + DemoRunRequest.model_validate({"seed_overrides": {"stores": 8}}) + + +def test_demo_run_request_empty_seed_overrides_normalizes_to_none(): + """E3 (#409) -- {} on the wire collapses to None (single no-overrides form), + and is therefore legal even on a skip_seed run.""" + req = DemoRunRequest.model_validate({"skip_seed": True, "seed_overrides": {}}) + assert req.seed_overrides is None + + +def test_demo_run_request_window_days_rejected_on_holiday_rush(): + """E3 (#409) -- holiday_rush is calendar-pinned; window_days fails loudly.""" + with pytest.raises(ValidationError): + DemoRunRequest.model_validate( + { + "skip_seed": False, + "scenario": "holiday_rush", + "seed_overrides": {"window_days": 120}, + } + ) + # The same knob is fine on a today-anchored preset. + req = DemoRunRequest.model_validate( + { + "skip_seed": False, + "scenario": "retail_standard", + "seed_overrides": {"window_days": 120}, + } + ) + assert req.seed_overrides is not None + assert req.seed_overrides.window_days == 120 + + +def test_demo_run_request_seed_overrides_unknown_knob_rejected(): + """E3 (#409) -- the nested extra='forbid' allow-list holds on the demo path.""" + with pytest.raises(ValidationError): + DemoRunRequest.model_validate({"skip_seed": False, "seed_overrides": {"bogus_knob": 1}}) + + +def test_demo_run_request_user_scope_json_path(): + """E3 (#409) -- user_scope accepts a real id pair; works with skip_seed.""" + req = DemoRunRequest.model_validate({"user_scope": {"store_id": 12, "product_id": 47}}) + assert req.user_scope is not None + assert req.user_scope.store_id == 12 + assert req.user_scope.product_id == 47 + + +def test_user_scope_rejects_extra_keys_and_bad_ids(): + """E3 (#409) -- closed schema; ids are ge=1; strict rejects string ints.""" + with pytest.raises(ValidationError): + UserScope.model_validate({"store_id": 1, "product_id": 1, "extra": True}) + with pytest.raises(ValidationError): + UserScope.model_validate({"store_id": 0, "product_id": 1}) + with pytest.raises(ValidationError): + UserScope.model_validate({"store_id": 1}) # product_id required + with pytest.raises(ValidationError): + UserScope.model_validate({"store_id": "1", "product_id": 1}) + + # ============================================================================= # E1 (#407) -- WorkspaceUpdateRequest (PATCH body) # ============================================================================= @@ -393,6 +483,28 @@ def test_workspace_detail_passes_e1_fields_through(): assert detail.job_ids == ["job-1", "job-2"] +def test_workspace_list_item_exposes_e3_slots(): + """E3 (#409) -- seed_overrides/user_scope live on the LIST item (replay + reads list rows), defaulting to None on rows without them.""" + bare = WorkspaceListItem.model_validate(_orm_like_workspace_row()) + assert bare.seed_overrides is None + assert bare.user_scope is None + + slotted = WorkspaceListItem.model_validate( + _orm_like_workspace_row( + seed_overrides={"stores": 8, "noise_sigma": 0.25}, + user_scope={"store_id": 12, "product_id": 47}, + ) + ) + assert slotted.seed_overrides == {"stores": 8, "noise_sigma": 0.25} + assert slotted.user_scope == {"store_id": 12, "product_id": 47} + # Detail inherits the same exposure. + detail = WorkspaceDetailResponse.model_validate( + _orm_like_workspace_row(seed_overrides={"sparsity": 0.3}) + ) + assert detail.seed_overrides == {"sparsity": 0.3} + + def test_workspace_list_response_shape(): """E4 (#393) -- page shape mirrors the scenarios list (items + total).""" item = WorkspaceListItem.model_validate(_orm_like_workspace_row()) diff --git a/app/features/demo/tests/test_workspace.py b/app/features/demo/tests/test_workspace.py index cb28dea2..fcef7115 100644 --- a/app/features/demo/tests/test_workspace.py +++ b/app/features/demo/tests/test_workspace.py @@ -76,6 +76,37 @@ async def test_create_workspace_persists_config(db_session: AsyncSession) -> Non assert row.result_summary is None +async def test_create_workspace_persists_e3_slots(db_session: AsyncSession) -> None: + """E3 (#409) -- seed_overrides/user_scope land in the story slots, sparse.""" + workspace_id = await workspace.create_workspace( + _keep_request( + skip_seed=False, + seed_overrides={"stores": 8, "promotion_intensity": 0.3}, + user_scope={"store_id": 12, "product_id": 47}, + ) + ) + assert workspace_id is not None + + row = await workspace.get_workspace(db_session, workspace_id) + assert row is not None + # Sparse JSON: only the operator-set knobs appear. + assert row.seed_overrides == {"stores": 8, "promotion_intensity": 0.3} + assert row.user_scope == {"store_id": 12, "product_id": 47} + + +async def test_create_workspace_without_e3_fields_persists_nulls( + db_session: AsyncSession, +) -> None: + """E3 (#409) -- a legacy keep-run stores NULL slots (never {}).""" + workspace_id = await workspace.create_workspace(_keep_request()) + assert workspace_id is not None + + row = await workspace.get_workspace(db_session, workspace_id) + assert row is not None + assert row.seed_overrides is None + assert row.user_scope is None + + async def test_finalize_workspace_completed(db_session: AsyncSession) -> None: """finalize(failed=False) settles to completed with collected ids.""" workspace_id = await workspace.create_workspace(_keep_request()) diff --git a/app/features/demo/workspace.py b/app/features/demo/workspace.py index 364b64fd..ca3002df 100644 --- a/app/features/demo/workspace.py +++ b/app/features/demo/workspace.py @@ -112,6 +112,21 @@ async def create_workspace(req: DemoRunRequest) -> str | None: # E1 (#407): replay provenance, recorded verbatim (soft # reference -- no existence check; dangles are designed). replayed_from_workspace_id=req.replayed_from_workspace_id, + # E3 (#409): the two replay-relevant story slots, recorded + # at create time (the REQUESTED config -- the effective + # grain lands separately on store_id/product_id at + # finalize, so a fallen-back scope stays visible). Sparse + # JSON: only operator-set knobs appear; never {}. + seed_overrides=( + req.seed_overrides.model_dump(mode="json", exclude_none=True) + if req.seed_overrides is not None + else None + ), + user_scope=( + req.user_scope.model_dump(mode="json") + if req.user_scope is not None + else None + ), ) ) await db.commit() diff --git a/app/features/seeder/schemas.py b/app/features/seeder/schemas.py index 20a22dc3..42b697c1 100644 --- a/app/features/seeder/schemas.py +++ b/app/features/seeder/schemas.py @@ -7,6 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from app.shared.seeder.config import default_seed_end_date, default_seed_start_date +from app.shared.seeder.overrides import SeederOverrides VALID_CHANNELS: frozenset[str] = frozenset({"in_store", "online", "click_collect", "wholesale"}) """Allow-list for ``sales_daily.channel`` — mirrors the SQL CHECK.""" @@ -257,6 +258,17 @@ class GenerateParams(BaseModel): ), ) + # E3 (#409) — curated nested overrides. Absent field = byte-identical + # legacy behavior (the same promise as the Phase 1/2 blocks above). + overrides: SeederOverrides | None = Field( + default=None, + description=( + "Curated nested overrides (E3 #409); applied LAST — wins over the " + "scalar stores/products/sparsity. Unknown knobs are rejected " + "(extra=forbid). Absent = byte-identical legacy behavior." + ), + ) + @model_validator(mode="after") def _validate_date_range(self) -> "GenerateParams": """Reject inverted date ranges with a clear message.""" diff --git a/app/features/seeder/service.py b/app/features/seeder/service.py index 87b20709..341afdef 100644 --- a/app/features/seeder/service.py +++ b/app/features/seeder/service.py @@ -52,6 +52,7 @@ from app.shared.seeder.generators.lifecycle import LifecycleGenerator from app.shared.seeder.generators.replenishment import ReplenishmentGenerator from app.shared.seeder.generators.returns import ReturnsGenerator +from app.shared.seeder.overrides import SeederOverrides logger = get_logger(__name__) @@ -199,6 +200,49 @@ def _apply_phase2_overrides(config: SeederConfig, params: schemas.GenerateParams ) +def _apply_seed_overrides(config: SeederConfig, overrides: SeederOverrides | None) -> None: + """Apply the curated nested overrides LAST -- wins over scalar params (E3, #409). + + Mutates ``config`` in place (the ``_apply_phaseN_overrides`` pattern). + ``dataclasses.replace`` is field-precise: preset-customized sibling fields + (region/category lists, ``random_gaps_*``) survive every knob. ``None`` + (or an all-``None`` object) is a no-op so legacy bodies stay + byte-identical. + """ + if overrides is None: + return + if overrides.stores is not None or overrides.products is not None: + config.dimensions = replace( + config.dimensions, + stores=overrides.stores if overrides.stores is not None else config.dimensions.stores, + products=( + overrides.products if overrides.products is not None else config.dimensions.products + ), + ) + if overrides.window_days is not None: + # Recompute the window length from the (scalar-or-default) end_date; + # end_date itself is untouched. + config.start_date = config.end_date - timedelta(days=overrides.window_days) + if overrides.sparsity is not None: + config.sparsity = replace(config.sparsity, missing_combinations_pct=overrides.sparsity) + if overrides.promotion_intensity is not None or overrides.stockout_intensity is not None: + config.retail = replace( + config.retail, + promotion_probability=( + overrides.promotion_intensity + if overrides.promotion_intensity is not None + else config.retail.promotion_probability + ), + stockout_probability=( + overrides.stockout_intensity + if overrides.stockout_intensity is not None + else config.retail.stockout_probability + ), + ) + if overrides.noise_sigma is not None: + config.time_series = replace(config.time_series, noise_sigma=overrides.noise_sigma) + + def _build_config_from_params(params: schemas.GenerateParams) -> SeederConfig: """Build SeederConfig from API parameters. @@ -239,6 +283,9 @@ def _build_config_from_params(params: schemas.GenerateParams) -> SeederConfig: _apply_phase1_overrides(config, params) _apply_phase2_overrides(config, params) + # E3 (#409) — the curated nested overrides apply LAST so they win over + # the scalar stores/products/sparsity params above. + _apply_seed_overrides(config, params.overrides) settings = get_settings() config.batch_size = settings.seeder_batch_size diff --git a/app/features/seeder/tests/test_routes.py b/app/features/seeder/tests/test_routes.py index f1733142..7da2e947 100644 --- a/app/features/seeder/tests/test_routes.py +++ b/app/features/seeder/tests/test_routes.py @@ -163,6 +163,52 @@ def test_generate_validation_error(self, client, mock_settings): assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + def test_generate_with_overrides(self, client, mock_settings, mock_db): + """E3 (#409) — the nested overrides object is accepted (201).""" + mock_result = schemas.GenerateResult( + success=True, + records_created={"stores": 8, "products": 20, "sales": 5000}, + duration_seconds=12.0, + message="Success", + seed=42, + ) + + with patch( + "app.features.seeder.routes.service.generate_data", return_value=mock_result + ) as mock_generate: + response = client.post( + "/seeder/generate", + json={ + "scenario": "demo_minimal", + "overrides": {"stores": 8, "promotion_intensity": 0.3}, + }, + ) + + assert response.status_code == status.HTTP_201_CREATED + # The validated params object carries the parsed nested model. + params = mock_generate.call_args.args[1] + assert params.overrides is not None + assert params.overrides.stores == 8 + assert params.overrides.promotion_intensity == 0.3 + + def test_generate_overrides_out_of_bounds_rejected(self, client, mock_settings): + """E3 (#409) — an out-of-bounds knob is a 422.""" + response = client.post( + "/seeder/generate", + json={"overrides": {"stores": 0}}, + ) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + def test_generate_overrides_unknown_knob_rejected(self, client, mock_settings): + """E3 (#409) — extra='forbid' rejects knobs outside the allow-list.""" + response = client.post( + "/seeder/generate", + json={"overrides": {"bogus_knob": 1}}, + ) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + def test_generate_blocked_in_production(self, client, mock_db): """Test generate is blocked in production.""" with patch("app.features.seeder.routes.get_settings") as mock_settings: diff --git a/app/features/seeder/tests/test_service.py b/app/features/seeder/tests/test_service.py index f21aa28b..7a29058d 100644 --- a/app/features/seeder/tests/test_service.py +++ b/app/features/seeder/tests/test_service.py @@ -7,6 +7,7 @@ from app.features.seeder import schemas, service from app.shared.seeder.config import DEMO_MINIMAL_SPAN_DAYS, default_seed_end_date +from app.shared.seeder.overrides import SeederOverrides class TestListScenarios: @@ -198,6 +199,121 @@ def test_custom_scenario_preserves_holiday_list(self): assert config.time_series.monthly_seasonality == {10: 1.0, 11: 1.3, 12: 1.8} +class TestApplySeedOverrides: + """Tests for the E3 (#409) curated nested overrides layer.""" + + def test_each_knob_maps_to_its_config_field(self): + """Every knob lands on the documented SeederConfig target.""" + params = schemas.GenerateParams( + scenario="demo_minimal", + overrides=SeederOverrides( + stores=8, + products=20, + sparsity=0.3, + promotion_intensity=0.3, + stockout_intensity=0.1, + noise_sigma=0.25, + ), + ) + config = service._build_config_from_params(params) + + assert config.dimensions.stores == 8 + assert config.dimensions.products == 20 + assert config.sparsity.missing_combinations_pct == 0.3 + assert config.retail.promotion_probability == 0.3 + assert config.retail.stockout_probability == 0.1 + assert config.time_series.noise_sigma == 0.25 + + def test_overrides_win_over_scalar_params(self): + """Nested overrides apply LAST and beat the legacy scalar params.""" + params = schemas.GenerateParams( + scenario="demo_minimal", + stores=3, + products=10, + sparsity=0.5, + overrides=SeederOverrides(stores=8, products=20, sparsity=0.2), + ) + config = service._build_config_from_params(params) + + assert config.dimensions.stores == 8 + assert config.dimensions.products == 20 + assert config.sparsity.missing_combinations_pct == 0.2 + + def test_window_days_recomputes_start_from_end(self): + """window_days derives start_date from the request's end_date.""" + params = schemas.GenerateParams( + scenario="demo_minimal", + start_date=date(2025, 1, 1), + end_date=date(2025, 6, 30), + overrides=SeederOverrides(window_days=120), + ) + config = service._build_config_from_params(params) + + assert config.end_date == date(2025, 6, 30) + assert config.start_date == date(2025, 6, 30) - timedelta(days=120) + + def test_sparse_preset_gap_character_survives_sparsity_override(self): + """dataclasses.replace preserves the preset's random_gaps_* siblings.""" + baseline = service._build_config_from_params(schemas.GenerateParams(scenario="sparse")) + assert baseline.sparsity.random_gaps_per_series > 0 # preset character + + params = schemas.GenerateParams( + scenario="sparse", + overrides=SeederOverrides(sparsity=0.2), + ) + config = service._build_config_from_params(params) + + assert config.sparsity.missing_combinations_pct == 0.2 + assert config.sparsity.random_gaps_per_series == baseline.sparsity.random_gaps_per_series + assert config.sparsity.gap_min_days == baseline.sparsity.gap_min_days + assert config.sparsity.gap_max_days == baseline.sparsity.gap_max_days + + def test_partial_overrides_leave_other_fields_untouched(self): + """Setting one retail knob preserves the preset's other retail fields.""" + baseline = service._build_config_from_params( + schemas.GenerateParams(scenario="stockout_heavy") + ) + params = schemas.GenerateParams( + scenario="stockout_heavy", + overrides=SeederOverrides(promotion_intensity=0.4), + ) + config = service._build_config_from_params(params) + + assert config.retail.promotion_probability == 0.4 + assert config.retail.stockout_probability == baseline.retail.stockout_probability + assert config.retail.promotion_lift == baseline.retail.promotion_lift + + def test_no_overrides_is_byte_identical_regression(self): + """A body without overrides produces the exact config it does today.""" + + def _params(**extra: object) -> schemas.GenerateParams: + body: dict[str, object] = { + "scenario": "demo_minimal", + "seed": 42, + "stores": 3, + "products": 10, + "start_date": "2025-01-01", + "end_date": "2025-03-31", + "sparsity": 0.0, + } + body.update(extra) + return schemas.GenerateParams.model_validate(body) + + legacy = service._build_config_from_params(_params()) + with_none = service._build_config_from_params(_params(overrides=None)) + + assert legacy == with_none + + def test_empty_overrides_object_is_noop(self): + """An all-None overrides object changes nothing.""" + base = service._build_config_from_params(schemas.GenerateParams(scenario="demo_minimal")) + with_empty = service._build_config_from_params( + schemas.GenerateParams(scenario="demo_minimal", overrides=SeederOverrides()) + ) + + assert base == with_empty + + class TestGetStatus: """Tests for get_status function.""" diff --git a/app/shared/seeder/overrides.py b/app/shared/seeder/overrides.py new file mode 100644 index 00000000..11d8ed9f --- /dev/null +++ b/app/shared/seeder/overrides.py @@ -0,0 +1,90 @@ +"""Curated, allow-listed seed-override schema (E3, issue #409). + +Shared between the seeder slice (``GenerateParams.overrides``) and the demo +slice (``DemoRunRequest.seed_overrides``) -- ``app/shared`` is the sanctioned +cross-slice home (vertical-slice rule; precedent: ``ScenarioPreset`` is +imported by both slices from ``app.shared.seeder.config``). + +``extra="forbid"`` IS the allow-list: any knob not listed here is a 422 at +the HTTP boundary (umbrella #406 risk mitigation -- the seeder's full 25+ +knob surface stays preset-driven; only these 7 curated knobs are exposed). +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class SeederOverrides(BaseModel): + """The 7 curated seed knobs, applied LAST in ``_build_config_from_params``. + + Precedence: preset -> scalar ``stores``/``products``/``sparsity`` params -> + phase 1/2 overrides -> THIS object (wins). Each knob maps onto one + ``SeederConfig`` sub-dataclass field via ``dataclasses.replace`` so + preset-customized sibling fields survive. + """ + + # strict=True catches JSON-native coercion bugs ("5" -> 5); every field is + # int/float so no Field(strict=False) override is needed (see + # docs/_base/SECURITY.md -> "Pydantic v2 strict mode"). + model_config = ConfigDict(strict=True, extra="forbid") + + stores: int | None = Field( + default=None, + ge=1, + le=100, + description=("Store count -> DimensionConfig.stores; wins over the scalar `stores` param."), + ) + products: int | None = Field( + default=None, + ge=1, + le=500, + description=( + "Product count -> DimensionConfig.products; wins over the scalar `products` param." + ), + ) + window_days: int | None = Field( + default=None, + ge=75, + le=365, + description=( + "Seeded window length; start_date = end_date - window_days. >=75 keeps " + "the showcase historical_backfill gate clear. Rejected on the " + "calendar-pinned holiday_rush preset (demo surface)." + ), + ) + sparsity: float | None = Field( + default=None, + ge=0.0, + le=0.9, + description=( + "Missing (store,product) grain fraction -> " + "SparsityConfig.missing_combinations_pct; preserves the preset's gap " + "config. 1.0 disallowed (would seed zero series)." + ), + ) + promotion_intensity: float | None = Field( + default=None, + ge=0.0, + le=0.5, + description="-> RetailPatternConfig.promotion_probability (preset max 0.25).", + ) + stockout_intensity: float | None = Field( + default=None, + ge=0.0, + le=0.5, + description=( + "-> RetailPatternConfig.stockout_probability. High values can " + "legitimately NaN-WAPE-fail the backtest (documented expected outcome)." + ), + ) + noise_sigma: float | None = Field( + default=None, + ge=0.0, + le=0.5, + description="-> TimeSeriesConfig.noise_sigma (preset max 0.4).", + ) + + def is_empty(self) -> bool: + """True when no knob is set (``{}`` on the wire) -- treated as None everywhere.""" + return not self.model_dump(exclude_none=True) diff --git a/app/shared/seeder/tests/test_overrides.py b/app/shared/seeder/tests/test_overrides.py new file mode 100644 index 00000000..06ad6034 --- /dev/null +++ b/app/shared/seeder/tests/test_overrides.py @@ -0,0 +1,95 @@ +"""Unit tests for the curated SeederOverrides allow-list model (E3, #409).""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.shared.seeder.overrides import SeederOverrides + + +class TestBounds: + """Each knob rejects out-of-bounds values at both edges.""" + + @pytest.mark.parametrize( + ("knob", "low", "high"), + [ + ("stores", 0, 101), + ("products", 0, 501), + ("window_days", 74, 366), + ], + ) + def test_int_knob_bounds(self, knob: str, low: int, high: int) -> None: + with pytest.raises(ValidationError): + SeederOverrides.model_validate({knob: low}) + with pytest.raises(ValidationError): + SeederOverrides.model_validate({knob: high}) + + @pytest.mark.parametrize( + ("knob", "low", "high"), + [ + ("sparsity", -0.1, 0.91), + ("promotion_intensity", -0.1, 0.51), + ("stockout_intensity", -0.1, 0.51), + ("noise_sigma", -0.1, 0.51), + ], + ) + def test_float_knob_bounds(self, knob: str, low: float, high: float) -> None: + with pytest.raises(ValidationError): + SeederOverrides.model_validate({knob: low}) + with pytest.raises(ValidationError): + SeederOverrides.model_validate({knob: high}) + + def test_boundary_values_accepted(self) -> None: + ov = SeederOverrides.model_validate( + { + "stores": 100, + "products": 500, + "window_days": 75, + "sparsity": 0.9, + "promotion_intensity": 0.5, + "stockout_intensity": 0.0, + "noise_sigma": 0.5, + } + ) + assert ov.stores == 100 + assert ov.window_days == 75 + + +class TestAllowList: + """extra='forbid' is the machine-enforced allow-list.""" + + def test_unknown_knob_rejected(self) -> None: + with pytest.raises(ValidationError): + SeederOverrides.model_validate({"stores": 5, "bogus_knob": 1}) + + def test_strict_rejects_string_int(self) -> None: + # strict=True: a JSON string is not coerced (validate_python path). + with pytest.raises(ValidationError): + SeederOverrides.model_validate({"stores": "5"}) + + +class TestJsonPath: + """JSON-dict validation (FastAPI's validate_python path) happy paths.""" + + def test_partial_object_validates(self) -> None: + ov = SeederOverrides.model_validate({"stores": 8, "promotion_intensity": 0.3}) + assert ov.stores == 8 + assert ov.promotion_intensity == 0.3 + assert ov.products is None + + def test_model_dump_exclude_none_is_sparse(self) -> None: + ov = SeederOverrides.model_validate({"stores": 8, "noise_sigma": 0.25}) + assert ov.model_dump(exclude_none=True) == {"stores": 8, "noise_sigma": 0.25} + + +class TestIsEmpty: + """is_empty() truth table -- {} on the wire collapses to None everywhere.""" + + def test_empty_object_is_empty(self) -> None: + assert SeederOverrides().is_empty() is True + assert SeederOverrides.model_validate({}).is_empty() is True + + def test_any_knob_makes_non_empty(self) -> None: + assert SeederOverrides(stores=1).is_empty() is False + assert SeederOverrides(noise_sigma=0.0).is_empty() is False diff --git a/docs/_base/API_CONTRACTS.md b/docs/_base/API_CONTRACTS.md index 70e6f5ab..e9c2ff7a 100644 --- a/docs/_base/API_CONTRACTS.md +++ b/docs/_base/API_CONTRACTS.md @@ -56,12 +56,12 @@ All endpoints serve JSON; error responses use `application/problem+json` (RFC 78 | agents | POST | `/agents/sessions/{session_id}/approve` | Approve/reject a pending tool call (HITL gate) | | agents | DELETE | `/agents/sessions/{session_id}` | Close session | | agents | WS | `/agents/stream` | Token-by-token streaming + tool-call events | -| seeder | (see `app/features/seeder/routes.py`) | `/seeder/*` | Trigger scenarios, status, customization | +| seeder | (see `app/features/seeder/routes.py`) | `/seeder/*` | Trigger scenarios, status, customization. **E3 (#409)** — `POST /seeder/generate` accepts an additive Optional `overrides` object (`SeederOverrides`, `app/shared/seeder/overrides.py`) with 7 allow-listed knobs: `stores` (1-100), `products` (1-500), `window_days` (75-365; recomputes `start_date` from `end_date`), `sparsity` (0-0.9), `promotion_intensity` (0-0.5), `stockout_intensity` (0-0.5), `noise_sigma` (0-0.5). `extra=forbid` → an unknown knob is a `422`; applied LAST in `_build_config_from_params` so it wins over the scalar `stores`/`products`/`sparsity` params; absent = byte-identical legacy behavior | | seeder | POST | `/seeder/phase2-enrichment` | PRP-38 — run Phase 2 generators (lifecycle, replenishment, exogenous, returns) against the existing seeded data. `422 application/problem+json` on an empty database. | -| demo | POST | `/demo/run` | Run the end-to-end demo pipeline in-process; returns a `DemoRunResult`. `409 application/problem+json` if a run is already active. **PRP-38** — body accepts an Optional `scenario: 'demo_minimal' \| 'showcase_rich' \| 'sparse'` field; default `'demo_minimal'` (back-compat). **E1 (#390)** — body accepts additive Optional `preservation: 'ephemeral' \| 'keep'` (default `'ephemeral'`, today's no-row behavior) and `workspace_name: str \| null` (pattern `^[a-z0-9][a-z0-9\-_]*$`, ≤100 chars); `workspace_name` without `preservation='keep'` → `422 application/problem+json`. `preservation='keep'` records the run as a `showcase_workspace` row; `DemoRunResult` gains an additive Optional `workspace_id: str \| null`. **E2 (#391)** — `scenario` accepts all 8 `ScenarioPreset` values (`retail_standard` / `holiday_rush` / `high_variance` / `stockout_heavy` / `new_launches` / `sparse` / `demo_minimal` / `showcase_rich`); only `showcase_rich` changes the step table (24 rows), every other preset runs the legacy 11-row flow. **E1 (#407)** — body accepts additive Optional `replayed_from_workspace_id: str \| null` (`^[0-9a-f]{32}$`); requires `preservation='keep'` (else `422 application/problem+json`); recorded verbatim on the new `showcase_workspace` row as a SOFT reference (no existence check — dangles are designed). | +| demo | POST | `/demo/run` | Run the end-to-end demo pipeline in-process; returns a `DemoRunResult`. `409 application/problem+json` if a run is already active. **PRP-38** — body accepts an Optional `scenario: 'demo_minimal' \| 'showcase_rich' \| 'sparse'` field; default `'demo_minimal'` (back-compat). **E1 (#390)** — body accepts additive Optional `preservation: 'ephemeral' \| 'keep'` (default `'ephemeral'`, today's no-row behavior) and `workspace_name: str \| null` (pattern `^[a-z0-9][a-z0-9\-_]*$`, ≤100 chars); `workspace_name` without `preservation='keep'` → `422 application/problem+json`. `preservation='keep'` records the run as a `showcase_workspace` row; `DemoRunResult` gains an additive Optional `workspace_id: str \| null`. **E2 (#391)** — `scenario` accepts all 8 `ScenarioPreset` values (`retail_standard` / `holiday_rush` / `high_variance` / `stockout_heavy` / `new_launches` / `sparse` / `demo_minimal` / `showcase_rich`); only `showcase_rich` changes the step table (24 rows), every other preset runs the legacy 11-row flow. **E1 (#407)** — body accepts additive Optional `replayed_from_workspace_id: str \| null` (`^[0-9a-f]{32}$`); requires `preservation='keep'` (else `422 application/problem+json`); recorded verbatim on the new `showcase_workspace` row as a SOFT reference (no existence check — dangles are designed). **E3 (#409)** — body accepts additive Optional `seed_overrides` (the same `SeederOverrides` object as `POST /seeder/generate`; requires `skip_seed=false` else `422`; `window_days` rejected on the calendar-pinned `holiday_rush` preset; `{}` normalizes to `null`) and `user_scope` (`{store_id: int>=1, product_id: int>=1}`, `extra=forbid` — the focus pair the pipeline models instead of the auto-discovered first pair; validated by the status step, WARN + fallback to discovery on a dangling pair). Both persist into the kept workspace row's story slots and replay verbatim. | | demo | WS | `/demo/stream` | Stream one `StepEvent` per pipeline step for the live Showcase page | -| demo | GET | `/demo/workspaces` | **E4 (#393)** — list saved showcase workspaces, newest first (`limit` 1-100 default 20 / `offset`); `200` + empty list on an empty table. **E1 (#407)** — list items additively carry `archived`, `pinned`, `tags`, `replayed_from_workspace_id`. **E2 (#408)** — additive query params: `q` (name ILIKE search, min 2 chars), repeated `tags` (JSONB containment — all listed tags must match), `include_archived` (default `false` — archived rows are now HIDDEN by default), allow-listed `sort_by` (`created_at`/`name`/`seed`/`status`; unknown → default `created_at desc`, no 422) + `sort_order` (`asc`/`desc`); pinned rows always order first; `total` respects the active filters | -| demo | GET | `/demo/workspaces/{workspace_id}` | **E4 (#393)** — full workspace row incl. `created_objects` soft references + grain/window columns; `404 application/problem+json` when missing. **E1 (#407)** — response additively carries the list-item lifecycle fields plus `notes`, `config_schema_version`, and the six story slots (`seed_overrides` / `user_scope` / `approval_events` / `rag_events` / `job_ids` / `phase_summaries` — all `null` until their writer epic lands; schemas in `docs/_base/DOMAIN_MODEL.md`) | +| demo | GET | `/demo/workspaces` | **E4 (#393)** — list saved showcase workspaces, newest first (`limit` 1-100 default 20 / `offset`); `200` + empty list on an empty table. **E1 (#407)** — list items additively carry `archived`, `pinned`, `tags`, `replayed_from_workspace_id`. **E2 (#408)** — additive query params: `q` (name ILIKE search, min 2 chars), repeated `tags` (JSONB containment — all listed tags must match), `include_archived` (default `false` — archived rows are now HIDDEN by default), allow-listed `sort_by` (`created_at`/`name`/`seed`/`status`; unknown → default `created_at desc`, no 422) + `sort_order` (`asc`/`desc`); pinned rows always order first; `total` respects the active filters. **E3 (#409)** — list items additively carry the `seed_overrides` / `user_scope` story slots (`null` on runs without them) — deliberately on the LIST item, because the frontend Replay builds its verbatim start frame from list rows | +| demo | GET | `/demo/workspaces/{workspace_id}` | **E4 (#393)** — full workspace row incl. `created_objects` soft references + grain/window columns; `404 application/problem+json` when missing. **E1 (#407)** — response additively carries the list-item lifecycle fields plus `notes`, `config_schema_version`, and the six story slots (`seed_overrides` / `user_scope` / `approval_events` / `rag_events` / `job_ids` / `phase_summaries` — `null` until their writer epic lands; schemas in `docs/_base/DOMAIN_MODEL.md`). **E3 (#409)** — `seed_overrides` and `user_scope` are now WRITTEN (recorded at create time from the start frame) and surfaced on the LIST item as well (Detail inherits) | | demo | GET | `/demo/workspaces/{workspace_id}/health` | **E2 (#408)** — probe the workspace's soft references in-process (model runs, scenario plans, alias, batch, agent session, `job_ids` slot) via `httpx.ASGITransport`; per-reference `status` ∈ `alive` (2xx) / `dead` (404 — deleted after the run) / `unknown` (anything else — never a 500), plus `alive`/`dead`/`unknown` counts and `partial_run` (true when the row's status ≠ `completed`); non-probeable keys (`v2_model_path`, `scenario_artifact_key`, `train_model_types`) are skipped; `404 application/problem+json` when the workspace is missing | | demo | PATCH | `/demo/workspaces/{workspace_id}` | **E1 (#407)** — partial lifecycle update (`name` / `notes` / `tags` / `archived` / `pinned`; `exclude_unset` semantics — only provided fields change; explicit `null` clears `name`/`notes`; explicit `null` on `archived`/`pinned`/`tags` → `422` (send `[]` to clear tags); `status` NOT patchable — the pipeline owns it); returns the updated `WorkspaceDetailResponse`; empty body = `200` no-op; `404 application/problem+json` when missing; `422` on unknown keys / bad name pattern / >20 tags | | demo | DELETE | `/demo/workspaces/{workspace_id}` | Delete one saved workspace METADATA row; `204` on success, `404 application/problem+json` when missing. The run's created objects (model runs, scenario plans, aliases, jobs, artifacts) are soft references and are NOT deleted | @@ -88,7 +88,7 @@ Verified against `app/features/agents/websocket.py` and `app/features/agents/sch Drives the end-to-end demo pipeline for the dashboard Showcase page. Verified against `app/features/demo/routes.py` and `app/features/demo/schemas.py` (`StepEvent`). -- **Client → server (one start frame):** `{"seed": int, "reset": bool, "skip_seed": bool, "scenario"?: "demo_minimal" | "showcase_rich" | "sparse", "preservation"?: "ephemeral" | "keep", "workspace_name"?: str}` — all fields optional (`DemoRunRequest` supplies defaults `seed=42`, `reset=false`, `skip_seed=true`, `scenario="demo_minimal"`, `preservation="ephemeral"`, `workspace_name=null`). E1 (#390) — `workspace_name` requires `preservation="keep"` (else one `error` event from validation); unknown start-frame keys remain ignored (forward/backward compat). E2 (#391) — `scenario` accepts all 8 `ScenarioPreset` values (`retail_standard` / `holiday_rush` / `high_variance` / `stockout_heavy` / `new_launches` / `sparse` / `demo_minimal` / `showcase_rich`); only `showcase_rich` changes the step table (24 rows), every other preset runs the legacy 11-row flow. E1 (#407) — the start frame additively accepts `replayed_from_workspace_id?: str` (`^[0-9a-f]{32}$`, requires `preservation="keep"` else one `error` event from validation); the Showcase Replay button sends the source row's `workspace_id`, recorded verbatim on the NEW row as a soft reference. The pipeline runs once, then the server closes. +- **Client → server (one start frame):** `{"seed": int, "reset": bool, "skip_seed": bool, "scenario"?: "demo_minimal" | "showcase_rich" | "sparse", "preservation"?: "ephemeral" | "keep", "workspace_name"?: str}` — all fields optional (`DemoRunRequest` supplies defaults `seed=42`, `reset=false`, `skip_seed=true`, `scenario="demo_minimal"`, `preservation="ephemeral"`, `workspace_name=null`). E1 (#390) — `workspace_name` requires `preservation="keep"` (else one `error` event from validation); unknown start-frame keys remain ignored (forward/backward compat). E2 (#391) — `scenario` accepts all 8 `ScenarioPreset` values (`retail_standard` / `holiday_rush` / `high_variance` / `stockout_heavy` / `new_launches` / `sparse` / `demo_minimal` / `showcase_rich`); only `showcase_rich` changes the step table (24 rows), every other preset runs the legacy 11-row flow. E1 (#407) — the start frame additively accepts `replayed_from_workspace_id?: str` (`^[0-9a-f]{32}$`, requires `preservation="keep"` else one `error` event from validation); the Showcase Replay button sends the source row's `workspace_id`, recorded verbatim on the NEW row as a soft reference. E3 (#409) — the start frame additively accepts `seed_overrides?: {stores?, products?, window_days?, sparsity?, promotion_intensity?, stockout_intensity?, noise_sigma?}` (allow-listed — an unknown knob is one `error` event; requires `skip_seed=false`; `window_days` rejected on `holiday_rush`) and `user_scope?: {store_id, product_id}`; the seed step forwards `seed_overrides` verbatim to `POST /seeder/generate` (its `data` echoes `overrides_applied`), the status step adopts a valid `user_scope` (detail says "(user-selected)", `data.user_scope_applied=true`) or WARNS and falls back to discovery on a dangling pair; both persist to the kept workspace row and replay verbatim. The pipeline runs once, then the server closes. - **Server → client (every frame):** Pydantic-serialized `StepEvent` — `{"event_type", "step_name", "step_index", "total_steps", "status", "detail", "duration_ms", "data", "timestamp", "phase_name"?, "phase_index"?, "phase_total"?}`. PRP-38 — the three `phase_*` fields are Optional + Nullable so legacy clients that don't render phases keep working. - **`event_type` values (Literal in `StepEvent`):** - `step_start` — a step began; `status` is `null`. diff --git a/docs/_base/DOMAIN_MODEL.md b/docs/_base/DOMAIN_MODEL.md index 24137fc2..a7493219 100644 --- a/docs/_base/DOMAIN_MODEL.md +++ b/docs/_base/DOMAIN_MODEL.md @@ -61,22 +61,22 @@ - **Stored metadata:** replay config (`seed`, `scenario`, `reset`, `skip_seed`), showcase grain + window (`store_id`, `product_id`, `date_start`, `date_end` — NULL on early failure), lifecycle (`status`, `created_at`/`updated_at`), and the JSONB payloads below. E1 (#407) adds operator-curation columns `archived` / `pinned` (booleans, default false, PATCH-mutable, orthogonal to `status` — the pipeline owns the run lifecycle), `notes` (free text, 2000-char cap at the Pydantic boundary), `tags` (a queryable JSONB string array — its own GIN-indexed column, exact `scenario_plan.tags` pattern, ≤20 items at the PATCH boundary), `config_schema_version` (int, default 1 — versions the workspace config + story-slot schema as a whole; any epic that changes a documented slot shape bumps the ORM default and documents the delta here), and the provenance column `replayed_from_workspace_id` (String(32), btree-indexed SOFT reference — see Invariants). - **JSONB fields:** `created_objects` (sparse soft-reference keys — `winning_run_id`, `v2_run_id`, `v2_model_path`, `alias`, `agent_session_id`, `batch_id`, `scenario_plan_ids`, `scenario_artifact_key`, `train_model_types`, `stale_alias_run_id`) and `result_summary` (winner / WAPE / wall-clock display payload). - **JSONB story slots (E1 #407 — authoritative per-slot schema):** six dedicated nullable JSONB columns; `NULL` = "slot never written" (distinct from empty). E1 ships the columns only — each slot has an assigned writer epic: - - `seed_overrides` (E3 #409 writes) — dict: the curated seeder-override payload from the start frame, stored verbatim (`model_dump(mode="json")`); replay echoes it. - - `user_scope` (E3 #409 writes) — dict: operator-selected focus, `{"store_id": int, "product_id": int}` (additive keys allowed later). + - `seed_overrides` (**WRITTEN since E3 #409**) — SPARSE dict: only operator-set knobs appear, `{}` is never stored (`None` instead). Allow-listed keys (the `SeederOverrides` schema, `app/shared/seeder/overrides.py`): `stores` int 1-100, `products` int 1-500, `window_days` int 75-365, `sparsity` float 0-0.9, `promotion_intensity` float 0-0.5, `stockout_intensity` float 0-0.5, `noise_sigma` float 0-0.5. Persisted via `model_dump(mode="json", exclude_none=True)` at create time; replay re-submits it verbatim. Records the REQUESTED config — the data the run actually seeded follows from it deterministically. + - `user_scope` (**WRITTEN since E3 #409**) — dict: operator-selected focus, `{"store_id": int>=1, "product_id": int>=1}` (`UserScope` schema, `extra=forbid`; additive keys need a documented schema change). Records the REQUESTED pair; the row's `store_id`/`product_id` columns record the EFFECTIVE grain the run modeled — the two legitimately diverge when the requested pair dangled and the status step warn-fell-back to discovery (divergence is visible by design). Both slots are exposed on the workspace LIST item (not detail-only) because the frontend Replay builds its start frame from list rows. - `approval_events` (E5 #411 writes) — list[dict], append-only: `{"action_id": str, "tool_name": str, "decision": "approved"|"rejected", "decided_at": iso8601-str, "session_id": str}`. - `rag_events` (E5 #411 writes) — list[dict], append-only: `{"event": "index"|"retrieve"|"skip", "detail": str, "count": int, "occurred_at": iso8601-str}`. - `job_ids` (later parallel epic — E2 #408 / E4 #410 agree on the writer) — list[str]: job / batch sub-job ids the run submitted (soft references). - `phase_summaries` (later parallel epic) — list[dict], one per phase: `{"phase_name": str, "status": "pass"|"fail"|"warn"|"skip", "steps": int, "duration_ms": float}`. - **Relationship to demo pipeline runs:** one workspace row per kept pipeline run — `create_workspace` inserts it as `running` before the first step; `finalize_workspace` settles it with the run's collected ids. NOT a seeder `scenario`: a preset is a reusable data-generation recipe; a workspace is the record of ONE concrete run (which preset it used, with what seed, and what it produced). - **Invariants:** - - The config columns (`seed`, `scenario`, `reset`, `skip_seed`) are sufficient for a verbatim Replay through the normal run path — replay never mutates the original row; it creates a NEW row. + - The config columns (`seed`, `scenario`, `reset`, `skip_seed`) — plus, since E3 #409, the `seed_overrides`/`user_scope` story slots — are sufficient for a verbatim Replay through the normal run path; replay never mutates the original row; it creates a NEW row. - `name` is deliberately NON-unique; `workspace_id` (UUID hex) is the unique handle. - `created_objects` carries SOFT references only — **no ForeignKeys by design**. The workspace row is an audit record, not an ownership root: the referenced runs/plans/aliases are independently operator-deletable, and a workspace must never block (or cascade) their deletion. - Deletion is METADATA-ONLY, symmetric with the no-FK design: `DELETE /demo/workspaces/{id}` removes the `showcase_workspace` row and nothing else — the soft-referenced model runs, scenario plans, aliases, jobs, agent sessions, and artifacts survive, and a workspace whose references already dangle still deletes cleanly. - Persistence is warn-and-continue: a workspace write failure must never break the demo pipeline (the run completes with `workspace_id: null`). The HTTP-backed helpers (`update_workspace` for PATCH, like get/list/delete) take a caller-owned session and raise normally — warn-and-continue is pipeline-only. - E1 (#407): `replayed_from_workspace_id` is a SOFT reference — **no ForeignKey, not even self-referential**: ancestor workspace rows must stay independently deletable (metadata-only delete) without cascading to or blocking descendants. The value is recorded verbatim from the request (no existence check); dangling lineage pointers after an ancestor delete are expected and harmless, like every `created_objects` id. - E1 (#407): `status` is NOT patchable — `PATCH /demo/workspaces/{id}` covers `name`/`notes`/`tags`/`archived`/`pinned` only; `archived` is an orthogonal curation flag and the `ck_showcase_workspace_status` CHECK is untouched. -- **Out of scope (deliberately not modeled yet):** export bundles under `artifacts/showcase//`, RAG-event / approval-decision capture (columns exist as E1 story slots; the writers are E5 #411), advanced seed config (slot exists; writer is E3 #409), and per-phase interactive configuration — see `docs/_base/RUNBOOKS.md` § Showcase workspace. +- **Out of scope (deliberately not modeled yet):** export bundles under `artifacts/showcase//`, RAG-event / approval-decision capture (columns exist as E1 story slots; the writers are E5 #411), and per-phase interactive configuration — see `docs/_base/RUNBOOKS.md` § Showcase workspace. (Advanced seed config + scope selection shipped in E3 #409 — the `seed_overrides`/`user_scope` slots above are now written.) ## Key Invariants — NEVER violate diff --git a/docs/_base/RUNBOOKS.md b/docs/_base/RUNBOOKS.md index 22e06e49..f7aa35a5 100644 --- a/docs/_base/RUNBOOKS.md +++ b/docs/_base/RUNBOOKS.md @@ -139,6 +139,11 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log - `holiday_rush` — seeds a **pinned Oct–Dec 2024 window** (the preset's `HolidayConfig` spikes are fixed 2024 dates; a today-anchored window would never contain them). Re-seeding ADDS rows without wiping prior data, so after a holiday_rush re-seed `/seeder/status` reports the union range (e.g. `2024-10-01..today`); tick **Reset database** together with **Re-seed first** for a clean pinned window, and again when switching back to a today-anchored preset. Expected green on the 11-step flow. - `retail_standard` / `high_variance` / `stockout_heavy` — demo-scaled 5×15×180d, today-anchored; `new_launches` — 5×25×180d. All expected **green** on the legacy 11-step flow (only `showcase_rich` runs the 24-step table). Fix: none for the documented outcomes above. If a normally-green preset fails, make sure **Re-seed first** was ticked (without it the run reuses the currently seeded dataset, whatever preset produced it), then re-run. +29. **Seed-overrides / focus-pair failures (E3 #409)** — the Advanced seed config panel and the store/product focus-pair selector add four documented outcomes: + - **`POST /demo/run` (or the WS start frame) 422s with `seed_overrides requires skip_seed=false`** — overrides on a run that skips the seed step would be a silent no-op, so the backend rejects the combination. Fix: tick **Re-seed first** (the panel is only rendered while it's ticked; direct API callers must send `skip_seed: false`). + - **422 `window_days cannot override the calendar-pinned holiday_rush window`** — expected; the preset's holiday spikes are fixed 2024 dates and a shifted window would silently drop all of them (the UI disables the window control on `holiday_rush`). Fix: pick a today-anchored preset or drop `window_days`. + - **`status` step shows ⚠️ `user_scope (store=X, product=Y) not found — fell back to discovered pair`** — expected after a reset/reseed re-issued entity ids (Postgres sequences never reset). The run continues on the discovered pair; the workspace row's `user_scope` slot keeps the REQUESTED pair while the `store_id`/`product_id` columns record the EFFECTIVE grain (divergence is visible by design). Fix: re-pick the pair from the live dropdowns after the run. + - **`backtest` step ❌ NaN WAPE after high `stockout_intensity` / `sparsity` overrides** — documented expected outcome, same semantics as the `sparse` preset (incident 28); the panel shows a caveat badge at risky values. Not graceful-skipped by design — a skip would mask real regressions on healthy configs. Fix: lower the knob or accept the documented fail. > ⚠️ **RAG embedding-dim mismatch can orphan chunks (R4).** PRP-40 indexes a curated 5-file subset; if the operator switches the embedding provider mid-showcase, indexed chunks orphan (pgvector assumes one fixed dimension per column). PRP-40 does NOT ship a `clear_rag` UI toggle — that's a future PRP. Stick to one provider for the showcase run. @@ -155,9 +160,9 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log 4. **Deleting a workspace deletes METADATA ONLY.** The delete removes just the `showcase_workspace` row — the model runs, scenario plans, aliases, jobs, agent sessions, and on-disk artifacts the run created are NOT touched (and the seeded data is not reverted). `created_objects` ids are SOFT references (deliberately no FKs), so deletion in either direction never cascades: an operator-issued `DELETE /registry/runs/{id}` or scenario-plan delete leaves dangling deep links on a loaded workspace's artifact cards — expected; the workspace row records what WAS created, not what still exists. E2 (#408) — that staleness now SURFACES instead of dangling silently: loading a workspace probes its references via `GET /demo/workspaces/{id}/health`, dead references get a warning marker on the artifact cards, and a summary chip shows alive/dead counts plus a partial-run warning for never-completed rows. 5. **`holiday_rush` workspaces replay the pinned 2024 window.** The preset seeds a fixed Oct–Dec 2024 window (incident 28 above); a Replay with `reset=false` ADDS those rows to a today-anchored dataset, so `/seeder/status` reports the union range afterwards. For a clean pinned window, save the workspace from a run with **Reset database** ticked — its (destructive) Replay then reproduces the pinned window exactly. -**Notes:** keep-runs are recorded by warn-and-continue hooks — a DB hiccup during `create_workspace` yields a green pipeline with `workspace_id: null` and no row (check uvicorn logs for `demo.workspace_create_failed`). Ephemeral runs write no workspace rows and stay in the localStorage Run-history strip; kept runs appear ONLY in the server-backed panel. On `showcase_rich` keep-runs, the planning-phase scenario plans carry the `workspace:` tag (E3 #392) — retrieve them via `GET /scenarios?tags=workspace: