From 977269b4a2afd3b730b029385574a5f394052e94 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Sun, 24 May 2026 22:50:24 -0700 Subject: [PATCH 1/2] explorer: B1 viewport-aware facet counts (#234 step 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The facet legend now scopes per-source / per-material / per-context / per-object_type counts to the current viewport bbox instead of the whole dataset. Pan or zoom → counts recompute. Resolves the silent disagreement between the legend and the "Samples in View" / table-count surfaces. What changed - New `isGlobalView()` helper: altitude shortcut (h > 10,000 km) plus a rect-saturation check on `paddedViewportBounds`. Returns true on the initial zoomed-out view so the legend doesn't jiggle on tiny rotations. - `updateCrossFilteredCounts` snapshots `bboxSQL` once at function entry. Cube fast-path is gated additionally on `bboxSQL === null` — the pre-aggregated cube has no spatial granularity, so it's invalid for any non-global view. The no-filter + non-global case now falls through to the slow path (B1's whole point: "what's in view" even with zero facet filters). - Slow path: when `bboxSQL` is set, JOIN `facets_url f` to `lite_url l` on `l.pid = f.pid` and apply the bbox predicate against `(l.latitude, l.longitude)`. `buildCrossFilterWhere` accepts an optional column prefix so the JOIN-shape WHERE qualifies columns with `f.` to avoid ambiguity with `lite_url.source`. - `viewer.camera.moveStart` → synchronous `markFacetCountsRecomputing()` so the legend shows italic-stale state the instant the user starts panning, instead of waiting for the 250 ms debounce. Cleared when `applyFacetCounts` writes the post-moveEnd query result. - `viewer.camera.moveEnd` → `refreshFacetCounts()`. Reuses the existing `facetCountsReqId` + 250 ms debounce, so bursts of moveEnd coalesce to one query and any superseded in-flight query discards its result on `await` resume via the stale guard. - On query error: existing `applyFacetCounts(key, null)` baseline fallback covers the Q3 decision (show globals, not error indicator). Q1 / Q2 / Q3 plan decisions (locked with rdhyee before implementation) - Q1: JOIN to `lite_url` for this PR; no existing parquet carries both the four facet columns and lat/lon. If real-world latency is bad, follow-up to bake a pre-joined facet+coords parquet. - Q2: Single-pass full query (no coarse-then-fine progressive scaffold yet — defer until latency is shown to need it). - Q3: On bbox-query throw, fall back to global baseline counts. Tests - New `tests/playwright/facet-viewport.spec.js`: 3 specs covering zoom-in-then-out (counts shrink, then restore to within 1 %), moveStart-sync `.recomputing` affordance, and the negative control that a cold global-view boot leaves no `.recomputing` stuck. - Regression: facetnote-url-load (2), search-real-count (3), url-roundtrip (5) all pass — 13 tests green locally. Known follow-ups - Latency was not formally benchmarked; the JOIN turns 4 parallel GROUP-BY queries into 4 parallel JOIN+GROUP-BY queries against the lite parquet (≈300 MB). If real-world UX feels slow, bake a pre-joined facets-with-coords parquet and route the bbox path through it (Q1 fallback). - A cancellation-race Playwright spec (two pans back-to-back inside the 250 ms window) is conceptually covered by the existing `facetCountsReqId` stale guard but not yet exercised by a test — add if Codex review flags it. Closes the B1 row in #234. --- explorer.qmd | 130 ++++++++++++-- tests/playwright/facet-viewport.spec.js | 230 ++++++++++++++++++++++++ 2 files changed, 349 insertions(+), 11 deletions(-) create mode 100644 tests/playwright/facet-viewport.spec.js diff --git a/explorer.qmd b/explorer.qmd index e564eab..8cc7622 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -993,6 +993,50 @@ function viewerBboxSQL(latCol, lngCol, padFactor) { return ` AND ${latCol} BETWEEN ${south} AND ${north} AND ${lngClause}`; } +// True when the current camera shows ≈the whole world (initial zoom-out +// state), or when the camera can't compute a rectangle at all (off-globe). +// In either case a viewport bbox predicate would be a no-op, so fast-paths +// that depend on "no spatial constraint" (e.g. the pre-aggregated facet +// cross-filter cube) remain valid and can be used. +// +// Used by `updateCrossFilteredCounts` (B1, issue #234 step 3) to decide +// whether to gate the cube fast-path off and JOIN to `lite_url` for a +// bbox-scoped slow-path query. +// +// Implementation note — why an altitude shortcut: +// At high altitudes Cesium's `computeViewRectangle` saturates at the +// sphere's extent for some camera angles but returns a tight bounding +// rect for others (e.g. at alt=5000 km over the equator the rect is +// ≈hemispheric, not full). The user's intuition is "max zoom-out = global +// counts," and at the explorer's default `globalRect` (which fits the +// `[-180,-60,180,80]` data extent and sits roughly at altitude ≈12000 km) +// the per-source legend totals should match the baselines, not jiggle on +// tiny camera rotations. The altitude check captures that "I'm zoomed all +// the way out" intent regardless of the exact rect Cesium reports. Below +// the altitude threshold we still let the rect vote "global" so flyTo +// to a true world-rect destination keeps working. +// +// Constants live inside the function (not top-level `const`) because +// Quarto `{ojs}` cells reject top-level `const`/`let`/`var` and a bad +// declaration cascades — breaking every dependent cell (memory note +// `feedback_qmd_ojs_top_level`). +function isGlobalView() { + const ALT_THRESHOLD_M = 1.0e7; // 10,000 km + const LNG_PAD_DEG = 2; + const LAT_PAD_DEG = 2; + if (typeof viewer === 'undefined') return true; + let h = NaN; + try { h = viewer.camera.positionCartographic.height; } catch { /* mid-init */ } + if (Number.isFinite(h) && h > ALT_THRESHOLD_M) return true; + const b = paddedViewportBounds(0); + if (!b) return true; // off-globe → no meaningful bbox + if (b.west > b.east) return false; // antimeridian-wrapping is never "global" + return b.west <= -180 + LNG_PAD_DEG + && b.east >= 180 - LNG_PAD_DEG + && b.south <= -90 + LAT_PAD_DEG + && b.north >= 90 - LAT_PAD_DEG; +} + // === Cross-filter facet count UI helpers === function applyFacetCounts(facetKey, countsMap) { const baseline = (viewer && viewer._baselineCounts) ? viewer._baselineCounts[facetKey] : null; @@ -2644,7 +2688,10 @@ zoomWatcher = { }; } - function buildCrossFilterWhere(excludeFacet) { + // `colPrefix` lets callers qualify column names when the WHERE is going + // to be used inside a JOIN (B1 bbox-aware path). Defaults to '' so the + // pre-B1 single-table call sites keep working unchanged. + function buildCrossFilterWhere(excludeFacet, colPrefix = '') { const { activeDims, sourceImpossible } = describeCrossFilters(); if (sourceImpossible && excludeFacet !== 'source') return '1=0'; @@ -2652,7 +2699,7 @@ zoomWatcher = { .filter(d => d.key !== excludeFacet) .map(d => { const list = d.values.map(v => `'${escSql(v)}'`).join(','); - return `${d.col} IN (${list})`; + return `${colPrefix}${d.col} IN (${list})`; }); return conds.length > 0 ? conds.join(' AND ') : '1=1'; @@ -2662,17 +2709,40 @@ zoomWatcher = { if (myReq !== facetCountsReqId) return; const { dims, activeDims, totalActiveValues, sourceImpossible } = describeCrossFilters(); - if (!sourceImpossible && activeDims.length === 0) { + // --- B1 (issue #234 step 3): bbox-aware counts --- + // + // Snapshot the viewport state at function entry. If the camera moves + // after this, a fresh `refreshFacetCounts()` call (from the moveEnd + // listener) bumps `facetCountsReqId` and supersedes this in-flight + // request — every `await` resume checks `myReq !== facetCountsReqId`. + // + // `isGlobalView()` true → no spatial constraint → cube fast-path and + // baseline early-return remain valid as before. + // `isGlobalView()` false → snapshot a bbox SQL fragment scoped to the + // `lite_url` lat/lon columns (which we JOIN to in the slow path, + // since `facets_url` carries no coordinates today). Cube fast-path + // is unconditionally gated off (it is pre-aggregated globally and + // can't answer viewport-scoped questions). + const isGlobal = isGlobalView(); + const bboxSQL = isGlobal ? null : viewerBboxSQL('l.latitude', 'l.longitude', 0); + + // Baseline early-return only applies when there is no filter AND no + // spatial constraint. In a non-global view with no facet filter, B1 + // still wants per-value counts scoped to what's visible — fall + // through to the slow path with `where = '1=1'`. + if (!sourceImpossible && activeDims.length === 0 && bboxSQL === null) { for (const d of dims) applyFacetCounts(d.key, null); return; } markFacetCountsRecomputing(); + // Cube fast-path: pre-aggregated globally, so it's valid only when + // the camera is at (or close to) the global view. const singleActiveDim = !sourceImpossible && activeDims.length === 1 && activeDims[0].values.length === 1 ? activeDims[0] : null; - if (singleActiveDim && totalActiveValues === 1) { + if (singleActiveDim && totalActiveValues === 1 && bboxSQL === null) { try { const filterCols = ['filter_source', 'filter_material', 'filter_context', 'filter_object_type']; const filterColForKey = { @@ -2708,14 +2778,31 @@ zoomWatcher = { } await Promise.all(dims.map(async (d) => { - const where = buildCrossFilterWhere(d.key); try { - const rows = await db.query(` - SELECT ${d.col} AS value, COUNT(*) AS count - FROM read_parquet('${facets_url}') - WHERE ${where} AND ${d.col} IS NOT NULL - GROUP BY ${d.col} - `); + let rows; + if (bboxSQL) { + // B1 bbox-scoped slow path: JOIN facets_url to lite_url + // on pid so we can filter by lite.latitude / lite.longitude. + // facets_url has no coordinates of its own. Per-PR follow-up: + // if this JOIN turns out too slow in practice, bake a + // pre-joined parquet (decision Q1 in plan). + const where = buildCrossFilterWhere(d.key, 'f.'); + rows = await db.query(` + SELECT f.${d.col} AS value, COUNT(*) AS count + FROM read_parquet('${facets_url}') f + JOIN read_parquet('${lite_url}') l ON l.pid = f.pid + WHERE ${where} AND f.${d.col} IS NOT NULL${bboxSQL} + GROUP BY f.${d.col} + `); + } else { + const where = buildCrossFilterWhere(d.key); + rows = await db.query(` + SELECT ${d.col} AS value, COUNT(*) AS count + FROM read_parquet('${facets_url}') + WHERE ${where} AND ${d.col} IS NOT NULL + GROUP BY ${d.col} + `); + } if (myReq !== facetCountsReqId) return; const map = new Map(); for (const r of rows) map.set(r.value, Number(r.count)); @@ -2723,6 +2810,8 @@ zoomWatcher = { } catch (err) { if (myReq !== facetCountsReqId) return; console.warn(`Cross-filter count query failed for ${d.key}:`, err); + // Q3 in plan: on bbox-query throw, fall back to global + // baseline rather than clobber with an error indicator. applyFacetCounts(d.key, null); } })); @@ -2971,10 +3060,29 @@ zoomWatcher = { // the "Samples in View" stat / phase-msg stay in lockstep with the // table's row count even on small sub-10% pans that `camera.changed` // doesn't fire for. + // B1 (issue #234 step 3): mark facet counts as "recomputing" the moment + // the user starts a camera move, so the legend shows italic-stale state + // immediately instead of waiting for moveEnd + the 250ms debounce. The + // class is cleared only when `applyFacetCounts` runs, so it stays + // sticky-italic across a continuous pan/zoom — exactly the intended + // "we're checking if counts changed" affordance. + // + // We skip the mark when there are no facet-count spans rendered yet + // (initial boot, before facet UI hydrates) to avoid touching DOM during + // the very first camera-position events. + viewer.camera.moveStart.addEventListener(() => { + if (document.querySelector('.facet-count')) markFacetCountsRecomputing(); + }); + viewer.camera.moveEnd.addEventListener(() => { if (!viewer._suppressHashWrite) { history.replaceState(null, '', buildHash(viewer)); } + // B1: viewport-aware facet counts. Bouncing through refreshFacetCounts + // reuses the existing 250ms debounce + facetCountsReqId stale-guard, + // so bursts of moveEnd (drag-pan, wheel-zoom) coalesce into one query + // and any in-flight superseded query discards its result on resume. + refreshFacetCounts(); if (getMode() !== 'point') return; const h = viewer.camera.positionCartographic.height; if (h > EXIT_POINT_ALT) { diff --git a/tests/playwright/facet-viewport.spec.js b/tests/playwright/facet-viewport.spec.js new file mode 100644 index 0000000..27cbe4e --- /dev/null +++ b/tests/playwright/facet-viewport.spec.js @@ -0,0 +1,230 @@ +/** + * B1 viewport-aware facet counts (issue #234, roadmap step 3). + * + * Behavior change: the facet-legend counts (per-source, per-material, + * per-context, per-object_type) now reflect only the samples currently + * visible on the map, not the global dataset. Pan or zoom → counts + * recompute. The `.recomputing` italic state appears synchronously on + * `camera.moveStart` and clears once the post-moveEnd debounced query + * lands and `applyFacetCounts` writes the new values. + * + * Implementation summary (see explorer.qmd): + * - `isGlobalView()` returns true when the camera shows ≈the whole + * world (or is off-globe). Used to gate the cube fast-path off and + * decide whether to JOIN to `lite_url` for a bbox-scoped query. + * - `viewer.camera.moveStart` → `markFacetCountsRecomputing()` (sync). + * - `viewer.camera.moveEnd` → `refreshFacetCounts()` (debounced 250ms, + * stale-guarded by `facetCountsReqId`). + * - `updateCrossFilteredCounts` snapshots bboxSQL once at function + * entry; cube fast-path skipped when bboxSQL is set; slow path + * JOINs facets_url f ↔ lite_url l ON l.pid = f.pid with the bbox + * predicate on (l.latitude, l.longitude). + * + * What this spec covers: + * - Global-view boot → counts populated, no `.recomputing` left over. + * - flyTo small viewport (Cyprus) → `.recomputing` appears, counts + * shrink relative to global, `.recomputing` clears. + * - flyTo back to global → counts restore to (within 1% of) original. + * - moveStart marks `.recomputing` synchronously, before any 250ms + * debounce can have run. + * + * What this spec does NOT cover: + * - Cancellation race when two pans land back-to-back inside the + * 250ms debounce window — covered by the `facetCountsReqId` stale + * guard in the implementation; a reliable Playwright race here + * would require monkey-patching `db.query` to inject latency and + * was deferred to a follow-up if Codex flags it as missing. + * - JOIN performance under stress (the Q1(a) decision was to accept + * the JOIN cost for this PR and bake a combined parquet only if + * real-world latency proves bad). The local-validation step in + * the implementation plan logs query latency for visibility. + */ +const { test, expect } = require('@playwright/test'); + +const EXPLORER_PATH = '/explorer.html'; + +// Global view position. alt=15000000 (15,000 km) is above the +// `GLOBAL_VIEW_ALT_M = 1e7` shortcut in `isGlobalView()`, so the +// implementation will treat this as the no-bbox baseline path +// regardless of any per-angle quirks in `computeViewRectangle`. +// Without the altitude shortcut, at lower altitudes (e.g. 5000 km) +// Cesium reports an ≈hemispheric bounding rect over the equator and +// `isGlobalView()` would return false — which would conflate the +// boot-time bbox query with the "true global" baseline read. +const GLOBAL_HASH = '#v=1&lat=0&lng=0&alt=15000000'; + +// Cyprus-centered small viewport (~35°N, 33°E, alt 500km). Cyprus has +// archaeology data in iSamples (PKAP, OpenContext) and is small enough +// that the bbox-scoped count must drop substantially below global. +const CYPRUS_LAT = 35; +const CYPRUS_LNG = 33; +const CYPRUS_ALT = 500000; + +/** Wait until facet UI hydrates: at least one `.facet-count[data-facet="source"]` + * span is in the DOM. The facetFilters cell renders these after + * facet_summaries.parquet loads, which is the same precondition existing + * tests (facetnote-url-load) wait for. */ +async function waitForFacetUI(page, ms = 60000) { + await page.waitForFunction( + () => document.querySelectorAll('.facet-count[data-facet="source"]').length > 0, + null, { timeout: ms } + ); + // Also wait for materialFilterBody to populate — the existing + // facetnote-url-load spec uses this as the "facet UI is fully wired" + // signal, and we want to match its boot-readiness criterion. + await page.waitForFunction( + () => document.querySelectorAll('#materialFilterBody input[type="checkbox"]').length > 0, + null, { timeout: ms } + ); +} + +/** Wait until no `.facet-count` carries the `.recomputing` class. Catches + * both the cold-boot path (no recompute fires, condition true immediately) + * and the post-pan path (italic clears once the bbox query completes). */ +async function waitForFacetCountsStable(page, ms = 60000) { + await page.waitForFunction( + () => document.querySelectorAll('.facet-count.recomputing').length === 0, + null, { timeout: ms } + ); +} + +/** Read the per-source counts off the DOM. Returns `{[uri]: integer}`. + * Tolerates the `Number.toLocaleString()` thousands-grouping the renderer + * applies (en-US locale produces `1,234`; we strip commas). + * Source is the most stable facet for a viewport-aware assertion: only a + * handful of values, and `_baselineCounts.source` populates at boot. */ +async function readSourceCounts(page) { + return page.evaluate(() => { + const out = {}; + document.querySelectorAll('.facet-count[data-facet="source"]').forEach(el => { + const val = el.getAttribute('data-value'); + const m = (el.textContent || '').match(/\(([\d,]+)\)/); + if (m && val) out[val] = parseInt(m[1].replace(/,/g, ''), 10); + }); + return out; + }); +} + +/** flyTo a destination, then wait for the post-moveEnd debounce + bbox query + * to settle. Uses a zero-duration flight to make the test deterministic. */ +async function flyToAndSettle(page, lat, lng, alt) { + await page.evaluate(async ({ lat, lng, alt }) => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + if (!v) throw new Error('viewer not in OJS module'); + v.camera.flyTo({ + destination: window.Cesium.Cartesian3.fromDegrees(lng, lat, alt), + duration: 0, + }); + }, { lat, lng, alt }); + // Give the debounce + query a chance to land. waitForFacetCountsStable + // is the real synchronization point; this just kicks the event loop. + await page.waitForTimeout(50); + await waitForFacetCountsStable(page); +} + +function totalOf(counts) { + return Object.values(counts).reduce((a, b) => a + b, 0); +} + +test.describe('B1 viewport-aware facet counts (#234 step 3)', () => { + test.setTimeout(180000); + + test('zoom in → counts shrink to viewport; zoom out → counts restore', async ({ page }) => { + await page.goto(`${EXPLORER_PATH}${GLOBAL_HASH}`); + await waitForFacetUI(page); + await waitForFacetCountsStable(page); + + // Baseline at global view. Sources are stable URIs, so we can + // compare value-by-value across the two viewport states. + const globalCounts = await readSourceCounts(page); + const totalGlobal = totalOf(globalCounts); + expect(totalGlobal).toBeGreaterThan(0); + + // Fly to Cyprus. + await flyToAndSettle(page, CYPRUS_LAT, CYPRUS_LNG, CYPRUS_ALT); + + const cyprusCounts = await readSourceCounts(page); + const totalCyprus = totalOf(cyprusCounts); + // Viewport-scoped total must be strictly less than global and + // strictly greater than zero (Cyprus is known to carry data). + expect(totalCyprus).toBeGreaterThan(0); + expect(totalCyprus).toBeLessThan(totalGlobal); + + // No individual per-source count should exceed its global value + // (the bbox is a subset of the world, monotonically). + for (const [uri, n] of Object.entries(cyprusCounts)) { + const g = globalCounts[uri] ?? 0; + expect(n).toBeLessThanOrEqual(g); + } + + // Fly back to the global view (alt above GLOBAL_VIEW_ALT_M). + await flyToAndSettle(page, 0, 0, 15000000); + + const restoredCounts = await readSourceCounts(page); + const totalRestored = totalOf(restoredCounts); + // Should match global to within 1% (same parquet, same WHERE clause + // shape — the only difference is the bbox JOIN vs. no JOIN, which + // the isGlobalView() epsilon should collapse to "no JOIN"). + expect(Math.abs(totalRestored - totalGlobal) / totalGlobal).toBeLessThan(0.01); + }); + + test('moveStart marks .recomputing before the debounce can run', async ({ page }) => { + await page.goto(`${EXPLORER_PATH}${GLOBAL_HASH}`); + await waitForFacetUI(page); + await waitForFacetCountsStable(page); + + // Verify the italic-stale state appears during the moveStart event + // firing — i.e., before `refreshFacetCounts()`'s 250 ms debounce + // can have completed. We attach our own moveStart listener AFTER + // the explorer's; Cesium fires listeners in registration order, so + // ours runs after the explorer's `markFacetCountsRecomputing()` + // and observes `.recomputing` already present. + // + // We use `flyTo` (not `lookLeft`): Cesium only fires + // `Camera.moveStart` on input-driven moves or programmatic flights, + // not on direct attribute mutations like `lookLeft`/`setView`. + // `duration > 0` is required so the flight actually animates and + // the event sequence is moveStart → ... → moveEnd (rather than + // collapsing to a single tick). + const appearedAtMoveStart = await page.evaluate(async () => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + if (!v) throw new Error('viewer not in OJS module'); + return new Promise((resolve, reject) => { + const off = v.camera.moveStart.addEventListener(() => { + off(); + resolve(document.querySelector('.facet-count.recomputing') !== null); + }); + // Safety timeout so a regression that suppresses moveStart + // fails the test loudly instead of hanging. + setTimeout(() => { off(); reject(new Error('moveStart did not fire')); }, 10000); + v.camera.flyTo({ + destination: window.Cesium.Cartesian3.fromDegrees(30, 30, 1000000), + duration: 0.2, + }); + }); + }); + expect(appearedAtMoveStart).toBe(true); + + // And it should clear after the post-moveEnd query lands. + await waitForFacetCountsStable(page); + }); + + test('global-view boot does not leave .recomputing stuck', async ({ page }) => { + // Negative control: a cold boot at the global view goes through the + // baseline early-return path (no facet filter, bboxSQL===null) and + // must NOT leave any `.recomputing` class behind. Guards against a + // future refactor that moves `markFacetCountsRecomputing()` above + // the early-return. + await page.goto(`${EXPLORER_PATH}${GLOBAL_HASH}`); + await waitForFacetUI(page); + await waitForFacetCountsStable(page); + const stuck = await page.evaluate( + () => document.querySelectorAll('.facet-count.recomputing').length + ); + expect(stuck).toBe(0); + + // Counts must be populated (baselines). + const counts = await readSourceCounts(page); + expect(totalOf(counts)).toBeGreaterThan(0); + }); +}); From 63b0457fea6125b2692bedf89bf56d2a58c55e05 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Mon, 25 May 2026 09:49:32 -0700 Subject: [PATCH 2/2] B1 viewport-aware facets: address Codex round-1 review of #237 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex caught three real issues + one documentation polish. 1. (BLOCKING) moveStart did not supersede in-flight or debounced facet count work. An older `updateCrossFilteredCounts` could land mid-pan, pass the stale guard at its `await` resume, call `applyFacetCounts`, and CLEAR `.recomputing` while writing pre-move counts — leaving the legend looking authoritative but stale until moveEnd's 250 ms debounce fired the new query. Fix: the moveStart listener now `clearTimeout(facetCountsDebounce); ++facetCountsReqId;` in addition to marking the italic-stale class. moveEnd's `refreshFacetCounts()` bumps the id AGAIN, harmlessly — supersession of already-superseded work, debounce coalesced. 2. (TEST GAP) The PR exercised the no-filter bbox path but NOT the filter-active JOIN path — exactly the new SQL shape this change introduces. Added a 4th Playwright spec `bbox-aware count under an active source filter (JOIN + filter)`: - activate a single source filter (cube fast-path at global view) - capture filtered material counts at global view (cube-derived) - fly to Cyprus → forces slow path with `f.source IN (…)` + JOIN - assert material counts shrink (a silent fallback to baseline on a column-ambiguity SQL error would write global counts, which would GROW back to the un-filtered globals — caught by the `cyprusTotal < filteredTotal` assertion) 3. (TOLERANCE TOO LOOSE) `zoom in → zoom out → restore` accepted a 1 % delta on the restored totals, but at `alt=15e6` the altitude shortcut in `isGlobalView()` forces the no-bbox baseline path → the counts should be value-by-value EXACTLY equal to the first global capture. Switched to `expect(restoredCounts).toEqual(globalCounts)`. A loose tolerance could hide a stale-read race writing a bbox-scoped count that just happens to look "close enough." 4. (POLISH) Sharpened the `moveStart` listener comment to explain why bumping the id is necessary (was missing the rationale). Test results: 4/4 new + 10/10 regression (facetnote-url-load, search-real-count, url-roundtrip) — 14 green locally. Codex's point about the altitude-shortcut UX (high-altitude pivot returns "global" even if camera angle doesn't show the full data extent) is an intentional product decision per the design rationale in `isGlobalView()`'s comment; nothing to change there. --- explorer.qmd | 36 ++++++--- tests/playwright/facet-viewport.spec.js | 100 +++++++++++++++++++++--- 2 files changed, 113 insertions(+), 23 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 8cc7622..146cb6c 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3060,18 +3060,34 @@ zoomWatcher = { // the "Samples in View" stat / phase-msg stay in lockstep with the // table's row count even on small sub-10% pans that `camera.changed` // doesn't fire for. - // B1 (issue #234 step 3): mark facet counts as "recomputing" the moment - // the user starts a camera move, so the legend shows italic-stale state - // immediately instead of waiting for moveEnd + the 250ms debounce. The - // class is cleared only when `applyFacetCounts` runs, so it stays - // sticky-italic across a continuous pan/zoom — exactly the intended - // "we're checking if counts changed" affordance. + // B1 (issue #234 step 3): on the very first sign of a camera move, + // (a) mark the legend italic-stale so the user sees "checking…" the + // moment they grab the globe (instead of waiting moveEnd + 250 ms); + // (b) bump `facetCountsReqId` and clear the pending debounce so any + // in-flight or debounced `updateCrossFilteredCounts` for the + // PRE-MOVE viewport is invalidated. // - // We skip the mark when there are no facet-count spans rendered yet - // (initial boot, before facet UI hydrates) to avoid touching DOM during - // the very first camera-position events. + // Without (b), a query that was already in flight at moveStart could + // resume after its SELECT lands, pass the stale guard, and call + // `applyFacetCounts` — which would WRITE OLD COUNTS for a viewport the + // user has already abandoned AND clear `.recomputing` before moveEnd + // schedules the new query, leaving the legend looking authoritative + // (no italic) but stale until moveEnd's 250 ms debounce fires. + // Codex round-1 review of PR #237 caught this. + // + // moveEnd's `refreshFacetCounts()` below will bump `facetCountsReqId` + // AGAIN — that's intentional and harmless: a second supersession of + // already-superseded work, debounce coalesced. + // + // We skip the mark/bump when there are no facet-count spans rendered + // yet (initial boot, before facet UI hydrates) to avoid DOM thrash + // and a no-op debounce reset during the very first camera-position + // events that fire before the OJS facet cells have settled. viewer.camera.moveStart.addEventListener(() => { - if (document.querySelector('.facet-count')) markFacetCountsRecomputing(); + if (!document.querySelector('.facet-count')) return; + markFacetCountsRecomputing(); + clearTimeout(facetCountsDebounce); + ++facetCountsReqId; }); viewer.camera.moveEnd.addEventListener(() => { diff --git a/tests/playwright/facet-viewport.spec.js b/tests/playwright/facet-viewport.spec.js index 27cbe4e..419e6f6 100644 --- a/tests/playwright/facet-viewport.spec.js +++ b/tests/playwright/facet-viewport.spec.js @@ -88,23 +88,26 @@ async function waitForFacetCountsStable(page, ms = 60000) { ); } -/** Read the per-source counts off the DOM. Returns `{[uri]: integer}`. +/** Read the per-value counts for a facet off the DOM. Returns `{[uri]: integer}`. * Tolerates the `Number.toLocaleString()` thousands-grouping the renderer - * applies (en-US locale produces `1,234`; we strip commas). - * Source is the most stable facet for a viewport-aware assertion: only a - * handful of values, and `_baselineCounts.source` populates at boot. */ -async function readSourceCounts(page) { - return page.evaluate(() => { + * applies (en-US locale produces `1,234`; we strip commas). */ +async function readFacetCounts(page, facet) { + return page.evaluate((facet) => { const out = {}; - document.querySelectorAll('.facet-count[data-facet="source"]').forEach(el => { + document.querySelectorAll(`.facet-count[data-facet="${facet}"]`).forEach(el => { const val = el.getAttribute('data-value'); const m = (el.textContent || '').match(/\(([\d,]+)\)/); if (m && val) out[val] = parseInt(m[1].replace(/,/g, ''), 10); }); return out; - }); + }, facet); } +/** Source is the most stable facet for the unfiltered viewport-aware + * assertion: only a handful of values, and `_baselineCounts.source` + * populates at boot. */ +async function readSourceCounts(page) { return readFacetCounts(page, 'source'); } + /** flyTo a destination, then wait for the post-moveEnd debounce + bbox query * to settle. Uses a zero-duration flight to make the test deterministic. */ async function flyToAndSettle(page, lat, lng, alt) { @@ -161,11 +164,82 @@ test.describe('B1 viewport-aware facet counts (#234 step 3)', () => { await flyToAndSettle(page, 0, 0, 15000000); const restoredCounts = await readSourceCounts(page); - const totalRestored = totalOf(restoredCounts); - // Should match global to within 1% (same parquet, same WHERE clause - // shape — the only difference is the bbox JOIN vs. no JOIN, which - // the isGlobalView() epsilon should collapse to "no JOIN"). - expect(Math.abs(totalRestored - totalGlobal) / totalGlobal).toBeLessThan(0.01); + // At alt=15e6 the altitude shortcut in `isGlobalView()` forces the + // no-bbox baseline path, so restored counts must be VALUE-BY-VALUE + // EXACTLY equal to the first global capture — not a 1 % tolerance. + // Codex round-1 review of PR #237 caught that a loose tolerance + // could hide a real regression (e.g. a stale-read race writing a + // bbox-scoped count and us mistaking it for "close enough"). + expect(restoredCounts).toEqual(globalCounts); + }); + + test('bbox-aware count under an active source filter (JOIN + filter)', async ({ page }) => { + // The most important new SQL shape: `buildCrossFilterWhere(d.key, 'f.')` + // emitted as `f.source IN (...)` joined to `lite_url l` for the + // bbox predicate. This guards against column-prefix regressions + // (e.g. dropping the `f.` qualifier → ambiguous-column error from + // DuckDB since `lite_url` also carries a `source` column) and + // against the JOIN inadvertently double-counting via duplicate pids. + // Codex round-1 review of PR #237 called out the coverage gap. + await page.goto(`${EXPLORER_PATH}${GLOBAL_HASH}`); + await waitForFacetUI(page); + await waitForFacetCountsStable(page); + + // Pick the source with the most data in our viewport-baseline so the + // assertion has signal: bbox-scoped count > 0 and ≤ its global. + const globalCounts = await readSourceCounts(page); + const [topSource] = Object.entries(globalCounts) + .sort((a, b) => b[1] - a[1]) + .map(([uri]) => uri); + expect(topSource).toBeTruthy(); + + // Toggle the source-checkbox state so only `topSource` remains + // active. The sourceFilter change handler runs refreshFacetCounts; + // at global view the cube fast-path kicks in (single value selected). + await page.evaluate((keep) => { + document.querySelectorAll('#sourceFilter input[type="checkbox"]').forEach(cb => { + cb.checked = (cb.value === keep); + }); + document.getElementById('sourceFilter').dispatchEvent(new Event('change', { bubbles: true })); + }, topSource); + await waitForFacetCountsStable(page); + + // Capture filtered-but-global material counts. With source filtered + // to a single value (cube fast-path), these are the cube-derived + // material counts under that source. + const globalMaterialUnderFilter = await readFacetCounts(page, 'material'); + const materialsPresent = Object.values(globalMaterialUnderFilter).filter(n => n > 0).length; + expect(materialsPresent).toBeGreaterThan(0); + + // Fly to Cyprus. The slow path now runs WITH the JOIN AND the + // `f.source IN ('topSource')` predicate (this is the new shape + // Codex round-1 flagged as untested). Source-dim query has WHERE + // '1=1' (its own dim excluded) — no `f.` prefix appears — but the + // material/context/object_type queries DO emit `f.source IN (...)`. + await flyToAndSettle(page, CYPRUS_LAT, CYPRUS_LNG, CYPRUS_ALT); + + const cyprusSourceCounts = await readSourceCounts(page); + // Top source must shrink from global (we're now in a small viewport) + // but remain > 0 (Cyprus carries data for the most-represented source). + expect(cyprusSourceCounts[topSource]).toBeGreaterThan(0); + expect(cyprusSourceCounts[topSource]).toBeLessThanOrEqual(globalCounts[topSource]); + for (const [uri, n] of Object.entries(cyprusSourceCounts)) { + const g = globalCounts[uri] ?? 0; + expect(n).toBeLessThanOrEqual(g); + } + + // Material counts must come back from the JOIN+filter path and + // also shrink. A silent fallback to `applyFacetCounts(key, null)` + // (e.g. on an ambiguous-column SQL error) would write GLOBAL + // baseline counts here — those baselines are unfiltered, so the + // material total would actually GROW back to its global value, + // which violates the shrink assertion below. This is the JOIN-path + // smoke test Codex round-1 called for. + const cyprusMaterialUnderFilter = await readFacetCounts(page, 'material'); + const filteredTotal = Object.values(globalMaterialUnderFilter).reduce((a, b) => a + b, 0); + const cyprusTotal = Object.values(cyprusMaterialUnderFilter).reduce((a, b) => a + b, 0); + expect(cyprusTotal).toBeGreaterThan(0); + expect(cyprusTotal).toBeLessThan(filteredTotal); }); test('moveStart marks .recomputing before the debounce can run', async ({ page }) => {