Skip to content

Commit 7d426a5

Browse files
committed
fix: normalize leaderboard display labels
1 parent 5615606 commit 7d426a5

4 files changed

Lines changed: 141 additions & 13 deletions

File tree

src/pages/LeaderboardPage.tsx

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import { useCallback, useEffect, useMemo, useState } from "react";
22
import { useAuth } from "../auth/AuthContext";
33
import { getGraphLabel } from "../config";
44
import {
5+
formatModelDisplayName,
6+
getLeaderboardCalibrationNotice,
57
getPublicModelBoardScore,
68
getPublicNodeBoardScore,
79
MODEL_BOARD_PUBLIC_COLUMNS,
@@ -166,6 +168,10 @@ export default function LeaderboardPage() {
166168
}, [selectedGraphId]);
167169

168170
const updatedAt = view === "model" ? modelUpdatedAt : nodeUpdatedAt;
171+
const calibrationNotice = useMemo(
172+
() => getLeaderboardCalibrationNotice(modelRows),
173+
[modelRows],
174+
);
169175

170176
return (
171177
<div className="pt-16 min-h-screen overflow-hidden">
@@ -320,18 +326,32 @@ export default function LeaderboardPage() {
320326
Score Contract
321327
</p>
322328
{view === "model" ? (
323-
<p className="text-sm text-text-secondary leading-relaxed">
324-
Public V1 display shows one `Score` column only. In this first
325-
batch, that visible score is the required judged idea-quality
326-
signal, while optional execution and usage signals stay hidden
327-
until they materially exist.
328-
</p>
329+
<div className="space-y-3">
330+
<p className="text-sm text-text-secondary leading-relaxed">
331+
Public V1 display shows one `Score` column only. In this first
332+
batch, that visible score is the required judged idea-quality
333+
signal, while optional execution and usage signals stay hidden
334+
until they materially exist.
335+
</p>
336+
{calibrationNotice && (
337+
<p className="text-sm text-amber-700 leading-relaxed">
338+
{calibrationNotice}
339+
</p>
340+
)}
341+
</div>
329342
) : (
330-
<p className="text-sm text-text-secondary leading-relaxed">
331-
Node rows also expose one visible `Score` only. Long titles are
332-
shortened in-table for readability, and the full node content is
333-
available from the detail drawer after opening the row.
334-
</p>
343+
<div className="space-y-3">
344+
<p className="text-sm text-text-secondary leading-relaxed">
345+
Node rows also expose one visible `Score` only. Long titles are
346+
shortened in-table for readability, and the full node content is
347+
available from the detail drawer after opening the row.
348+
</p>
349+
{calibrationNotice && (
350+
<p className="text-sm text-amber-700 leading-relaxed">
351+
{calibrationNotice}
352+
</p>
353+
)}
354+
</div>
335355
)}
336356
</div>
337357

@@ -496,7 +516,7 @@ function ModelLeaderboardTable({ rows }: { rows: ModelLeaderboardRow[] }) {
496516
</td>
497517
<td className="py-4 px-4">
498518
<div className="font-semibold text-text-primary">
499-
{row.generator_model}
519+
{formatModelDisplayName(row.generator_model)}
500520
</div>
501521
</td>
502522
<td className="py-4 px-4 text-right font-bold text-cyan-light">
@@ -598,7 +618,7 @@ function NodeLeaderboardTable(
598618
{formatScore(getPublicNodeBoardScore(row))}
599619
</td>
600620
<td className="py-4 px-4 text-right text-text-primary font-semibold">
601-
{row.generator_model || "unknown"}
621+
{formatModelDisplayName(row.generator_model)}
602622
</td>
603623
</tr>
604624
);

src/utils/api.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,7 @@ function normalizeModelLeaderboardRow(raw: unknown): ModelLeaderboardRow | null
654654
}
655655

656656
const rawTopNodeIds = Array.isArray(item.top_node_ids) ? item.top_node_ids : [];
657+
const rawScoreAdjustments = Array.isArray(item.score_adjustments) ? item.score_adjustments : [];
657658

658659
return {
659660
generator_model: item.generator_model,
@@ -667,12 +668,24 @@ function normalizeModelLeaderboardRow(raw: unknown): ModelLeaderboardRow | null
667668
typeof item.scored_idea_count === "number"
668669
? item.scored_idea_count
669670
: Number(item.scored_idea_count ?? 0),
671+
raw_model_total_score:
672+
typeof item.raw_model_total_score === "number"
673+
? item.raw_model_total_score
674+
: item.raw_model_total_score == null
675+
? null
676+
: Number(item.raw_model_total_score),
670677
model_total_score:
671678
typeof item.model_total_score === "number"
672679
? item.model_total_score
673680
: item.model_total_score == null
674681
? null
675682
: Number(item.model_total_score),
683+
raw_model_idea_gen_score:
684+
typeof item.raw_model_idea_gen_score === "number"
685+
? item.raw_model_idea_gen_score
686+
: item.raw_model_idea_gen_score == null
687+
? null
688+
: Number(item.raw_model_idea_gen_score),
676689
model_idea_gen_score:
677690
typeof item.model_idea_gen_score === "number"
678691
? item.model_idea_gen_score
@@ -702,6 +715,26 @@ function normalizeModelLeaderboardRow(raw: unknown): ModelLeaderboardRow | null
702715
typeof item.last_generated_at === "string" && item.last_generated_at.trim()
703716
? item.last_generated_at
704717
: null,
718+
score_adjustments: rawScoreAdjustments
719+
.filter((value): value is Record<string, unknown> => Boolean(value && typeof value === "object"))
720+
.map((value) => ({
721+
target:
722+
typeof value.target === "string" && value.target.trim()
723+
? value.target
724+
: "node_agent_score",
725+
delta:
726+
typeof value.delta === "number"
727+
? value.delta
728+
: Number(value.delta ?? 0),
729+
reason:
730+
typeof value.reason === "string" && value.reason.trim()
731+
? value.reason
732+
: null,
733+
scope:
734+
typeof value.scope === "string" && value.scope.trim()
735+
? value.scope
736+
: null,
737+
})),
705738
};
706739
}
707740

@@ -715,6 +748,7 @@ function normalizeNodeLeaderboardRow(raw: unknown): NodeLeaderboardRow | null {
715748
? item.linked_seevomap_node_id
716749
: null;
717750
const detailNodeId = linkedNodeId || (!item.node_id.includes(":") ? item.node_id : null);
751+
const rawScoreAdjustments = Array.isArray(item.score_adjustments) ? item.score_adjustments : [];
718752

719753
return {
720754
node_id: item.node_id,
@@ -730,12 +764,24 @@ function normalizeNodeLeaderboardRow(raw: unknown): NodeLeaderboardRow | null {
730764
typeof item.field === "string" && item.field.trim()
731765
? item.field
732766
: null,
767+
raw_node_total_score:
768+
typeof item.raw_node_total_score === "number"
769+
? item.raw_node_total_score
770+
: item.raw_node_total_score == null
771+
? null
772+
: Number(item.raw_node_total_score),
733773
node_total_score:
734774
typeof item.node_total_score === "number"
735775
? item.node_total_score
736776
: item.node_total_score == null
737777
? null
738778
: Number(item.node_total_score),
779+
raw_node_agent_score:
780+
typeof item.raw_node_agent_score === "number"
781+
? item.raw_node_agent_score
782+
: item.raw_node_agent_score == null
783+
? null
784+
: Number(item.raw_node_agent_score),
739785
node_agent_score:
740786
typeof item.node_agent_score === "number"
741787
? item.node_agent_score
@@ -756,6 +802,26 @@ function normalizeNodeLeaderboardRow(raw: unknown): NodeLeaderboardRow | null {
756802
: Number(item.node_usage_score),
757803
linked_seevomap_node_id: linkedNodeId,
758804
detail_node_id: detailNodeId,
805+
score_adjustments: rawScoreAdjustments
806+
.filter((value): value is Record<string, unknown> => Boolean(value && typeof value === "object"))
807+
.map((value) => ({
808+
target:
809+
typeof value.target === "string" && value.target.trim()
810+
? value.target
811+
: "node_agent_score",
812+
delta:
813+
typeof value.delta === "number"
814+
? value.delta
815+
: Number(value.delta ?? 0),
816+
reason:
817+
typeof value.reason === "string" && value.reason.trim()
818+
? value.reason
819+
: null,
820+
scope:
821+
typeof value.scope === "string" && value.scope.trim()
822+
? value.scope
823+
: null,
824+
})),
759825
};
760826
}
761827

src/utils/leaderboardDisplay.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,35 @@ export function getPublicNodeBoardScore(row: NodeLeaderboardRow): number | null
2222
return row.node_total_score;
2323
}
2424

25+
export function getLeaderboardCalibrationNotice(
26+
rows: ModelLeaderboardRow[],
27+
): string | null {
28+
const calibratedRow = rows.find((row) => (row.score_adjustments || []).length > 0);
29+
const adjustment = calibratedRow?.score_adjustments?.[0];
30+
if (!calibratedRow || !adjustment) {
31+
return null;
32+
}
33+
if (
34+
calibratedRow.generator_model === "deepseek-v3"
35+
&& adjustment.target === "node_agent_score"
36+
&& adjustment.delta === -1.1
37+
) {
38+
return "Current version applies a temporary -1.1 calibration to deepseek-v3 because of the server-side inference issue in this batch. Later refined versions should not inherit it.";
39+
}
40+
return null;
41+
}
42+
43+
export function formatModelDisplayName(model: string | null | undefined): string {
44+
const value = String(model || "").trim();
45+
if (!value) return "unknown";
46+
47+
const normalized = value.toLowerCase();
48+
if (normalized === "gpt5") return "gpt-5";
49+
if (normalized === "claude_4_5_sonnet") return "claude-4.5-sonnet";
50+
if (normalized === "claude_4_5_opus") return "claude-4.5-opus";
51+
return value;
52+
}
53+
2554
export function summarizeLeaderboardQuestion(question: string, maxLength = 140): string {
2655
const compact = String(question || "").replace(/\s+/g, " ").trim();
2756
if (!compact) return "Untitled node";

src/utils/types.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,31 +89,44 @@ export interface UsageLeaderboardRow {
8989
last_used_at?: string | null;
9090
}
9191

92+
export interface ScoreAdjustment {
93+
target: string;
94+
delta: number;
95+
reason?: string | null;
96+
scope?: string | null;
97+
}
98+
9299
export interface ModelLeaderboardRow {
93100
generator_model: string;
94101
generator_family?: string | null;
95102
idea_count: number;
96103
scored_idea_count: number;
104+
raw_model_total_score?: number | null;
97105
model_total_score: number | null;
106+
raw_model_idea_gen_score?: number | null;
98107
model_idea_gen_score: number | null;
99108
model_execution_score: number | null;
100109
model_usage_score: number | null;
101110
score_confidence: number;
102111
top_node_ids: string[];
103112
last_generated_at?: string | null;
113+
score_adjustments?: ScoreAdjustment[];
104114
}
105115

106116
export interface NodeLeaderboardRow {
107117
node_id: string;
108118
question: string;
109119
generator_model?: string | null;
110120
field?: string | null;
121+
raw_node_total_score?: number | null;
111122
node_total_score: number | null;
123+
raw_node_agent_score?: number | null;
112124
node_agent_score: number | null;
113125
node_execution_score: number | null;
114126
node_usage_score: number | null;
115127
linked_seevomap_node_id?: string | null;
116128
detail_node_id?: string | null;
129+
score_adjustments?: ScoreAdjustment[];
117130
}
118131

119132
export type LeaderboardRow =

0 commit comments

Comments
 (0)