Skip to content

Commit 9fe8a22

Browse files
radu-mocanuclaude
andcommitted
feat: custom evaluator scaffolding via UI, MCP, and Claude Code skill
Backend: - Add POST /api/custom-evaluators/scaffold to create boilerplate Python file in evaluations/evaluators/custom/ with correct class structure - Add POST /api/custom-evaluators/register to run the registration logic Frontend: - Add "+ Custom Evaluator (Python)" button in evaluators sidebar - Inline form for name, creates file, shows toast with next steps - Links to custom evaluator docs MCP: - Add scaffold_custom_evaluator and register_custom_evaluator tools Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cab4939 commit 9fe8a22

8 files changed

Lines changed: 275 additions & 58 deletions

File tree

src/uipath/dev/server/frontend/src/api/eval-client.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,25 @@ export async function updateEvalSetEvaluators(
150150
});
151151
}
152152

153+
export async function scaffoldCustomEvaluator(body: {
154+
name: string;
155+
description?: string;
156+
}): Promise<{ file_path: string; filename: string; class_name: string; evaluator_id: string }> {
157+
return fetchJson(`${BASE}/custom-evaluators/scaffold`, {
158+
method: "POST",
159+
headers: { "Content-Type": "application/json" },
160+
body: JSON.stringify(body),
161+
});
162+
}
163+
164+
export async function registerCustomEvaluator(filename: string): Promise<{ evaluator_id: string; spec_path: string }> {
165+
return fetchJson(`${BASE}/custom-evaluators/register`, {
166+
method: "POST",
167+
headers: { "Content-Type": "application/json" },
168+
body: JSON.stringify({ filename }),
169+
});
170+
}
171+
153172
export async function updateLocalEvaluator(
154173
id: string,
155174
body: {

src/uipath/dev/server/frontend/src/components/evaluators/EvaluatorsSidebar.tsx

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import { useState } from "react";
12
import { useEvalStore } from "../../store/useEvalStore";
23
import { useHashRoute } from "../../hooks/useHashRoute";
4+
import { useToastStore } from "../../store/useToastStore";
5+
import { scaffoldCustomEvaluator } from "../../api/eval-client";
36

47
const categories: { type: string; label: string; badgeColor: string }[] = [
58
{ type: "deterministic", label: "Deterministic", badgeColor: "var(--success)" },
@@ -10,6 +13,26 @@ const categories: { type: string; label: string; badgeColor: string }[] = [
1013
export default function EvaluatorsSidebar() {
1114
const localEvaluators = useEvalStore((s) => s.localEvaluators);
1215
const { evaluatorFilter, evaluatorCreateType, navigate } = useHashRoute();
16+
const addToast = useToastStore((s) => s.addToast);
17+
const [customName, setCustomName] = useState("");
18+
const [showCustom, setShowCustom] = useState(false);
19+
const [scaffolding, setScaffolding] = useState(false);
20+
21+
const handleScaffold = async () => {
22+
if (!customName.trim()) return;
23+
setScaffolding(true);
24+
try {
25+
const result = await scaffoldCustomEvaluator({ name: customName.trim() });
26+
addToast("success", `Created ${result.filename} — implement evaluate() then run: uv run uipath register evaluator ${result.filename}`);
27+
setCustomName("");
28+
setShowCustom(false);
29+
} catch (err: unknown) {
30+
const detail = (err as { detail?: string })?.detail;
31+
addToast("error", detail ?? "Failed to scaffold evaluator");
32+
} finally {
33+
setScaffolding(false);
34+
}
35+
};
1336

1437
// "All" is active when on #/evaluators (no filter, no create, no detail)
1538
const isAllActive = !evaluatorFilter && !evaluatorCreateType;
@@ -33,6 +56,64 @@ export default function EvaluatorsSidebar() {
3356
+ New Evaluator
3457
</button>
3558

59+
{/* Create Custom */}
60+
{showCustom ? (
61+
<div className="mx-3 mb-1 p-2 rounded border" style={{ borderColor: "var(--border)", background: "var(--bg-secondary)" }}>
62+
<input
63+
type="text"
64+
value={customName}
65+
onChange={(e) => setCustomName(e.target.value)}
66+
onKeyDown={(e) => { if (e.key === "Enter") handleScaffold(); if (e.key === "Escape") setShowCustom(false); }}
67+
placeholder="e.g. DiscountEvaluator"
68+
autoFocus
69+
className="w-full rounded px-2 py-1 text-[11px] mb-1.5"
70+
style={{ background: "var(--bg-primary)", border: "1px solid var(--border)", color: "var(--text-primary)" }}
71+
/>
72+
<div className="flex gap-1.5">
73+
<button
74+
onClick={handleScaffold}
75+
disabled={scaffolding || !customName.trim()}
76+
className="flex-1 py-1 text-[10px] font-semibold rounded cursor-pointer disabled:opacity-40"
77+
style={{ background: "var(--accent)", color: "var(--bg-primary)", border: "none" }}
78+
>
79+
{scaffolding ? "Creating..." : "Create"}
80+
</button>
81+
<button
82+
onClick={() => setShowCustom(false)}
83+
className="px-2 py-1 text-[10px] rounded cursor-pointer"
84+
style={{ color: "var(--text-muted)", background: "transparent", border: "1px solid var(--border)" }}
85+
>
86+
Cancel
87+
</button>
88+
</div>
89+
<a
90+
href="https://uipath.github.io/uipath-python/eval/custom_evaluators/"
91+
target="_blank"
92+
rel="noopener noreferrer"
93+
className="block mt-1.5 text-[10px] no-underline"
94+
style={{ color: "var(--accent)" }}
95+
>
96+
Custom evaluator docs &rarr;
97+
</a>
98+
</div>
99+
) : (
100+
<button
101+
onClick={() => setShowCustom(true)}
102+
className="mx-3 mb-1 px-3 py-1.5 text-[11px] font-medium rounded border border-dashed transition-colors cursor-pointer"
103+
style={{ color: "var(--text-muted)", borderColor: "var(--border)", background: "transparent" }}
104+
onMouseEnter={(e) => {
105+
e.currentTarget.style.color = "var(--text-primary)";
106+
e.currentTarget.style.borderColor = "var(--text-muted)";
107+
}}
108+
onMouseLeave={(e) => {
109+
e.currentTarget.style.color = "var(--text-muted)";
110+
e.currentTarget.style.borderColor = "var(--border)";
111+
}}
112+
>
113+
+ Custom Evaluator (Python)
114+
</button>
115+
)}
116+
36117
{/* Categories label */}
37118
<div className="px-3 pt-3 pb-1 text-[11px] uppercase tracking-widest font-semibold" style={{ color: "var(--text-muted)" }}>
38119
Categories

src/uipath/dev/server/routes/evaluators.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,123 @@ def _discover_local_evaluators() -> list[dict[str, Any]]:
235235
return results
236236

237237

238+
_CUSTOM_EVALUATOR_TEMPLATE = '''\
239+
from typing import Optional
240+
241+
from uipath.eval.evaluators import (
242+
BaseEvaluationCriteria,
243+
BaseEvaluator,
244+
BaseEvaluatorConfig,
245+
)
246+
from uipath.eval.models import (
247+
AgentExecution,
248+
ErrorEvaluationResult,
249+
EvaluationResult,
250+
NumericEvaluationResult,
251+
)
252+
253+
254+
class {class_name}Criteria(BaseEvaluationCriteria):
255+
"""Per-item evaluation criteria for {class_name}."""
256+
257+
pass # TODO: add fields like expected_value: str
258+
259+
260+
class {class_name}Config(BaseEvaluatorConfig[{class_name}Criteria]):
261+
"""Configuration for {class_name}."""
262+
263+
name: str = "{class_name}"
264+
default_evaluation_criteria: Optional[{class_name}Criteria] = None
265+
266+
267+
class {class_name}(
268+
BaseEvaluator[{class_name}Criteria, {class_name}Config, None]
269+
):
270+
"""{description}"""
271+
272+
@classmethod
273+
def get_evaluator_id(cls) -> str:
274+
return "{class_name}"
275+
276+
async def evaluate(
277+
self,
278+
agent_execution: AgentExecution,
279+
evaluation_criteria: {class_name}Criteria,
280+
) -> EvaluationResult:
281+
try:
282+
output = agent_execution.agent_output
283+
# TODO: implement evaluation logic
284+
score = 0.0
285+
return NumericEvaluationResult(score=score, details="Not implemented yet")
286+
except Exception as e:
287+
return ErrorEvaluationResult(error=str(e))
288+
'''
289+
290+
291+
class ScaffoldCustomEvaluatorBody(BaseModel):
292+
"""Body for scaffolding a custom evaluator Python file."""
293+
294+
name: str
295+
description: str = ""
296+
297+
298+
@router.post("/custom-evaluators/scaffold")
299+
async def scaffold_custom_evaluator(body: ScaffoldCustomEvaluatorBody) -> dict[str, Any]:
300+
"""Scaffold a new custom evaluator Python file."""
301+
class_name = body.name.replace(" ", "").replace("-", "").replace("_", "")
302+
if not class_name.endswith("Evaluator"):
303+
class_name += "Evaluator"
304+
305+
# snake_case filename
306+
snake = ""
307+
for i, ch in enumerate(class_name):
308+
if ch.isupper() and i > 0 and not class_name[i - 1].isupper():
309+
snake += "_"
310+
snake += ch.lower()
311+
filename = f"{snake}.py"
312+
313+
custom_dir = Path.cwd() / "evaluations" / "evaluators" / "custom"
314+
custom_dir.mkdir(parents=True, exist_ok=True)
315+
316+
filepath = custom_dir / filename
317+
if filepath.exists():
318+
raise HTTPException(status_code=409, detail=f"File already exists: {filepath}")
319+
320+
content = _CUSTOM_EVALUATOR_TEMPLATE.format(
321+
class_name=class_name,
322+
description=body.description or f"Custom evaluator: {body.name}",
323+
)
324+
filepath.write_text(content, encoding="utf-8")
325+
326+
return {
327+
"file_path": str(filepath),
328+
"filename": filename,
329+
"class_name": class_name,
330+
"evaluator_id": class_name,
331+
}
332+
333+
334+
class RegisterCustomEvaluatorBody(BaseModel):
335+
"""Body for registering a custom evaluator."""
336+
337+
filename: str
338+
339+
340+
@router.post("/custom-evaluators/register")
341+
async def register_custom_evaluator(body: RegisterCustomEvaluatorBody) -> dict[str, Any]:
342+
"""Register a custom evaluator by running the registration logic."""
343+
try:
344+
from uipath.eval.evaluators.registration import register_evaluator
345+
346+
evaluator_id, spec_path = register_evaluator(body.filename)
347+
return {
348+
"evaluator_id": evaluator_id,
349+
"spec_path": spec_path,
350+
}
351+
except Exception as exc:
352+
raise HTTPException(status_code=400, detail=str(exc)) from None
353+
354+
238355
class CreateEvaluatorBody(BaseModel):
239356
"""Body for creating a new local evaluator JSON file."""
240357

src/uipath/dev/server/static/assets/ChatPanel-BNRio83n.js renamed to src/uipath/dev/server/static/assets/ChatPanel-DJtePF36.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uipath/dev/server/static/assets/index-Cd97ab0I.js renamed to src/uipath/dev/server/static/assets/index-C0mGt6tw.js

Lines changed: 23 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uipath/dev/server/static/assets/index-CIbY4UMG.css

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uipath/dev/server/static/assets/index-EAfwuOZs.css

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/uipath/dev/server/static/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
66
<title>UiPath Developer Console</title>
77
<link rel="icon" type="image/x-icon" href="/favicon.ico" />
8-
<script type="module" crossorigin src="/assets/index-Cd97ab0I.js"></script>
8+
<script type="module" crossorigin src="/assets/index-C0mGt6tw.js"></script>
99
<link rel="modulepreload" crossorigin href="/assets/vendor-react-VzyiTEsu.js">
1010
<link rel="modulepreload" crossorigin href="/assets/vendor-reactflow-B_2yZyR4.js">
1111
<link rel="stylesheet" crossorigin href="/assets/vendor-reactflow-B5DZHykP.css">
12-
<link rel="stylesheet" crossorigin href="/assets/index-EAfwuOZs.css">
12+
<link rel="stylesheet" crossorigin href="/assets/index-CIbY4UMG.css">
1313
</head>
1414
<body>
1515
<div id="root"></div>

0 commit comments

Comments
 (0)