-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.yaml
More file actions
99 lines (92 loc) · 2.72 KB
/
eval.yaml
File metadata and controls
99 lines (92 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
kind: Eval
metadata:
name: "stackrox-mcp-e2e"
config:
agent:
type: "builtin.openai-agent"
model: "gpt-4o"
llmJudge:
env:
baseUrlKey: JUDGE_BASE_URL
apiKeyKey: JUDGE_API_KEY
modelNameKey: JUDGE_MODEL_NAME
mcpConfigFile: mcp-config.yaml
taskSets:
# Test 1: List clusters
- path: tasks/list-clusters.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
minToolCalls: 1
maxToolCalls: 1
# Test 2: CVE affecting workloads
- path: tasks/cve-affecting-workloads.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_deployments_for_cve"
argumentsMatch:
cveName: "CVE-2021-31805"
minToolCalls: 1
maxToolCalls: 1
# Test 3: CVE affecting clusters - basic
- path: tasks/cve-affecting-clusters.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_for_cve"
argumentsMatch:
cveName: "CVE-2016-1000031"
minToolCalls: 1
maxToolCalls: 3
# Test 4: Non-existent CVE
- path: tasks/cve-nonexistent.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_for_cve"
argumentsMatch:
cveName: "CVE-2099-00001"
minToolCalls: 1
maxToolCalls: 2
# Test 5: CVE with specific cluster filter (scooby)
- path: tasks/cve-cluster-scooby.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
- server: stackrox-mcp
toolPattern: "get_clusters_for_cve"
argumentsMatch:
cveName: "CVE-2016-1000031"
minToolCalls: 1
maxToolCalls: 2
# Test 6: CVE with specific cluster filter (maria)
- path: tasks/cve-cluster-maria.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
minToolCalls: 1
maxToolCalls: 2
# Test 7: CVE affecting clusters - general
- path: tasks/cve-clusters-general.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_for_cve"
argumentsMatch:
cveName: "CVE-2021-31805"
minToolCalls: 1
maxToolCalls: 5
# Test 8: CVE check with cluster list reference
- path: tasks/cve-cluster-list.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_for_cve"
argumentsMatch:
cveName: "CVE-2024-52577"
minToolCalls: 1
maxToolCalls: 5