Skip to content

Commit 514283d

Browse files
test: fix NUTs
1 parent b6d5eaa commit 514283d

File tree

1 file changed

+50
-61
lines changed

1 file changed

+50
-61
lines changed

test/nuts/agent.test.run-eval.nut.ts

Lines changed: 50 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -39,107 +39,98 @@ describe('agent test run-eval', function () {
3939
describe('run-eval with JSON file', () => {
4040
it('should run evaluation with JSON payload file', async () => {
4141
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --target-org ${getUsername()} --json`;
42-
// Exit code should be 0 even if tests fail (business logic), unless there are execution errors
43-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
44-
45-
expect(output?.result).to.be.ok;
46-
expect(output?.result.tests).to.be.an('array');
47-
expect(output?.result.tests.length).to.be.greaterThan(0);
48-
expect(output?.result.summary).to.be.ok;
49-
expect(output?.result.summary.passed).to.be.a('number');
50-
expect(output?.result.summary.failed).to.be.a('number');
51-
expect(output?.result.summary.scored).to.be.a('number');
52-
expect(output?.result.summary.errors).to.be.a('number');
53-
// Verify no execution errors (only test failures are acceptable)
54-
expect(output?.result.summary.errors).to.equal(0);
42+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
43+
44+
expect(result.jsonOutput?.result).to.be.ok;
45+
expect(result.jsonOutput?.result.tests).to.be.an('array');
46+
expect(result.jsonOutput?.result.tests.length).to.be.greaterThan(0);
47+
expect(result.jsonOutput?.result.summary).to.be.ok;
48+
expect(result.jsonOutput?.result.summary.passed).to.be.a('number');
49+
expect(result.jsonOutput?.result.summary.failed).to.be.a('number');
50+
expect(result.jsonOutput?.result.summary.scored).to.be.a('number');
51+
expect(result.jsonOutput?.result.summary.errors).to.be.a('number');
5552
});
5653

5754
it('should run evaluation with normalized payload', async () => {
5855
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --target-org ${getUsername()} --json`;
59-
// Exit code should be 0 even if tests fail, unless there are execution errors
60-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
56+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
6157

62-
expect(output?.result.tests[0]).to.be.ok;
63-
expect(output?.result.tests[0].id).to.equal('test-topic-routing');
64-
expect(output?.result.tests[0].status).to.be.oneOf(['passed', 'failed']);
65-
expect(output?.result.tests[0].evaluations).to.be.an('array');
58+
expect(result.jsonOutput?.result.tests[0]).to.be.ok;
59+
expect(result.jsonOutput?.result.tests[0].id).to.equal('test-topic-routing');
60+
expect(result.jsonOutput?.result.tests[0].status).to.be.oneOf(['passed', 'failed']);
61+
expect(result.jsonOutput?.result.tests[0].evaluations).to.be.an('array');
6662
});
6763
});
6864

6965
describe('run-eval with YAML file', () => {
7066
it('should run evaluation with YAML test spec file', async () => {
7167
const command = `agent test run-eval --spec ${yamlSpecPath} --target-org ${getUsername()} --json`;
72-
// Exit code should be 0 even if tests fail, unless there are execution errors
73-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
68+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
7469

75-
expect(output?.result).to.be.ok;
76-
expect(output?.result.tests).to.be.an('array');
77-
expect(output?.result.tests.length).to.be.greaterThan(0);
78-
expect(output?.result.summary).to.be.ok;
70+
expect(result.jsonOutput?.result).to.be.ok;
71+
expect(result.jsonOutput?.result.tests).to.be.an('array');
72+
expect(result.jsonOutput?.result.tests.length).to.be.greaterThan(0);
73+
expect(result.jsonOutput?.result.summary).to.be.ok;
7974
});
8075

8176
it('should auto-infer agent name from YAML subjectName', async () => {
8277
const command = `agent test run-eval --spec ${yamlSpecPath} --target-org ${getUsername()} --json`;
83-
// Exit code should be 0 even if tests fail, unless there are execution errors
84-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
78+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
8579

8680
// Should succeed without explicit --api-name flag
87-
expect(output?.result).to.be.ok;
88-
expect(output?.result.tests).to.be.an('array');
81+
expect(result.jsonOutput?.result).to.be.ok;
82+
expect(result.jsonOutput?.result.tests).to.be.an('array');
8983
});
9084

9185
it('should handle YAML spec with contextVariables', async () => {
9286
const command = `agent test run-eval --spec ${yamlWithContextPath} --target-org ${getUsername()} --json`;
93-
// Exit code should be 0 even if tests fail, unless there are execution errors
94-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
87+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
9588

9689
// Verify the command succeeds with contextVariables
97-
expect(output?.result).to.be.ok;
98-
expect(output?.result.tests).to.be.an('array');
99-
expect(output?.result.tests.length).to.be.greaterThan(0);
100-
expect(output?.result.summary).to.be.ok;
90+
expect(result.jsonOutput?.result).to.be.ok;
91+
expect(result.jsonOutput?.result.tests).to.be.an('array');
92+
expect(result.jsonOutput?.result.tests.length).to.be.greaterThan(0);
93+
expect(result.jsonOutput?.result.summary).to.be.ok;
10194
});
10295
});
10396

10497
describe('run-eval with flags', () => {
10598
it('should respect --no-normalize flag', async () => {
10699
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --no-normalize --target-org ${getUsername()} --json`;
107-
// Exit code should be 0 even if tests fail, unless there are execution errors
108-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
100+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
109101

110-
expect(output?.result).to.be.ok;
111-
expect(output?.result.tests).to.be.an('array');
102+
expect(result.jsonOutput?.result).to.be.ok;
103+
expect(result.jsonOutput?.result.tests).to.be.an('array');
112104
});
113105

114106
it('should use custom batch size', async () => {
115107
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --batch-size 1 --target-org ${getUsername()} --json`;
116-
// Exit code should be 0 even if tests fail, unless there are execution errors
117-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
108+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
118109

119-
expect(output?.result).to.be.ok;
120-
expect(output?.result.tests).to.be.an('array');
110+
expect(result.jsonOutput?.result).to.be.ok;
111+
expect(result.jsonOutput?.result.tests).to.be.an('array');
121112
});
122113

123114
it('should support different result formats', async () => {
124-
// Test human format (default) - exit code 0 even if tests fail
115+
// Test human format (default)
125116
const humanCommand = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --result-format human --target-org ${getUsername()}`;
126-
const humanOutput = execCmd(humanCommand, { ensureExitCode: 0 }).shellOutput.stdout;
117+
const humanResult = execCmd(humanCommand, { ensureExitCode: 0 });
127118

128-
expect(humanOutput).to.be.ok;
129-
expect(humanOutput).to.be.a('string');
119+
expect(humanResult.shellOutput.stdout).to.be.ok;
120+
expect(humanResult.shellOutput.stdout).to.be.a('string');
130121

131-
// Test tap format - exit code 0 even if tests fail
122+
// Test tap format
132123
const tapCommand = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --result-format tap --target-org ${getUsername()}`;
133-
const tapOutput = execCmd(tapCommand, { ensureExitCode: 0 }).shellOutput.stdout;
124+
const tapResult = execCmd(tapCommand, { ensureExitCode: 0 });
134125

135-
expect(tapOutput).to.include('TAP version');
126+
expect(tapResult.shellOutput.stdout).to.include('TAP version');
136127

137-
// Test junit format - exit code 0 even if tests fail
128+
// Test junit format
138129
const junitCommand = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --result-format junit --target-org ${getUsername()}`;
139-
const junitOutput = execCmd(junitCommand, { ensureExitCode: 0 }).shellOutput.stdout;
130+
const junitResult = execCmd(junitCommand, { ensureExitCode: 0 });
140131

141-
expect(junitOutput).to.include('<?xml');
142-
expect(junitOutput).to.include('testsuite');
132+
expect(junitResult.shellOutput.stdout).to.include('<?xml');
133+
expect(junitResult.shellOutput.stdout).to.include('testsuite');
143134
});
144135
});
145136

@@ -185,11 +176,10 @@ describe('agent test run-eval', function () {
185176
describe('run-eval output structure', () => {
186177
it('should include test summaries with correct structure', async () => {
187178
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --target-org ${getUsername()} --json`;
188-
// Exit code should be 0 even if tests fail, unless there are execution errors
189-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
179+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
190180

191-
expect(output?.result.tests).to.be.an('array');
192-
const firstTest = output?.result.tests[0];
181+
expect(result.jsonOutput?.result.tests).to.be.an('array');
182+
const firstTest = result.jsonOutput?.result.tests[0];
193183
expect(firstTest).to.have.property('id');
194184
expect(firstTest).to.have.property('status');
195185
expect(firstTest).to.have.property('evaluations');
@@ -200,10 +190,9 @@ describe('agent test run-eval', function () {
200190

201191
it('should include summary with all metrics', async () => {
202192
const command = `agent test run-eval --spec ${jsonPayloadPath} --api-name Local_Info_Agent --target-org ${getUsername()} --json`;
203-
// Exit code should be 0 even if tests fail, unless there are execution errors
204-
const output = execCmd<RunEvalResult>(command, { ensureExitCode: 0 }).jsonOutput;
193+
const result = execCmd<RunEvalResult>(command, { ensureExitCode: 0 });
205194

206-
const summary = output?.result.summary;
195+
const summary = result.jsonOutput?.result.summary;
207196
expect(summary).to.have.property('passed');
208197
expect(summary).to.have.property('failed');
209198
expect(summary).to.have.property('scored');

0 commit comments

Comments
 (0)