Skip to content

Commit c014c04

Browse files
committed
feat(ci): show actual scenario outcomes in E2E results
- Capture and display meaningful output from each sample - Show completion messages, warnings, and actual results - ASCII-safe output for cross-platform compatibility - Better organized results display with details per scenario
1 parent 1326bc4 commit c014c04

1 file changed

Lines changed: 84 additions & 8 deletions

File tree

scripts/run_agent_scenarios.py

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,12 @@ async def run_sample_module(sample_path: Path, test_inputs: dict | None = None)
7777
else:
7878
sys.argv = [str(sample_path)]
7979

80-
# Redirect output to suppress sample output
80+
# Capture output
8181
import io
82-
sys.stdout = io.StringIO()
83-
sys.stderr = io.StringIO()
82+
stdout_capture = io.StringIO()
83+
stderr_capture = io.StringIO()
84+
sys.stdout = stdout_capture
85+
sys.stderr = stderr_capture
8486
sys.stdin = io.StringIO() # Prevent waiting for input
8587

8688
spec.loader.exec_module(module)
@@ -89,7 +91,55 @@ async def run_sample_module(sample_path: Path, test_inputs: dict | None = None)
8991
if hasattr(module, "main"):
9092
timeout_s = float(os.getenv("COPILOT_E2E_TIMEOUT", "45"))
9193
await asyncio.wait_for(module.main(), timeout=timeout_s)
92-
return ScenarioResult(name, True, "OK")
94+
95+
# Get captured output
96+
output = stdout_capture.getvalue()
97+
errors = stderr_capture.getvalue()
98+
99+
# Clean output (ASCII-only for cross-platform compatibility)
100+
def clean_text(text: str) -> str:
101+
# Remove common Unicode symbols
102+
replacements = {
103+
'✅': '[OK]',
104+
'✓': '[OK]',
105+
'❌': '[FAIL]',
106+
'✗': '[FAIL]',
107+
'⚠️': '[WARN]',
108+
'🤖': '',
109+
'📝': '',
110+
'📋': '',
111+
'🧪': '',
112+
'🔄': '',
113+
'🔧': '',
114+
'📄': '',
115+
'📊': '',
116+
'⚙️': '',
117+
'🌐': '',
118+
'🎯': '',
119+
'🔐': '',
120+
'📍': '',
121+
'🎲': '',
122+
'🔢': '',
123+
}
124+
for old, new in replacements.items():
125+
text = text.replace(old, new)
126+
# Remove any remaining non-ASCII
127+
return ''.join(c if ord(c) < 128 else '' for c in text)
128+
129+
# Return first meaningful line or summary
130+
if output:
131+
output = clean_text(output)
132+
lines = [l.strip() for l in output.split('\n') if l.strip()]
133+
# Get the last substantial line (often the result)
134+
meaningful = [l for l in lines if not l.startswith(('Running', 'Loading', 'Connecting'))]
135+
if meaningful:
136+
detail = meaningful[-1][:120]
137+
else:
138+
detail = lines[-1][:120] if lines else "Completed"
139+
else:
140+
detail = "Completed successfully"
141+
142+
return ScenarioResult(name, True, detail)
93143
else:
94144
return ScenarioResult(name, False, "No main() function found")
95145
finally:
@@ -105,7 +155,7 @@ async def run_sample_module(sample_path: Path, test_inputs: dict | None = None)
105155
except SystemExit as e:
106156
# Some samples use sys.exit() for usage errors
107157
if e.code == 0:
108-
return ScenarioResult(name, True, "OK")
158+
return ScenarioResult(name, True, "Completed")
109159
return ScenarioResult(name, False, f"Exit code {e.code}")
110160
except Exception as e:
111161
error_msg = str(e)[:80]
@@ -183,7 +233,11 @@ async def run(provider: str, model: str) -> int:
183233

184234
status = "PASS" if result.ok else "FAIL"
185235
print(status)
186-
print("RESULTS")
236+
237+
# Print summary
238+
print()
239+
print("=" * 80)
240+
print("SCENARIO RESULTS")
187241
print("=" * 80)
188242
print()
189243

@@ -192,13 +246,35 @@ async def run(provider: str, model: str) -> int:
192246
for r in results:
193247
status = "PASS" if r.ok else "FAIL"
194248
marker = "+" if r.ok else "!"
195-
print(f"{marker} {r.name:25} {status:6} {r.details}")
249+
250+
# Print scenario with details
251+
print(f"{marker} {r.name}")
252+
print(f" Status: {status}")
253+
if r.details and r.details != r.name:
254+
# Wrap long details
255+
detail_lines = []
256+
current_line = ""
257+
words = r.details.split()
258+
for word in words:
259+
if len(current_line) + len(word) + 1 <= 74:
260+
current_line += (" " if current_line else "") + word
261+
else:
262+
if current_line:
263+
detail_lines.append(current_line)
264+
current_line = word
265+
if current_line:
266+
detail_lines.append(current_line)
267+
268+
print(f" Result: {detail_lines[0]}")
269+
for line in detail_lines[1:]:
270+
print(f" {line}")
271+
print()
272+
196273
if r.ok:
197274
passed += 1
198275
else:
199276
failed += 1
200277

201-
print()
202278
print("=" * 80)
203279
print(f"Summary: {passed} passed, {failed} failed out of {len(results)} scenarios")
204280
print("=" * 80)

0 commit comments

Comments
 (0)