Skip to content

Commit 90ea173

Browse files
committed
feat: add CLI support for json2xml-py
- Add json2xml/cli.py with same flags as Go version - Add console script entry point as json2xml-py - Add comprehensive CLI tests (15 tests) - Add benchmark scripts to compare Python vs Go performance Flags: -w/--wrapper, -r/--root, -p/--pretty, -t/--type, -i/--item-wrap, -x/--xpath, -c/--cdata, -l/--list-headers, -u/--url, -s/--string, -o/--output, -v/--version, -h/--help
1 parent 0b603c7 commit 90ea173

5 files changed

Lines changed: 1041 additions & 0 deletions

File tree

benchmark.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Benchmark script for json2xml-py vs json2xml-go.
4+
5+
Compares performance of Python and Go implementations across
6+
different JSON sizes.
7+
"""
8+
from __future__ import annotations
9+
10+
import json
11+
import os
12+
import random
13+
import string
14+
import subprocess
15+
import sys
16+
import tempfile
17+
import time
18+
from pathlib import Path
19+
20+
21+
# Paths
22+
PYTHON_CLI = [sys.executable, "-m", "json2xml.cli"]
23+
GO_CLI = Path("/Users/vinitkumar/projects/go/json2xml-go/json2xml-go")
24+
EXAMPLES_DIR = Path("/Users/vinitkumar/projects/python/json2xml/examples")
25+
26+
# Colors for terminal output
27+
class Colors:
28+
RED = "\033[0;31m"
29+
GREEN = "\033[0;32m"
30+
BLUE = "\033[0;34m"
31+
YELLOW = "\033[1;33m"
32+
CYAN = "\033[0;36m"
33+
BOLD = "\033[1m"
34+
NC = "\033[0m" # No Color
35+
36+
37+
def colorize(text: str, color: str) -> str:
38+
"""Wrap text in color codes."""
39+
return f"{color}{text}{Colors.NC}"
40+
41+
42+
def random_string(length: int = 10) -> str:
43+
"""Generate a random string."""
44+
return "".join(random.choices(string.ascii_letters, k=length))
45+
46+
47+
def generate_large_json(num_records: int = 1000) -> str:
48+
"""Generate a large JSON file for benchmarking."""
49+
data = []
50+
for i in range(num_records):
51+
item = {
52+
"id": i,
53+
"name": random_string(20),
54+
"email": f"{random_string(8)}@example.com",
55+
"active": random.choice([True, False]),
56+
"score": round(random.uniform(0, 100), 2),
57+
"tags": [random_string(5) for _ in range(5)],
58+
"metadata": {
59+
"created": "2024-01-15T10:30:00Z",
60+
"updated": "2024-01-15T12:45:00Z",
61+
"version": random.randint(1, 100),
62+
"nested": {
63+
"level1": {
64+
"level2": {"value": random_string(10)}
65+
}
66+
},
67+
},
68+
}
69+
data.append(item)
70+
return json.dumps(data)
71+
72+
73+
def run_benchmark(
74+
cmd: list[str],
75+
iterations: int = 10,
76+
warmup: int = 2
77+
) -> dict[str, float]:
78+
"""
79+
Run a benchmark for the given command.
80+
81+
Returns dict with avg, min, max times in milliseconds.
82+
"""
83+
times = []
84+
85+
# Warmup runs
86+
for _ in range(warmup):
87+
subprocess.run(cmd, capture_output=True)
88+
89+
# Timed runs
90+
for _ in range(iterations):
91+
start = time.perf_counter()
92+
result = subprocess.run(cmd, capture_output=True)
93+
end = time.perf_counter()
94+
95+
if result.returncode != 0:
96+
print(f"Error: {result.stderr.decode()}")
97+
continue
98+
99+
duration_ms = (end - start) * 1000
100+
times.append(duration_ms)
101+
102+
if not times:
103+
return {"avg": 0, "min": 0, "max": 0}
104+
105+
return {
106+
"avg": sum(times) / len(times),
107+
"min": min(times),
108+
"max": max(times),
109+
}
110+
111+
112+
def format_time(ms: float) -> str:
113+
"""Format time in milliseconds."""
114+
if ms < 1:
115+
return f"{ms * 1000:.2f}µs"
116+
elif ms < 1000:
117+
return f"{ms:.2f}ms"
118+
else:
119+
return f"{ms / 1000:.2f}s"
120+
121+
122+
def print_header(title: str) -> None:
123+
"""Print a section header."""
124+
print(colorize("=" * 50, Colors.BLUE))
125+
print(colorize(f" {title}", Colors.BOLD))
126+
print(colorize("=" * 50, Colors.BLUE))
127+
128+
129+
def print_result(name: str, result: dict[str, float]) -> None:
130+
"""Print benchmark result."""
131+
print(f" {name}:")
132+
print(f" Avg: {format_time(result['avg'])} | "
133+
f"Min: {format_time(result['min'])} | "
134+
f"Max: {format_time(result['max'])}")
135+
136+
137+
def main() -> int:
138+
"""Run the benchmark suite."""
139+
print_header("json2xml Benchmark: Python vs Go")
140+
print()
141+
142+
# Check prerequisites
143+
print(colorize("Checking prerequisites...", Colors.YELLOW))
144+
145+
if not GO_CLI.exists():
146+
print(colorize(f"Error: Go binary not found at {GO_CLI}", Colors.RED))
147+
print("Please build it first: cd json2xml-go && go build -o json2xml-go ./cmd/json2xml-go")
148+
return 1
149+
150+
print(colorize("✓ Prerequisites met", Colors.GREEN))
151+
print()
152+
153+
# Test configurations
154+
iterations = 10
155+
results = {}
156+
157+
# Create temp files for testing
158+
with tempfile.TemporaryDirectory() as tmpdir:
159+
# Small JSON - inline string
160+
small_json = '{"name": "John", "age": 30, "city": "New York"}'
161+
162+
# Medium JSON - existing file
163+
medium_json_file = EXAMPLES_DIR / "bigexample.json"
164+
165+
# Large JSON - generated
166+
large_json = generate_large_json(1000)
167+
large_json_file = Path(tmpdir) / "large.json"
168+
large_json_file.write_text(large_json)
169+
170+
# Very large JSON
171+
very_large_json = generate_large_json(5000)
172+
very_large_json_file = Path(tmpdir) / "very_large.json"
173+
very_large_json_file.write_text(very_large_json)
174+
175+
print(colorize("Test file sizes:", Colors.CYAN))
176+
print(f" Small: {len(small_json)} bytes (inline)")
177+
print(f" Medium: {medium_json_file.stat().st_size:,} bytes")
178+
print(f" Large: {large_json_file.stat().st_size:,} bytes (1000 records)")
179+
print(f" Very Large: {very_large_json_file.stat().st_size:,} bytes (5000 records)")
180+
print()
181+
182+
# Benchmark: Small JSON (inline string)
183+
print(colorize("--- Small JSON (inline string) ---", Colors.BLUE))
184+
py_small = run_benchmark(PYTHON_CLI + ["-s", small_json], iterations)
185+
go_small = run_benchmark([str(GO_CLI), "-s", small_json], iterations)
186+
print_result("Python", py_small)
187+
print_result("Go", go_small)
188+
results["small"] = {"python": py_small, "go": go_small}
189+
print()
190+
191+
# Benchmark: Medium JSON (file)
192+
print(colorize("--- Medium JSON (bigexample.json) ---", Colors.BLUE))
193+
py_medium = run_benchmark(PYTHON_CLI + [str(medium_json_file)], iterations)
194+
go_medium = run_benchmark([str(GO_CLI), str(medium_json_file)], iterations)
195+
print_result("Python", py_medium)
196+
print_result("Go", go_medium)
197+
results["medium"] = {"python": py_medium, "go": go_medium}
198+
print()
199+
200+
# Benchmark: Large JSON (file)
201+
print(colorize("--- Large JSON (1000 records) ---", Colors.BLUE))
202+
py_large = run_benchmark(PYTHON_CLI + [str(large_json_file)], iterations)
203+
go_large = run_benchmark([str(GO_CLI), str(large_json_file)], iterations)
204+
print_result("Python", py_large)
205+
print_result("Go", go_large)
206+
results["large"] = {"python": py_large, "go": go_large}
207+
print()
208+
209+
# Benchmark: Very Large JSON (file)
210+
print(colorize("--- Very Large JSON (5000 records) ---", Colors.BLUE))
211+
py_vlarge = run_benchmark(PYTHON_CLI + [str(very_large_json_file)], iterations)
212+
go_vlarge = run_benchmark([str(GO_CLI), str(very_large_json_file)], iterations)
213+
print_result("Python", py_vlarge)
214+
print_result("Go", go_vlarge)
215+
results["very_large"] = {"python": py_vlarge, "go": go_vlarge}
216+
print()
217+
218+
# Summary
219+
print_header("SUMMARY")
220+
print()
221+
222+
for size, data in results.items():
223+
py_avg = data["python"]["avg"]
224+
go_avg = data["go"]["avg"]
225+
226+
if go_avg > 0:
227+
speedup = py_avg / go_avg
228+
speedup_str = colorize(f"{speedup:.1f}x faster", Colors.GREEN)
229+
else:
230+
speedup_str = "N/A"
231+
232+
print(colorize(f"{size.replace('_', ' ').title()} JSON:", Colors.BOLD))
233+
print(f" Python: {format_time(py_avg)}")
234+
print(f" Go: {format_time(go_avg)}")
235+
print(f" Go is {speedup_str}")
236+
print()
237+
238+
# Overall average speedup
239+
total_py = sum(r["python"]["avg"] for r in results.values())
240+
total_go = sum(r["go"]["avg"] for r in results.values())
241+
if total_go > 0:
242+
overall_speedup = total_py / total_go
243+
print(colorize(f"Overall: Go is {overall_speedup:.1f}x faster than Python", Colors.GREEN + Colors.BOLD))
244+
245+
print()
246+
print(colorize("=" * 50, Colors.BLUE))
247+
print(colorize("Benchmark complete!", Colors.GREEN))
248+
print(colorize("=" * 50, Colors.BLUE))
249+
250+
return 0
251+
252+
253+
if __name__ == "__main__":
254+
sys.exit(main())

0 commit comments

Comments
 (0)