forked from Mercidaiha/IRT-Router
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge_csv.py
More file actions
71 lines (56 loc) · 2.06 KB
/
merge_csv.py
File metadata and controls
71 lines (56 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python3
"""Merge (concatenate) two CSV files with the same schema.
- Writes the header once (from the first file).
- Streams rows so it works with large files.
- Validates that both input headers match exactly.
Usage:
python merge_csv.py input1.csv input2.csv output.csv
"""
from __future__ import annotations
import argparse
import csv
import sys
from pathlib import Path
csv.field_size_limit(sys.maxsize)
def merge_csv(input1: Path, input2: Path, output: Path) -> None:
if output in (input1, input2):
raise ValueError("Output path must be different from input paths")
with input1.open("r", newline="", encoding="utf-8") as f1, input2.open(
"r", newline="", encoding="utf-8"
) as f2:
r1 = csv.reader(f1)
r2 = csv.reader(f2)
try:
header1 = next(r1)
except StopIteration:
raise ValueError(f"{input1} is empty")
try:
header2 = next(r2)
except StopIteration:
raise ValueError(f"{input2} is empty")
if header1 != header2:
raise ValueError(
"CSV headers do not match.\n"
f"{input1} header: {header1}\n"
f"{input2} header: {header2}"
)
output.parent.mkdir(parents=True, exist_ok=True)
with output.open("w", newline="", encoding="utf-8") as out:
w = csv.writer(out)
w.writerow(header1)
w.writerows(r1)
w.writerows(r2)
def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(description="Merge two CSV files with the same schema")
p.add_argument("--input1", type=Path, help="First input CSV")
p.add_argument("--input2", type=Path, help="Second input CSV")
p.add_argument("--output-file", type=Path, help="Output CSV")
args = p.parse_args(argv)
try:
merge_csv(args.input1, args.input2, args.output_file)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())