-
Notifications
You must be signed in to change notification settings - Fork 83
Expand file tree
/
Copy pathTEI2LossyJSON_cli.py
More file actions
131 lines (101 loc) · 3.71 KB
/
TEI2LossyJSON_cli.py
File metadata and controls
131 lines (101 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
"""
Standalone CLI for TEI2LossyJSON converter.
This script provides a command-line interface for converting TEI XML files to JSON format
using the TEI2LossyJSONConverter.
"""
import argparse
import json
import logging
import sys
from pathlib import Path
from .TEI2LossyJSON import TEI2LossyJSONConverter
def setup_logging(verbose: bool = False):
"""Setup logging configuration."""
level = logging.INFO if verbose else logging.WARNING
logging.basicConfig(
level=level,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def convert_single_file(input_file: Path, output_file: Path, verbose: bool = False) -> bool:
"""Convert a single TEI file to JSON format."""
try:
if verbose:
logging.info(f"Converting {input_file} to {output_file}")
converter = TEI2LossyJSONConverter()
result = converter.convert_tei_file(input_file, stream=False)
if result is None:
logging.error(f"Failed to convert {input_file}: TEI file is not well-formed or empty")
return False
# Ensure output directory exists
output_file.parent.mkdir(parents=True, exist_ok=True)
# Write JSON output
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=2, ensure_ascii=False)
if verbose:
logging.info(f"Successfully converted {input_file} to {output_file}")
return True
except Exception as e:
logging.error(f"Error converting {input_file}: {str(e)}")
return False
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="Convert TEI XML files to JSON format using TEI2LossyJSON converter",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Convert a single TEI file
python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml --output output.json
# Convert with verbose logging
python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml --output output.json --verbose
# Convert and output to stdout
python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml
"""
)
parser.add_argument(
"--input", "-i",
type=Path,
required=True,
help="Input TEI XML file to convert"
)
parser.add_argument(
"--output", "-o",
type=Path,
help="Output JSON file (if not specified, prints to stdout)"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
args = parser.parse_args()
# Setup logging
setup_logging(args.verbose)
# Validate input file
if not args.input.exists():
logging.error(f"Input file does not exist: {args.input}")
sys.exit(1)
if not args.input.is_file():
logging.error(f"Input path is not a file: {args.input}")
sys.exit(1)
# Convert the file
if args.output:
success = convert_single_file(args.input, args.output, args.verbose)
sys.exit(0 if success else 1)
else:
# Output to stdout
try:
converter = TEI2LossyJSONConverter()
result = converter.convert_tei_file(args.input, stream=False)
if result is None:
logging.error(f"Failed to convert {args.input}: TEI file is not well-formed or empty")
sys.exit(1)
# Print JSON to stdout
print(json.dumps(result, indent=2, ensure_ascii=False))
except Exception as e:
logging.error(f"Error converting {args.input}: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()