-
Notifications
You must be signed in to change notification settings - Fork 83
Expand file tree
/
Copy pathTEI2Markdown_cli.py
More file actions
130 lines (100 loc) · 3.6 KB
/
TEI2Markdown_cli.py
File metadata and controls
130 lines (100 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
"""
Standalone CLI for TEI2Markdown converter.
This script provides a command-line interface for converting TEI XML files to Markdown format
using the TEI2MarkdownConverter.
"""
import argparse
import logging
import sys
from pathlib import Path
from .TEI2Markdown import TEI2MarkdownConverter
def setup_logging(verbose: bool = False):
"""Setup logging configuration."""
level = logging.INFO if verbose else logging.WARNING
logging.basicConfig(
level=level,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def convert_single_file(input_file: Path, output_file: Path, verbose: bool = False) -> bool:
"""Convert a single TEI file to Markdown format."""
try:
if verbose:
logging.info(f"Converting {input_file} to {output_file}")
converter = TEI2MarkdownConverter()
result = converter.convert_tei_file(input_file)
if result is None:
logging.error(f"Failed to convert {input_file}: TEI file is not well-formed or empty")
return False
# Ensure output directory exists
output_file.parent.mkdir(parents=True, exist_ok=True)
# Write Markdown output
with open(output_file, 'w', encoding='utf-8') as f:
f.write(result)
if verbose:
logging.info(f"Successfully converted {input_file} to {output_file}")
return True
except Exception as e:
logging.error(f"Error converting {input_file}: {str(e)}")
return False
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="Convert TEI XML files to Markdown format using TEI2Markdown converter",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Convert a single TEI file
python -m grobid_client.format.TEI2Markdown --input input.tei.xml --output output.md
# Convert with verbose logging
python -m grobid_client.format.TEI2Markdown --input input.tei.xml --output output.md --verbose
# Convert and output to stdout
python -m grobid_client.format.TEI2Markdown --input input.tei.xml
"""
)
parser.add_argument(
"--input", "-i",
type=Path,
required=True,
help="Input TEI XML file to convert"
)
parser.add_argument(
"--output", "-o",
type=Path,
help="Output Markdown file (if not specified, prints to stdout)"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
args = parser.parse_args()
# Setup logging
setup_logging(args.verbose)
# Validate input file
if not args.input.exists():
logging.error(f"Input file does not exist: {args.input}")
sys.exit(1)
if not args.input.is_file():
logging.error(f"Input path is not a file: {args.input}")
sys.exit(1)
# Convert the file
if args.output:
success = convert_single_file(args.input, args.output, args.verbose)
sys.exit(0 if success else 1)
else:
# Output to stdout
try:
converter = TEI2MarkdownConverter()
result = converter.convert_tei_file(args.input)
if result is None:
logging.error(f"Failed to convert {args.input}: TEI file is not well-formed or empty")
sys.exit(1)
# Print Markdown to stdout
print(result)
except Exception as e:
logging.error(f"Error converting {args.input}: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()