Skip to content

Commit 368633a

Browse files
authored
Merge pull request #5 from n7space/feature-mbep#314-m2docx
Feature MBEP#314 m2docx
2 parents 5b4ecdf + 14ef8c0 commit 368633a

10 files changed

Lines changed: 440 additions & 34 deletions

File tree

LICENSE.MD2DOCX

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
This license applies to templateprocessor/md2docx.py file
2+
3+
MIT License
4+
5+
Copyright (c) 2024 Shlok T
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in all
15+
copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.

examples/generate_ecss_demo.sh

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,21 @@
11
#!/bin/bash
22
mkdir -p output
3-
4-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_1_software_static_architecture.tmplt
5-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_1_software_static_architecture.pdf output/ecss-e-st-40c_4_1_software_static_architecture.md
6-
7-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_2_software_dynamic_architecture.tmplt
8-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_2_software_dynamic_architecture.pdf output/ecss-e-st-40c_4_2_software_dynamic_architecture.md
9-
10-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_4_interfaces_context.tmplt
11-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_4_interfaces_context.pdf output/ecss-e-st-40c_4_4_interfaces_context.md
12-
13-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_2_overall_architecture.tmplt
14-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_2_overall_architecture.pdf output/ecss-e-st-40c_5_2_overall_architecture.md
15-
16-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_3_software_components_design.tmplt
17-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_3_software_components_design.pdf output/ecss-e-st-40c_5_3_software_components_design.md
18-
19-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_4_aspects_of_each_component.tmplt
20-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_4_aspects_of_each_component.pdf output/ecss-e-st-40c_5_4_aspects_of_each_component.md
21-
22-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_5_internal_interface_design.tmplt
23-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_5_internal_interface_design.pdf output/ecss-e-st-40c_5_5_internal_interface_design.md
24-
25-
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_6_requirement_traceability.tmplt
26-
pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_6_requirement_traceability.pdf output/ecss-e-st-40c_6_requirement_traceability.md
3+
# List of template names
4+
templates=(
5+
"ecss-e-st-40c_4_1_software_static_architecture"
6+
"ecss-e-st-40c_4_2_software_dynamic_architecture"
7+
"ecss-e-st-40c_4_4_interfaces_context"
8+
"ecss-e-st-40c_5_2_overall_architecture"
9+
"ecss-e-st-40c_5_3_software_components_design"
10+
"ecss-e-st-40c_5_4_aspects_of_each_component"
11+
"ecss-e-st-40c_5_5_internal_interface_design"
12+
"ecss-e-st-40c_6_requirement_traceability"
13+
)
14+
15+
# Loop through templates
16+
for template in "${templates[@]}"; do
17+
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt
18+
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2docx
19+
template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2html
20+
pandoc --pdf-engine=pdfroff --output=output/${template}.pdf output/${template}.md
21+
done

examples/generate_so_list.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
22
mkdir -p output
33
template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt
4-
pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
4+
pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
5+
template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt -p md2docx

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@
33
pytest==7.4.2
44
black==24.3.0
55
mako==1.3.10
6-
6+
python-docx==1.2.0
7+
bs4==0.0.2
8+
markdown2==2.5.4

setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@
3030
include_package_data=True,
3131
python_requires='>=3.8',
3232
install_requires=[
33-
"mako==1.3.10"
33+
"mako==1.3.10",
34+
"python-docx==1.2.0",
35+
"beautifulsoup4==4.12.3",
36+
"markdown2==2.5.4"
3437
],
3538
extras_require={
3639
'dev': [

templateprocessor/cli.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@
1414
from templateprocessor.soreader import SOReader
1515
from templateprocessor.dvreader import DVReader
1616
from templateprocessor.so import SystemObjectType
17+
from templateprocessor.postprocessor import (
18+
PostprocessorType,
19+
Md2docxPostprocessor,
20+
Md2HtmlPostprocessor,
21+
PassthroughPostprocessor,
22+
Postprocessor,
23+
)
1724

1825

1926
def parse_arguments() -> argparse.Namespace:
@@ -88,7 +95,7 @@ def parse_arguments() -> argparse.Namespace:
8895
parser.add_argument(
8996
"-p",
9097
"--postprocess",
91-
choices=["none", "md2docx"],
98+
choices=["none", "md2docx", "md2html"],
9299
help="Output postprocessing",
93100
default="none",
94101
)
@@ -107,6 +114,16 @@ def get_log_level(level_str: str) -> int:
107114
return log_levels.get(level_str.lower(), logging.WARNING)
108115

109116

117+
def get_postprocessor_type(type_str: str) -> PostprocessorType:
118+
types = {
119+
PostprocessorType.NONE.value: PostprocessorType.NONE,
120+
PostprocessorType.MD2DOCX.value: PostprocessorType.MD2DOCX,
121+
PostprocessorType.MD2HTML.value: PostprocessorType.MD2HTML,
122+
}
123+
124+
return types.get(type_str.lower(), PostprocessorType.NONE)
125+
126+
110127
def get_values_dictionary(values: list[str]) -> dict[str, str]:
111128
if not values or not isinstance(values, list):
112129
return {}
@@ -143,8 +160,10 @@ def read_sots(file_names: list[str]) -> dict[str, SystemObjectType]:
143160

144161
def instantiate(
145162
instantiator: TemplateInstantiator,
163+
postprocessor: Postprocessor,
146164
template_file: str,
147165
module_directory: str,
166+
postprocessor_type: PostprocessorType,
148167
output_directory: str,
149168
):
150169
try:
@@ -157,10 +176,9 @@ def instantiate(
157176
logging.debug(f"Instantiating template:\n {template}")
158177
instantiated_template = instantiator.instantiate(template, module_directory)
159178
logging.debug(f"Instantiation:\n {instantiated_template}")
160-
output = Path(output_directory) / f"{name}.md"
161-
logging.debug(f"Saving to {output}")
162-
with open(output, "w") as f:
163-
f.write(instantiated_template)
179+
output = str(Path(output_directory) / f"{name}")
180+
logging.debug(f"Postprocessing with {postprocessor_type}")
181+
postprocessor.process(postprocessor_type, instantiated_template, output)
164182
except FileNotFoundError as e:
165183
logging.error(f"File not found: {e.filename}")
166184
except Exception as e:
@@ -173,6 +191,7 @@ def main():
173191
args = parse_arguments()
174192
logging_level = get_log_level(args.verbosity)
175193
logging.basicConfig(level=logging_level)
194+
postprocessor_type = get_postprocessor_type(args.postprocess)
176195

177196
logging.info("Template Processor")
178197
logging.debug(f"Interface View: {args.iv}")
@@ -182,6 +201,7 @@ def main():
182201
logging.debug(f"Templates: {args.template}")
183202
logging.debug(f"Output Directory: {args.output}")
184203
logging.debug(f"Module directory: {args.module_directory}")
204+
logging.debug(f"Postprocessing: {postprocessor_type.value}")
185205

186206
logging.info(f"Reading Interface View from {args.iv}")
187207
iv = IVReader().read(args.iv) if args.iv else InterfaceView()
@@ -198,10 +218,26 @@ def main():
198218
logging.info(f"Instantiating the TemplateInstantiator")
199219
instantiator = TemplateInstantiator(iv, dv, sots, values)
200220

221+
logging.info(f"Instantiating the Postprocessor")
222+
postprocessor = Postprocessor(
223+
{
224+
PostprocessorType.NONE: PassthroughPostprocessor(),
225+
PostprocessorType.MD2DOCX: Md2docxPostprocessor(),
226+
PostprocessorType.MD2HTML: Md2HtmlPostprocessor(),
227+
}
228+
)
229+
201230
if args.template:
202231
logging.info(f"Instantiating templates")
203232
for template_file in args.template:
204-
instantiate(instantiator, template_file, args.module_directory, args.output)
233+
instantiate(
234+
instantiator,
235+
postprocessor,
236+
template_file,
237+
args.module_directory,
238+
postprocessor_type,
239+
args.output,
240+
)
205241

206242
return 0
207243

templateprocessor/md2docx.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""
2+
3+
Markdown to DOCX conversion module extracted from md2docx-python project.
4+
5+
Project address: https://github.com/shloktech/md2docx-python/
6+
Project LICENSE: LICENSE.MD2DOCX
7+
8+
The reason for extraction is to align the API and features with the needs.
9+
Changes:
10+
- input is text, not file
11+
- markdown2 is used instead of markdown
12+
- table support is added via markdown2 extras and additional HTML processing
13+
14+
"""
15+
16+
import markdown2
17+
from docx import Document
18+
from bs4 import BeautifulSoup, Tag
19+
20+
21+
def get_element_text(element: Tag) -> str:
22+
if hasattr(element, "get_text"):
23+
return element.get_text(strip=True)
24+
else:
25+
return str(element).strip()
26+
27+
28+
def process_list_items(list_element: Tag, doc: Document, style_base: str, level=0):
29+
# Get direct children li elements only (not nested)
30+
for li in list_element.find_all("li", recursive=False):
31+
# Get text content, excluding nested lists
32+
text_parts = []
33+
for child in li.children:
34+
if child.name not in ["ul", "ol"]:
35+
text_parts.append(get_element_text(child))
36+
37+
text = " ".join(text_parts).strip()
38+
39+
# Add paragraph with appropriate indentation level
40+
if text:
41+
style = style_base if level == 0 else f"{style_base} {level + 1}"
42+
doc.add_paragraph(text, style=style)
43+
44+
# Process nested lists
45+
nested_ul = li.find("ul", recursive=False)
46+
nested_ol = li.find("ol", recursive=False)
47+
48+
if nested_ul:
49+
process_list_items(nested_ul, doc, "List Bullet", level + 1)
50+
if nested_ol:
51+
process_list_items(nested_ol, doc, "List Number", level + 1)
52+
53+
54+
def markdown_to_word_file(markdown_source: str, word_file_path: str):
55+
doc = markdown_to_word_object(markdown_source)
56+
doc.save(word_file_path)
57+
58+
59+
def markdown_to_word_object(markdown_source: str) -> Document:
60+
# Converting Markdown to HTML
61+
html_content = markdown2.markdown(markdown_source, extras=["tables", "wiki-tables"])
62+
63+
# Creating a new Word Document
64+
doc = Document()
65+
66+
# Converting HTML to text and adding it to the Word Document
67+
soup = BeautifulSoup(html_content, "html.parser")
68+
69+
# Adding content to the Word Document
70+
for element in soup:
71+
if element.name == "h1":
72+
doc.add_heading(element.text, level=1)
73+
elif element.name == "h2":
74+
doc.add_heading(element.text, level=2)
75+
elif element.name == "h3":
76+
doc.add_heading(element.text, level=3)
77+
elif element.name == "p":
78+
paragraph = doc.add_paragraph()
79+
for child in element.children:
80+
if child.name == "strong":
81+
paragraph.add_run(child.text).bold = True
82+
elif child.name == "em":
83+
paragraph.add_run(child.text).italic = True
84+
else:
85+
paragraph.add_run(child)
86+
elif element.name == "ul":
87+
process_list_items(element, doc, "List Bullet")
88+
elif element.name == "ol":
89+
process_list_items(element, doc, "List Number")
90+
elif element.name == "table":
91+
rows_data = []
92+
for row in element.find_all("tr"):
93+
cells = row.find_all(["th", "td"])
94+
row_data = [cell.get_text(strip=True) for cell in cells]
95+
if row_data:
96+
rows_data.append(row_data)
97+
98+
if rows_data:
99+
columns_count = len(rows_data[0])
100+
table = doc.add_table(rows=len(rows_data), cols=columns_count)
101+
table.style = "Table Grid"
102+
103+
for row_index, row_data in enumerate(rows_data):
104+
for column_index, cell_text in enumerate(row_data):
105+
if column_index < columns_count:
106+
table.rows[row_index].cells[column_index].text = cell_text
107+
108+
# Make the first row bold if it is a header
109+
first_row = element.find("tr")
110+
if first_row and first_row.find("th"):
111+
for cell in table.rows[0].cells:
112+
for paragraph in cell.paragraphs:
113+
for run in paragraph.runs:
114+
run.bold = True
115+
116+
return doc

0 commit comments

Comments
 (0)