querypie
diff --git a/‎confluence-mdx/bin/converter/cli.py‎
Lines changed: 22 additions & 15 deletions b/‎confluence-mdx/bin/converter/cli.py‎
Lines changed: 22 additions & 15 deletions
diff --git a/‎confluence-mdx/bin/converter/core.py‎
Lines changed: 69 additions & 7 deletions b/‎confluence-mdx/bin/converter/core.py‎
Lines changed: 69 additions & 7 deletions
diff --git a/‎confluence-mdx/bin/reverse_sync/mdx_to_xhtml_inline.py‎
Lines changed: 31 additions & 0 deletions b/‎confluence-mdx/bin/reverse_sync/mdx_to_xhtml_inline.py‎
Lines changed: 31 additions & 0 deletions
@@ -120,6 +120,9 @@ def main():
                         help='Directory to save attachments (default: output file directory)')
     parser.add_argument('--skip-image-copy', action='store_true',
                         help='이미지 파일 복사를 생략 (경로만 지정대로 생성)')
+    parser.add_argument('--language',
+                        choices=['ko', 'ja', 'en'],
+                        help='언어 코드를 명시적으로 지정 (미지정 시 출력 경로에서 자동 감지)')
     parser.add_argument('--log-level',
                         choices=['debug', 'info', 'warning', 'error', 'critical'],
                         default='info',
@@ -144,21 +147,25 @@ def main():
         output_dir = os.path.join(os.path.dirname(args.output_file), output_file_stem)
         logging.info(f"Using default attachment directory: {output_dir}")
 
-    # Extract language code from the output file path
-    path_parts = ctx.OUTPUT_FILE_PATH.split(os.sep)
-
-    # Look for 2-letter language code in the path
-    detected_language = 'en'  # Default to English
-    for part in path_parts:
-        if len(part) == 2 and part.isalpha():
-            # Check if it's a known language code
-            if part in ['ko', 'ja', 'en']:
-                detected_language = part
-                break
-
-    # Update shared LANGUAGE variable
-    ctx.LANGUAGE = detected_language
-    logging.info(f"Detected language from output path: {ctx.LANGUAGE}")
+    # Determine language: explicit --language takes precedence over path detection
+    if args.language:
+        ctx.LANGUAGE = args.language
+        logging.info(f"Language set explicitly: {ctx.LANGUAGE}")
+    else:
+        # Extract language code from the output file path
+        path_parts = ctx.OUTPUT_FILE_PATH.split(os.sep)
+
+        # Look for 2-letter language code in the path
+        detected_language = 'en'  # Default to English
+        for part in path_parts:
+            if len(part) == 2 and part.isalpha():
+                # Check if it's a known language code
+                if part in ['ko', 'ja', 'en']:
+                    detected_language = part
+                    break
+
+        ctx.LANGUAGE = detected_language
+        logging.info(f"Detected language from output path: {ctx.LANGUAGE}")
 
     try:
         with open(args.input_file, 'r', encoding='utf-8') as f:
 
@@ -116,6 +116,18 @@ def as_markdown(self, caption: Optional[str] = None, width: Optional[str] = None
             return f'[{caption}]({self.output_dir}/{self.filename})'
 
 
+def _is_unicode_punctuation(ch: str) -> bool:
+    """CommonMark spec의 Unicode punctuation 판정.
+
+    Unicode general category가 P(punctuation) 또는 S(symbol)이면 True.
+    ASCII punctuation도 포함된다.
+    """
+    if not ch:
+        return False
+    cat = unicodedata.category(ch[0])
+    return cat.startswith('P') or cat.startswith('S')
+
+
 class SingleLineParser:
     def __init__(self, node, collector: LostInfoCollector | None = None):
         self.node = node
@@ -202,13 +214,25 @@ def convert_recursively(self, node):
                 for child in node.children:
                     self.convert_recursively(child)
             else:
-                self.markdown_lines.append(" **")
-                self.markdown_lines.append(self.markdown_of_children(node).strip())
-                self.markdown_lines.append("** ")
+                inner = self.markdown_of_children(node).strip()
+                open_sp = " " if inner and _is_unicode_punctuation(inner[0]) else ""
+                close_sp = " " if inner and _is_unicode_punctuation(inner[-1]) else ""
+                # 연속 emphasis delimiter 충돌 방지
+                if not close_sp and isinstance(node.next_sibling, Tag) and node.next_sibling.name in ('strong', 'em'):
+                    close_sp = " "
+                self.markdown_lines.append(f"{open_sp}**")
+                self.markdown_lines.append(inner)
+                self.markdown_lines.append(f"**{close_sp}")
         elif node.name in ['em']:
-            self.markdown_lines.append(" *")
-            self.markdown_lines.append(self.markdown_of_children(node).strip())
-            self.markdown_lines.append("* ")
+            inner = self.markdown_of_children(node).strip()
+            open_sp = " " if inner and _is_unicode_punctuation(inner[0]) else ""
+            close_sp = " " if inner and _is_unicode_punctuation(inner[-1]) else ""
+            # 연속 emphasis delimiter 충돌 방지
+            if not close_sp and isinstance(node.next_sibling, Tag) and node.next_sibling.name in ('strong', 'em'):
+                close_sp = " "
+            self.markdown_lines.append(f"{open_sp}*")
+            self.markdown_lines.append(inner)
+            self.markdown_lines.append(f"*{close_sp}")
         elif node.name in ['code']:
             self.markdown_lines.append("`")
             self.markdown_lines.append(self.markdown_of_children(node).strip())
@@ -617,6 +641,43 @@ def is_standalone_dash(self):
 
         return True
 
+    @staticmethod
+    def _is_trailing_empty_p(node):
+        """Trailing empty <p>/<div> 앞의 separator를 건너뛰어 1:1 매핑을 보장한다.
+
+        Markdown에서 블록 사이 빈 줄(separator)은 필수이므로, separator를
+        그대로 두면 N개의 trailing empty <p> → N+1개의 blank line이 된다:
+
+            XHTML empty <p> 수 | separator 포함 시 blank line 수
+            0                  | 0
+            1                  | 2  ← "1"이 불가능
+            2                  | 3
+            N                  | N+1
+
+        1 blank line을 만들 수 있는 XHTML 상태가 존재하지 않으므로,
+        사용자가 trailing blank을 2→1로 편집하면 roundtrip에서 재현할 수 없다.
+
+        Trailing empty <p> 앞의 separator를 건너뛰면 N → N으로 1:1 매핑되어
+        모든 trailing blank 수를 XHTML로 정확히 표현할 수 있다.
+
+        Top-level [document] 컨텍스트에서만 적용하여, expand 매크로 등
+        중첩 컨테이너 내부에는 영향을 주지 않는다.
+        """
+        if node.name not in ('p', 'div'):
+            return False
+        if node.get_text(strip=True):
+            return False
+        if node.parent.name != '[document]':
+            return False
+        for sibling in node.next_siblings:
+            if isinstance(sibling, NavigableString):
+                if sibling.strip():
+                    return False
+            else:
+                if sibling.get_text(strip=True):
+                    return False
+        return True
+
     def append_empty_line_unless_first_child(self, node):
         # Convert generator to list to check length
         children_list = list(node.parent.children)
@@ -708,7 +769,8 @@ def convert_recursively(self, node):
                 self.append_empty_line_unless_first_child(node)
                 self.markdown_lines.extend(TableToHtmlTable(node, collector=self.collector).as_markdown)
         elif node.name in ['p', 'div']:
-            self.append_empty_line_unless_first_child(node)
+            if not self._is_trailing_empty_p(node):
+                self.append_empty_line_unless_first_child(node)
             child_markdown = []
             for child in node.children:
                 if isinstance(child, NavigableString):
 
@@ -6,6 +6,7 @@
 import re
 from typing import List
 
+from bs4 import BeautifulSoup, Tag
 from mdx_to_storage.inline import convert_inline
 
 
@@ -23,10 +24,14 @@ def mdx_block_to_inner_xhtml(content: str, block_type: str) -> str:
         return _convert_heading(text)
     elif block_type == 'paragraph':
         return _convert_paragraph(text)
+    elif block_type == 'callout':
+        return _convert_callout_inner(text)
     elif block_type == 'list':
         return _convert_list_content(text)
     elif block_type == 'code_block':
         return _convert_code_block(text)
+    elif block_type == 'html_block':
+        return _convert_html_block_inner(text)
     else:
         return convert_inline(text)
 
@@ -55,6 +60,17 @@ def _convert_paragraph(text: str) -> str:
     return ' '.join(converted)
 
 
+def _convert_callout_inner(text: str) -> str:
+    """callout: <Callout> 래퍼 태그를 제거하고 내부 텍스트를 paragraph로 변환."""
+    lines = text.splitlines()
+    if lines and lines[0].strip().startswith('<Callout'):
+        lines = lines[1:]
+    if lines and lines[-1].strip().startswith('</Callout'):
+        lines = lines[:-1]
+    inner = '\n'.join(lines).strip()
+    return _convert_paragraph(inner)
+
+
 def _convert_code_block(text: str) -> str:
     """code_block: 펜스 마커 제거, 코드 내용만 추출."""
     lines = text.split('\n')
@@ -66,6 +82,21 @@ def _convert_code_block(text: str) -> str:
     return '\n'.join(lines)
 
 
+def _convert_html_block_inner(text: str) -> str:
+    """html_block: inline 변환 후 루트 요소의 innerHTML만 반환한다.
+
+    html_block content는 ``<table>...**bold**...</table>`` 처럼
+    outer 태그를 포함하므로, inline 변환 후 루트 요소를 벗겨내야
+    _replace_inner_html()에서 중첩이 발생하지 않는다.
+    """
+    converted = convert_inline(text)
+    soup = BeautifulSoup(converted, 'html.parser')
+    root = soup.find(True)  # 첫 번째 태그 요소
+    if isinstance(root, Tag):
+        return root.decode_contents()
+    return converted
+
+
 def _convert_code_spans(text: str) -> str:
     """code span만 변환 (`text` → <code>text</code>)."""
     return re.sub(r'`([^`]+)`', r'<code>\1</code>', text)