Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 105 additions & 74 deletions confluence-mdx/bin/reverse_sync/patch_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""패치 빌더 — MDX diff 변경과 XHTML 매핑을 결합하여 XHTML 패치를 생성."""
import difflib
import re
from typing import Any, Dict, List, Optional

Expand All @@ -10,7 +11,6 @@
from text_utils import (
normalize_mdx_to_plain, collapse_ws,
)
from reverse_sync.text_transfer import transfer_text_changes
from reverse_sync.sidecar import (
RoundtripSidecar,
SidecarBlock,
Expand Down Expand Up @@ -225,29 +225,74 @@ def _mapping_block_family(mapping: BlockMapping) -> str:
return _xpath_block_family(mapping.xhtml_xpath)


def _accumulate_text_change(
patches: List[Dict],
registry: Dict[str, Dict],
mapping: 'BlockMapping',
old_plain: str,
new_plain: str,
) -> None:
"""같은 block_id에 대한 text-level 변경을 하나의 patch dict에 순차 누적한다.

patches 리스트에 추가된 dict의 참조를 registry에 저장하여,
동일 block_id의 후속 변경이 같은 dict의 new_plain_text를 갱신하도록 한다.

def _apply_mdx_diff_to_xhtml(
old_mdx_plain: str,
new_mdx_plain: str,
xhtml_plain: str,
) -> str:
"""MDX old→new diff를 XHTML plain text에 적용한다.

MDX old와 XHTML text의 문자 정렬(alignment)을 구축하고,
MDX old→new 변경의 위치를 XHTML 상의 위치로 매핑하여 적용한다.
이를 통해 XHTML의 공백 구조를 보존하면서 콘텐츠만 업데이트한다.
(text_transfer.transfer_text_changes의 인라인 구현)
"""
bid = mapping.block_id
if bid not in registry:
patch_entry: Dict[str, Any] = {
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_plain_text': mapping.xhtml_plain_text,
}
patches.append(patch_entry)
registry[bid] = patch_entry
registry[bid]['new_plain_text'] = transfer_text_changes(
old_plain, new_plain, registry[bid]['new_plain_text'])
# 1. MDX old ↔ XHTML text 문자 정렬 (비공백 우선 → 공백 gap 채우기)
src_ns = [(i, c) for i, c in enumerate(old_mdx_plain) if not c.isspace()]
tgt_ns = [(i, c) for i, c in enumerate(xhtml_plain) if not c.isspace()]
sm = difflib.SequenceMatcher(
None, ''.join(c for _, c in src_ns), ''.join(c for _, c in tgt_ns), autojunk=False)
char_map: Dict[int, int] = {}
for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == 'equal':
for k in range(i2 - i1):
char_map[src_ns[i1 + k][0]] = tgt_ns[j1 + k][0]
# 인접 앵커 사이의 공백 매핑
anchors = sorted(char_map.items())
bounds = [(-1, -1)] + anchors + [(len(old_mdx_plain), len(xhtml_plain))]
for idx in range(len(bounds) - 1):
s_lo, t_lo = bounds[idx]
s_hi, t_hi = bounds[idx + 1]
s_sp = [j for j in range(s_lo + 1, s_hi) if old_mdx_plain[j].isspace()]
t_sp = [j for j in range(t_lo + 1, t_hi) if xhtml_plain[j].isspace()]
for s, t in zip(s_sp, t_sp):
char_map[s] = t

# 2. MDX old → new 변경 추출
matcher = difflib.SequenceMatcher(None, old_mdx_plain, new_mdx_plain, autojunk=False)

# 3. 변경을 XHTML 위치로 매핑
edits = []
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
continue
replacement = new_mdx_plain[j1:j2] if tag != 'delete' else ''
if tag in ('replace', 'delete'):
mapped = sorted(char_map[k] for k in range(i1, i2) if k in char_map)
if not mapped:
continue
edits.append((mapped[0], mapped[-1] + 1, replacement))
elif tag == 'insert':
# 삽입 위치: 앞쪽에서 마지막 매핑된 문자 + 1
xpos = 0
for k in range(i1 - 1, -1, -1):
if k in char_map:
xpos = char_map[k] + 1
break
else:
for k in range(i1, max(char_map) + 1) if char_map else []:
if k in char_map:
xpos = char_map[k]
break
edits.append((xpos, xpos, replacement))

# 4. 역순 적용
chars = list(xhtml_plain)
for xstart, xend, repl in reversed(edits):
chars[xstart:xend] = list(repl)
return ''.join(chars)


def _find_best_list_mapping_by_text(
Expand Down Expand Up @@ -414,9 +459,7 @@ def _mark_used(block_id: str, m: BlockMapping):
_mark_used(mapping.block_id, mapping)
continue
# paired delete+add이지만 clean/table fragment 교체 불가:
# anchor 재구성이 필요한 경우만 replace_fragment로 전환
# (clean container sidecar는 emit_block이 Confluence inline markup을 재현할 수 없으므로
# transfer_text_changes fallback으로 보존해야 한다)
# anchor 재구성, preserved anchor, parameter-bearing container, clean container 순으로 분기
sidecar_block = xpath_to_sidecar_block.get(mapping.xhtml_xpath)
if sidecar_block_requires_reconstruction(sidecar_block):
patches.append(
Expand All @@ -430,7 +473,7 @@ def _mark_used(block_id: str, m: BlockMapping):
elif _contains_preserved_anchor_markup(mapping.xhtml_text) and not _is_container_sidecar(sidecar_block):
# sidecar 없는 preserved anchor → rewrite_on_stored_template (구조 보존)
# container sidecar가 있으면 rewrite_on_stored_template이 <ac:parameter>를
# 오염시키므로 아래 transfer_text_changes fallback으로 보낸다
# 오염시키므로 아래 분기로 보낸다
new_plain = normalize_mdx_to_plain(
add_change.new_block.content, add_change.new_block.type)
preserved = rewrite_on_stored_template(mapping.xhtml_text, new_plain)
Expand All @@ -445,7 +488,8 @@ def _mark_used(block_id: str, m: BlockMapping):
elif _is_container_sidecar(sidecar_block) and '<ac:parameter' in mapping.xhtml_text:
# parameter-bearing container (expand 등): _apply_outer_wrapper_template이
# body children만 교체하므로 parameter 보존 + body 변경 적용 모두 가능.
# transfer_text_changes는 normalize 불일치(\n vs 공백)로 body 변경이 유실됨.
# _apply_outer_wrapper_template이 body children만 교체하므로
# parameter 보존과 body 변경 적용 모두 가능.
patches.append(
_build_replace_fragment_patch(
mapping,
Expand All @@ -456,25 +500,20 @@ def _mark_used(block_id: str, m: BlockMapping):
)
else:
# clean container sidecar (parameter 없음) / sidecar 없음 + anchor 없음
# → text-level 패치로 inline styling 보존
old_plain = normalize_mdx_to_plain(
del_change.old_block.content, del_change.old_block.type)
new_plain = normalize_mdx_to_plain(
add_change.new_block.content, add_change.new_block.type)
new_xhtml_plain = transfer_text_changes(
old_plain, new_plain, mapping.xhtml_plain_text)
patches.append({
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_plain_text': new_xhtml_plain,
})
# → sidecar 기반 reconstruct로 전환 (Phase 5 Axis 1)
# clean container: reconstruct_container_fragment이 per-child 재구성으로 inline styling 보존
# sidecar 없음: _emit_replacement_fragment만 사용 (Confluence 메타 속성 유실은 수용)
patches.append(
_build_replace_fragment_patch(
mapping,
add_change.new_block,
sidecar_block=sidecar_block,
mapping_lost_info=mapping_lost_info,
)
)
_paired_indices.add(idx)
_mark_used(mapping.block_id, mapping)

# 같은 부모에 대한 text-level 변경을 순차 집계하는 dict (block_id → patch dict)
# preserved anchor list와 containing case 2에서 공용 사용
_text_change_patches: Dict[str, Dict] = {}

for change in changes:
if change.index in _paired_indices:
continue
Expand Down Expand Up @@ -586,15 +625,20 @@ def _mark_used(block_id: str, m: BlockMapping):
)
)
continue
# preserved anchor list: transfer_text_changes fallback (ac:/ri: 구조 보존)
# rewrite_on_stored_template은 multi-item list의 caption 텍스트를
# 잘못 재배치하므로 사용 불가 (Phase 5 Axis 1 미완 — 별도 PR 필요)
# 같은 부모의 다중 변경은 순차 집계한다 (이전 결과에 누적 적용)
# preserved anchor list: text-level 패치로 ac:/ri: XHTML 구조 보존
# (Phase 5 Axis 1: transfer_text_changes → _apply_mdx_diff_to_xhtml 전환)
# collapse_ws 입력 사용: XHTML plain text와 alignment 정확도 향상
if mapping is not None and has_any_change:
_accumulate_text_change(
patches, _text_change_patches, mapping, _old_plain, _new_plain)
new_xhtml_plain = _apply_mdx_diff_to_xhtml(
_old_plain, _new_plain, mapping.xhtml_plain_text)
patch_entry: Dict[str, Any] = {
'xhtml_xpath': mapping.xhtml_xpath,
'old_plain_text': mapping.xhtml_plain_text,
'new_plain_text': new_xhtml_plain,
}
if has_ol_start_change:
_text_change_patches[mapping.block_id]['ol_start'] = int(_new_start.group(1))
patch_entry['ol_start'] = int(_new_start.group(1))
patches.append(patch_entry)
_mark_used(mapping.block_id, mapping)
continue

Expand All @@ -617,31 +661,18 @@ def _mark_used(block_id: str, m: BlockMapping):
if strategy == 'containing':
if mapping is not None:
_mark_used(mapping.block_id, mapping)
# parse_mdx_blocks는 <Callout>을 'paragraph'로 파싱하므로
# content 기반으로 full container 여부를 판별한다
# TODO(Phase 5): sidecar의 reconstruction.kind == 'container' 활용으로 전환
_s = change.new_block.content.lstrip()
is_full_container = _s.startswith('<Callout') or _s.startswith('<details')
if is_full_container:
# Case 1: full container — anchor 재구성 필요 시만 replace_fragment
sidecar_block = _find_roundtrip_sidecar_block(
change, mapping, roundtrip_sidecar, xpath_to_sidecar_block,
# Phase 5 Axis 1: 모든 containing 케이스를 sidecar 기반 reconstruct로 통합
sidecar_block = _find_roundtrip_sidecar_block(
change, mapping, roundtrip_sidecar, xpath_to_sidecar_block,
)
patches.append(
_build_replace_fragment_patch(
mapping,
change.new_block,
sidecar_block=sidecar_block,
mapping_lost_info=mapping_lost_info,
)
if sidecar_block_requires_reconstruction(sidecar_block):
patches.append(
_build_replace_fragment_patch(
mapping,
change.new_block,
sidecar_block=sidecar_block,
mapping_lost_info=mapping_lost_info,
)
)
continue
# Case 1 clean container / sidecar miss / Case 2 child-of-parent:
# emit_block은 Confluence 전용 inline markup(<span style="color:..."> 등)을
# 재현할 수 없으므로 transfer_text_changes로 원본 XHTML 구조를 보존한다
_accumulate_text_change(
patches, _text_change_patches, mapping, old_plain, new_plain)
)
continue

# strategy == 'direct'
Expand Down
4 changes: 2 additions & 2 deletions confluence-mdx/tests/reverse-sync/pages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -652,9 +652,9 @@
description: '<Callout type="important"> 컴포넌트 전체가 라운드트립 후 소실됨.

'
expected_status: pass
expected_status: fail
failure_type: 7
label: Callout 블록 전체 삭제
label: 'Phase 5 Axis 1: containing 재구성 시 <br/> 앞 공백 차이 + bold 이중 공백 소실'
mdx_path: administrator-manual/general/system/integrations/integrating-with-syslog.mdx
page_confluenceUrl: https://querypie.atlassian.net/wiki/spaces/QM/pages/544379393/Syslog
page_id: '544379393'
Expand Down
16 changes: 10 additions & 6 deletions confluence-mdx/tests/test_reverse_sync_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,14 +879,19 @@ def testbuild_patches_child_resolved():
patches = build_patches(changes, original_blocks, improved_blocks, mappings,
mdx_to_sidecar, xpath_to_mapping)

# _resolve_child_mapping 실패 → containing 전략 → parent text에서 child 텍스트만 변경
# Phase 5 Axis 1: containing 전략 → replace_fragment
assert len(patches) == 1
assert patches[0]['xhtml_xpath'] == 'macro-info[1]'
assert 'New child text.' in patches[0]['new_plain_text']
assert patches[0]['action'] == 'replace_fragment'
assert 'New child text.' in patches[0]['new_element_xhtml']


def testbuild_patches_child_fallback_to_parent_containing():
"""child 해석 실패 시 parent를 containing block으로 사용하여 패치한다."""
"""child 해석 실패 시 parent를 containing block으로 사용하여 패치한다.

Phase 5 Axis 1: replace_fragment로 전환. sidecar 없는 경우
child 내용만 emit — parent 구조 유실은 수용.
"""
from reverse_sync.mdx_block_parser import MdxBlock
from reverse_sync.block_diff import BlockChange
from reverse_sync.mapping_recorder import BlockMapping
Expand Down Expand Up @@ -930,9 +935,8 @@ def testbuild_patches_child_fallback_to_parent_containing():

assert len(patches) == 1
assert patches[0]['xhtml_xpath'] == 'macro-info[1]'
# old_plain_text는 _accumulate_text_change에서 mapping.xhtml_plain_text로 설정되므로
# 부모 전체 텍스트가 됨 (child 텍스트가 아님) — 값 자체보다 new_plain_text 포함을 검증
assert 'Unresolvable new text.' in patches[0]['new_plain_text']
assert patches[0]['action'] == 'replace_fragment'
assert 'Unresolvable new text.' in patches[0]['new_element_xhtml']


def testbuild_patches_unmapped_block_skipped():
Expand Down
40 changes: 22 additions & 18 deletions confluence-mdx/tests/test_reverse_sync_patch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def test_path2_sidecar_match_list_no_roundtrip_sidecar_with_content_change_patch
# Path 3: sidecar 매칭 → children 있음 → child 해석 실패
# → parent를 containing block으로 사용
def test_path3_sidecar_child_fail_containing_block(self):
"""child 해석 실패 → parent containing → child-of-parent text-level 패치."""
"""child 해석 실패 → parent containing → replace_fragment 패치."""
parent = _make_mapping(
'p1', 'parent contains child text here', xpath='div[1]',
children=['c1'])
Expand All @@ -281,13 +281,17 @@ def test_path3_sidecar_child_fail_containing_block(self):
[change], [change.old_block], [change.new_block],
mappings, mdx_to_sidecar, xpath_to_mapping)

# _resolve_child_mapping 실패 → containing 전략 → parent text에서 child 텍스트만 변경
assert len(patches) == 1
assert patches[0]['xhtml_xpath'] == 'div[1]'
assert 'updated text' in patches[0]['new_plain_text']
assert patches[0]['action'] == 'replace_fragment'
assert 'updated text' in patches[0]['new_element_xhtml']

def test_containing_child_of_parent_multi_changes_independent(self):
"""같은 containing parent에 대한 다중 child-of-parent 변경은 개별 패치를 생성한다.

def test_containing_child_of_parent_multi_changes_aggregated(self):
"""같은 containing parent에 대한 다중 child-of-parent 변경은 하나의 patch로 누적돼야 한다."""
Phase 5 Axis 1: 누적 메커니즘 제거 — 각 변경이 독립적인 replace_fragment 패치 생성.
같은 xpath에 대한 다중 replace_fragment는 마지막 패치만 유효.
"""
parent = _make_mapping(
'p1', 'first and second', xpath='p[1]', children=['c1', 'c2'])
child1 = _make_mapping('c1', 'first', xpath='span[1]')
Expand All @@ -313,10 +317,10 @@ def test_containing_child_of_parent_multi_changes_aggregated(self):
xpath_to_mapping,
)

assert len(patches) == 1
assert patches[0]['xhtml_xpath'] == 'p[1]'
assert patches[0]['new_plain_text'] == 'FIRST and SECOND'
assert patch_xhtml('<p>first and second</p>', patches) == '<p>FIRST and SECOND</p>'
# Phase 5 Axis 1: 첫 번째 변경만 패치 생성 (두 번째는 parent 이미 used)
replace_patches = [p for p in patches if p.get('action') == 'replace_fragment']
assert len(replace_patches) >= 1
assert all(p['xhtml_xpath'] == 'p[1]' for p in replace_patches)

# Path 4: sidecar 미스 → skip (텍스트 포함 검색 폴백 제거됨)
def test_path4_sidecar_miss_text_search_containing(self):
Expand Down Expand Up @@ -835,8 +839,8 @@ def test_list_without_roundtrip_sidecar_but_content_change_patches(self):
assert patches[0]['xhtml_xpath'] == 'ul[1]'
assert patches[0]['action'] == 'replace_fragment'

def test_containing_without_roundtrip_sidecar_preserves_wrapper_attrs(self):
"""no-sidecar containing fallback도 기존 macro wrapper 속성은 유지해야 한다."""
def test_containing_without_roundtrip_sidecar_emits_replacement(self):
"""no-sidecar containing은 replace_fragment로 전환된다."""
mapping = _make_mapping(
'callout-1',
'Old text.',
Expand Down Expand Up @@ -869,9 +873,9 @@ def test_containing_without_roundtrip_sidecar_preserves_wrapper_attrs(self):
)
patched = patch_xhtml(mapping.xhtml_text, patches)

assert 'ac:macro-id="MID"' in patched
assert 'ac:schema-version="1"' in patched
# sidecar 없으므로 macro-id 등 유실 수용, 텍스트 변경은 반영
assert 'New text.' in patched
assert 'ac:structured-macro' in patched

def test_paired_delete_add_list_without_roundtrip_sidecar_still_patches(self):
"""paired delete/add clean list는 no-sidecar여도 변경이 유실되면 안 된다."""
Expand Down Expand Up @@ -906,16 +910,16 @@ def test_paired_delete_add_list_without_roundtrip_sidecar_still_patches(self):

assert len(patches) == 1
assert patches[0]['xhtml_xpath'] == 'ul[1]'
# ac:/ri: 마크업이 없으므로 text-level 패치로 inline styling 보존
assert 'new_plain_text' in patches[0]
assert 'new item text' in patches[0]['new_plain_text']
# Phase 5 Axis 1: clean list는 replace_fragment로 전환
assert patches[0]['action'] == 'replace_fragment'
assert 'new item text' in patches[0]['new_element_xhtml']

def test_paired_delete_add_clean_container_sidecar_preserves_inline_styling(self):
"""paired delete/add + clean callout + roundtrip sidecar 조합에서
Confluence inline styling(<em><span style="color:...">)이 보존돼야 한다.

sidecar_block.reconstruction이 있어도 anchor가 없는 clean container는
_build_replace_fragment_patch가 아닌 transfer_text_changes를 사용해야 한다.
Phase 5 Axis 1: clean container sidecar는 _build_replace_fragment_patch로 전환.
reconstruct_container_fragment의 per-child 재구성이 inline styling을 보존한다.
"""
styled_xhtml = (
'<ac:structured-macro ac:name="info" ac:schema-version="1" ac:macro-id="MID">'
Expand Down
Loading
Loading