Skip to content

Commit 1b58f9f

Browse files
pbolingCopilot
andcommitted
Fix end_line for multi-line HTML blocks (types 1-5)
HTML blocks types 1-5 have explicit closing markers (</script>, -->, ?>, >, ]]>) that appear on the current line when the block is finalized. This is structurally identical to fenced code blocks, which were already handled by S_ends_on_current_line(). Without this fix, multi-line HTML blocks types 1-5 report end_line as one less than the actual last line, because finalize() falls through to the 'ended on previous line' case (line_number - 1). Types 6-7 correctly continue to use line_number - 1 since they end at a blank line (their last content is on the previous line). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 21eb348 commit 1b58f9f

2 files changed

Lines changed: 97 additions & 4 deletions

File tree

ext/markly/blocks.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,25 @@ static CMARK_INLINE bool S_is_space_or_tab(char c) {
7777
// - Document node (special case)
7878
// - Fenced code blocks (end on the closing fence line)
7979
// - Setext headings (end on the underline)
80-
// - Any block finalized on the same line it started (e.g., single-line HTML blocks)
80+
// - HTML blocks types 1-5 per CommonMark spec §4.6 (end on the line
81+
// containing the closing marker)
82+
// - Any block finalized on the same line it started (e.g., single-line blocks)
8183
static CMARK_INLINE bool S_ends_on_current_line(cmark_parser *parser, cmark_node *b) {
8284
return S_type(b) == CMARK_NODE_DOCUMENT ||
8385
(S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
8486
(S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext) ||
87+
// HTML block types per CommonMark spec §4.6:
88+
// 1: <script>, <pre>, <style>, <textarea> (ends at </tag>)
89+
// 2: <!-- (ends at -->)
90+
// 3: <? (ends at ?>)
91+
// 4: <! + letter (ends at >)
92+
// 5: <![CDATA[ (ends at ]]>)
93+
// All five end on the line containing their closing marker,
94+
// similar to fenced code blocks.
95+
// Types 6-7 end at a blank line, so their last content line is
96+
// the previous line and they should NOT match here.
97+
(S_type(b) == CMARK_NODE_HTML_BLOCK && b->as.html_block_type >= 1 &&
98+
b->as.html_block_type <= 5) ||
8599
// Single-line blocks: finalized on same line they started
86100
b->start_line == parser->line_number;
87101
}

test/markly/node.rb

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,16 +365,14 @@
365365
end
366366

367367
it "has correct position for multi-line HTML block" do
368-
# Multi-line HTML comment (4 lines total, but ends on line 4)
369368
doc = Markly.parse("<!--\nLine 1\nLine 2\n-->")
370369
html_node = doc.first_child
371370
pos = html_node.source_position
372371

373372
expect(html_node.type).to be == :html
374373
expect(pos[:start_line]).to be == 1
375374
# The block starts on line 1 and the closing --> is on line 4
376-
expect(pos[:end_line]).to be >= pos[:start_line]
377-
expect(pos[:end_line]).to be >= 3 # At least line 3 or more
375+
expect(pos[:end_line]).to be == 4
378376
end
379377

380378
it "has correct position for fenced code block" do
@@ -423,6 +421,87 @@
423421
expect(pos[:end_line]).to be == 1
424422
end
425423

424+
it "has correct position for multi-line HTML comment (type 2)" do
425+
doc = Markly.parse("Para 1\n\n<!--\nContent\n-->\n\nPara 2")
426+
427+
html_node = nil
428+
doc.each do |node|
429+
html_node = node if node.type == :html
430+
end
431+
432+
expect(html_node).not.to be_nil
433+
pos = html_node.source_position
434+
expect(pos[:start_line]).to be == 3
435+
expect(pos[:end_line]).to be == 5
436+
end
437+
438+
it "has correct position for multi-line script block (type 1)" do
439+
doc = Markly.parse("Para 1\n\n<script>\nalert(1);\n</script>\n\nPara 2")
440+
441+
html_node = nil
442+
doc.each do |node|
443+
html_node = node if node.type == :html
444+
end
445+
446+
expect(html_node).not.to be_nil
447+
pos = html_node.source_position
448+
expect(pos[:start_line]).to be == 3
449+
expect(pos[:end_line]).to be == 5
450+
end
451+
452+
it "has correct position for multi-line PHP block (type 3)" do
453+
doc = Markly.parse("Para 1\n\n<?php\necho;\n?>\n\nPara 2")
454+
455+
html_node = nil
456+
doc.each do |node|
457+
html_node = node if node.type == :html
458+
end
459+
460+
expect(html_node).not.to be_nil
461+
pos = html_node.source_position
462+
expect(pos[:start_line]).to be == 3
463+
expect(pos[:end_line]).to be == 5
464+
end
465+
466+
it "has correct position for multi-line declaration block (type 4)" do
467+
doc = Markly.parse("Para 1\n\n<!DOCTYPE\nhtml>\n\nPara 2")
468+
469+
html_node = nil
470+
doc.each do |node|
471+
html_node = node if node.type == :html
472+
end
473+
474+
expect(html_node).not.to be_nil
475+
pos = html_node.source_position
476+
expect(pos[:start_line]).to be == 3
477+
expect(pos[:end_line]).to be == 4
478+
end
479+
480+
it "has correct position for multi-line CDATA block (type 5)" do
481+
doc = Markly.parse("Para 1\n\n<![CDATA[\ndata\n]]>\n\nPara 2")
482+
483+
html_node = nil
484+
doc.each do |node|
485+
html_node = node if node.type == :html
486+
end
487+
488+
expect(html_node).not.to be_nil
489+
pos = html_node.source_position
490+
expect(pos[:start_line]).to be == 3
491+
expect(pos[:end_line]).to be == 5
492+
end
493+
494+
it "has correct position for blank-terminated HTML block (type 6)" do
495+
# Types 6-7 end at a blank line, so end_line is the last content line
496+
doc = Markly.parse("<div>\ncontent\n</div>\n\nPara 2")
497+
html_node = doc.first_child
498+
pos = html_node.source_position
499+
500+
expect(html_node.type).to be == :html
501+
expect(pos[:start_line]).to be == 1
502+
expect(pos[:end_line]).to be == 3
503+
end
504+
426505
it "ensures all nodes have valid position ranges" do
427506
# Comprehensive test: no node should have end_line < start_line
428507
markdown = <<~MD

0 commit comments

Comments
 (0)