From 2482465cab6c5883ff17296ab7901ffb0a687e49 Mon Sep 17 00:00:00 2001
From: st0012
Date: Sun, 1 Mar 2026 12:11:32 +0000
Subject: [PATCH] Enable break_on_newline extension by default for Markdown
Enable the existing break_on_newline extension in DEFAULT_EXTENSIONS so
the Markdown parser converts soft line breaks to HardBreak objects. This
produces visible
line breaks in HTML output, matching GFM rendering.
The conversion happens in the Markdown parser's paragraph() method,
which is the proper place for Markdown-specific behavior. The generic
accept_paragraph in ToHtml is unchanged and has no Markdown-specific
logic.
Also remove the CJK-aware newline-to-space gsub from accept_paragraph
that is no longer needed. The RDoc markup parser already handles newline
joining at parse time in build_paragraph.
---
lib/rdoc/markdown.kpeg | 1 +
lib/rdoc/markdown.rb | 1 +
lib/rdoc/markup/to_html.rb | 3 -
test/rdoc/markup/to_html_test.rb | 38 +-
test/rdoc/parser/changelog_test.rb | 4 +-
test/rdoc/rdoc_markdown_test.rb | 47 +-
test/rdoc/rdoc_markdown_test_test.rb | 619 ++++++++++++++-------------
7 files changed, 348 insertions(+), 365 deletions(-)
diff --git a/lib/rdoc/markdown.kpeg b/lib/rdoc/markdown.kpeg
index d95a88a823..6896326740 100644
--- a/lib/rdoc/markdown.kpeg
+++ b/lib/rdoc/markdown.kpeg
@@ -202,6 +202,7 @@
# Extensions enabled by default
DEFAULT_EXTENSIONS = [
+ :break_on_newline,
:definition_lists,
:github,
:html,
diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb
index e4d0ae9ff6..f62fcbd173 100644
--- a/lib/rdoc/markdown.rb
+++ b/lib/rdoc/markdown.rb
@@ -587,6 +587,7 @@ def self.rule_info(name, rendered)
# Extensions enabled by default
DEFAULT_EXTENSIONS = [
+ :break_on_newline,
:definition_lists,
:github,
:html,
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
index 90eff47840..be195011ad 100644
--- a/lib/rdoc/markup/to_html.rb
+++ b/lib/rdoc/markup/to_html.rb
@@ -313,9 +313,6 @@ def accept_block_quote(block_quote)
def accept_paragraph(paragraph)
@res << "\n"
text = paragraph.text @hard_break
- text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
- defined?($2) && ' '
- }
@res << to_html(text)
@res << "
\n"
end
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
index 26d224eac2..92b033e481 100644
--- a/test/rdoc/markup/to_html_test.rb
+++ b/test/rdoc/markup/to_html_test.rb
@@ -236,7 +236,7 @@ def accept_paragraph_br
end
def accept_paragraph_break
- assert_equal "\nhello
world
\n", @to.res.join
+ assert_equal "\nhello
\nworld
\n", @to.res.join
end
def accept_paragraph_i
@@ -411,43 +411,13 @@ def test_accept_heading_dedup_resets_on_start_accepting
end
def test_accept_paragraph_newline
- hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
- worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
- ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
-
- hellos.product(worlds) do |hello, world|
- @to.start_accepting
- @to.accept_paragraph para("#{hello}\n", "#{world}\n")
- assert_equal "\n#{hello} #{world}
\n", @to.res.join
- end
-
- hellos.each do |hello|
- @to.start_accepting
- @to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
- assert_equal "\n#{hello}#{sekai}
\n", @to.res.join
- end
-
- worlds.each do |world|
- @to.start_accepting
- @to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
- assert_equal "\n#{ohayo}#{world}
\n", @to.res.join
- end
-
@to.start_accepting
- @to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
- assert_equal "\n#{ohayo}#{sekai}
\n", @to.res.join
+ @to.accept_paragraph para("hello\n", "world\n")
+ assert_equal "\nhello\nworld\n
\n", @to.res.join
@to.start_accepting
@to.accept_paragraph para("+hello+\n", "world\n")
- assert_equal "\nhello world
\n", @to.res.join
-
- @to.start_accepting
- @to.accept_paragraph para("hello\n", "+world+\n")
- assert_equal "\nhello world
\n", @to.res.join
-
- @to.start_accepting
- @to.accept_paragraph para("+hello+\n", "+world+\n")
- assert_equal "\nhello world
\n", @to.res.join
+ assert_equal "\nhello\nworld\n
\n", @to.res.join
end
def test_accept_heading_output_decoration
diff --git a/test/rdoc/parser/changelog_test.rb b/test/rdoc/parser/changelog_test.rb
index 533c012a54..7d091dc3a9 100644
--- a/test/rdoc/parser/changelog_test.rb
+++ b/test/rdoc/parser/changelog_test.rb
@@ -421,8 +421,8 @@ def test_scan_git
'Masataka Pocke Kuwabara', 'kuwabara@pocke.me', '2021-01-01 14:25:08 +0900',
[head(4, 'Make args info for RubyVM::AST to available on endless method without parens'),
head(5, 'Problem'),
- para("Arguments information is missing for endless method without parens.\n" +
- "For example:"),
+ para("Arguments information is missing for endless method without parens.",
+ hard_break, "For example:"),
verb("# ok\n").tap {|v| v.format = :ruby},
para('It causes an error if a program expects args node exists.'),
head(5, 'Solution'),
diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb
index 608974d7c9..5813c8b90a 100644
--- a/test/rdoc/rdoc_markdown_test.rb
+++ b/test/rdoc/rdoc_markdown_test.rb
@@ -56,7 +56,7 @@ def test_parse_block_quote
expected =
doc(
block(
- para("this is\na block quote")))
+ para("this is", hard_break, "a block quote")))
assert_equal expected, doc
end
@@ -70,11 +70,22 @@ def test_parse_block_quote_continue
expected =
doc(
block(
- para("this is\na block quote")))
+ para("this is", hard_break, "a block quote")))
assert_equal expected, doc
end
+ def test_parse_block_quote_continue_html
+ doc = parse <<-BLOCK_QUOTE
+> this is
+a block quote
+ BLOCK_QUOTE
+
+ html = doc.accept(RDoc::Markup::ToHtml.new)
+
+ assert_include html, "this is
\na block quote
"
+ end
+
def test_parse_block_quote_list
doc = parse <<-BLOCK_QUOTE
> text
@@ -104,7 +115,7 @@ def test_parse_block_quote_newline
expected =
doc(
block(
- para("this is\na block quote")))
+ para("this is", hard_break, "a block quote")))
assert_equal expected, doc
end
@@ -120,7 +131,7 @@ def test_parse_block_quote_separate
expected =
doc(
block(
- para("this is\na block quote"),
+ para("this is", hard_break, "a block quote"),
para("that continues")))
assert_equal expected, doc
@@ -262,7 +273,7 @@ def test_parse_code_github
assert_equal expected, parse(doc)
expected =
- doc(para("Example:\n\n""code goes here\n"))
+ doc(para("Example:", hard_break, "\n""code goes here\n"))
assert_equal expected, parse(doc.sub(/^\n/, ''))
end
@@ -296,7 +307,7 @@ def test_parse_code_github_format
assert_equal expected, parse(doc)
expected =
- doc(para("Example:\nruby\n""code goes here\n"))
+ doc(para("Example:", hard_break, "ruby\n""code goes here\n"))
assert_equal expected, parse(doc.sub(/^\n/, ''))
end
@@ -343,7 +354,7 @@ def test_parse_definition_list_indents
item(%w[one], para("Indented one characters")),
item(%w[two], para("Indented two characters")),
item(%w[three], para("Indented three characters"))),
- para("four\n : Indented four characters"))
+ para("four", hard_break, " : Indented four characters"))
assert_equal expected, doc
end
@@ -392,9 +403,9 @@ def test_parse_definition_list_multi_line
expected = doc(
list(:NOTE,
item(%w[one],
- para("This is a definition\nthat extends to two lines")),
+ para("This is a definition", hard_break, "that extends to two lines")),
item(%w[two],
- para("This is another definition\nthat also extends to two lines"))))
+ para("This is another definition", hard_break, "that also extends to two lines"))))
assert_equal expected, doc
end
@@ -430,8 +441,8 @@ def test_parse_definition_list_no
MD
expected = doc(
- para("one\n: This is a definition"),
- para("two\n: This is another definition"))
+ para("one", hard_break, ": This is a definition"),
+ para("two", hard_break, ": This is another definition"))
assert_equal expected, doc
end
@@ -779,7 +790,7 @@ def test_parse_list_bullet_multiline
expected = doc(
list(:BULLET,
- item(nil, para("one\n two"))))
+ item(nil, para("one", hard_break, " two"))))
assert_equal expected, doc
end
@@ -832,7 +843,7 @@ def test_parse_list_bullet_nest_continue
para("outer"),
list(:BULLET,
item(nil,
- para("inner\n continue inner")))),
+ para("inner", hard_break, " continue inner")))),
item(nil,
para("outer 2"))))
@@ -899,7 +910,7 @@ def test_parse_note_indent
expected = doc(
para("Some text.{*1}[rdoc-label:foottext-1:footmark-1]"),
rule(1),
- para("{^1}[rdoc-label:footmark-1:foottext-1] With a footnote\n\nmore"))
+ para("{^1}[rdoc-label:footmark-1:foottext-1] With a footnote", hard_break, "more"))
assert_equal expected, doc
end
@@ -940,8 +951,10 @@ def test_parse_note_multiple
MD
expected = doc(
- para("Some text{*1}[rdoc-label:foottext-1:footmark-1]\n" +
- "with inline notes{*2}[rdoc-label:foottext-2:footmark-2]\n" +
+ para("Some text{*1}[rdoc-label:foottext-1:footmark-1]",
+ hard_break,
+ "with inline notes{*2}[rdoc-label:foottext-2:footmark-2]",
+ hard_break,
"and an extra note.{*3}[rdoc-label:foottext-3:footmark-3]"),
rule(1),
@@ -1040,7 +1053,7 @@ def test_parse_paragraph_indent_three
def test_parse_paragraph_multiline
doc = parse "one\ntwo"
- expected = doc(para("one\ntwo"))
+ expected = doc(para("one", hard_break, "two"))
assert_equal expected, doc
end
diff --git a/test/rdoc/rdoc_markdown_test_test.rb b/test/rdoc/rdoc_markdown_test_test.rb
index ce9481e7ed..b0e88ed130 100644
--- a/test/rdoc/rdoc_markdown_test_test.rb
+++ b/test/rdoc/rdoc_markdown_test_test.rb
@@ -139,8 +139,8 @@ def test_backslash_escapes
para("Plus: \\+"),
para("Minus: \\-"),
- para("These should get escaped, even though they're matching pairs for\n" +
- "other Markdown constructs:"),
+ para("These should get escaped, even though they're matching pairs for",
+ hard_break, "other Markdown constructs:"),
para("\\*asterisks\\*"),
para("\\_underscores\\_"),
@@ -221,13 +221,13 @@ def test_hard_wrapped_paragraphs_with_list_like_lines
expected =
doc(
- para("In Markdown 1.0.0 and earlier. Version\n" +
- "8. This line turns into a list item.\n" +
- "Because a hard-wrapped line in the\n" +
- "middle of a paragraph looked like a\n" +
- "list item."),
- para("Here's one with a bullet.\n" +
- "\\* criminey."))
+ para("In Markdown 1.0.0 and earlier. Version",
+ hard_break, "8. This line turns into a list item.",
+ hard_break, "Because a hard-wrapped line in the",
+ hard_break, "middle of a paragraph looked like a",
+ hard_break, "list item."),
+ para("Here's one with a bullet.",
+ hard_break, "\\* criminey."))
assert_equal expected, doc
end
@@ -502,33 +502,33 @@ def test_markdown_documentation_basics
head(2, "Getting the Gist of Markdown's Formatting Syntax"),
- para("This page offers a brief overview of what it's like to use Markdown.\n" +
- "The {syntax page}[/projects/markdown/syntax] provides complete, detailed documentation for\n" +
- "every feature, but Markdown should be very easy to pick up simply by\n" +
- "looking at a few examples of it in action. The examples on this page\n" +
- "are written in a before/after style, showing example syntax and the\n" +
- "HTML output produced by Markdown."),
+ para("This page offers a brief overview of what it's like to use Markdown.",
+ hard_break, "The {syntax page}[/projects/markdown/syntax] provides complete, detailed documentation for",
+ hard_break, "every feature, but Markdown should be very easy to pick up simply by",
+ hard_break, "looking at a few examples of it in action. The examples on this page",
+ hard_break, "are written in a before/after style, showing example syntax and the",
+ hard_break, "HTML output produced by Markdown."),
- para("It's also helpful to simply try Markdown out; the {Dingus}[/projects/markdown/dingus] is a\n" +
- "web application that allows you type your own Markdown-formatted text\n" +
- "and translate it to XHTML."),
+ para("It's also helpful to simply try Markdown out; the {Dingus}[/projects/markdown/dingus] is a",
+ hard_break, "web application that allows you type your own Markdown-formatted text",
+ hard_break, "and translate it to XHTML."),
- para("Note: This document is itself written using Markdown; you\n" +
- "can {see the source for it by adding '.text' to the URL}[/projects/markdown/basics.text]."),
+ para("Note: This document is itself written using Markdown; you",
+ hard_break, "can {see the source for it by adding '.text' to the URL}[/projects/markdown/basics.text]."),
head(2, "Paragraphs, Headers, Blockquotes"),
- para("A paragraph is simply one or more consecutive lines of text, separated\n" +
- "by one or more blank lines. (A blank line is any line that looks like a\n" +
- "blank line -- a line containing nothing spaces or tabs is considered\n" +
- "blank.) Normal paragraphs should not be intended with spaces or tabs."),
+ para("A paragraph is simply one or more consecutive lines of text, separated",
+ hard_break, "by one or more blank lines. (A blank line is any line that looks like a",
+ hard_break, "blank line -- a line containing nothing spaces or tabs is considered",
+ hard_break, "blank.) Normal paragraphs should not be intended with spaces or tabs."),
- para("Markdown offers two styles of headers: _Setext_ and _atx_.\n" +
- "Setext-style headers for and are created by\n" +
- "\"underlining\" with equal signs (=) and hyphens (-), respectively.\n" +
- "To create an atx-style header, you put 1-6 hash marks (#) at the\n" +
- "beginning of the line -- the number of hashes equals the resulting\n" +
- "HTML header level."),
+ para("Markdown offers two styles of headers: _Setext_ and _atx_.",
+ hard_break, "Setext-style headers for and are created by",
+ hard_break, "\"underlining\" with equal signs (=) and hyphens (-), respectively.",
+ hard_break, "To create an atx-style header, you put 1-6 hash marks (#) at the",
+ hard_break, "beginning of the line -- the number of hashes equals the resulting",
+ hard_break, "HTML header level."),
para("Blockquotes are indicated using email-style '>' angle brackets."),
@@ -599,9 +599,9 @@ def test_markdown_documentation_basics
head(2, "Lists"),
- para("Unordered (bulleted) lists use asterisks, pluses, and hyphens (*,\n" +
- "+, and -) as list markers. These three markers are\n" +
- "interchangeable; this:"),
+ para("Unordered (bulleted) lists use asterisks, pluses, and hyphens (*,",
+ hard_break, "+, and -) as list markers. These three markers are",
+ hard_break, "interchangeable; this:"),
verb("* Candy.\n",
"* Gum.\n",
@@ -627,8 +627,8 @@ def test_markdown_documentation_basics
"Booze.\n",
"\n"),
- para("Ordered (numbered) lists use regular numbers, followed by periods, as\n" +
- "list markers:"),
+ para("Ordered (numbered) lists use regular numbers, followed by periods, as",
+ hard_break, "list markers:"),
verb("1. Red\n",
"2. Green\n",
@@ -642,9 +642,9 @@ def test_markdown_documentation_basics
"Blue\n",
"\n"),
- para("If you put blank lines between items, you'll get tags for the\n" +
- "list item text. You can create multi-paragraph list items by indenting\n" +
- "the paragraphs by 4 spaces or 1 tab:"),
+ para("If you put blank lines between items, you'll get tags for the",
+ hard_break, "list item text. You can create multi-paragraph list items by indenting",
+ hard_break, "the paragraphs by 4 spaces or 1 tab:"),
verb("* A list item.\n",
"\n",
@@ -662,12 +662,12 @@ def test_markdown_documentation_basics
head(3, "Links"),
- para("Markdown supports two styles for creating links: _inline_ and\n" +
- "_reference_. With both styles, you use square brackets to delimit the\n" +
- "text you want to turn into a link."),
+ para("Markdown supports two styles for creating links: _inline_ and",
+ hard_break, "_reference_. With both styles, you use square brackets to delimit the",
+ hard_break, "text you want to turn into a link."),
- para("Inline-style links use parentheses immediately after the link text.\n" +
- "For example:"),
+ para("Inline-style links use parentheses immediately after the link text.",
+ hard_break, "For example:"),
verb("This is an [example link](http://example.com/).\n"),
@@ -685,8 +685,8 @@ def test_markdown_documentation_basics
verb("This is an \n",
"example link.
\n"),
- para("Reference-style links allow you to refer to your links by names, which\n" +
- "you define elsewhere in your document:"),
+ para("Reference-style links allow you to refer to your links by names, which",
+ hard_break, "you define elsewhere in your document:"),
verb("I get 10 times more traffic from [Google][1] than from\n",
"[Yahoo][2] or [MSN][3].\n",
@@ -702,8 +702,8 @@ def test_markdown_documentation_basics
"title=\"Yahoo Search\">Yahoo or MSN.
\n"),
- para("The title attribute is optional. Link names may contain letters,\n" +
- "numbers and spaces, but are _not_ case sensitive:"),
+ para("The title attribute is optional. Link names may contain letters,",
+ hard_break, "numbers and spaces, but are _not_ case sensitive:"),
verb("I start my morning with a cup of coffee and\n",
"[The New York Times][NY Times].\n",
@@ -735,10 +735,10 @@ def test_markdown_documentation_basics
head(3, "Code"),
- para("In a regular paragraph, you can create code span by wrapping text in\n" +
- "backtick quotes. Any ampersands (&) and angle brackets (< or\n" +
- ">) will automatically be translated into HTML entities. This makes\n" +
- "it easy to use Markdown to write about HTML example code:"),
+ para("In a regular paragraph, you can create code span by wrapping text in",
+ hard_break, "backtick quotes. Any ampersands (&) and angle brackets (< or",
+ hard_break, ">) will automatically be translated into HTML entities. This makes",
+ hard_break, "it easy to use Markdown to write about HTML example code:"),
verb(
"I strongly recommend against using any `