Skip to content

Commit 24e81a9

Browse files
eamonnmcmanusgoogle-java-format Team
authored andcommitted
Initial support for fenced code blocks in Markdown Javadoc.
This removes one of the main areas where Markdown comments would be mangled. PiperOrigin-RevId: 896076206
1 parent ffd5425 commit 24e81a9

File tree

7 files changed

+215
-13
lines changed

7 files changed

+215
-13
lines changed

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
4141
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
4242
import com.google.googlejavaformat.java.javadoc.Token.Literal;
43+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
4344
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
4445
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
4546
import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak;
@@ -134,6 +135,7 @@ private static String render(List<Token> input, int blockIndent, boolean classic
134135
case ParagraphCloseTag unused -> {}
135136
case ListItemCloseTag unused -> {}
136137
case OptionalLineBreak unused -> {}
138+
case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t);
137139
}
138140
}
139141
throw new AssertionError();

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,26 @@ private ImmutableList<Token> generateTokens() throws LexException {
127127
tokens.add(token);
128128

129129
while (!input.isExhausted()) {
130-
for (Token markdownToken : markdownPositions.tokensAt(input.position())) {
131-
boolean consumed = input.tryConsume(markdownToken.value());
132-
verify(consumed, "Did not consume markdown token: %s", markdownToken);
133-
var unused = input.readAndResetRecorded();
134-
tokens.add(markdownToken);
130+
boolean moreMarkdown;
131+
do {
132+
moreMarkdown = false;
133+
// If there are one or more markdown tokens at the current position, consume their text and
134+
// add them to the token list. If a token has non-empty text, consuming its text changes the
135+
// position, so we need to start looking for markdown tokens at the new position. It is
136+
// assumed that there are no other tokens (markdown or otherwise) in a non-empty text span
137+
// covered by a markdown token.
138+
for (Token markdownToken : markdownPositions.tokensAt(input.position())) {
139+
tokens.add(markdownToken);
140+
if (!markdownToken.value().isEmpty()) {
141+
boolean consumed = input.tryConsume(markdownToken.value());
142+
verify(consumed, "Did not consume markdown token: %s", markdownToken);
143+
var unused = input.readAndResetRecorded();
144+
moreMarkdown = true;
145+
}
146+
}
147+
} while (moreMarkdown);
148+
if (input.isExhausted()) {
149+
break;
135150
}
136151
token = readToken();
137152
tokens.add(token);

core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import static com.google.googlejavaformat.java.javadoc.JavadocWriter.RequestedWhitespace.NONE;
2424
import static com.google.googlejavaformat.java.javadoc.JavadocWriter.RequestedWhitespace.WHITESPACE;
2525

26+
import com.google.googlejavaformat.java.javadoc.Token.BrTag;
2627
import com.google.googlejavaformat.java.javadoc.Token.CodeCloseTag;
2728
import com.google.googlejavaformat.java.javadoc.Token.CodeOpenTag;
2829
import com.google.googlejavaformat.java.javadoc.Token.FooterJavadocTagStart;
@@ -33,6 +34,7 @@
3334
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
3435
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
3536
import com.google.googlejavaformat.java.javadoc.Token.Literal;
37+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
3638
import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment;
3739
import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment;
3840
import com.google.googlejavaformat.java.javadoc.Token.PreCloseTag;
@@ -310,7 +312,7 @@ void writeHtmlComment(HtmlComment token) {
310312
requestNewline();
311313
}
312314

313-
void writeBr(Token token) {
315+
void writeBr(BrTag token) {
314316
writeToken(token);
315317

316318
requestNewline();
@@ -324,6 +326,22 @@ void writeLiteral(Literal token) {
324326
writeToken(token);
325327
}
326328

329+
void writeMarkdownFencedCodeBlock(MarkdownFencedCodeBlock token) {
330+
flushWhitespace();
331+
output.append(token.start());
332+
token
333+
.literal()
334+
.lines()
335+
.forEach(
336+
line -> {
337+
writeNewline();
338+
output.append(line);
339+
});
340+
writeNewline();
341+
output.append(token.end());
342+
requestBlankLine();
343+
}
344+
327345
@Override
328346
public String toString() {
329347
return output.toString();
@@ -350,12 +368,13 @@ enum RequestedWhitespace {
350368
BLANK_LINE,
351369
}
352370

353-
private void writeToken(Token token) {
371+
private void flushWhitespace() {
354372
if (requestedMoeBeginStripComment != null) {
355373
requestNewline();
356374
}
357375

358-
if (requestedWhitespace == BLANK_LINE
376+
if (classicJavadoc
377+
&& requestedWhitespace == BLANK_LINE
359378
&& (!postWriteModifiedContinuingListStack.isEmpty() || continuingFooterTag)) {
360379
/*
361380
* We don't write blank lines inside lists or footer tags, even in cases where we otherwise
@@ -374,6 +393,14 @@ private void writeToken(Token token) {
374393
writeNewline();
375394
requestedWhitespace = NONE;
376395
}
396+
}
397+
398+
private void writeToken(Token token) {
399+
if (token.value().isEmpty()) {
400+
return;
401+
}
402+
403+
flushWhitespace();
377404
boolean needWhitespace = (requestedWhitespace == WHITESPACE);
378405

379406
/*

core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
import com.google.googlejavaformat.java.javadoc.Token.ListItemCloseTag;
2626
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
2727
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
28+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
2829
import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag;
2930
import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag;
3031
import java.util.regex.Matcher;
3132
import java.util.regex.Pattern;
3233
import org.commonmark.node.BulletList;
34+
import org.commonmark.node.FencedCodeBlock;
3335
import org.commonmark.node.Heading;
3436
import org.commonmark.node.ListItem;
3537
import org.commonmark.node.Node;
@@ -102,6 +104,25 @@ void visit(Node node) {
102104
visitNodeList(paragraph.getNext());
103105
}
104106
}
107+
case FencedCodeBlock fencedCodeBlock -> {
108+
// Any indentation before the code block is part of FencedCodeBlock. This makes sense
109+
// because the lines inside the code block must also be indented by that amount. That
110+
// indentation gets subtracted from FencedCodeBlock.getLiteral(), which is the actual text
111+
// represented by the code block.
112+
int start = startPosition(fencedCodeBlock) + fencedCodeBlock.getFenceIndent();
113+
MarkdownFencedCodeBlock token =
114+
new MarkdownFencedCodeBlock(
115+
input.substring(start, endPosition(fencedCodeBlock)),
116+
fencedCodeBlock
117+
.getFenceCharacter()
118+
.repeat(fencedCodeBlock.getOpeningFenceLength())
119+
+ fencedCodeBlock.getInfo(),
120+
fencedCodeBlock
121+
.getFenceCharacter()
122+
.repeat(fencedCodeBlock.getClosingFenceLength()),
123+
fencedCodeBlock.getLiteral());
124+
positionToToken.get(start).addLast(token);
125+
}
105126
// TODO: others
106127
default -> {}
107128
}
@@ -131,12 +152,17 @@ private void visitNodeList(Node node) {
131152
*/
132153
private void addSpan(Node node, Token startToken, Token endToken) {
133154
// We could write the first part more simply as a `put`, but we do it this way for symmetry.
134-
var first = node.getSourceSpans().getFirst();
135-
int startPosition = first.getInputIndex();
136-
positionToToken.get(startPosition).addLast(startToken);
155+
positionToToken.get(startPosition(node)).addLast(startToken);
156+
positionToToken.get(endPosition(node)).addFirst(endToken);
157+
}
158+
159+
private int startPosition(Node node) {
160+
return node.getSourceSpans().getFirst().getInputIndex();
161+
}
162+
163+
private int endPosition(Node node) {
137164
var last = node.getSourceSpans().getLast();
138-
int endPosition = last.getInputIndex() + last.getLength();
139-
positionToToken.get(endPosition).addFirst(endToken);
165+
return last.getInputIndex() + last.getLength();
140166
}
141167
}
142168

core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,27 @@ record HtmlComment(String value) implements Token {}
103103

104104
record BrTag(String value) implements Token {}
105105

106+
/**
107+
* A fenced code block, like:
108+
*
109+
* <pre>
110+
* ```java
111+
* code block
112+
* with an info string ("java")
113+
* ```
114+
* </pre>
115+
*
116+
* @param value the full text of the code block as it appeared in the input, including the start
117+
* and end fences and the literal content.
118+
* @param start the start fence, including the info string if any ({@code ```java} in the
119+
* example).
120+
* @param end the end fence.
121+
* @param literal the text that the code block represents. This does not include the start and end
122+
* fences, nor any indentation that precedes these fences and every intervening line.
123+
*/
124+
record MarkdownFencedCodeBlock(String value, String start, String end, String literal)
125+
implements Token {}
126+
106127
/**
107128
* Whitespace that is not in a {@code <pre>} or {@code <table>} section. Whitespace includes
108129
* leading newlines, asterisks, and tabs and spaces. In the output, it is translated to newlines

core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,60 @@ class Test {}
17171717
///
17181718
/// A following paragraph.
17191719
class Test {}
1720+
""";
1721+
doFormatTest(input, expected);
1722+
}
1723+
1724+
@Test
1725+
public void markdownFencedCodeBlocks() {
1726+
assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
1727+
// If fenced code blocks are not supported correctly, the contents of each one will be joined.
1728+
// If the input lines survive as separate lines, that means we identified the code block.
1729+
String input =
1730+
"""
1731+
/// ```
1732+
/// foo
1733+
/// bar
1734+
/// ```
1735+
///
1736+
/// - ```
1737+
/// code block
1738+
/// in a list
1739+
/// ```
1740+
///
1741+
/// ~~~java
1742+
/// code block
1743+
/// with tildes and an info string ("java")
1744+
/// ~~~
1745+
///
1746+
/// ````
1747+
/// code block
1748+
/// with more than three backticks and an extra leading space
1749+
/// ````
1750+
class Test {}
1751+
""";
1752+
String expected =
1753+
"""
1754+
/// ```
1755+
/// foo
1756+
/// bar
1757+
/// ```
1758+
///
1759+
/// - ```
1760+
/// code block
1761+
/// in a list
1762+
/// ```
1763+
///
1764+
/// ~~~java
1765+
/// code block
1766+
/// with tildes and an info string ("java")
1767+
/// ~~~
1768+
///
1769+
/// ````
1770+
/// code block
1771+
/// with more than three backticks and an extra leading space
1772+
/// ````
1773+
class Test {}
17201774
""";
17211775
doFormatTest(input, expected);
17221776
}

core/src/test/java/com/google/googlejavaformat/java/javadoc/MarkdownPositionsTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.google.googlejavaformat.java.javadoc.Token.ListItemCloseTag;
2424
import com.google.googlejavaformat.java.javadoc.Token.ListItemOpenTag;
2525
import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag;
26+
import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock;
2627
import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag;
2728
import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag;
2829
import java.util.Map;
@@ -100,6 +101,62 @@ public void heading() {
100101
assertThat(map).isEqualTo(expected);
101102
}
102103

104+
@Test
105+
public void codeBlock() {
106+
String text =
107+
"""
108+
- ```
109+
foo
110+
bar
111+
```
112+
113+
~~~java
114+
code
115+
with tildes
116+
~~~
117+
118+
````
119+
indented code
120+
with more than three backticks
121+
````
122+
""";
123+
var positions = MarkdownPositions.parse(text);
124+
ImmutableListMultimap<Integer, Token> map = positionToToken(positions, text);
125+
int bullet = text.indexOf('-');
126+
int firstCodeStart = text.indexOf("```");
127+
int firstCodeEnd = text.indexOf("```", firstCodeStart + 3) + 3;
128+
int secondCodeStart = text.indexOf("~~~", firstCodeEnd);
129+
int secondCodeEnd = text.indexOf("~~~", secondCodeStart + 3) + 3;
130+
int thirdCodeStart = text.indexOf("````", secondCodeEnd);
131+
int thirdCodeEnd = text.indexOf("````", thirdCodeStart + 4) + 4;
132+
ImmutableListMultimap<Integer, Token> expected =
133+
ImmutableListMultimap.<Integer, Token>builder()
134+
.put(bullet, new ListOpenTag(""))
135+
.put(bullet, new ListItemOpenTag("- "))
136+
.put(
137+
firstCodeStart,
138+
new MarkdownFencedCodeBlock(
139+
text.substring(firstCodeStart, firstCodeEnd), "```", "```", "foo\nbar\n"))
140+
.put(firstCodeEnd, new ListItemCloseTag(""))
141+
.put(firstCodeEnd, new ListCloseTag(""))
142+
.put(
143+
secondCodeStart,
144+
new MarkdownFencedCodeBlock(
145+
text.substring(secondCodeStart, secondCodeEnd),
146+
"~~~java",
147+
"~~~",
148+
"code\nwith tildes\n"))
149+
.put(
150+
thirdCodeStart,
151+
new MarkdownFencedCodeBlock(
152+
text.substring(thirdCodeStart, thirdCodeEnd),
153+
"````",
154+
"````",
155+
"indented code\nwith more than three backticks\n"))
156+
.build();
157+
assertThat(map).isEqualTo(expected);
158+
}
159+
103160
private static ImmutableListMultimap<Integer, Token> positionToToken(
104161
MarkdownPositions positions, String input) {
105162
return IntStream.rangeClosed(0, input.length())

0 commit comments

Comments
 (0)