Skip to content

Commit 59103b9

Browse files
committed
Add temporary replacement for MarkdownRenderer
This includes the change needed in commonmark/commonmark-java#361 and removes the previous snapshot
1 parent a4e1738 commit 59103b9

3 files changed

Lines changed: 131 additions & 6 deletions

File tree

langchain4j-parent/pom.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@
9393
<infinispan.version>15.0.0.Final</infinispan.version>
9494
<kotlin.version>1.9.25</kotlin.version>
9595
<kotlinx.version>1.9.0</kotlinx.version>
96-
<!-- SNAPSHOT including https://github.com/commonmark/commonmark-java/pull/361 -->
97-
<commonmark.version>0.24.1-SNAPSHOT</commonmark.version>
96+
<commonmark.version>0.24.0</commonmark.version>
9897
</properties>
9998

10099
<dependencyManagement>

langchain4j/src/main/java/dev/langchain4j/data/document/splitter/MarkdownSectionSplitter.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.google.gson.Gson;
44
import com.google.gson.GsonBuilder;
5+
import dev.langchain4j.data.document.DefaultDocument;
56
import dev.langchain4j.data.document.Document;
67
import dev.langchain4j.data.document.DocumentSplitter;
78
import dev.langchain4j.data.document.Metadata;
@@ -23,7 +24,6 @@
2324
import org.commonmark.renderer.markdown.CoreMarkdownNodeRenderer;
2425
import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
2526
import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
26-
import org.commonmark.renderer.markdown.MarkdownRenderer;
2727

2828
import java.util.ArrayList;
2929
import java.util.Collections;
@@ -108,10 +108,10 @@ public List<TextSegment> split(Document document) {
108108
}
109109

110110
MarkdownSplitterContext context = new MarkdownSplitterContext(document.metadata());
111-
MarkdownRenderer renderer = MarkdownRenderer.builder()
111+
TempMarkdownRenderer renderer = TempMarkdownRenderer.builder()
112112
.nodeRendererFactory(new MarkdownSectionSplitterNodeRendererFactory(context))
113113
.extensions(EXTENSIONS)
114-
.build();
114+
.tempBuild();
115115
// We use the Appendable allowed by the renderer as the hook in.
116116
// I tried a few other approaches, but this is the only one I can find that works...
117117
renderer.render(node, context.getBuffer());
@@ -375,7 +375,7 @@ private void addSectionSegments(Header header, String sectionText) {
375375
// Document constructor does not like blank text
376376
sectionText = emptySectionPlaceholderText;
377377
}
378-
Document document = new Document(sectionText, metadata);
378+
Document document = new DefaultDocument(sectionText, metadata);
379379
document = documentAdjuster.adjust(document);
380380

381381
List<TextSegment> segments = sectionSplitter.split(document);
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package dev.langchain4j.data.document.splitter;
2+
3+
import org.commonmark.Extension;
4+
import org.commonmark.internal.renderer.NodeRendererMap;
5+
import org.commonmark.node.Node;
6+
import org.commonmark.renderer.NodeRenderer;
7+
import org.commonmark.renderer.Renderer;
8+
import org.commonmark.renderer.markdown.CoreMarkdownNodeRenderer;
9+
import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
10+
import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
11+
import org.commonmark.renderer.markdown.MarkdownRenderer;
12+
import org.commonmark.renderer.markdown.MarkdownWriter;
13+
14+
import java.util.ArrayList;
15+
import java.util.Collections;
16+
import java.util.HashSet;
17+
import java.util.List;
18+
import java.util.Set;
19+
20+
/**
21+
* Temporary replacement for MarkdownRenderer, including the fix in
22+
* https://github.com/commonmark/commonmark-java/pull/361.
23+
*
24+
* It contains that fix, and adjustments needed to be able to use this replacement.
25+
*/
26+
class TempMarkdownRenderer implements Renderer {
27+
28+
private final List<MarkdownNodeRendererFactory> nodeRendererFactories;
29+
30+
private TempMarkdownRenderer(Builder builder) {
31+
this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1);
32+
this.nodeRendererFactories.addAll(builder.nodeRendererFactories);
33+
// Add as last. This means clients can override the rendering of core nodes if they want.
34+
this.nodeRendererFactories.add(new MarkdownNodeRendererFactory() {
35+
@Override
36+
public NodeRenderer create(MarkdownNodeRendererContext context) {
37+
return new CoreMarkdownNodeRenderer(context);
38+
}
39+
40+
@Override
41+
public Set<Character> getSpecialCharacters() {
42+
return Set.of();
43+
}
44+
});
45+
}
46+
47+
public static Builder builder() {
48+
return new Builder();
49+
}
50+
51+
@Override
52+
public void render(Node node, Appendable output) {
53+
RendererContext context = new RendererContext(new MarkdownWriter(output));
54+
context.render(node);
55+
}
56+
57+
@Override
58+
public String render(Node node) {
59+
StringBuilder sb = new StringBuilder();
60+
render(node, sb);
61+
return sb.toString();
62+
}
63+
64+
static class Builder extends MarkdownRenderer.Builder {
65+
66+
private final List<MarkdownNodeRendererFactory> nodeRendererFactories = new ArrayList<>();
67+
68+
public TempMarkdownRenderer tempBuild() {
69+
return new TempMarkdownRenderer(this);
70+
}
71+
72+
public Builder nodeRendererFactory(MarkdownNodeRendererFactory nodeRendererFactory) {
73+
this.nodeRendererFactories.add(nodeRendererFactory);
74+
return this;
75+
}
76+
77+
public Builder extensions(Iterable<? extends Extension> extensions) {
78+
for (Extension extension : extensions) {
79+
if (extension instanceof MarkdownRenderer.MarkdownRendererExtension) {
80+
MarkdownRenderer.MarkdownRendererExtension markdownRendererExtension = (MarkdownRenderer.MarkdownRendererExtension) extension;
81+
markdownRendererExtension.extend(this);
82+
}
83+
}
84+
return this;
85+
}
86+
}
87+
88+
private class RendererContext implements MarkdownNodeRendererContext {
89+
private final MarkdownWriter writer;
90+
private final NodeRendererMap nodeRendererMap = new NodeRendererMap();
91+
private final Set<Character> additionalTextEscapes;
92+
93+
private RendererContext(MarkdownWriter writer) {
94+
// Set fields that are used by interface
95+
this.writer = writer;
96+
Set<Character> escapes = new HashSet<>();
97+
for (MarkdownNodeRendererFactory factory : nodeRendererFactories) {
98+
escapes.addAll(factory.getSpecialCharacters());
99+
}
100+
additionalTextEscapes = Collections.unmodifiableSet(escapes);
101+
102+
// The first node renderer for a node type "wins". The NodeRendererMap
103+
// disallows overwriting.
104+
for (MarkdownNodeRendererFactory nodeRendererFactory : nodeRendererFactories) {
105+
// Pass in this as context here, which uses the fields set above
106+
NodeRenderer nodeRenderer = nodeRendererFactory.create(this);
107+
nodeRendererMap.add(nodeRenderer);
108+
}
109+
}
110+
111+
@Override
112+
public MarkdownWriter getWriter() {
113+
return writer;
114+
}
115+
116+
@Override
117+
public void render(Node node) {
118+
nodeRendererMap.render(node);
119+
}
120+
121+
@Override
122+
public Set<Character> getSpecialCharacters() {
123+
return additionalTextEscapes;
124+
}
125+
}
126+
}

0 commit comments

Comments
 (0)