Skip to content

Commit 9d0016e

Browse files
committed
#1602 - Upgrade dependencies
- Revert migration to Jackson and instead upgrade to modern JAXB
1 parent 8676525 commit 9d0016e

31 files changed

Lines changed: 326 additions & 361 deletions

File tree

dkpro-core-io-bioc-asl/pom.xml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,12 @@
2828
<url>https://dkpro.github.io/dkpro-core/</url>
2929
<dependencies>
3030
<dependency>
31-
<groupId>com.fasterxml.jackson.core</groupId>
32-
<artifactId>jackson-databind</artifactId>
31+
<groupId>jakarta.xml.bind</groupId>
32+
<artifactId>jakarta.xml.bind-api</artifactId>
3333
</dependency>
3434
<dependency>
35-
<groupId>com.fasterxml.jackson.core</groupId>
36-
<artifactId>jackson-annotations</artifactId>
37-
</dependency>
38-
<dependency>
39-
<groupId>com.fasterxml.jackson.dataformat</groupId>
40-
<artifactId>jackson-dataformat-xml</artifactId>
35+
<groupId>org.glassfish.jaxb</groupId>
36+
<artifactId>jaxb-runtime</artifactId>
4137
</dependency>
4238
<dependency>
4339
<groupId>org.apache.uima</groupId>

dkpro-core-io-bioc-asl/src/main/java/org/dkpro/core/io/bioc/BioCReader.java

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@
2020
import static org.dkpro.core.io.bioc.BioCComponent.addCollectionMetadataField;
2121

2222
import java.io.IOException;
23-
import java.io.StringWriter;
2423
import java.util.Optional;
2524

26-
import javax.xml.stream.XMLOutputFactory;
2725
import javax.xml.stream.XMLStreamException;
2826

2927
import org.apache.uima.UimaContext;
@@ -38,10 +36,10 @@
3836
import org.dkpro.core.io.bioc.internal.BioCToCas;
3937
import org.dkpro.core.io.bioc.internal.model.BioCDocument;
4038

41-
import com.fasterxml.jackson.databind.DeserializationFeature;
42-
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
43-
4439
import eu.openminted.share.annotations.api.DocumentationResource;
40+
import jakarta.xml.bind.JAXBContext;
41+
import jakarta.xml.bind.JAXBException;
42+
import jakarta.xml.bind.Unmarshaller;
4543

4644
/**
4745
* Reader for the BioC format.
@@ -53,20 +51,26 @@
5351
public class BioCReader
5452
extends BioCReaderImplBase
5553
{
56-
private XmlMapper mapper;
54+
private JAXBContext context;
55+
private Unmarshaller unmarshaller;
5756
private Optional<BioCDocument> nextDocument;
5857

5958
@Override
6059
public void initialize(UimaContext aContext) throws ResourceInitializationException
6160
{
6261
super.initialize(aContext);
63-
mapper = new XmlMapper();
64-
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
62+
63+
try {
64+
context = JAXBContext.newInstance(BioCDocument.class);
65+
}
66+
catch (JAXBException e) {
67+
throw new ResourceInitializationException(e);
68+
}
6569

6670
try {
6771
nextDocument = nextBioCDocument();
6872
}
69-
catch (CollectionException | XMLStreamException | IOException e) {
73+
catch (CollectionException | XMLStreamException | JAXBException | IOException e) {
7074
throw new ResourceInitializationException(e);
7175
}
7276
}
@@ -90,14 +94,14 @@ public void getNext(JCas aJCas) throws IOException, CollectionException
9094
// DocumentMetaData.get(aJCas).setDocumentId(document.getId());
9195
// }
9296

93-
var jb = new JCasBuilder(aJCas);
97+
JCasBuilder jb = new JCasBuilder(aJCas);
9498
new BioCToCas().readDocument(jb, document);
9599
jb.close();
96100

97101
try {
98102
nextDocument = nextBioCDocument();
99103
}
100-
catch (XMLStreamException | IOException e) {
104+
catch (XMLStreamException | JAXBException e) {
101105
throw new IOException(e);
102106
}
103107
}
@@ -109,7 +113,7 @@ public boolean hasNext() throws IOException, CollectionException
109113
}
110114

111115
private Optional<BioCDocument> nextBioCDocument()
112-
throws XMLStreamException, CollectionException, IOException
116+
throws XMLStreamException, JAXBException, CollectionException, IOException
113117
{
114118
if (!isFileOpen()) {
115119
openNextFile();
@@ -127,43 +131,26 @@ private Optional<BioCDocument> nextBioCDocument()
127131
protected void openNextFile() throws IOException, XMLStreamException, CollectionException
128132
{
129133
super.openNextFile();
130-
// no-op for XmlMapper-based parsing
134+
try {
135+
unmarshaller = context.createUnmarshaller();
136+
}
137+
catch (JAXBException e) {
138+
new IOException(e);
139+
}
131140
}
132141

133142
@Override
134143
protected void closeFile()
135144
{
136-
// mapper is reused, nothing to clear per-file
145+
unmarshaller = null;
137146
super.closeFile();
138147
}
139148

140-
private Optional<BioCDocument> nextBioCDocumentInFile() throws XMLStreamException, IOException
149+
private Optional<BioCDocument> nextBioCDocumentInFile() throws XMLStreamException, JAXBException
141150
{
142151
if (seekNextBioCDocumentInFile()) {
143-
// Serialize the current <document> event sequence to a string and parse with XmlMapper
144-
var sw = new StringWriter();
145-
var outFactory = XMLOutputFactory.newFactory();
146-
var xew = outFactory.createXMLEventWriter(sw);
147-
148-
int depth = 0;
149-
while (getXmlEventReader().hasNext()) {
150-
var e = getXmlEventReader().nextEvent();
151-
xew.add(e);
152-
if (e.isStartElement()) {
153-
depth++;
154-
}
155-
else if (e.isEndElement()) {
156-
depth--;
157-
if (depth == 0) {
158-
break;
159-
}
160-
}
161-
}
162-
xew.flush();
163-
xew.close();
164-
165-
var xml = sw.toString();
166-
var document = mapper.readValue(xml, BioCDocument.class);
152+
var document = unmarshaller.unmarshal(getXmlEventReader(), BioCDocument.class)
153+
.getValue();
167154
return Optional.of(document);
168155
}
169156

dkpro-core-io-bioc-asl/src/main/java/org/dkpro/core/io/bioc/BioCWriter.java

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
import static org.dkpro.core.io.bioc.BioCComponent.getCollectionMetadataField;
2121

22-
import java.nio.charset.StandardCharsets;
23-
2422
import org.apache.uima.UimaContext;
2523
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
2624
import org.apache.uima.fit.descriptor.ConfigurationParameter;
@@ -35,11 +33,11 @@
3533
import org.dkpro.core.io.bioc.internal.CasToBioC;
3634
import org.dkpro.core.io.bioc.internal.model.BioCCollection;
3735

38-
import com.fasterxml.jackson.databind.SerializationFeature;
39-
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
40-
4136
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
4237
import eu.openminted.share.annotations.api.DocumentationResource;
38+
import jakarta.xml.bind.JAXBContext;
39+
import jakarta.xml.bind.JAXBException;
40+
import jakarta.xml.bind.Marshaller;
4341

4442
/**
4543
* Writer for the BioC format.
@@ -75,19 +73,22 @@ public class BioCWriter
7573
defaultValue = ComponentParameters.DEFAULT_ENCODING)
7674
private String targetEncoding;
7775

78-
private XmlMapper mapper;
76+
private JAXBContext context;
77+
private Marshaller marshaller;
7978

8079
@Override
8180
public void initialize(UimaContext aContext) throws ResourceInitializationException
8281
{
8382
super.initialize(aContext);
84-
mapper = new XmlMapper();
85-
mapper.configure(SerializationFeature.INDENT_OUTPUT, indent);
86-
mapper.setSerializationInclusion(
87-
com.fasterxml.jackson.annotation.JsonInclude.Include.NON_EMPTY);
88-
mapper.getFactory().configure(
89-
com.fasterxml.jackson.dataformat.xml.ser.ToXmlGenerator.Feature.WRITE_XML_DECLARATION,
90-
false);
83+
try {
84+
context = JAXBContext.newInstance(BioCCollection.class);
85+
marshaller = context.createMarshaller();
86+
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, indent);
87+
marshaller.setProperty(Marshaller.JAXB_ENCODING, targetEncoding);
88+
}
89+
catch (JAXBException e) {
90+
throw new ResourceInitializationException(e);
91+
}
9192
}
9293

9394
@Override
@@ -110,24 +111,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException
110111

111112
new CasToBioC().convert(aJCas, bioCCollection);
112113

113-
var xml = mapper.writeValueAsString(bioCCollection);
114-
115-
// Replace 2-space indents with 4-space indents
116-
if (indent) {
117-
var pattern = java.util.regex.Pattern.compile("(?m)^( )+");
118-
var matcher = pattern.matcher(xml);
119-
var sb = new StringBuffer();
120-
while (matcher.find()) {
121-
int spaces = matcher.group().length();
122-
matcher.appendReplacement(sb, " ".repeat(spaces * 2));
123-
}
124-
matcher.appendTail(sb);
125-
xml = sb.toString();
126-
}
127-
128-
var encoding = targetEncoding != null ? targetEncoding
129-
: StandardCharsets.UTF_8.name();
130-
docOS.write(xml.getBytes(encoding));
114+
marshaller.marshal(bioCCollection, docOS);
131115
}
132116
catch (Exception e) {
133117
throw new AnalysisEngineProcessException(e);

dkpro-core-io-bioc-asl/src/main/java/org/dkpro/core/io/bioc/internal/BioCToCas.java

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,11 @@
3434
import static org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XCasSchemaAnalyzer.isRelationLayer;
3535
import static org.slf4j.LoggerFactory.getLogger;
3636

37-
import java.io.IOException;
3837
import java.io.InputStream;
39-
import java.io.StringWriter;
4038
import java.util.LinkedHashMap;
4139
import java.util.List;
4240
import java.util.Map;
4341

44-
import javax.xml.stream.XMLOutputFactory;
4542
import javax.xml.stream.XMLStreamException;
4643

4744
import org.apache.uima.cas.Type;
@@ -58,17 +55,16 @@
5855
import org.dkpro.core.io.bioc.internal.model.BioCSentence;
5956
import org.slf4j.Logger;
6057

61-
import com.fasterxml.jackson.databind.DeserializationFeature;
62-
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
63-
6458
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Div;
6559
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
60+
import jakarta.xml.bind.JAXBContext;
61+
import jakarta.xml.bind.JAXBException;
6662

6763
public class BioCToCas
6864
{
6965
private static final Logger LOG = getLogger(lookup().lookupClass());
7066

71-
public void parseXml(InputStream aReader, JCas aJCas) throws XMLStreamException, IOException
67+
public void parseXml(InputStream aReader, JCas aJCas) throws XMLStreamException, JAXBException
7268
{
7369
var collection = loadBioCCollection(aReader);
7470

@@ -83,26 +79,15 @@ public void convert(BioCCollection collection, JCas aJCas)
8379
}
8480

8581
public BioCCollection loadBioCCollection(InputStream aReader)
86-
throws XMLStreamException, IOException
82+
throws XMLStreamException, JAXBException
8783
{
8884
var xmlInputFactory = XmlParserUtils.newXmlInputFactory();
8985
var xmlEventReader = xmlInputFactory.createXMLEventReader(aReader);
9086

91-
var sw = new StringWriter();
92-
var outFactory = XMLOutputFactory.newFactory();
93-
var xew = outFactory.createXMLEventWriter(sw);
94-
while (xmlEventReader.hasNext()) {
95-
var e = xmlEventReader.nextEvent();
96-
xew.add(e);
97-
}
98-
xew.flush();
99-
xew.close();
100-
101-
var xml = sw.toString();
102-
var mapper = new XmlMapper();
103-
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
87+
var context = JAXBContext.newInstance(BioCCollection.class);
88+
var unmarshaller = context.createUnmarshaller();
10489

105-
var collection = mapper.readValue(xml, BioCCollection.class);
90+
var collection = unmarshaller.unmarshal(xmlEventReader, BioCCollection.class).getValue();
10691
return collection;
10792
}
10893

@@ -241,7 +226,7 @@ private void readRelations(JCasBuilder aBuilder, List<BioCRelation> aRelations,
241226
var infons = bioCRelation.infonMap();
242227
var nodes = bioCRelation.nodeMap();
243228

244-
var uimaType = guessBestRelationType(cas.getTypeSystem(), infons);
229+
Type uimaType = guessBestRelationType(cas.getTypeSystem(), infons);
245230
if (uimaType == null || !isRelationLayer(uimaType)) {
246231
LOG.debug("Unable to find suitable UIMA type for relation annotation");
247232
continue;

dkpro-core-io-bioc-asl/src/main/java/org/dkpro/core/io/bioc/internal/model/BioCAnnotation.java

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,17 @@
1919

2020
import java.util.List;
2121

22-
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
23-
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper;
24-
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty;
22+
import jakarta.xml.bind.annotation.XmlAttribute;
23+
import jakarta.xml.bind.annotation.XmlElement;
2524

26-
@JsonPropertyOrder({ "infons", "id", "locations", "text" })
2725
public class BioCAnnotation
2826
extends BioCObject
2927
{
30-
@JacksonXmlProperty(isAttribute = true, localName = "id")
3128
private String id;
32-
33-
@JacksonXmlElementWrapper(useWrapping = false)
34-
@JacksonXmlProperty(localName = "location")
3529
private List<BioCLocation> locations;
36-
37-
@JacksonXmlProperty(localName = "text")
3830
private String text;
3931

32+
@XmlAttribute(name = "id")
4033
public String getId()
4134
{
4235
return id;
@@ -47,23 +40,26 @@ public void setId(String aId)
4740
id = aId;
4841
}
4942

50-
public String getText()
43+
@XmlElement(name = "location")
44+
public List<BioCLocation> getLocations()
5145
{
52-
return text;
46+
return locations;
5347
}
5448

55-
public void setText(String aText)
49+
public void setLocations(List<BioCLocation> aLocations)
5650
{
57-
text = aText;
51+
locations = aLocations;
5852
}
5953

60-
public List<BioCLocation> getLocations()
54+
@XmlElement(name = "text")
55+
public String getText()
56+
6157
{
62-
return locations;
58+
return text;
6359
}
6460

65-
public void setLocations(List<BioCLocation> aLocations)
61+
public void setText(String aText)
6662
{
67-
locations = aLocations;
63+
text = aText;
6864
}
6965
}

0 commit comments

Comments
 (0)