2020import static org .dkpro .core .io .bioc .BioCComponent .addCollectionMetadataField ;
2121
2222import java .io .IOException ;
23- import java .io .StringWriter ;
2423import java .util .Optional ;
2524
26- import javax .xml .stream .XMLOutputFactory ;
2725import javax .xml .stream .XMLStreamException ;
2826
2927import org .apache .uima .UimaContext ;
3836import org .dkpro .core .io .bioc .internal .BioCToCas ;
3937import org .dkpro .core .io .bioc .internal .model .BioCDocument ;
4038
41- import com .fasterxml .jackson .databind .DeserializationFeature ;
42- import com .fasterxml .jackson .dataformat .xml .XmlMapper ;
43-
4439import eu .openminted .share .annotations .api .DocumentationResource ;
40+ import jakarta .xml .bind .JAXBContext ;
41+ import jakarta .xml .bind .JAXBException ;
42+ import jakarta .xml .bind .Unmarshaller ;
4543
4644/**
4745 * Reader for the BioC format.
5351public class BioCReader
5452 extends BioCReaderImplBase
5553{
56- private XmlMapper mapper ;
54+ private JAXBContext context ;
55+ private Unmarshaller unmarshaller ;
5756 private Optional <BioCDocument > nextDocument ;
5857
5958 @ Override
6059 public void initialize (UimaContext aContext ) throws ResourceInitializationException
6160 {
6261 super .initialize (aContext );
63- mapper = new XmlMapper ();
64- mapper .configure (DeserializationFeature .FAIL_ON_UNKNOWN_PROPERTIES , false );
62+
63+ try {
64+ context = JAXBContext .newInstance (BioCDocument .class );
65+ }
66+ catch (JAXBException e ) {
67+ throw new ResourceInitializationException (e );
68+ }
6569
6670 try {
6771 nextDocument = nextBioCDocument ();
6872 }
69- catch (CollectionException | XMLStreamException | IOException e ) {
73+ catch (CollectionException | XMLStreamException | JAXBException | IOException e ) {
7074 throw new ResourceInitializationException (e );
7175 }
7276 }
@@ -90,14 +94,14 @@ public void getNext(JCas aJCas) throws IOException, CollectionException
9094 // DocumentMetaData.get(aJCas).setDocumentId(document.getId());
9195 // }
9296
93- var jb = new JCasBuilder (aJCas );
97+ JCasBuilder jb = new JCasBuilder (aJCas );
9498 new BioCToCas ().readDocument (jb , document );
9599 jb .close ();
96100
97101 try {
98102 nextDocument = nextBioCDocument ();
99103 }
100- catch (XMLStreamException | IOException e ) {
104+ catch (XMLStreamException | JAXBException e ) {
101105 throw new IOException (e );
102106 }
103107 }
@@ -109,7 +113,7 @@ public boolean hasNext() throws IOException, CollectionException
109113 }
110114
111115 private Optional <BioCDocument > nextBioCDocument ()
112- throws XMLStreamException , CollectionException , IOException
116+ throws XMLStreamException , JAXBException , CollectionException , IOException
113117 {
114118 if (!isFileOpen ()) {
115119 openNextFile ();
@@ -127,43 +131,26 @@ private Optional<BioCDocument> nextBioCDocument()
127131 protected void openNextFile () throws IOException , XMLStreamException , CollectionException
128132 {
129133 super .openNextFile ();
130- // no-op for XmlMapper-based parsing
134+ try {
135+ unmarshaller = context .createUnmarshaller ();
136+ }
137+ catch (JAXBException e ) {
138+ new IOException (e );
139+ }
131140 }
132141
133142 @ Override
134143 protected void closeFile ()
135144 {
136- // mapper is reused, nothing to clear per-file
145+ unmarshaller = null ;
137146 super .closeFile ();
138147 }
139148
140- private Optional <BioCDocument > nextBioCDocumentInFile () throws XMLStreamException , IOException
149+ private Optional <BioCDocument > nextBioCDocumentInFile () throws XMLStreamException , JAXBException
141150 {
142151 if (seekNextBioCDocumentInFile ()) {
143- // Serialize the current <document> event sequence to a string and parse with XmlMapper
144- var sw = new StringWriter ();
145- var outFactory = XMLOutputFactory .newFactory ();
146- var xew = outFactory .createXMLEventWriter (sw );
147-
148- int depth = 0 ;
149- while (getXmlEventReader ().hasNext ()) {
150- var e = getXmlEventReader ().nextEvent ();
151- xew .add (e );
152- if (e .isStartElement ()) {
153- depth ++;
154- }
155- else if (e .isEndElement ()) {
156- depth --;
157- if (depth == 0 ) {
158- break ;
159- }
160- }
161- }
162- xew .flush ();
163- xew .close ();
164-
165- var xml = sw .toString ();
166- var document = mapper .readValue (xml , BioCDocument .class );
152+ var document = unmarshaller .unmarshal (getXmlEventReader (), BioCDocument .class )
153+ .getValue ();
167154 return Optional .of (document );
168155 }
169156
0 commit comments