77import java .io .ByteArrayInputStream ;
88import java .io .ByteArrayOutputStream ;
99import java .io .IOException ;
10- import java .util .ArrayList ;
1110import java .util .List ;
1211import javax .imageio .ImageIO ;
1312import org .apache .pdfbox .Loader ;
@@ -63,7 +62,7 @@ public BasePDFExtractor(LocalInputSource source) throws IOException {
6362 * @return a valid ImageIO buffer.
6463 * @throws IOException Throws if the file can't be accessed.
6564 */
66- public static BufferedImage byteArrayToBufferedImage (byte [] byteArray ) throws IOException {
65+ private static BufferedImage byteArrayToBufferedImage (byte [] byteArray ) throws IOException {
6766 try (ByteArrayInputStream stream = new ByteArrayInputStream (byteArray )) {
6867 return ImageIO .read (stream );
6968 }
@@ -76,10 +75,8 @@ public static BufferedImage byteArrayToBufferedImage(byte[] byteArray) throws IO
7675 * @return A list of extracted files.
7776 * @throws IOException Throws if the file can't be accessed.
7877 */
79- public List <ExtractedPDF > extractSubDocuments (
80- List <List <Integer >> pageIndexes
81- ) throws IOException {
82- var extractedPDFs = new ArrayList <ExtractedPDF >();
78+ public ExtractedPDFs extractSubDocuments (List <List <Integer >> pageIndexes ) throws IOException {
79+ var extractedPDFs = new ExtractedPDFs ();
8380
8481 for (List <Integer > pageIndexElement : pageIndexes ) {
8582 if (pageIndexElement .isEmpty ()) {
@@ -94,10 +91,7 @@ public List<ExtractedPDF> extractSubDocuments(
9491 .replace (" " , "0" )
9592 + "."
9693 + splitName [1 ];
97- extractedPDFs
98- .add (
99- new ExtractedPDF (mergePdfPages (this .sourcePdf , pageIndexElement , false ), fieldFilename )
100- );
94+ extractedPDFs .add (extractSinglePage (pageIndexElement , fieldFilename , false ));
10195 }
10296 return extractedPDFs ;
10397 }
@@ -136,11 +130,27 @@ private static byte[] createPdfFromExistingPdf(
136130 return output ;
137131 }
138132
139- public byte [] mergePdfPages (
140- PDDocument document ,
133+ public ExtractedPDF extractSinglePage (
134+ List <Integer > pageNumbers ,
135+ String fieldFilename ,
136+ boolean closeOriginal
137+ ) throws IOException {
138+ var pdfBytes = createPdfFromExistingPdf (this .sourcePdf , pageNumbers , closeOriginal );
139+ return new ExtractedPDF (pdfBytes , fieldFilename );
140+ }
141+
142+ public ExtractedPDF extractSinglePage (
141143 List <Integer > pageNumbers ,
142144 boolean closeOriginal
143145 ) throws IOException {
144- return createPdfFromExistingPdf (document , pageNumbers , closeOriginal );
146+ var pdfBytes = createPdfFromExistingPdf (this .sourcePdf , pageNumbers , closeOriginal );
147+ String [] splitName = InputSourceUtils .splitNameStrict (filename );
148+ String fieldFilename = splitName [0 ]
149+ + String .format ("_%3s" , pageNumbers .get (0 ) + 1 ).replace (" " , "0" )
150+ + "-"
151+ + String .format ("%3s" , pageNumbers .get (pageNumbers .size () - 1 ) + 1 ).replace (" " , "0" )
152+ + "."
153+ + splitName [1 ];
154+ return new ExtractedPDF (pdfBytes , fieldFilename );
145155 }
146156}
0 commit comments