| title | Invoice OCR Java |
|---|---|
| category | 622b805aaec68102ea7fcbc2 |
| slug | java-invoice-ocr |
| parentDoc | 631a062c3718850f3519b793 |
The Java OCR SDK supports the Invoice API.
Using the sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.

import com.mindee.MindeeClient;
import com.mindee.input.LocalInputSource;
import com.mindee.parsing.common.PredictResponse;
import com.mindee.product.invoice.InvoiceV4;
import java.io.File;
import java.io.IOException;
public class SimpleMindeeClient {
public static void main(String[] args) throws IOException {
String apiKey = "my-api-key";
String filePath = "/path/to/the/file.ext";
// Init a new client
MindeeClient mindeeClient = new MindeeClient(apiKey);
// Load a file from disk
LocalInputSource inputSource = new LocalInputSource(filePath);
// Parse the file
PredictResponse<InvoiceV4> response = mindeeClient.parse(
InvoiceV4.class,
inputSource
);
// Print a summary of the response
System.out.println(response.toString());
// Print a summary of the predictions
// System.out.println(response.getDocument().toString());
// Print the document-level predictions
// System.out.println(response.getDocument().getInference().getPrediction().toString());
// Print the page-level predictions
// response.getDocument().getInference().getPages().forEach(
// page -> System.out.println(page.toString())
// );
}
}You can also call this product asynchronously:
import com.mindee.MindeeClient;
import com.mindee.input.LocalInputSource;
import com.mindee.parsing.common.AsyncPredictResponse;
import com.mindee.product.invoice.InvoiceV4;
import java.io.File;
import java.io.IOException;
public class SimpleMindeeClient {
public static void main(String[] args) throws IOException, InterruptedException {
String apiKey = "my-api-key";
String filePath = "/path/to/the/file.ext";
// Init a new client
MindeeClient mindeeClient = new MindeeClient(apiKey);
// Load a file from disk
LocalInputSource inputSource = new LocalInputSource(new File(filePath));
// Parse the file asynchronously
AsyncPredictResponse<InvoiceV4> response = mindeeClient.enqueueAndParse(
InvoiceV4.class,
inputSource
);
// Print a summary of the response
System.out.println(response.toString());
// Print a summary of the predictions
// System.out.println(response.getDocumentObj().toString());
// Print the document-level predictions
// System.out.println(response.getDocumentObj().getInference().getPrediction().toString());
// Print the page-level predictions
// response.getDocumentObj().getInference().getPages().forEach(
// page -> System.out.println(page.toString())
// );
}
}Output (RST):
########
Document
########
:Mindee ID: 744748d5-9051-461c-b70c-bbf81f5ff943
:Filename: default_sample.jpg
Inference
#########
:Product: mindee/invoices v4.11
:Rotation applied: Yes
Prediction
==========
:Locale: en-CA; en; CA; CAD;
:Invoice Number: 14
:Purchase Order Number: AD29094
:Reference Numbers: AD29094
:Purchase Date: 2018-09-25
:Due Date:
:Payment Date:
:Total Net: 2145.00
:Total Amount: 2608.20
:Total Tax: 193.20
:Taxes:
+---------------+--------+----------+---------------+
| Base | Code | Rate (%) | Amount |
+===============+========+==========+===============+
| 2145.00 | | 8.00 | 193.20 |
+---------------+--------+----------+---------------+
:Supplier Payment Details:
:Supplier Name: TURNPIKE DESIGNS
:Supplier Company Registrations:
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
:Supplier Phone Number: 4165551212
:Supplier Website:
:Supplier Email: j_coi@example.com
:Customer Name: JIRO DOI
:Customer Company Registrations:
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
:Customer ID:
:Shipping Address:
:Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
:Document Type: INVOICE
:Document Type Extended: INVOICE
:Purchase Subcategory:
:Purchase Category: miscellaneous
:Line Items:
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
+======================================+==============+==========+============+==============+==============+=================+============+
| Platinum web hosting package Down... | | 1.00 | | | 65.00 | | 65.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| 2 page website design Includes ba... | | 3.00 | | | 2100.00 | | 2100.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| Mobile designs Includes responsiv... | | 1.00 | | | 250.00 | 1 | 250.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
Page Predictions
================
Page 0
------
:Locale: en-CA; en; CA; CAD;
:Invoice Number: 14
:Purchase Order Number: AD29094
:Reference Numbers: AD29094
:Purchase Date: 2018-09-25
:Due Date:
:Payment Date:
:Total Net: 2145.00
:Total Amount: 2608.20
:Total Tax: 193.20
:Taxes:
+---------------+--------+----------+---------------+
| Base | Code | Rate (%) | Amount |
+===============+========+==========+===============+
| 2145.00 | | 8.00 | 193.20 |
+---------------+--------+----------+---------------+
:Supplier Payment Details:
:Supplier Name: TURNPIKE DESIGNS
:Supplier Company Registrations:
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
:Supplier Phone Number: 4165551212
:Supplier Website:
:Supplier Email: j_coi@example.com
:Customer Name: JIRO DOI
:Customer Company Registrations:
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
:Customer ID:
:Shipping Address:
:Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
:Document Type: INVOICE
:Document Type Extended: INVOICE
:Purchase Subcategory:
:Purchase Category: miscellaneous
:Line Items:
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
+======================================+==============+==========+============+==============+==============+=================+============+
| Platinum web hosting package Down... | | 1.00 | | | 65.00 | | 65.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| 2 page website design Includes ba... | | 3.00 | | | 2100.00 | | 2100.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
| Mobile designs Includes responsiv... | | 1.00 | | | 250.00 | 1 | 250.00 |
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+These fields are generic and used in several products.
Each prediction object contains a set of fields that inherit from the generic BaseField class.
A typical BaseField object will have the following attributes:
- confidence (
Double): the confidence score of the field prediction. - boundingBox (
Polygon): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. - polygon (
Polygon): contains the relative vertices coordinates (polygonextendsList<Point>) of a polygon containing the field in the image. - pageId (
Integer): the ID of the page, alwaysnullwhen at document-level.
Note: A
Pointsimply refers to a List ofDouble.
Aside from the previous attributes, all basic fields have access to a custom toString method that can be used to print their value as a string.
Aside from the basic BaseField attributes, the address field AddressField also implements the following:
- streetNumber (
String): String representation of the street number. Can benull. - streetName (
String): Name of the street. Can benull. - poBox (
String): String representation of the PO Box number. Can benull. - addressComplement (
String): Address complement. Can benull. - city (
String): City name. Can benull. - postalcode (
String): String representation of the postal code. Can benull. - state (
String): State name. Can benull. - country (
String): Country name. Can benull.
Note: The value field of an AddressField should be a concatenation of the rest of the values.
An amount field AmountField extends BaseField, but also implements:
- value (
Double): corresponds to the field value. Can benullif no value was extracted.
The classification field ClassificationField extends BaseField, but also implements:
- value (
strong): corresponds to the field value. - confidence (
double): the confidence score of the field prediction.
Note: a classification field's
value is always aString`.
Aside from the basic BaseField attributes, the company registration field CompanyRegistrationField also implements the following:
- type (
String): the type of company. - value (
String): corresponds to the field value. - toTableLine(): a method that formats the data to fit in a .rst display.
The text field StringField extends BaseField, but also implements:
- value (
String): corresponds to the field value. - rawValue (
String): corresponds to the raw value as it appears on the document.
The date field DateField extends BaseField, but also implements:
- value (
LocalDate): an accessible representation of the value as a Java object. Can benull.
The locale field LocaleField extends BaseField, but also implements:
- value (
LocalDate): an accessible representation of the value as a Java object. Can benull. - language (
String): ISO 639-1 language code (e.g.:enfor English). Can benull. - country (
String): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.:GRBorGBfor "Great Britain"). Can benull. - currency (
String): ISO 4217 code for currencies (e.g.:USDfor "US Dollars"). Can benull.
Aside from the basic BaseField attributes, the tax field TaxField also implements the following:
- rate (
Double): the tax rate applied to an item expressed as a percentage. Can benull. - code (
String): tax code (or equivalent, depending on the origin of the document). - base (
Double): base amount used for the tax. Can benull. - value (
Double): the value of the tax. Can benull.
Note: currently
TaxFieldis not used on its own, and is accessed through a parentTaxesobject, a list-like structure.
The Taxes field represents a List of TaxField objects. As it is the representation of several objects, it has access to a custom toString method that can render a TaxField object as a table line.
Fields which are specific to this product; they are not used in any other product.
List of all the line items present on the invoice.
A InvoiceV4LineItem implements the following attributes:
- description (
String): The item description. - productCode (
String): The product code of the item. - quantity (
Double): The item quantity - taxAmount (
Double): The item tax amount. - taxRate (
Double): The item tax rate in percentage. - totalAmount (
Double): The item total amount. - unitMeasure (
String): The item unit of measure. - unitPrice (
Double): The item unit price.
The following fields are extracted for Invoice V4:
billingAddress: The customer billing address.
System.out.println(result.getDocument().getInference().getPrediction().getBillingAddress().value);category: The purchase category.
- 'toll'
- 'food'
- 'parking'
- 'transport'
- 'accommodation'
- 'telecom'
- 'miscellaneous'
- 'software'
- 'shopping'
- 'energy'
System.out.println(result.getDocument().getInference().getPrediction().getCategory().value);customerAddress: The address of the customer.
System.out.println(result.getDocument().getInference().getPrediction().getCustomerAddress().value);customerCompanyRegistrations: List of company registration numbers associated to the customer.
for (customerCompanyRegistrationsElem : result.getDocument().getInference().getPrediction().getCustomerCompanyRegistrations())
{
System.out.println(customerCompanyRegistrationsElem.value);
}customerId: The customer account number or identifier from the supplier.
System.out.println(result.getDocument().getInference().getPrediction().getCustomerId().value);customerName: The name of the customer or client.
System.out.println(result.getDocument().getInference().getPrediction().getCustomerName().value);date: The date the purchase was made.
System.out.println(result.getDocument().getInference().getPrediction().getDate().value);documentType: Document type: INVOICE or CREDIT NOTE.
- 'INVOICE'
- 'CREDIT NOTE'
System.out.println(result.getDocument().getInference().getPrediction().getDocumentType().value);documentTypeExtended: Document type extended.
- 'CREDIT NOTE'
- 'INVOICE'
- 'OTHER'
- 'OTHER_FINANCIAL'
- 'PAYSLIP'
- 'PURCHASE ORDER'
- 'QUOTE'
- 'RECEIPT'
- 'STATEMENT'
System.out.println(result.getDocument().getInference().getPrediction().getDocumentTypeExtended().value);dueDate: The date on which the payment is due.
System.out.println(result.getDocument().getInference().getPrediction().getDueDate().value);invoiceNumber: The invoice number or identifier.
System.out.println(result.getDocument().getInference().getPrediction().getInvoiceNumber().value);lineItems(List<InvoiceV4LineItem>): List of all the line items present on the invoice.
for (lineItemsElem : result.getDocument().getInference().getPrediction().getLineItems())
{
System.out.println(lineItemsElem.value);
}locale: The locale of the document.
System.out.println(result.getDocument().getInference().getPrediction().getLocale().value);paymentDate: The date on which the payment is due / was full-filled.
System.out.println(result.getDocument().getInference().getPrediction().getPaymentDate().value);poNumber: The purchase order number.
System.out.println(result.getDocument().getInference().getPrediction().getPoNumber().value);referenceNumbers: List of all reference numbers on the invoice, including the purchase order number.
for (referenceNumbersElem : result.getDocument().getInference().getPrediction().getReferenceNumbers())
{
System.out.println(referenceNumbersElem.value);
}shippingAddress: Customer's delivery address.
System.out.println(result.getDocument().getInference().getPrediction().getShippingAddress().value);subcategory: The purchase subcategory for transport, food and shopping.
- 'plane'
- 'taxi'
- 'train'
- 'restaurant'
- 'shopping'
- 'other'
- 'groceries'
- 'cultural'
- 'electronics'
- 'office_supplies'
- 'micromobility'
- 'car_rental'
- 'public'
- 'delivery'
- null
System.out.println(result.getDocument().getInference().getPrediction().getSubcategory().value);supplierAddress: The address of the supplier or merchant.
System.out.println(result.getDocument().getInference().getPrediction().getSupplierAddress().value);supplierCompanyRegistrations: List of company registration numbers associated to the supplier.
for (supplierCompanyRegistrationsElem : result.getDocument().getInference().getPrediction().getSupplierCompanyRegistrations())
{
System.out.println(supplierCompanyRegistrationsElem.value);
}supplierEmail: The email address of the supplier or merchant.
System.out.println(result.getDocument().getInference().getPrediction().getSupplierEmail().value);supplierName: The name of the supplier or merchant.
System.out.println(result.getDocument().getInference().getPrediction().getSupplierName().value);supplierPaymentDetails: List of payment details associated to the supplier of the invoice.
for (supplierPaymentDetailsElem : result.getDocument().getInference().getPrediction().getSupplierPaymentDetails())
{
System.out.println(supplierPaymentDetailsElemvalue);
System.out.println(supplierPaymentDetailsElem.rate);
System.out.println(supplierPaymentDetailsElem.code);
System.out.println(supplierPaymentDetailsElem.base);
}supplierPhoneNumber: The phone number of the supplier or merchant.
System.out.println(result.getDocument().getInference().getPrediction().getSupplierPhoneNumber().value);supplierWebsite: The website URL of the supplier or merchant.
System.out.println(result.getDocument().getInference().getPrediction().getSupplierWebsite().value);taxes: List of taxes. Each item contains the detail of the tax.
for (taxesElem : result.getDocument().getInference().getPrediction().getTaxes())
{
System.out.println(taxesElem.value);
}totalAmount: The total amount of the invoice: includes taxes, tips, fees, and other charges.
System.out.println(result.getDocument().getInference().getPrediction().getTotalAmount().value);totalNet: The net amount of the invoice: does not include taxes, fees, and discounts.
System.out.println(result.getDocument().getInference().getPrediction().getTotalNet().value);totalTax: The total tax: the sum of all the taxes for this invoice.
System.out.println(result.getDocument().getInference().getPrediction().getTotalTax().value);