Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"clean": "rm -rf ./dist ./docs/_build",
"test": "mocha \"tests/**/*.spec.ts\"",
"test-integration": "mocha \"tests/**/*.integration.ts\"",
"test-v1": "mocha \"tests/v1/**/*.spec.ts\"",
"test-v2": "mocha \"tests/v2/**/*.spec.ts\"",
"lint": "tsc --noEmit && eslint './src/**/*.ts' --report-unused-disable-directives && echo 'Your .ts files look good.'",
"lint-fix": "eslint './src/**/*.ts' --fix",
Expand Down
4 changes: 3 additions & 1 deletion src/input/inputSource.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { MindeeInputSourceError } from "@/errors/index.js";

/**
* @param {string} inputType - the type of input used in file ("base64", "path", "dummy").
* NB: dummy is only used for tests purposes
Expand All @@ -17,7 +19,7 @@ export abstract class InputSource {
protected initialized: boolean = false;

async init() {
throw new Error("not Implemented");
throw new MindeeInputSourceError("not Implemented");
}

public isInitialized() {
Expand Down
30 changes: 25 additions & 5 deletions src/input/streamInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Readable } from "stream";
import { LocalInputSource } from "./localInputSource.js";
import { INPUT_TYPE_STREAM } from "./inputSource.js";
import { logger } from "@/logger.js";
import { MindeeError } from "@/errors/index.js";
import { MindeeInputSourceError } from "@/errors/index.js";

interface StreamInputProps {
inputStream: Readable;
Expand Down Expand Up @@ -31,17 +31,37 @@ export class StreamInput extends LocalInputSource {
this.initialized = true;
}

async stream2buffer(stream: Readable): Promise<Buffer> {
async stream2buffer(stream: Readable, signal?: AbortSignal): Promise<Buffer> {
return new Promise<Buffer>((resolve, reject) => {
if (stream.closed || stream.destroyed) {
return reject(new MindeeError("Stream is already closed"));
return reject(new MindeeInputSourceError("Stream is already closed"));
}

if (signal?.aborted) {
return reject(new MindeeInputSourceError("Operation aborted"));
}
const onAbort = () => {
stream.destroy();
reject(new MindeeInputSourceError("Operation aborted"));
};
if (signal) {
signal.addEventListener("abort", onAbort, { once: true });
}
const cleanup = () => {
signal?.removeEventListener("abort", onAbort);
};

const _buf: Buffer[] = [];
stream.pause();
stream.on("data", (chunk) => _buf.push(chunk));
stream.on("end", () => resolve(Buffer.concat(_buf)));
stream.on("error", (err) => reject(new Error(`Error converting stream - ${err}`)));
stream.on("end", () => {
cleanup();
resolve(Buffer.concat(_buf));
});
stream.on("error", (err) => {
cleanup();
reject(new MindeeInputSourceError(`Error converting stream - ${err}`));
});
stream.resume();
});
}
Expand Down
13 changes: 8 additions & 5 deletions src/input/urlInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ import { basename, extname } from "path";
import { randomBytes } from "crypto";
import { writeFile } from "fs/promises";
import { request, Dispatcher, getGlobalDispatcher } from "undici";
import { BytesInput } from "./bytesInput.js";
import { logger } from "@/logger.js";
import { MindeeInputSourceError } from "@/errors/index.js";
import { BytesInput } from "./bytesInput.js";

export class UrlInput extends InputSource {
public readonly url: string;
public readonly dispatcher;

constructor({ url, dispatcher }: { url: string, dispatcher?: Dispatcher }) {
constructor(
{ url, dispatcher }: { url: string, dispatcher?: Dispatcher }
) {
super();
this.url = url;
this.dispatcher = dispatcher ?? getGlobalDispatcher();
Expand All @@ -24,7 +27,7 @@ export class UrlInput extends InputSource {
}
logger.debug(`source URL: ${this.url}`);
if (!this.url.toLowerCase().startsWith("https")) {
throw new Error("URL must be HTTPS");
throw new MindeeInputSourceError("URL must be HTTPS");
}
this.fileObject = this.url;
this.initialized = true;
Expand Down Expand Up @@ -129,7 +132,7 @@ export class UrlInput extends InputSource {
if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400) {
logger.debug(`Redirecting to: ${response.headers.location}`);
if (redirects === maxRedirects) {
throw new Error(
throw new MindeeInputSourceError(
`Can't reach URL after ${redirects} out of ${maxRedirects} redirects, aborting operation.`
);
}
Expand All @@ -138,7 +141,7 @@ export class UrlInput extends InputSource {
response.headers.location.toString(), auth, headers, redirects + 1, maxRedirects
);
}
throw new Error("Redirect location not found");
throw new MindeeInputSourceError("Redirect location not found");
}

if (!response.statusCode || response.statusCode >= 400 || response.statusCode < 200) {
Expand Down
223 changes: 223 additions & 0 deletions tests/input/compression.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import {
PathInput,
} from "@/input/index.js";
import * as fs from "fs";
import * as path from "path";
import { expect } from "chai";
import sharp from "sharp";
import { compressImage } from "@/image/index.js";
import { compressPdf } from "@/pdf/index.js";
import { extractTextFromPdf } from "@/pdf/pdfUtils.js";
import { logger } from "@/logger.js";
import { RESOURCE_PATH, V1_PRODUCT_PATH } from "../index.js";

describe("Input Sources - compression and resize", () => {
const outputPath = path.join(RESOURCE_PATH, "output");

before(async () => {
await fs.promises.mkdir(outputPath, { recursive: true });
});

it("Image Quality Compress From Input Source", async () => {
const receiptInput = new PathInput({ inputPath: path.join(RESOURCE_PATH, "file_types/receipt.jpg") });
await receiptInput.compress(40);
await fs.promises.writeFile(path.join(outputPath, "compress_indirect.jpg"), receiptInput.fileObject);

const initialFileStats = await fs.promises.stat(path.join(RESOURCE_PATH, "file_types/receipt.jpg"));
const renderedFileStats = await fs.promises.stat(path.join(outputPath, "compress_indirect.jpg"));
expect(renderedFileStats.size).to.be.lessThan(initialFileStats.size);
});

it("Image Quality Compresses From Compressor", async () => {
const receiptInput = new PathInput({ inputPath: path.join(RESOURCE_PATH, "file_types/receipt.jpg") });
await receiptInput.init();
const compresses = [
await compressImage(receiptInput.fileObject, 100),
await compressImage(receiptInput.fileObject),
await compressImage(receiptInput.fileObject, 50),
await compressImage(receiptInput.fileObject, 10),
await compressImage(receiptInput.fileObject, 1)
];

const fileNames = ["compress100.jpg", "compress75.jpg", "compress50.jpg", "compress10.jpg", "compress1.jpg"];
for (let i = 0; i < compresses.length; i++) {
await fs.promises.writeFile(path.join(outputPath, fileNames[i]), compresses[i]);
}

const initialFileStats = await fs.promises.stat(path.join(RESOURCE_PATH, "file_types/receipt.jpg"));
const renderedFileStats = await Promise.all(
fileNames.map(fileName => fs.promises.stat(path.join(outputPath, fileName)))
);

expect(initialFileStats.size).to.be.lessThan(renderedFileStats[0].size);
expect(initialFileStats.size).to.be.lessThan(renderedFileStats[1].size);
expect(renderedFileStats[1].size).to.be.greaterThan(renderedFileStats[2].size);
expect(renderedFileStats[2].size).to.be.greaterThan(renderedFileStats[3].size);
expect(renderedFileStats[3].size).to.be.greaterThan(renderedFileStats[4].size);
});

it("Image Resize From InputSource", async () => {
const imageResizeInput = new PathInput({ inputPath: path.join(RESOURCE_PATH, "file_types/receipt.jpg") });
await imageResizeInput.init();

await imageResizeInput.compress(75, 250, 1000);
await fs.promises.writeFile(path.join(outputPath, "resize_indirect.jpg"), imageResizeInput.fileObject);

const initialFileStats = await fs.promises.stat(path.join(RESOURCE_PATH, "file_types/receipt.jpg"));
const renderedFileStats = await fs.promises.stat(path.join(outputPath, "resize_indirect.jpg"));
expect(renderedFileStats.size).to.be.lessThan(initialFileStats.size);
const metadata = await sharp(imageResizeInput.fileObject).metadata();
expect(metadata.width).to.equal(250);
expect(metadata.height).to.equal(333);
});

it("Image Resize From Compressor", async () => {
const imageResizeInput = new PathInput({ inputPath: path.join(RESOURCE_PATH, "file_types/receipt.jpg") });
await imageResizeInput.init();

const resizes = [
await compressImage(imageResizeInput.fileObject, 75, 500),
await compressImage(imageResizeInput.fileObject, 75, 250, 500),
await compressImage(imageResizeInput.fileObject, 75, 500, 250),
await compressImage(imageResizeInput.fileObject, 75, null, 250)
];

const fileNames = ["resize500xnull.jpg", "resize250x500.jpg", "resize500x250.jpg", "resizenullx250.jpg"];
for (let i = 0; i < resizes.length; i++) {
await fs.promises.writeFile(path.join(outputPath, fileNames[i]), resizes[i]);
}

const initialFileStats = await fs.promises.stat(path.join(RESOURCE_PATH, "file_types/receipt.jpg"));
const renderedFileStats = await Promise.all(
fileNames.map(fileName => fs.promises.stat(path.join(outputPath, fileName)))
);

expect(initialFileStats.size).to.be.greaterThan(renderedFileStats[0].size);
expect(renderedFileStats[0].size).to.be.greaterThan(renderedFileStats[1].size);
expect(renderedFileStats[1].size).to.be.greaterThan(renderedFileStats[2].size);
expect(renderedFileStats[2].size).to.be.equals(renderedFileStats[3].size);
});


it("PDF Input Has Text", async () => {
const hasSourceTextPath = path.join(RESOURCE_PATH, "file_types/pdf/multipage.pdf");
const hasNoSourceTextPath = path.join(RESOURCE_PATH, "file_types/pdf/blank_1.pdf");
const hasNoSourceTextSinceItsImagePath = path.join(RESOURCE_PATH, "file_types/receipt.jpg");

const hasSourceTextInput = new PathInput({ inputPath: hasSourceTextPath });
const hasNoSourceTextInput = new PathInput({ inputPath: hasNoSourceTextPath });
const hasNoSourceTextSinceItsImageInput = new PathInput({ inputPath: hasNoSourceTextSinceItsImagePath });

expect(await hasSourceTextInput.hasSourceText()).to.be.true;
expect(await hasNoSourceTextInput.hasSourceText()).to.be.false;
expect(await hasNoSourceTextSinceItsImageInput.hasSourceText()).to.be.false;
});

it("PDF Compress From InputSource", async () => {
const pdfResizeInput = new PathInput(
{ inputPath: path.join(V1_PRODUCT_PATH, "invoice_splitter/default_sample.pdf") }
);
await pdfResizeInput.init();

const compressedPdf = await compressPdf(
pdfResizeInput.fileObject, 75, true
);
await fs.promises.writeFile(path.join(outputPath, "resize_indirect.pdf"), compressedPdf);

const initialFileStats = await fs.promises.stat(
path.join(V1_PRODUCT_PATH, "invoice_splitter/default_sample.pdf")
);
const renderedFileStats = await fs.promises.stat(
path.join(outputPath, "resize_indirect.pdf")
);
expect(renderedFileStats.size).to.be.lessThan(initialFileStats.size);
}).timeout(10000);

it("PDF Compress From Compressor", async () => {
const pdfResizeInput = new PathInput(
{ inputPath: path.join(V1_PRODUCT_PATH, "invoice_splitter/default_sample.pdf") }
);
await pdfResizeInput.init();

const resizes = [
await compressPdf(pdfResizeInput.fileObject, 85),
await compressPdf(pdfResizeInput.fileObject, 75),
await compressPdf(pdfResizeInput.fileObject, 50),
await compressPdf(pdfResizeInput.fileObject, 10)
];

const fileNames = ["compress85.pdf", "compress75.pdf", "compress50.pdf", "compress10.pdf"];
for (let i = 0; i < resizes.length; i++) {
await fs.promises.writeFile(path.join(outputPath, fileNames[i]), resizes[i]);
}

const initialFileStats = await fs.promises.stat(
path.join(V1_PRODUCT_PATH, "invoice_splitter/default_sample.pdf")
);
const renderedFileStats = await Promise.all(
fileNames.map(fileName => fs.promises.stat(path.join(outputPath, fileName)))
);

expect(initialFileStats.size).to.be.greaterThan(renderedFileStats[0].size);
expect(renderedFileStats[0].size).to.be.greaterThan(renderedFileStats[1].size);
expect(renderedFileStats[1].size).to.be.greaterThan(renderedFileStats[2].size);
expect(renderedFileStats[2].size).to.be.greaterThan(renderedFileStats[3].size);
}).timeout(20000);

it("PDF Compress With Text Keeps Text", async () => {
const initialWithText = new PathInput(
{ inputPath: path.join(RESOURCE_PATH, "file_types/pdf/multipage.pdf") }
);
await initialWithText.init();

const compressedWithText = await compressPdf(
initialWithText.fileObject, 100, true, false
);
const originalText = (await extractTextFromPdf(initialWithText.fileObject)).getConcatenatedText();
const compressedText = (await extractTextFromPdf(compressedWithText)).getConcatenatedText();

expect(compressedText).to.equal(originalText);
}).timeout(60000);

it("PDF Compress With Text Does Not Compress", async () => {
const initialWithText = new PathInput(
{ inputPath: path.join(RESOURCE_PATH, "file_types/pdf/multipage.pdf") }
);
await initialWithText.init();

const compressedWithText = await compressPdf(initialWithText.fileObject, 50);

expect(compressedWithText).to.deep.equal(initialWithText.fileObject);
}).timeout(10000);

after(async function () {
const createdFiles: string[] = [
"compress10.pdf",
"compress50.pdf",
"compress75.pdf",
"compress85.pdf",
"resize_indirect.pdf",
"compress1.jpg",
"compress10.jpg",
"compress50.jpg",
"compress75.jpg",
"compress100.jpg",
"compress_indirect.jpg",
"resize250x500.jpg",
"resize500x250.jpg",
"resize500xnull.jpg",
"resize_indirect.jpg",
"resizenullx250.jpg",
];

for (const filePath of createdFiles) {
try {
await fs.promises.unlink(path.join(RESOURCE_PATH, "output", filePath));
} catch (error) {
if ((error as NodeJS.ErrnoException).code !== "ENOENT") {
logger.warn(`Could not delete file '${filePath}': ${(error as Error).message}`);
}
}
}
});
});
2 changes: 1 addition & 1 deletion tests/input/pageOperations.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import * as path from "path";
import { expect } from "chai";
import { RESOURCE_PATH } from "../index.js";

describe("High level multi-page operations", () => {
describe("Input Sources - high level multi-page operations", () => {
it("should cut a PDF", async () => {
const input = new PathInput({
inputPath: path.join(RESOURCE_PATH, "file_types/pdf/multipage.pdf"),
Expand Down
Loading
Loading