diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dfe1cd0..5d63b0a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ - Preserve existing PageMode instead of overwriting when adding outlines - Add userUnit option for custom page units (PDF 1.6) - Support outlines that jump to specific page positions with custom zoom level +- Add robust handling of null byte padding in JPEG images +- Replace outdated jpeg-exif with minimal implementation ### [v0.17.2] - 2025-08-30 diff --git a/lib/image/jpeg.js b/lib/image/jpeg.js index 9fadb8bf..8510df32 100644 --- a/lib/image/jpeg.js +++ b/lib/image/jpeg.js @@ -1,4 +1,79 @@ -import exif from 'jpeg-exif'; +/** + * Parse EXIF orientation from JPEG buffer + * @param {Buffer} data - JPEG image data + * @returns {number|null} Orientation value (1-8) or null if not found + */ +const parseExifOrientation = (data) => { + if (!data || data.length < 20) return null; + + let pos = 2; // Skip SOI marker + + while (pos < data.length - 4) { + // Skip padding bytes (some JPEG files have null bytes between segments) + while (pos < data.length && data[pos] !== 0xff) pos++; + if (pos >= data.length - 4) return null; + + const marker = data.readUInt16BE(pos); + pos += 2; + + // SOS marker - image data starts, stop searching + if (marker === 0xffda) return null; + + // Skip standalone markers + if ((marker >= 0xffd0 && marker <= 0xffd9) || marker === 0xff01) continue; + + if (pos + 2 > data.length) return null; + const segmentLength = data.readUInt16BE(pos); + + // APP1 (EXIF) marker + if (marker === 0xffe1 && pos + 8 <= data.length) { + const exifHeader = data.subarray(pos + 2, pos + 8).toString('binary'); + if (exifHeader === 'Exif\x00\x00') { + const tiffStart = pos + 8; + if (tiffStart + 8 > data.length) return null; + + // Byte order + const byteOrder = data + .subarray(tiffStart, tiffStart + 2) + .toString('ascii'); + const isLittleEndian = byteOrder === 'II'; + if (!isLittleEndian && byteOrder !== 'MM') return null; + + const read16 = isLittleEndian + ? (o) => data.readUInt16LE(o) + : (o) => data.readUInt16BE(o); + const read32 = isLittleEndian + ? (o) => data.readUInt32LE(o) + : (o) => data.readUInt32BE(o); + + // Verify TIFF magic number (42) + if (read16(tiffStart + 2) !== 42) return null; + + // IFD0 offset + const ifdPos = tiffStart + read32(tiffStart + 4); + if (ifdPos + 2 > data.length) return null; + + const entryCount = read16(ifdPos); + + // Scan IFD entries for Orientation tag (0x0112) + for (let i = 0; i < entryCount; i++) { + const entryPos = ifdPos + 2 + i * 12; + if (entryPos + 12 > data.length) return null; + + if (read16(entryPos) === 0x0112) { + const value = read16(entryPos + 8); + return value >= 1 && value <= 8 ? value : null; + } + } + return null; + } + } + + pos += segmentLength; + } + + return null; +}; const MARKERS = [ 0xffc0, 0xffc1, 0xffc2, 0xffc3, 0xffc5, 0xffc6, 0xffc7, 0xffc8, 0xffc9, @@ -21,10 +96,14 @@ class JPEG { } // Parse the EXIF orientation - this.orientation = exif.fromBuffer(this.data).Orientation || 1; + this.orientation = parseExifOrientation(this.data) || 1; let pos = 2; while (pos < this.data.length) { + // Skip padding bytes (some JPEG files have null bytes between segments) + while (pos < this.data.length && this.data[pos] !== 0xff) pos++; + if (pos >= this.data.length) break; + marker = this.data.readUInt16BE(pos); pos += 2; if (MARKERS.includes(marker)) { diff --git a/package.json b/package.json index 25164ae7..d488e834 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,6 @@ "dependencies": { "crypto-js": "^4.2.0", "fontkit": "^2.0.4", - "jpeg-exif": "^1.1.4", "linebreak": "^1.1.0", "png-js": "^1.0.0" }, diff --git a/rollup.config.js b/rollup.config.js index c3a8b867..7e20d154 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -11,8 +11,7 @@ const external = [ 'linebreak', 'png-js', 'crypto-js', - 'saslprep', - 'jpeg-exif' + 'saslprep' ]; const supportedBrowsers = [ diff --git a/tests/images/issue-1175.jpeg b/tests/images/issue-1175.jpeg new file mode 100644 index 00000000..c95aeb58 Binary files /dev/null and b/tests/images/issue-1175.jpeg differ diff --git a/tests/unit/image.spec.js b/tests/unit/image.spec.js index c75c414d..af6e4cf1 100644 --- a/tests/unit/image.spec.js +++ b/tests/unit/image.spec.js @@ -1,4 +1,6 @@ import PDFDocument from '../../lib/document'; +import fs from 'fs'; +import JPEG from '../../lib/image/jpeg'; describe('Image', function () { /** @@ -18,4 +20,12 @@ describe('Image', function () { document.image('./tests/images/bee.png'); expect(document.y).toBe(originalY + imageHeight); }); + + test('parse JPEG with null byte padding in EXIF (issue #1175)', () => { + const data = fs.readFileSync('./tests/images/issue-1175.jpeg'); + const jpeg = new JPEG(data, 'test'); + expect(jpeg.width).toBe(375); + expect(jpeg.height).toBe(500); + expect(jpeg.orientation).toBe(1); + }); }); diff --git a/yarn.lock b/yarn.lock index 65ef81cb..1c2560ba 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5324,13 +5324,6 @@ __metadata: languageName: node linkType: hard -"jpeg-exif@npm:^1.1.4": - version: 1.1.4 - resolution: "jpeg-exif@npm:1.1.4" - checksum: 10c0/0f9225b2423184d60c66b3d7361176801c17ede92fc9b3c044fcf00f379a5a1d424b360ecf0027dda47d405d253c7b62bf5b353fb08b2589e3650f38cc575e82 - languageName: node - linkType: hard - "js-stringify@npm:^1.0.2": version: 1.0.2 resolution: "js-stringify@npm:1.0.2" @@ -6325,7 +6318,6 @@ __metadata: globals: "npm:^15.14.0" jest: "npm:^29.7.0" jest-image-snapshot: "npm:^6.4.0" - jpeg-exif: "npm:^1.1.4" linebreak: "npm:^1.1.0" markdown: "npm:~0.5.0" pdfjs-dist: "npm:^2.14.305"