diff --git a/lib/api/apiUtils/integrity/crcCombine.js b/lib/api/apiUtils/integrity/crcCombine.js new file mode 100644 index 0000000000..9dc43b0da1 --- /dev/null +++ b/lib/api/apiUtils/integrity/crcCombine.js @@ -0,0 +1,185 @@ +'use strict'; + +// Combine two right-shift CRCs (zlib's gf2_matrix_* trick) without using BigInt +// inside the hot loops. Each GF(2) operator matrix is stored as a Uint32Array +// of `2 * dim` words, where row n is packed as [lo32, hi32]. For 32-bit CRCs +// the high halves stay zero and the per-row loop exits early; for the 64-bit +// CRC (crc64nvme) the pair-of-u32s representation lets every XOR/shift stay on +// 32-bit ints. +// +// References: +// zlib crc32_combine (canonical C implementation): +// https://github.com/madler/zlib/blob/master/crc32.c +// Mark Adler, "How does CRC32 work?" — derivation of the matrix trick: +// https://stackoverflow.com/a/23126768 +// AWS S3 multipart upload full-object checksums: +// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html + +function gf2MatrixTimes(mat, vecLo, vecHi) { + let sumLo = 0; + let sumHi = 0; + let lo = vecLo; + let hi = vecHi; + let i = 0; + while ((lo | hi) !== 0) { + if (lo & 1) { + sumLo ^= mat[2 * i]; + sumHi ^= mat[2 * i + 1]; + } + lo = (lo >>> 1) | ((hi & 1) << 31); + hi = hi >>> 1; + i += 1; + } + return [sumLo >>> 0, sumHi >>> 0]; +} + +function gf2MatrixSquare(square, mat, dim) { + for (let n = 0; n < dim; n += 1) { + const r = gf2MatrixTimes(mat, mat[2 * n], mat[2 * n + 1]); + // In-place mutation of the caller's scratch buffer is intentional — + // the callers (crcCombine, ensureChainLen) own `square` and re-use + // it across iterations to avoid re-allocating per squaring step. + /* eslint-disable no-param-reassign */ + square[2 * n] = r[0]; + square[2 * n + 1] = r[1]; + /* eslint-enable no-param-reassign */ + } +} + +// Per (polyReversed, dim), a lazily-grown chain of zero-byte operators. +// state.byteOps[j] is the GF(2) operator for prepending 2^j zero bytes +// (i.e. M^(8 * 2^j)). Building this chain is the dominant cost of crcCombine +// and depends only on the polynomial, so we cache it across calls. +const chainCache = new Map(); + +function getOrInitChain(polyReversed, dim) { + let state = chainCache.get(polyReversed); + if (state !== undefined) { + return state; + } + + // M^1: one-zero-bit operator. Column 0 is the polynomial; column k>0 is + // 1 << (k - 1) — what right-shifting a state with bit k set produces. + const m1 = new Uint32Array(2 * dim); + m1[0] = Number(polyReversed & 0xffffffffn); + m1[1] = Number((polyReversed >> 32n) & 0xffffffffn); + for (let k = 1; k < dim; k += 1) { + const bit = k - 1; + if (bit < 32) { + m1[2 * k] = (1 << bit) >>> 0; + } else { + m1[2 * k + 1] = (1 << (bit - 32)) >>> 0; + } + } + + const m2 = new Uint32Array(2 * dim); + gf2MatrixSquare(m2, m1, dim); + const m4 = new Uint32Array(2 * dim); + gf2MatrixSquare(m4, m2, dim); + const m8 = new Uint32Array(2 * dim); // operator for 1 zero byte + gf2MatrixSquare(m8, m4, dim); + + state = { dim, byteOps: [m8] }; + chainCache.set(polyReversed, state); + return state; +} + +function ensureChainLen(state, j) { + while (state.byteOps.length <= j) { + const prev = state.byteOps[state.byteOps.length - 1]; + const next = new Uint32Array(prev.length); + gf2MatrixSquare(next, prev, state.dim); + state.byteOps.push(next); + } +} + +/** + * Combine two CRCs of adjacent byte chunks. + * + * crcCombine(crc(a), crc(b), len(b), polyReversed, dim) === crc(a ‖ b) + * + * Works for any right-shift CRC of width `dim` (32 or 64) given its + * bit-reversed polynomial. The squaring chain for `polyReversed` is cached + * across calls, so the per-call cost is just popcount(len2) cheap operator + * applications plus the BigInt boundary conversions. + * + * @param {bigint} crc1 - CRC of the first chunk + * @param {bigint} crc2 - CRC of the second chunk + * @param {bigint} len2 - byte length of the second chunk + * @param {bigint} polyReversed - bit-reversed polynomial + * @param {number} dim - CRC width in bits (32 or 64) + * @returns {bigint} CRC of the concatenated chunk, masked to `dim` bits + */ +function crcCombine(crc1, crc2, len2, polyReversed, dim) { + const mask = (1n << BigInt(dim)) - 1n; + if (len2 === 0n) { + return crc1 & mask; + } + + const state = getOrInitChain(polyReversed, dim); + + let cLo = Number(crc1 & 0xffffffffn); + let cHi = Number((crc1 >> 32n) & 0xffffffffn); + + // Walk the bits of len2 (each bit represents a power-of-two number of + // zero bytes to prepend); apply the cached operator for every set bit. + let n = len2; + let j = 0; + while (n !== 0n) { + if ((n & 1n) === 1n) { + ensureChainLen(state, j); + const r = gf2MatrixTimes(state.byteOps[j], cLo, cHi); + cLo = r[0]; + cHi = r[1]; + } + n >>= 1n; + j += 1; + } + + const c2Lo = Number(crc2 & 0xffffffffn); + const c2Hi = Number((crc2 >> 32n) & 0xffffffffn); + cLo = (cLo ^ c2Lo) >>> 0; + cHi = (cHi ^ c2Hi) >>> 0; + + return ((BigInt(cHi) << 32n) | BigInt(cLo)) & mask; +} + +function base64ToBigInt(b64) { + const buf = Buffer.from(b64, 'base64'); + let r = 0n; + for (let i = 0; i < buf.length; i += 1) { + r = (r << 8n) | BigInt(buf[i]); + } + return r; +} + +function bigIntToBase64(value, dim) { + const nBytes = dim / 8; + const buf = Buffer.alloc(nBytes); + let v = value; + for (let i = nBytes - 1; i >= 0; i -= 1) { + buf[i] = Number(v & 0xffn); + v >>= 8n; + } + return buf.toString('base64'); +} + +/** + * Combine N per-part CRCs into the full-object CRC, base64-encoded. + * + * @param {Array<{value: string, length: number}>} parts - per-part data in + * part order; `value` is the base64-encoded per-part CRC, `length` is the + * byte length of that part + * @param {bigint} polyReversed - bit-reversed polynomial + * @param {number} dim - CRC width in bits (32 or 64) + * @returns {string} base64-encoded combined CRC + */ +function combineCrcs(parts, polyReversed, dim) { + let combined = base64ToBigInt(parts[0].value); + for (let i = 1; i < parts.length; i += 1) { + combined = crcCombine(combined, base64ToBigInt(parts[i].value), BigInt(parts[i].length), polyReversed, dim); + } + return bigIntToBase64(combined, dim); +} + +module.exports = { combineCrcs, crcCombine }; diff --git a/lib/api/apiUtils/integrity/validateChecksums.js b/lib/api/apiUtils/integrity/validateChecksums.js index c3ce6018c1..d2f7059eb8 100644 --- a/lib/api/apiUtils/integrity/validateChecksums.js +++ b/lib/api/apiUtils/integrity/validateChecksums.js @@ -4,6 +4,7 @@ const { Crc32c } = require('@aws-crypto/crc32c'); const { CrtCrc64Nvme } = require('@aws-sdk/crc64-nvme-crt'); const { errors: ArsenalErrors, errorInstances } = require('arsenal'); const { config } = require('../../../Config'); +const { combineCrcs } = require('./crcCombine'); const defaultChecksumData = Object.freeze({ algorithm: 'crc64nvme', isTrailer: false, expected: undefined }); @@ -152,6 +153,20 @@ const algorithms = Object.freeze({ }, }); +/** + * Validate body integrity for a buffered (non-chunked) request. + * + * The whole request body has already been read into memory. This function + * picks the single x-amz-checksum- header, recomputes the digest of + * `body` with that algorithm, and compares it to the header value. It is the + * authoritative body-checksum check for small APIs (e.g. multiObjectDelete, + * bucket configuration PUTs) where the body is not streamed. + * + * @param {object} headers - HTTP request headers (lowercased keys) + * @param {Buffer} body - the entire buffered request body + * @returns {Promise} - + * null on success; otherwise a ChecksumError with details. + */ async function validateXAmzChecksums(headers, body) { const checksumHeaders = Object.keys(headers).filter(header => header.startsWith('x-amz-checksum-')); const xAmzChecksumCnt = checksumHeaders.length; @@ -205,6 +220,28 @@ async function validateXAmzChecksums(headers, body) { return null; } +/** + * Extract checksum intent from request headers for a streaming upload. + * + * Inspects x-amz-checksum-, x-amz-trailer, and x-amz-sdk-checksum-algorithm + * to decide which algorithm the streaming pipeline (e.g. ChecksumTransform, + * TrailingChecksumTransform) should compute over the body, and what digest + * value (if any) the body must match. Does NOT read the body — the actual + * comparison happens later as bytes flow through the stream. + * + * Used by streaming write paths: PutObject, UploadPart, replication writes + * via routeBackbeat. + * + * @param {object} headers - HTTP request headers (lowercased keys) + * @returns {null + * | { algorithm: string, isTrailer: boolean, expected: string|undefined } + * | { error: string, details: object }} - + * - null when no checksum metadata is present (caller decides default). + * - { algorithm, isTrailer, expected } describing what the stream must + * produce. `isTrailer` is true when the digest will arrive in the + * request trailer (`expected` is undefined until the trailer parses). + * - { error, details } on header mismatch. + */ function getChecksumDataFromHeaders(headers) { const checkSdk = algo => { if (!('x-amz-sdk-checksum-algorithm' in headers)) { @@ -489,6 +526,85 @@ function getChecksumDataFromMPUHeaders(headers) { return { algorithm: algo, type: defaultChecksumType[algo], isDefault: false }; } +// ============================================================================= +// MPU final-object checksum computation +// ============================================================================= +// +// CompleteMultipartUpload composes a final-object checksum from the per-part +// checksums recorded at UploadPart time. AWS defines two modes: +// +// COMPOSITE : finalChecksum = base64(algo(decode(c1) || ... || decode(cN))) +// + "-N" suffix, where N is the number of parts. +// Supported on CRC32, CRC32C, SHA1, SHA256. +// +// FULL_OBJECT : finalChecksum is the CRC of the entire object's bytes, +// reconstructed by combining the per-part CRCs via CRC +// linearization. CRC-only: CRC32, CRC32C, +// CRC64NVME. + +// Bit-reversed polynomials used by the right-shift CRC implementations that +// the @aws-crypto/* and @aws-sdk/crc64-nvme-crt packages produce. +const FULL_OBJECT_POLYS = Object.freeze({ + crc32: { polyReversed: 0xedb88320n, dim: 32 }, + crc32c: { polyReversed: 0x82f63b78n, dim: 32 }, + crc64nvme: { polyReversed: 0x9a6c9329ac4bc9b5n, dim: 64 }, +}); + +// Algorithms whose digest is synchronous, which is the full set AWS allows +// for COMPOSITE MPUs. crc64nvme is excluded because (a) AWS does not allow +// COMPOSITE for CRC64NVME and (b) its CRT-backed digest is async. +const COMPOSITE_ALGOS = new Set(['crc32', 'crc32c', 'sha1', 'sha256']); + +/** + * Compute the COMPOSITE final-object checksum for a CompleteMultipartUpload. + * + * final = base64(algo(decode(c1) || decode(c2) || ... || decode(cN))) + "-N" + * + * Supported algorithms: crc32, crc32c, sha1, sha256. (crc64nvme is excluded — + * AWS does not allow COMPOSITE for CRC64NVME.) + * + * @param {string} algorithm - lowercase algorithm name + * @param {string[]} partChecksumsBase64 - per-part checksums in part order, + * each base64-encoded (the format stored on MPU part metadata) + * @returns {{ checksum: string, error: null } + * | { checksum: null, error: { code: string, details: object } }} + */ +function computeCompositeMPUChecksum(algorithm, partChecksumsBase64) { + if (!COMPOSITE_ALGOS.has(algorithm)) { + return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } }; + } + + const concat = Buffer.concat(partChecksumsBase64.map(c => Buffer.from(c, 'base64'))); + const digest = algorithms[algorithm].digest(concat); + return { + checksum: `${digest}-${partChecksumsBase64.length}`, + error: null, + }; +} + +/** + * Compute the FULL_OBJECT final-object checksum for a CompleteMultipartUpload. + * + * Returns the CRC of the assembled object's bytes, derived purely from the + * per-part CRCs and part lengths via CRC linearization. + * + * Supported algorithms: crc32, crc32c, crc64nvme. + * + * @param {string} algorithm - lowercase algorithm name + * @param {Array<{value: string, length: number}>} parts - per-part data in + * part order; `value` is the base64-encoded per-part CRC, `length` is the + * byte length of that part + * @returns {{ checksum: string, error: null } + * | { checksum: null, error: { code: string, details: object } }} + */ +function computeFullObjectMPUChecksum(algorithm, parts) { + const params = FULL_OBJECT_POLYS[algorithm]; + if (!params) { + return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } }; + } + return { checksum: combineCrcs(parts, params.polyReversed, params.dim), error: null }; +} + module.exports = { ChecksumError, defaultChecksumData, @@ -499,4 +615,6 @@ module.exports = { algorithms, checksumedMethods, getChecksumDataFromMPUHeaders, + computeCompositeMPUChecksum, + computeFullObjectMPUChecksum, }; diff --git a/tests/unit/api/apiUtils/integrity/computeMpuChecksums.js b/tests/unit/api/apiUtils/integrity/computeMpuChecksums.js new file mode 100644 index 0000000000..3640238dc5 --- /dev/null +++ b/tests/unit/api/apiUtils/integrity/computeMpuChecksums.js @@ -0,0 +1,188 @@ +const assert = require('assert'); +const crypto = require('crypto'); + +const { + algorithms, + computeCompositeMPUChecksum, + computeFullObjectMPUChecksum, +} = require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); + +// Random part bodies. Per-test randomness still satisfies the assertions +// because each test only checks combine(parts) === algo(concat(parts)), +// which holds for any byte sequence. +function makeParts(count, size) { + const parts = []; + for (let i = 0; i < count; i += 1) { + parts.push(crypto.randomBytes(size)); + } + return parts; +} + +// -- COMPOSITE ------------------------------------------------------------ + +describe('computeCompositeMPUChecksum', () => { + const parts = makeParts(3, 1024); + + const COMPOSITE_ALGOS = ['crc32', 'crc32c', 'sha1', 'sha256']; + + COMPOSITE_ALGOS.forEach(algo => { + const label = algo.toUpperCase(); + it(`should match ${label}(decode(c1) || ... || decode(cN)) + "-N"`, () => { + const partChecksums = parts.map(p => algorithms[algo].digest(p)); + const expectedConcat = Buffer.concat(partChecksums.map(c => Buffer.from(c, 'base64'))); + const expected = `${algorithms[algo].digest(expectedConcat)}-3`; + + const got = computeCompositeMPUChecksum(algo, partChecksums); + assert.strictEqual(got.error, null); + assert.strictEqual(got.checksum, expected); + }); + }); + + it('should return N=1 for a single part', () => { + const partChecksums = [algorithms.sha256.digest(parts[0])]; + const got = computeCompositeMPUChecksum('sha256', partChecksums); + assert.strictEqual(got.error, null); + assert(got.checksum.endsWith('-1')); + }); + + it('should return an error object on unsupported algorithm', () => { + const got = computeCompositeMPUChecksum('md5', ['AAAA']); + assert.strictEqual(got.checksum, null); + assert(got.error); + assert.strictEqual(got.error.code, 'MPUAlgoNotSupported'); + assert.deepStrictEqual(got.error.details, { algorithm: 'md5' }); + }); + + it('should return an error object for crc64nvme (not allowed for COMPOSITE)', () => { + const got = computeCompositeMPUChecksum('crc64nvme', ['AQIDBAUGBwg=']); + assert.strictEqual(got.checksum, null); + assert.strictEqual(got.error.code, 'MPUAlgoNotSupported'); + }); +}); + +// -- FULL_OBJECT ---------------------------------------------------------- + +describe('computeFullObjectMPUChecksum', () => { + // Validation strategy: build N concrete part bodies, run each through the + // canonical CRC implementation to get the per-part CRC, then compare the + // combined result against the CRC of the concatenation of all bodies. + + const FULL_OBJECT_ALGOS = ['crc32', 'crc32c', 'crc64nvme']; + + async function buildPartInputs(parts, algo) { + const partInputs = []; + for (const b of parts) { + // `await` is a no-op for the sync CRC32/CRC32C digests and resolves + // the Promise for the async CRC64NVME digest. + partInputs.push({ + value: await algorithms[algo].digest(b), + length: b.length, + }); + } + return partInputs; + } + + FULL_OBJECT_ALGOS.forEach(algo => { + const label = algo.toUpperCase(); + + it(`should match ${label}(concat(parts)) for varied part sizes`, async () => { + const parts = [ + crypto.randomBytes(5 * 1024 * 1024), + crypto.randomBytes(5 * 1024 * 1024 + 7), + crypto.randomBytes(19), + ]; + const partInputs = await buildPartInputs(parts, algo); + const result = computeFullObjectMPUChecksum(algo, partInputs); + const direct = await algorithms[algo].digest(Buffer.concat(parts)); + assert.strictEqual(result.error, null); + assert.strictEqual(result.checksum, direct); + }); + + it(`should return the part CRC unchanged for a single-part ${label} MPU`, async () => { + const buf = crypto.randomBytes(15); + const partCrc = await algorithms[algo].digest(buf); + const got = computeFullObjectMPUChecksum(algo, [ + { + value: partCrc, + length: buf.length, + }, + ]); + assert.strictEqual(got.error, null); + assert.strictEqual(got.checksum, partCrc); + }); + + it(`should handle many small ${label} parts (16 × 1 MiB)`, async () => { + // Exercises multiple combine iterations and the matrix-squaring loop. + const parts = makeParts(16, 1 * 1024 * 1024); + const partInputs = await buildPartInputs(parts, algo); + const result = computeFullObjectMPUChecksum(algo, partInputs); + const direct = await algorithms[algo].digest(Buffer.concat(parts)); + assert.strictEqual(result.error, null); + assert.strictEqual(result.checksum, direct); + }); + }); + + it('should return an error object on unsupported algorithm', () => { + const got = computeFullObjectMPUChecksum('sha256', [{ value: 'AAAA', length: 4 }]); + assert.strictEqual(got.checksum, null); + assert(got.error); + assert.strictEqual(got.error.code, 'MPUAlgoNotSupported'); + assert.deepStrictEqual(got.error.details, { algorithm: 'sha256' }); + }); + + it('should handle 10000 CRC64NVME parts of uniform 5 MiB (cache hits)', async function f() { + // 10 000 parts is the AWS MPU max; CRC64NVME has the largest + // (64-bit) combine matrix. Validates correctness against the CRC + // of the equivalent 50 GiB object, computed by streaming the same + // chunk through CrtCrc64Nvme without materializing the object. + this.timeout(120000); + + const partLen = 5 * 1024 * 1024; + const nParts = 10000; + const chunk = crypto.randomBytes(partLen); + const partCrc = await algorithms.crc64nvme.digest(chunk); + + const parts = new Array(nParts); + for (let i = 0; i < nParts; i += 1) { + parts[i] = { value: partCrc, length: partLen }; + } + + const got = computeFullObjectMPUChecksum('crc64nvme', parts); + assert.strictEqual(got.error, null); + + const ref = algorithms.crc64nvme.createHash(); + for (let i = 0; i < nParts; i += 1) { + ref.update(chunk); + } + const expected = await algorithms.crc64nvme.digestFromHash(ref); + assert.strictEqual(got.checksum, expected); + }); + + it('should handle 10000 CRC64NVME parts of distinct lengths (cache misses)', async function f() { + // Every part has a strictly different length, so each combine call + // touches a different mix of `len2` bit positions. Validates + // correctness against a streaming reference over independently + // generated part bodies. + this.timeout(60000); + + const baseLen = 64 * 1024; + const nParts = 10000; + const parts = new Array(nParts); + const ref = algorithms.crc64nvme.createHash(); + for (let i = 0; i < nParts; i += 1) { + const len = baseLen + i; + const buf = crypto.randomBytes(len); + parts[i] = { + value: await algorithms.crc64nvme.digest(buf), + length: len, + }; + ref.update(buf); + } + + const got = computeFullObjectMPUChecksum('crc64nvme', parts); + assert.strictEqual(got.error, null); + + const expected = await algorithms.crc64nvme.digestFromHash(ref); + assert.strictEqual(got.checksum, expected); + }); +}); diff --git a/tests/unit/api/apiUtils/integrity/crcCombine.js b/tests/unit/api/apiUtils/integrity/crcCombine.js new file mode 100644 index 0000000000..48f8239a21 --- /dev/null +++ b/tests/unit/api/apiUtils/integrity/crcCombine.js @@ -0,0 +1,159 @@ +const assert = require('assert'); +const crypto = require('crypto'); + +const { crcCombine, combineCrcs } = require('../../../../../lib/api/apiUtils/integrity/crcCombine'); +const { algorithms } = require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); + +// Reversed polynomial + bit width for each algorithm we use the combine +// routine with. Same values that validateChecksums.js feeds in. +const SPECS = [ + { algo: 'crc32', polyReversed: 0xedb88320n, dim: 32 }, + { algo: 'crc32c', polyReversed: 0x82f63b78n, dim: 32 }, + { algo: 'crc64nvme', polyReversed: 0x9a6c9329ac4bc9b5n, dim: 64 }, +]; + +function base64ToBigInt(b64) { + const buf = Buffer.from(b64, 'base64'); + let r = 0n; + for (let i = 0; i < buf.length; i += 1) { + r = (r << 8n) | BigInt(buf[i]); + } + return r; +} + +async function crcOf(algo, buf) { + return base64ToBigInt(await algorithms[algo].digest(buf)); +} + +describe('crcCombine', () => { + SPECS.forEach(({ algo, polyReversed, dim }) => { + const label = algo.toUpperCase(); + const mask = (1n << BigInt(dim)) - 1n; + + describe(`${label} (dim=${dim})`, () => { + it('should combine(crc1, crc2, len2) to crc(chunk1 ‖ chunk2) for random data', async () => { + const a = crypto.randomBytes(1024); + const b = crypto.randomBytes(1024); + const crc1 = await crcOf(algo, a); + const crc2 = await crcOf(algo, b); + const got = crcCombine(crc1, crc2, BigInt(b.length), polyReversed, dim); + const expected = await crcOf(algo, Buffer.concat([a, b])); + assert.strictEqual(got, expected); + }); + + it('should return crc1 unchanged when len2 = 0 (identity)', async () => { + const a = crypto.randomBytes(64); + const crc1 = await crcOf(algo, a); + const got = crcCombine(crc1, 0n, 0n, polyReversed, dim); + assert.strictEqual(got, crc1 & mask); + }); + + it('should equal the original CRC when combined with the CRC of empty', async () => { + // CRC of an empty chunk under the AWS implementations is 0. + const a = crypto.randomBytes(128); + const crc1 = await crcOf(algo, a); + const crcEmpty = await crcOf(algo, Buffer.alloc(0)); + const got = crcCombine(crc1, crcEmpty, 0n, polyReversed, dim); + assert.strictEqual(got, crc1 & mask); + }); + + it('should mask the result to `dim` bits', async () => { + const a = crypto.randomBytes(256); + const b = crypto.randomBytes(256); + const got = crcCombine(await crcOf(algo, a), await crcOf(algo, b), BigInt(b.length), polyReversed, dim); + assert.strictEqual(got & mask, got); + assert.strictEqual(got >> BigInt(dim), 0n); + }); + + it('should be associative across three chunks', async () => { + const a = crypto.randomBytes(300); + const b = crypto.randomBytes(400); + const c = crypto.randomBytes(500); + const crcA = await crcOf(algo, a); + const crcB = await crcOf(algo, b); + const crcC = await crcOf(algo, c); + + // Left-fold: combine(combine(A,B), C) + const ab = crcCombine(crcA, crcB, BigInt(b.length), polyReversed, dim); + const left = crcCombine(ab, crcC, BigInt(c.length), polyReversed, dim); + + // Right-fold: combine(A, combine(B, C), len(B)+len(C)) + const bc = crcCombine(crcB, crcC, BigInt(c.length), polyReversed, dim); + const right = crcCombine(crcA, bc, BigInt(b.length + c.length), polyReversed, dim); + + assert.strictEqual(left, right); + const expected = await crcOf(algo, Buffer.concat([a, b, c])); + assert.strictEqual(left, expected); + }); + + it('should handle single-byte chunks', async () => { + const a = crypto.randomBytes(1); + const b = crypto.randomBytes(1); + const got = crcCombine(await crcOf(algo, a), await crcOf(algo, b), 1n, polyReversed, dim); + const expected = await crcOf(algo, Buffer.concat([a, b])); + assert.strictEqual(got, expected); + }); + + it('should handle odd-length chunk2 sizes (not a multiple of 8 bytes)', async () => { + // Sizes chosen to exercise the matrix-squaring loop's + // odd/even alternation through both branches. + const sizes = [1, 7, 15, 33, 257, 1023, 65537]; + const a = crypto.randomBytes(64); + const crcA = await crcOf(algo, a); + for (const size of sizes) { + const b = crypto.randomBytes(size); + const got = crcCombine(crcA, await crcOf(algo, b), BigInt(size), polyReversed, dim); + const expected = await crcOf(algo, Buffer.concat([a, b])); + assert.strictEqual(got, expected, `failed at size=${size}`); + } + }); + }); + }); +}); + +describe('combineCrcs', () => { + SPECS.forEach(({ algo, polyReversed, dim }) => { + const label = algo.toUpperCase(); + + describe(`${label} (dim=${dim})`, () => { + it('should return the part CRC unchanged for a single-part input', async () => { + const buf = crypto.randomBytes(13); + const partCrc = await algorithms[algo].digest(buf); + const got = combineCrcs([{ value: partCrc, length: buf.length }], polyReversed, dim); + assert.strictEqual(got, partCrc); + }); + + it('should match crc(concat) for two parts — base64 in, base64 out', async () => { + const a = crypto.randomBytes(1024); + const b = crypto.randomBytes(2048); + const parts = [ + { value: await algorithms[algo].digest(a), length: a.length }, + { value: await algorithms[algo].digest(b), length: b.length }, + ]; + const got = combineCrcs(parts, polyReversed, dim); + const expected = await algorithms[algo].digest(Buffer.concat([a, b])); + assert.strictEqual(got, expected); + }); + + it('should match crc(concat) for N parts of varied sizes', async () => { + const bufs = [ + crypto.randomBytes(7), + crypto.randomBytes(513), + crypto.randomBytes(1024), + crypto.randomBytes(2049), + crypto.randomBytes(64), + ]; + const parts = []; + for (const buf of bufs) { + parts.push({ + value: await algorithms[algo].digest(buf), + length: buf.length, + }); + } + const got = combineCrcs(parts, polyReversed, dim); + const expected = await algorithms[algo].digest(Buffer.concat(bufs)); + assert.strictEqual(got, expected); + }); + }); + }); +});