Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions lib/api/apiUtils/integrity/crcCombine.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
'use strict';

// Combine two right-shift CRCs (zlib's gf2_matrix_* trick) without using BigInt
// inside the hot loops. Each GF(2) operator matrix is stored as a Uint32Array
// of `2 * dim` words, where row n is packed as [lo32, hi32]. For 32-bit CRCs
// the high halves stay zero and the per-row loop exits early; for the 64-bit
// CRC (crc64nvme) the pair-of-u32s representation lets every XOR/shift stay on
// 32-bit ints.
//
// References:
// zlib crc32_combine (canonical C implementation):
// https://github.com/madler/zlib/blob/master/crc32.c
// Mark Adler, "How does CRC32 work?" — derivation of the matrix trick:
// https://stackoverflow.com/a/23126768
// AWS S3 multipart upload full-object checksums:
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

function gf2MatrixTimes(mat, vecLo, vecHi) {
let sumLo = 0;
let sumHi = 0;
let lo = vecLo;
let hi = vecHi;
let i = 0;
while ((lo | hi) !== 0) {
if (lo & 1) {
sumLo ^= mat[2 * i];
sumHi ^= mat[2 * i + 1];
}
lo = (lo >>> 1) | ((hi & 1) << 31);
hi = hi >>> 1;
i += 1;
}
return [sumLo >>> 0, sumHi >>> 0];
}

function gf2MatrixSquare(square, mat, dim) {
for (let n = 0; n < dim; n += 1) {
const r = gf2MatrixTimes(mat, mat[2 * n], mat[2 * n + 1]);
// In-place mutation of the caller's scratch buffer is intentional —
// the callers (crcCombine, ensureChainLen) own `square` and re-use
// it across iterations to avoid re-allocating per squaring step.
/* eslint-disable no-param-reassign */
square[2 * n] = r[0];
square[2 * n + 1] = r[1];
/* eslint-enable no-param-reassign */
}
}

// Per (polyReversed, dim), a lazily-grown chain of zero-byte operators.
// state.byteOps[j] is the GF(2) operator for prepending 2^j zero bytes
// (i.e. M^(8 * 2^j)). Building this chain is the dominant cost of crcCombine
// and depends only on the polynomial, so we cache it across calls.
const chainCache = new Map();

function getOrInitChain(polyReversed, dim) {
let state = chainCache.get(polyReversed);
if (state !== undefined) {
return state;
}

// M^1: one-zero-bit operator. Column 0 is the polynomial; column k>0 is
// 1 << (k - 1) — what right-shifting a state with bit k set produces.
const m1 = new Uint32Array(2 * dim);
m1[0] = Number(polyReversed & 0xffffffffn);
m1[1] = Number((polyReversed >> 32n) & 0xffffffffn);
for (let k = 1; k < dim; k += 1) {
const bit = k - 1;
if (bit < 32) {
m1[2 * k] = (1 << bit) >>> 0;
} else {
m1[2 * k + 1] = (1 << (bit - 32)) >>> 0;
}
}

const m2 = new Uint32Array(2 * dim);
gf2MatrixSquare(m2, m1, dim);
const m4 = new Uint32Array(2 * dim);
gf2MatrixSquare(m4, m2, dim);
const m8 = new Uint32Array(2 * dim); // operator for 1 zero byte
gf2MatrixSquare(m8, m4, dim);

state = { dim, byteOps: [m8] };
chainCache.set(polyReversed, state);
return state;
}

function ensureChainLen(state, j) {
while (state.byteOps.length <= j) {
const prev = state.byteOps[state.byteOps.length - 1];
const next = new Uint32Array(prev.length);
gf2MatrixSquare(next, prev, state.dim);
state.byteOps.push(next);
}
}

/**
* Combine two CRCs of adjacent byte chunks.
*
* crcCombine(crc(a), crc(b), len(b), polyReversed, dim) === crc(a ‖ b)
*
* Works for any right-shift CRC of width `dim` (32 or 64) given its
* bit-reversed polynomial. The squaring chain for `polyReversed` is cached
* across calls, so the per-call cost is just popcount(len2) cheap operator
* applications plus the BigInt boundary conversions.
*
* @param {bigint} crc1 - CRC of the first chunk
* @param {bigint} crc2 - CRC of the second chunk
* @param {bigint} len2 - byte length of the second chunk
* @param {bigint} polyReversed - bit-reversed polynomial
* @param {number} dim - CRC width in bits (32 or 64)
* @returns {bigint} CRC of the concatenated chunk, masked to `dim` bits
*/
function crcCombine(crc1, crc2, len2, polyReversed, dim) {
const mask = (1n << BigInt(dim)) - 1n;
if (len2 === 0n) {
return crc1 & mask;
}

const state = getOrInitChain(polyReversed, dim);

let cLo = Number(crc1 & 0xffffffffn);
let cHi = Number((crc1 >> 32n) & 0xffffffffn);

// Walk the bits of len2 (each bit represents a power-of-two number of
// zero bytes to prepend); apply the cached operator for every set bit.
let n = len2;
let j = 0;
while (n !== 0n) {
if ((n & 1n) === 1n) {
ensureChainLen(state, j);
const r = gf2MatrixTimes(state.byteOps[j], cLo, cHi);
cLo = r[0];
cHi = r[1];
}
n >>= 1n;
j += 1;
}

const c2Lo = Number(crc2 & 0xffffffffn);
const c2Hi = Number((crc2 >> 32n) & 0xffffffffn);
cLo = (cLo ^ c2Lo) >>> 0;
cHi = (cHi ^ c2Hi) >>> 0;

return ((BigInt(cHi) << 32n) | BigInt(cLo)) & mask;
}

function base64ToBigInt(b64) {
const buf = Buffer.from(b64, 'base64');
let r = 0n;
for (let i = 0; i < buf.length; i += 1) {
r = (r << 8n) | BigInt(buf[i]);
}
return r;
}

function bigIntToBase64(value, dim) {
const nBytes = dim / 8;
const buf = Buffer.alloc(nBytes);
let v = value;
for (let i = nBytes - 1; i >= 0; i -= 1) {
buf[i] = Number(v & 0xffn);
v >>= 8n;
}
return buf.toString('base64');
}

/**
* Combine N per-part CRCs into the full-object CRC, base64-encoded.
*
* @param {Array<{value: string, length: number}>} parts - per-part data in
* part order; `value` is the base64-encoded per-part CRC, `length` is the
* byte length of that part
* @param {bigint} polyReversed - bit-reversed polynomial
* @param {number} dim - CRC width in bits (32 or 64)
* @returns {string} base64-encoded combined CRC
*/
function combineCrcs(parts, polyReversed, dim) {
let combined = base64ToBigInt(parts[0].value);
for (let i = 1; i < parts.length; i += 1) {
combined = crcCombine(combined, base64ToBigInt(parts[i].value), BigInt(parts[i].length), polyReversed, dim);
}
return bigIntToBase64(combined, dim);
}

module.exports = { combineCrcs, crcCombine };
118 changes: 118 additions & 0 deletions lib/api/apiUtils/integrity/validateChecksums.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { Crc32c } = require('@aws-crypto/crc32c');
const { CrtCrc64Nvme } = require('@aws-sdk/crc64-nvme-crt');
const { errors: ArsenalErrors, errorInstances } = require('arsenal');
const { config } = require('../../../Config');
const { combineCrcs } = require('./crcCombine');

const defaultChecksumData = Object.freeze({ algorithm: 'crc64nvme', isTrailer: false, expected: undefined });

Expand Down Expand Up @@ -152,6 +153,20 @@ const algorithms = Object.freeze({
},
});

/**
* Validate body integrity for a buffered (non-chunked) request.
*
* The whole request body has already been read into memory. This function
* picks the single x-amz-checksum-<algo> header, recomputes the digest of
* `body` with that algorithm, and compares it to the header value. It is the
* authoritative body-checksum check for small APIs (e.g. multiObjectDelete,
* bucket configuration PUTs) where the body is not streamed.
*
* @param {object} headers - HTTP request headers (lowercased keys)
* @param {Buffer} body - the entire buffered request body
* @returns {Promise<null | { error: string, details: object }>} -
* null on success; otherwise a ChecksumError with details.
*/
async function validateXAmzChecksums(headers, body) {
const checksumHeaders = Object.keys(headers).filter(header => header.startsWith('x-amz-checksum-'));
const xAmzChecksumCnt = checksumHeaders.length;
Expand Down Expand Up @@ -205,6 +220,28 @@ async function validateXAmzChecksums(headers, body) {
return null;
}

/**
* Extract checksum intent from request headers for a streaming upload.
*
* Inspects x-amz-checksum-<algo>, x-amz-trailer, and x-amz-sdk-checksum-algorithm
* to decide which algorithm the streaming pipeline (e.g. ChecksumTransform,
* TrailingChecksumTransform) should compute over the body, and what digest
* value (if any) the body must match. Does NOT read the body — the actual
* comparison happens later as bytes flow through the stream.
*
* Used by streaming write paths: PutObject, UploadPart, replication writes
* via routeBackbeat.
*
* @param {object} headers - HTTP request headers (lowercased keys)
* @returns {null
* | { algorithm: string, isTrailer: boolean, expected: string|undefined }
* | { error: string, details: object }} -
* - null when no checksum metadata is present (caller decides default).
* - { algorithm, isTrailer, expected } describing what the stream must
* produce. `isTrailer` is true when the digest will arrive in the
* request trailer (`expected` is undefined until the trailer parses).
* - { error, details } on header mismatch.
*/
function getChecksumDataFromHeaders(headers) {
const checkSdk = algo => {
if (!('x-amz-sdk-checksum-algorithm' in headers)) {
Expand Down Expand Up @@ -489,6 +526,85 @@ function getChecksumDataFromMPUHeaders(headers) {
return { algorithm: algo, type: defaultChecksumType[algo], isDefault: false };
}

// =============================================================================
// MPU final-object checksum computation
// =============================================================================
//
// CompleteMultipartUpload composes a final-object checksum from the per-part
// checksums recorded at UploadPart time. AWS defines two modes:
//
// COMPOSITE : finalChecksum = base64(algo(decode(c1) || ... || decode(cN)))
// + "-N" suffix, where N is the number of parts.
// Supported on CRC32, CRC32C, SHA1, SHA256.
//
// FULL_OBJECT : finalChecksum is the CRC of the entire object's bytes,
// reconstructed by combining the per-part CRCs via CRC
// linearization. CRC-only: CRC32, CRC32C,
// CRC64NVME.

// Bit-reversed polynomials used by the right-shift CRC implementations that
// the @aws-crypto/* and @aws-sdk/crc64-nvme-crt packages produce.
const FULL_OBJECT_POLYS = Object.freeze({
crc32: { polyReversed: 0xedb88320n, dim: 32 },
crc32c: { polyReversed: 0x82f63b78n, dim: 32 },
crc64nvme: { polyReversed: 0x9a6c9329ac4bc9b5n, dim: 64 },
});

// Algorithms whose digest is synchronous, which is the full set AWS allows
// for COMPOSITE MPUs. crc64nvme is excluded because (a) AWS does not allow
// COMPOSITE for CRC64NVME and (b) its CRT-backed digest is async.
const COMPOSITE_ALGOS = new Set(['crc32', 'crc32c', 'sha1', 'sha256']);

/**
* Compute the COMPOSITE final-object checksum for a CompleteMultipartUpload.
*
* final = base64(algo(decode(c1) || decode(c2) || ... || decode(cN))) + "-N"
*
* Supported algorithms: crc32, crc32c, sha1, sha256. (crc64nvme is excluded —
* AWS does not allow COMPOSITE for CRC64NVME.)
*
* @param {string} algorithm - lowercase algorithm name
* @param {string[]} partChecksumsBase64 - per-part checksums in part order,
* each base64-encoded (the format stored on MPU part metadata)
* @returns {{ checksum: string, error: null }
* | { checksum: null, error: { code: string, details: object } }}
*/
function computeCompositeMPUChecksum(algorithm, partChecksumsBase64) {
if (!COMPOSITE_ALGOS.has(algorithm)) {
return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } };
}

const concat = Buffer.concat(partChecksumsBase64.map(c => Buffer.from(c, 'base64')));
const digest = algorithms[algorithm].digest(concat);
return {
checksum: `${digest}-${partChecksumsBase64.length}`,
error: null,
};
}

/**
* Compute the FULL_OBJECT final-object checksum for a CompleteMultipartUpload.
*
* Returns the CRC of the assembled object's bytes, derived purely from the
* per-part CRCs and part lengths via CRC linearization.
*
* Supported algorithms: crc32, crc32c, crc64nvme.
*
* @param {string} algorithm - lowercase algorithm name
* @param {Array<{value: string, length: number}>} parts - per-part data in
* part order; `value` is the base64-encoded per-part CRC, `length` is the
* byte length of that part
* @returns {{ checksum: string, error: null }
* | { checksum: null, error: { code: string, details: object } }}
*/
function computeFullObjectMPUChecksum(algorithm, parts) {
const params = FULL_OBJECT_POLYS[algorithm];
if (!params) {
return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } };
}
return { checksum: combineCrcs(parts, params.polyReversed, params.dim), error: null };
}

module.exports = {
ChecksumError,
defaultChecksumData,
Expand All @@ -499,4 +615,6 @@ module.exports = {
algorithms,
checksumedMethods,
getChecksumDataFromMPUHeaders,
computeCompositeMPUChecksum,
computeFullObjectMPUChecksum,
};
Loading
Loading