Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions lib/api/apiUtils/integrity/crcCombine.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
'use strict';

// Combine two right-shift CRCs (zlib's gf2_matrix_* trick) without using BigInt
// inside the hot loops. Each GF(2) operator matrix is stored as a Uint32Array
// of `2 * dim` words, where row n is packed as [lo32, hi32]. For 32-bit CRCs
// the high halves stay zero and the per-row loop exits early; for the 64-bit
// CRC (crc64nvme) the pair-of-u32s representation lets every XOR/shift stay on
// 32-bit ints.
//
// References:
// zlib crc32_combine (canonical C implementation):
// https://github.com/madler/zlib/blob/master/crc32.c
// Mark Adler, "How does CRC32 work?" — derivation of the matrix trick:
// https://stackoverflow.com/a/23126768
// AWS S3 multipart upload full-object checksums:
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

function gf2MatrixTimes(mat, vecLo, vecHi) {
let sumLo = 0;
let sumHi = 0;
let lo = vecLo;
let hi = vecHi;
let i = 0;
while ((lo | hi) !== 0) {
if (lo & 1) {
sumLo ^= mat[2 * i];
sumHi ^= mat[2 * i + 1];
}
lo = (lo >>> 1) | ((hi & 1) << 31);
hi = hi >>> 1;
i += 1;
}
return [sumLo >>> 0, sumHi >>> 0];
}

function gf2MatrixSquare(square, mat, dim) {
for (let n = 0; n < dim; n += 1) {
const r = gf2MatrixTimes(mat, mat[2 * n], mat[2 * n + 1]);
// In-place mutation of the caller's scratch buffer is intentional —
// the callers (crcCombine, ensureChainLen) own `square` and re-use
// it across iterations to avoid re-allocating per squaring step.
/* eslint-disable no-param-reassign */
square[2 * n] = r[0];
square[2 * n + 1] = r[1];
/* eslint-enable no-param-reassign */
}
}

// Per (polyReversed, dim), a lazily-grown chain of zero-byte operators.
// state.byteOps[j] is the GF(2) operator for prepending 2^j zero bytes
// (i.e. M^(8 * 2^j)). Building this chain is the dominant cost of crcCombine
// and depends only on the polynomial, so we cache it across calls.
const chainCache = new Map();

function getOrInitChain(polyReversed, dim) {
let state = chainCache.get(polyReversed);
if (state !== undefined) {
return state;
}

// M^1: one-zero-bit operator. Column 0 is the polynomial; column k>0 is
// 1 << (k - 1) — what right-shifting a state with bit k set produces.
const m1 = new Uint32Array(2 * dim);
m1[0] = Number(polyReversed & 0xffffffffn);
m1[1] = Number((polyReversed >> 32n) & 0xffffffffn);
for (let k = 1; k < dim; k += 1) {
const bit = k - 1;
if (bit < 32) {
m1[2 * k] = (1 << bit) >>> 0;
} else {
m1[2 * k + 1] = (1 << (bit - 32)) >>> 0;
}
}

const m2 = new Uint32Array(2 * dim);
gf2MatrixSquare(m2, m1, dim);
const m4 = new Uint32Array(2 * dim);
gf2MatrixSquare(m4, m2, dim);
const m8 = new Uint32Array(2 * dim); // operator for 1 zero byte
gf2MatrixSquare(m8, m4, dim);

state = { dim, byteOps: [m8] };
chainCache.set(polyReversed, state);
return state;
}

function ensureChainLen(state, j) {
Comment thread
leif-scality marked this conversation as resolved.
while (state.byteOps.length <= j) {
const prev = state.byteOps[state.byteOps.length - 1];
const next = new Uint32Array(prev.length);
gf2MatrixSquare(next, prev, state.dim);
state.byteOps.push(next);
}
}

/**
* Combine two CRCs of adjacent byte chunks.
*
* crcCombine(crc(a), crc(b), len(b), polyReversed, dim) === crc(a ‖ b)
*
* Works for any right-shift CRC of width `dim` (32 or 64) given its
* bit-reversed polynomial. The squaring chain for `polyReversed` is cached
* across calls, so the per-call cost is just popcount(len2) cheap operator
* applications plus the BigInt boundary conversions.
*
* @param {bigint} crc1 - CRC of the first chunk
* @param {bigint} crc2 - CRC of the second chunk
* @param {bigint} len2 - byte length of the second chunk
* @param {bigint} polyReversed - bit-reversed polynomial
* @param {number} dim - CRC width in bits (32 or 64)
* @returns {bigint} CRC of the concatenated chunk, masked to `dim` bits
*/
function crcCombine(crc1, crc2, len2, polyReversed, dim) {
const mask = (1n << BigInt(dim)) - 1n;
if (len2 === 0n) {
return crc1 & mask;
}

const state = getOrInitChain(polyReversed, dim);

let cLo = Number(crc1 & 0xffffffffn);
let cHi = Number((crc1 >> 32n) & 0xffffffffn);

// Walk the bits of len2 (each bit represents a power-of-two number of
// zero bytes to prepend); apply the cached operator for every set bit.
let n = len2;
let j = 0;
while (n !== 0n) {
if ((n & 1n) === 1n) {
ensureChainLen(state, j);
const r = gf2MatrixTimes(state.byteOps[j], cLo, cHi);
cLo = r[0];
cHi = r[1];
}
n >>= 1n;
j += 1;
}

const c2Lo = Number(crc2 & 0xffffffffn);
const c2Hi = Number((crc2 >> 32n) & 0xffffffffn);
cLo = (cLo ^ c2Lo) >>> 0;
cHi = (cHi ^ c2Hi) >>> 0;

return ((BigInt(cHi) << 32n) | BigInt(cLo)) & mask;
}

function base64ToBigInt(b64) {
const buf = Buffer.from(b64, 'base64');
let r = 0n;
for (let i = 0; i < buf.length; i += 1) {
r = (r << 8n) | BigInt(buf[i]);
}
return r;
}

function bigIntToBase64(value, dim) {
const nBytes = dim / 8;
const buf = Buffer.alloc(nBytes);
let v = value;
for (let i = nBytes - 1; i >= 0; i -= 1) {
buf[i] = Number(v & 0xffn);
v >>= 8n;
}
return buf.toString('base64');
}

/**
* Combine N per-part CRCs into the full-object CRC, base64-encoded.
*
* @param {Array<{value: string, length: number}>} parts - per-part data in
* part order; `value` is the base64-encoded per-part CRC, `length` is the
* byte length of that part
* @param {bigint} polyReversed - bit-reversed polynomial
* @param {number} dim - CRC width in bits (32 or 64)
* @returns {string} base64-encoded combined CRC
*/
function combineCrcs(parts, polyReversed, dim) {
let combined = base64ToBigInt(parts[0].value);
Comment thread
leif-scality marked this conversation as resolved.
for (let i = 1; i < parts.length; i += 1) {
combined = crcCombine(combined, base64ToBigInt(parts[i].value), BigInt(parts[i].length), polyReversed, dim);
}
return bigIntToBase64(combined, dim);
}

module.exports = { combineCrcs, crcCombine };
Loading
Loading