From b0d0561afc41d20b5203c02bb9a4dbf59d18c214 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Tue, 26 May 2026 11:08:58 -0700 Subject: [PATCH] [world-vercel] Add /run-id sub-export with tagged ULID encode/decode (#1978) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [world-vercel] Add /run-id sub-export with tagged ULID encode/decode Encodes a tag bit, 5-bit version, and 6-bit Vercel region ID into a ULID-shaped string used for workflow run IDs. Tagged values remain valid 26-char Crockford-Base32 ULIDs so they still sort and round-trip through any system that accepts ULIDs. * [world-vercel] Add string-value assertions to run-id tests Add exact-string expectations for encoded outputs at known inputs, covering the default region/version pair, numeric region IDs, version overrides, boundary values (all-zero, all-max), the dirty-input overwrite case, and the lexicographic-order checks. Also adds an explicit byte-array expectation for the canonical ULID-spec example string and an additional first-char-range coverage test for isTagged. * [world-vercel] Remove internal-repo reference from regions doc comment * [world-vercel] Address PR review feedback on run-id sub-export - isTaggedString now fully validates the input as a 26-char Crockford Base32 ULID (delegating to ulidToBytes) instead of only inspecting the first character. This fixes false positives on inputs like '4UUUU...' that have a valid tag-bit position but invalid chars later in the string. - isTagged() now accepts `unknown` to match its documented behavior of safely rejecting non-string inputs without requiring callers to cast. - Introduce `RegionKey` for the full set of keys including 'unknown', and narrow `RegionCode` to `Exclude` so the return type of `lookupRegion` and the `DecodedRunId.region` field accurately reflect that 'unknown' is never produced. Updates `encode` to reject 'unknown' as a region code string at runtime (callers wanting the unknown sentinel should pass numeric 0). * [world-vercel] Move tagged-ULID metadata to the top of randomness Address review feedback on #1978: 1. **Metadata at top of randomness, not bottom.** Place `regionId` (6 bits) in the high bits of byte[6] and `version` (5 bits) straddling bytes 6 and 7, leaving the bottom 69 bits of randomness untouched by `encode`. This means a `monotonicFactory()`-style ULID generator's intra-millisecond bottom-bit increments survive encoding intact, so consecutive `encode(ulid(), region, { version })` calls with the same metadata produce strictly increasing strings. Previously the metadata sat in the bottom 11 bits — exactly the bits the monotonic factory uses — causing same-ms collisions/inversions. 2. **DecodedRunId is now a discriminated union.** When `tagged: false`, the `regionId`, `version`, and `region` fields are typed as `null` instead of being populated with garbage bits from arbitrary ULIDs. This forces callers to discriminate on `tagged` before reading metadata. 3. **regionIdFor: keep runtime backstop, mark as ignored for coverage.** The unreachable-in-TS branch stays as a defensive runtime check for callers crossing a JS/TS boundary; an istanbul/c8 ignore comment keeps coverage tools quiet. Doc strings and tests updated accordingly. The new layout adds a test verifying that a sequence of incrementing-bottom-bit ULIDs (simulating `monotonicFactory()`) round-trips through `encode` as a strictly increasing sequence. 108/108 world-vercel tests pass; typecheck clean. --- .changeset/tagged-run-id.md | 5 + packages/world-vercel/package.json | 4 + .../world-vercel/src/run-id/codec.test.ts | 157 ++++++++ packages/world-vercel/src/run-id/codec.ts | 182 +++++++++ .../world-vercel/src/run-id/index.test.ts | 376 ++++++++++++++++++ packages/world-vercel/src/run-id/index.ts | 269 +++++++++++++ packages/world-vercel/src/run-id/regions.ts | 90 +++++ 7 files changed, 1083 insertions(+) create mode 100644 .changeset/tagged-run-id.md create mode 100644 packages/world-vercel/src/run-id/codec.test.ts create mode 100644 packages/world-vercel/src/run-id/codec.ts create mode 100644 packages/world-vercel/src/run-id/index.test.ts create mode 100644 packages/world-vercel/src/run-id/index.ts create mode 100644 packages/world-vercel/src/run-id/regions.ts diff --git a/.changeset/tagged-run-id.md b/.changeset/tagged-run-id.md new file mode 100644 index 0000000000..956ea45717 --- /dev/null +++ b/.changeset/tagged-run-id.md @@ -0,0 +1,5 @@ +--- +"@workflow/world-vercel": minor +--- + +Add `@workflow/world-vercel/run-id` sub-export with `encode`/`decode` helpers that produce ULID-shaped workflow run IDs carrying a tag bit, a 5-bit version, and a 6-bit Vercel region ID. diff --git a/packages/world-vercel/package.json b/packages/world-vercel/package.json index d0f6329263..1d2aa6104c 100644 --- a/packages/world-vercel/package.json +++ b/packages/world-vercel/package.json @@ -20,6 +20,10 @@ ".": { "types": "./dist/index.d.ts", "default": "./dist/index.js" + }, + "./run-id": { + "types": "./dist/run-id/index.d.ts", + "default": "./dist/run-id/index.js" } }, "scripts": { diff --git a/packages/world-vercel/src/run-id/codec.test.ts b/packages/world-vercel/src/run-id/codec.test.ts new file mode 100644 index 0000000000..12e9333907 --- /dev/null +++ b/packages/world-vercel/src/run-id/codec.test.ts @@ -0,0 +1,157 @@ +import { describe, expect, it } from 'vitest'; +import { + bytesToUlid, + isTaggedString, + TAG_BIT_MASK, + ULID_BYTE_LENGTH, + ULID_LENGTH, + ulidToBytes, +} from './codec.js'; + +/** + * Reference ULID with all bytes = 0. Crockford encoding of 16 zero bytes is + * 26 '0' chars. + */ +const ZERO_ULID = '0'.repeat(ULID_LENGTH); + +/** + * Reference ULID with all bytes = 0xff. Crockford encoding of 16 0xff bytes + * is the 26-char value "7ZZZZZZZZZZZZZZZZZZZZZZZZZ" (the top char carries + * only 3 real bits, so its max is 7). + */ +const MAX_ULID = '7ZZZZZZZZZZZZZZZZZZZZZZZZZ'; + +describe('codec / ulidToBytes & bytesToUlid', () => { + it('round-trips the all-zero ULID', () => { + const bytes = ulidToBytes(ZERO_ULID); + expect(bytes).toEqual(new Uint8Array(ULID_BYTE_LENGTH)); + expect(bytesToUlid(bytes)).toBe(ZERO_ULID); + }); + + it('round-trips the all-ones ULID', () => { + const bytes = ulidToBytes(MAX_ULID); + expect(bytes).toEqual(new Uint8Array(ULID_BYTE_LENGTH).fill(0xff)); + expect(bytesToUlid(bytes)).toBe(MAX_ULID); + }); + + it('round-trips a typical ULID-shaped value', () => { + const ulid = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; + const bytes = ulidToBytes(ulid); + expect(bytes).toHaveLength(ULID_BYTE_LENGTH); + // Sanity-check the byte-level decoding of this ULID-spec example string. + expect(Array.from(bytes)).toEqual([ + 0x01, 0x56, 0x3e, 0x3a, 0xb5, 0xd3, 0xd6, 0x76, 0x4c, 0x61, 0xef, 0xb9, + 0x93, 0x02, 0xbd, 0x5b, + ]); + expect(bytesToUlid(bytes)).toBe(ulid); + }); + + it('decodes lowercase Crockford characters and emits uppercase', () => { + const ulid = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; + expect(bytesToUlid(ulidToBytes(ulid.toLowerCase()))).toBe(ulid); + }); + + it('rejects strings of the wrong length', () => { + expect(() => ulidToBytes('')).toThrow(/Invalid ULID length/); + expect(() => ulidToBytes('0'.repeat(25))).toThrow(/Invalid ULID length/); + expect(() => ulidToBytes('0'.repeat(27))).toThrow(/Invalid ULID length/); + }); + + it('rejects strings with invalid Crockford characters', () => { + // 'U' is invalid in Crockford Base32. + const bad = `U${ZERO_ULID.slice(1)}`; + expect(() => ulidToBytes(bad)).toThrow( + /Invalid Crockford-Base32 character/ + ); + // 'L' is also invalid in Crockford (replaced by '1'). + const bad2 = `L${ZERO_ULID.slice(1)}`; + expect(() => ulidToBytes(bad2)).toThrow( + /Invalid Crockford-Base32 character/ + ); + // Non-ASCII. + const bad3 = `\u00ff${ZERO_ULID.slice(1)}`; + expect(() => ulidToBytes(bad3)).toThrow( + /Invalid Crockford-Base32 character/ + ); + }); + + it("rejects ULIDs whose first character is > '7'", () => { + // '8' = 0b01000, which has the top of its 3 real bits set... wait, '8' + // has value 8 = 0b01000 in Crockford. The codec checks the top 2 pad + // bits (values[0] & 0x18). 8 & 0x18 = 0x08, which is nonzero. + const bad = `8${'0'.repeat(25)}`; + expect(() => ulidToBytes(bad)).toThrow(/top 2 bits must be zero/); + // 'Z' = 31 = 0b11111 → top 2 pad bits both set. + const bad2 = `Z${'0'.repeat(25)}`; + expect(() => ulidToBytes(bad2)).toThrow(/top 2 bits must be zero/); + }); + + it('throws on non-string inputs', () => { + expect(() => ulidToBytes(undefined as unknown as string)).toThrow( + TypeError + ); + expect(() => ulidToBytes(null as unknown as string)).toThrow(TypeError); + expect(() => ulidToBytes(123 as unknown as string)).toThrow(TypeError); + }); + + it('rejects wrong-length byte arrays', () => { + expect(() => bytesToUlid(new Uint8Array(15))).toThrow( + /Invalid byte length/ + ); + expect(() => bytesToUlid(new Uint8Array(17))).toThrow( + /Invalid byte length/ + ); + }); +}); + +describe('codec / isTaggedString', () => { + it('returns false for the zero ULID', () => { + expect(isTaggedString(ZERO_ULID)).toBe(false); + }); + + it('returns true for a ULID with the tag bit manually set', () => { + const bytes = new Uint8Array(ULID_BYTE_LENGTH); + bytes[0] = TAG_BIT_MASK; + const tagged = bytesToUlid(bytes); + expect(tagged).toBe('40000000000000000000000000'); + expect(isTaggedString(tagged)).toBe(true); + // First char of a value with byte[0] = 0x80 should be '4' (0b100). + expect(tagged[0]).toBe('4'); + }); + + it('returns true for any ULID whose first char is in [4..7]', () => { + expect(isTaggedString(`4${'0'.repeat(25)}`)).toBe(true); + expect(isTaggedString(`5${'0'.repeat(25)}`)).toBe(true); + expect(isTaggedString(`6${'0'.repeat(25)}`)).toBe(true); + expect(isTaggedString(`7${'Z'.repeat(25)}`)).toBe(true); + expect(isTaggedString(`0${'0'.repeat(25)}`)).toBe(false); + expect(isTaggedString(`3${'Z'.repeat(25)}`)).toBe(false); + }); + + it('returns false for non-strings, wrong lengths, and invalid chars', () => { + expect(isTaggedString('')).toBe(false); + expect(isTaggedString('0'.repeat(25))).toBe(false); + expect(isTaggedString(null)).toBe(false); + expect(isTaggedString(undefined)).toBe(false); + expect(isTaggedString(123)).toBe(false); + expect(isTaggedString({})).toBe(false); + // Invalid Crockford character at index 0. + expect(isTaggedString(`U${ZERO_ULID.slice(1)}`)).toBe(false); + }); + + it('rejects ULIDs with invalid Crockford characters after index 0', () => { + // First char '4' would otherwise set the tag bit, but the string is not + // a valid ULID because of the bad char further in. A naive + // implementation that only looked at the first char would incorrectly + // return true here. + expect(isTaggedString(`4${'U'.repeat(25)}`)).toBe(false); + expect(isTaggedString(`4${'0'.repeat(24)}L`)).toBe(false); + }); + + it("rejects ULIDs whose first char is > '7' (overflows 128 bits)", () => { + // First char '8'..'Z' has nonzero top 2 pad bits → not a valid ULID, + // regardless of whether the tag bit appears set. + expect(isTaggedString(`8${'0'.repeat(25)}`)).toBe(false); + expect(isTaggedString(`Z${'0'.repeat(25)}`)).toBe(false); + }); +}); diff --git a/packages/world-vercel/src/run-id/codec.ts b/packages/world-vercel/src/run-id/codec.ts new file mode 100644 index 0000000000..c1f34e6782 --- /dev/null +++ b/packages/world-vercel/src/run-id/codec.ts @@ -0,0 +1,182 @@ +/** + * Low-level bit / Crockford-Base32 plumbing for tagged ULIDs. + * + * A ULID is a 128-bit value rendered as 26 Crockford-Base32 characters. Since + * 26 * 5 = 130 bits, the encoded representation has 2 leading zero pad bits + * — i.e. the top 2 bits of the first character must always be 0. This means + * the first character of any valid ULID lies in the range `0`..`7`. + * + * The tagged-ULID layout (see ./regions.ts and ./index.ts for context): + * + * byte[0] bit 7 TAG bit (1 = tagged run ID) + * byte[6] bits 2..7 `regionId` (6-bit field, MSB-first) + * byte[6] bits 0..1 high 2 bits of `version` (5-bit field) + * byte[7] bits 5..7 low 3 bits of `version` + * + * Encode sets the tag bit on byte[0] and overwrites the 11 metadata bits at + * the **top** of the randomness section (bytes[6..7]). Decode reads + clears + * only the tag bit, leaving the metadata bits intact in the returned + * "untagged" ULID. Placing the metadata at the top of randomness (rather + * than the bottom) leaves the low 69 bits free, so a `monotonicFactory()`- + * style ULID generator's bottom-bit increments survive encoding and + * intra-millisecond lexicographic order is preserved (provided callers + * don't change `(regionId, version)` mid-millisecond). + */ + +// Crockford Base32 alphabet (matches the `ulid` spec). +const ENCODING = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'; + +// Decode table: ASCII char code -> 5-bit value, or -1 if invalid. +const DECODE_TABLE: Int8Array = (() => { + const table = new Int8Array(128).fill(-1); + for (let i = 0; i < ENCODING.length; i++) { + table[ENCODING.charCodeAt(i)] = i; + } + // Crockford-Base32 case-insensitivity: also accept lowercase. + for (let i = 0; i < ENCODING.length; i++) { + const lower = ENCODING[i].toLowerCase(); + if (lower !== ENCODING[i]) { + table[lower.charCodeAt(0)] = i; + } + } + return table; +})(); + +export const ULID_LENGTH = 26; +export const ULID_BYTE_LENGTH = 16; + +/** Bit masks used by the tagged-ULID layout. */ +export const TAG_BIT_MASK = 0x80; // byte[0] bit 7 +/** `regionId` occupies the top 6 bits of byte[6]. */ +export const REGION_MASK = 0xfc; +/** High 2 bits of `version` occupy the bottom 2 bits of byte[6]. */ +export const VERSION_HIGH_MASK = 0x03; +/** Low 3 bits of `version` occupy the top 3 bits of byte[7]. */ +export const VERSION_LOW_MASK = 0xe0; +/** Byte indices of the metadata region within the 16-byte ULID buffer. */ +export const REGION_BYTE_INDEX = 6; +export const VERSION_LOW_BYTE_INDEX = 7; +export const VERSION_BIT_WIDTH = 5; +export const REGION_BIT_WIDTH = 6; +export const MAX_VERSION = (1 << VERSION_BIT_WIDTH) - 1; // 31 +export const MAX_REGION = (1 << REGION_BIT_WIDTH) - 1; // 63 + +/** + * Decode a 26-character Crockford-Base32 ULID string into 16 bytes. + * + * Throws if the string is not exactly 26 characters, contains an invalid + * Crockford character, or has nonzero top 2 pad bits (which would imply the + * value overflows 128 bits). + */ +export function ulidToBytes(ulid: string): Uint8Array { + if (typeof ulid !== 'string') { + throw new TypeError(`Expected ULID string, got ${typeof ulid}`); + } + if (ulid.length !== ULID_LENGTH) { + throw new Error( + `Invalid ULID length: expected ${ULID_LENGTH}, got ${ulid.length}` + ); + } + + // Validate and convert each char to its 5-bit value. + const values = new Uint8Array(ULID_LENGTH); + for (let i = 0; i < ULID_LENGTH; i++) { + const code = ulid.charCodeAt(i); + const v = code < 128 ? DECODE_TABLE[code] : -1; + if (v < 0) { + throw new Error( + `Invalid Crockford-Base32 character at index ${i}: ${JSON.stringify(ulid[i])}` + ); + } + values[i] = v; + } + + // The first character carries only 3 real bits (the top 2 must be zero pad). + if ((values[0] & 0x18) !== 0) { + throw new Error( + `Invalid ULID: top 2 bits must be zero (first char > '7'): ${JSON.stringify(ulid[0])}` + ); + } + + // Pack 26 * 5 = 130 bits, with the top 2 bits being zero, into 16 bytes. + // Stream the values MSB-first into a bit buffer. + const out = new Uint8Array(ULID_BYTE_LENGTH); + // Skip the 2 leading zero pad bits by starting the bit cursor at 2. + let bitBuf = values[0] & 0x07; + let bitCount = 3; + let outIdx = 0; + for (let i = 1; i < ULID_LENGTH; i++) { + bitBuf = (bitBuf << 5) | values[i]; + bitCount += 5; + while (bitCount >= 8) { + bitCount -= 8; + out[outIdx++] = (bitBuf >> bitCount) & 0xff; + } + } + // After consuming all 26 chars (130 bits) starting from a 3-bit prefix, + // bitCount should be exactly 0 and outIdx should be 16. + /* c8 ignore next 3 */ + if (outIdx !== ULID_BYTE_LENGTH || bitCount !== 0) { + throw new Error('Internal error: ULID bit packing did not consume cleanly'); + } + return out; +} + +/** + * Encode 16 bytes as a 26-character Crockford-Base32 ULID string. The output + * is always uppercase. + * + * Throws if `bytes.length !== 16`. + */ +export function bytesToUlid(bytes: Uint8Array): string { + if (bytes.length !== ULID_BYTE_LENGTH) { + throw new Error( + `Invalid byte length: expected ${ULID_BYTE_LENGTH}, got ${bytes.length}` + ); + } + + // Emit 26 chars from 128 bits, MSB-first, with 2 leading zero pad bits + // implicitly contributed by starting the bit buffer empty (bitCount = 0) + // and producing the first 5-bit chunk only after we've shifted in 3 real + // bits — i.e. we encode by appending bytes and pulling 5-bit groups off + // the top. + let bitBuf = 0; + let bitCount = 0; + // Pre-load 3 zero bits (i.e., start with bitCount = -2 conceptually). The + // simpler way: shift in 3 zero bits up front, so the first 5-bit chunk + // pulled out consists of those 3 zeros + the top 2 bits of byte[0]. + // Equivalently, treat the value as a 130-bit number with the top 2 bits = 0. + bitBuf = 0; + bitCount = 2; // 2 zero pad bits already "in" the buffer at the top + let out = ''; + for (let i = 0; i < ULID_BYTE_LENGTH; i++) { + bitBuf = (bitBuf << 8) | bytes[i]; + bitCount += 8; + while (bitCount >= 5) { + bitCount -= 5; + out += ENCODING[(bitBuf >> bitCount) & 0x1f]; + } + } + /* c8 ignore next 3 */ + if (out.length !== ULID_LENGTH || bitCount !== 0) { + throw new Error('Internal error: ULID bit packing did not flush cleanly'); + } + return out; +} + +/** + * Test whether `s` is a fully valid 26-character Crockford-Base32 ULID with + * the tag bit set. Returns `false` for any input that is not a string, has + * the wrong length, contains an invalid character, or has nonzero top 2 + * padding bits. + */ +export function isTaggedString(s: unknown): boolean { + if (typeof s !== 'string' || s.length !== ULID_LENGTH) return false; + let bytes: Uint8Array; + try { + bytes = ulidToBytes(s); + } catch { + return false; + } + return (bytes[0] & TAG_BIT_MASK) !== 0; +} diff --git a/packages/world-vercel/src/run-id/index.test.ts b/packages/world-vercel/src/run-id/index.test.ts new file mode 100644 index 0000000000..b7935a5f19 --- /dev/null +++ b/packages/world-vercel/src/run-id/index.test.ts @@ -0,0 +1,376 @@ +import { describe, expect, it } from 'vitest'; +import { + bytesToUlid, + REGION_BYTE_INDEX, + ULID_BYTE_LENGTH, + ulidToBytes, + VERSION_LOW_BYTE_INDEX, +} from './codec.js'; +import { + CURRENT_VERSION, + decode, + encode, + isTagged, + MAX_REGION_ID, + MAX_VERSION, + REGION_IDS, + type RegionCode, + type RegionKey, +} from './index.js'; + +const SAMPLE_ULID = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; + +describe('encode / decode round-trip', () => { + it('encodes with default version=1 and the iad1 region code', () => { + const tagged = encode(SAMPLE_ULID, 'iad1'); + expect(tagged).toBe('41ARZ3NDEK0GV4RRFFQ69G5FAV'); + expect(tagged).toHaveLength(26); + expect(isTagged(tagged)).toBe(true); + + const decoded = decode(tagged); + expect(decoded).toEqual({ + tagged: true, + ulid: '01ARZ3NDEK0GV4RRFFQ69G5FAV', + region: 'iad1', + regionId: REGION_IDS.iad1, + version: CURRENT_VERSION, + }); + }); + + it('accepts numeric region IDs', () => { + const tagged = encode(SAMPLE_ULID, 7); + expect(tagged).toBe('41ARZ3NDEK3GV4RRFFQ69G5FAV'); + const decoded = decode(tagged); + expect(decoded.regionId).toBe(7); + expect(decoded.region).toBe('dub1'); + expect(decoded.ulid).toBe('01ARZ3NDEK3GV4RRFFQ69G5FAV'); + }); + + it('returns region: null for unknown but in-range region IDs', () => { + const tagged = encode(SAMPLE_ULID, 63); + expect(tagged).toBe('41ARZ3NDEKZGV4RRFFQ69G5FAV'); + const decoded = decode(tagged); + expect(decoded.regionId).toBe(63); + expect(decoded.region).toBeNull(); + }); + + it('encodes regionId=0 as the "unknown" sentinel', () => { + const tagged = encode(SAMPLE_ULID, 0); + expect(tagged).toBe('41ARZ3NDEK00V4RRFFQ69G5FAV'); + const decoded = decode(tagged); + expect(decoded.regionId).toBe(0); + expect(decoded.region).toBeNull(); + }); + + it('accepts an explicit version override', () => { + const tagged = encode(SAMPLE_ULID, 'iad1', { version: 0 }); + expect(tagged).toBe('41ARZ3NDEK0GB4RRFFQ69G5FAV'); + expect(decode(tagged).version).toBe(0); + + const tagged2 = encode(SAMPLE_ULID, 'iad1', { version: MAX_VERSION }); + expect(tagged2).toBe('41ARZ3NDEK0ZV4RRFFQ69G5FAV'); + expect(decode(tagged2).version).toBe(MAX_VERSION); + }); + + it('preserves all metadata bits across encode → decode → encode', () => { + for (const regionId of [0, 1, 17, 31, 32, 63]) { + for (const version of [0, 1, 7, 16, 31]) { + const tagged = encode(SAMPLE_ULID, regionId, { version }); + const decoded = decode(tagged); + expect(decoded.regionId).toBe(regionId); + expect(decoded.version).toBe(version); + // Re-encoding the cleared ULID with the same metadata must reproduce + // the same tagged string. + const reTagged = encode(decoded.ulid, regionId, { version }); + expect(reTagged).toBe(tagged); + } + } + }); + + it('clears only the tag bit in the decoded ULID', () => { + const tagged = encode(SAMPLE_ULID, 'fra1', { version: 5 }); + expect(tagged).toBe('41ARZ3NDEK52V4RRFFQ69G5FAV'); + const decoded = decode(tagged); + expect(decoded.ulid).toBe('01ARZ3NDEK52V4RRFFQ69G5FAV'); + + // The decoded ulid must NOT have the tag bit set. + expect(isTagged(decoded.ulid)).toBe(false); + + // The metadata bytes (the top of the 80-bit randomness section) must be + // preserved in the decoded ULID, not zeroed. + const taggedBytes = ulidToBytes(tagged); + const decodedBytes = ulidToBytes(decoded.ulid); + expect(decodedBytes[REGION_BYTE_INDEX]).toBe( + taggedBytes[REGION_BYTE_INDEX] + ); + expect(decodedBytes[VERSION_LOW_BYTE_INDEX]).toBe( + taggedBytes[VERSION_LOW_BYTE_INDEX] + ); + + // And byte[0] differs only in the top bit. + expect(decodedBytes[0]).toBe(taggedBytes[0] & 0x7f); + }); + + it('overwrites the tag bit and metadata bits even if the input has them set', () => { + // Synthesize a ULID with byte[0] = 0x40 (some non-tag bits set) and + // garbage in the metadata bytes (byte[6] + top of byte[7]). + const bytes = new Uint8Array(ULID_BYTE_LENGTH); + bytes[0] = 0x40; + bytes[REGION_BYTE_INDEX] = 0xff; + bytes[VERSION_LOW_BYTE_INDEX] = 0xff; + const dirty = bytesToUlid(bytes); + expect(dirty).toBe('2000000000ZZZG000000000000'); + + const tagged = encode(dirty, 'sfo1', { version: 3 }); + const decoded = decode(tagged); + expect(decoded.region).toBe('sfo1'); + expect(decoded.regionId).toBe(REGION_IDS.sfo1); + expect(decoded.version).toBe(3); + // Re-encoding the decoded.ulid with the same metadata must reproduce + // the same tagged string — sanity-check of the round-trip property. + expect(encode(decoded.ulid, 'sfo1', { version: 3 })).toBe(tagged); + }); + + it('encode emits an uppercase result for lowercase Crockford input', () => { + const tagged = encode(SAMPLE_ULID.toLowerCase(), 'iad1'); + expect(tagged).toBe('41ARZ3NDEK0GV4RRFFQ69G5FAV'); + expect(tagged).toBe(tagged.toUpperCase()); + }); + + it('encodes well-known boundary inputs to exact strings', () => { + // Zero ULID with zero metadata: only the tag bit is set, so byte[0] = 0x80. + // 0x80 → first 5-bit chunk (0b00100) → '4'; rest are all zero. + expect(encode('0'.repeat(26), 0, { version: 0 })).toBe( + '40000000000000000000000000' + ); + // Zero ULID with region=1, version=1: regionId at the top of byte[6] + // and version straddling bytes 6/7 — the changed bits show up around + // base32 chars 11..13 ("0GG"). The low randomness bytes remain 0. + expect(encode('0'.repeat(26), 1, { version: 1 })).toBe( + '40000000000GG0000000000000' + ); + // Zero ULID with max region (63) and max version (31): the 11 metadata + // bits are all-ones, lighting up the high bits of bytes 6 and 7. + expect(encode('0'.repeat(26), 63, { version: 31 })).toBe( + '4000000000ZZG0000000000000' + ); + // Max ULID with zero metadata: the metadata bits are forced to 0 even + // though the source had them set, demonstrating overwrite semantics. + expect(encode('7ZZZZZZZZZZZZZZZZZZZZZZZZZ', 0, { version: 0 })).toBe( + '7ZZZZZZZZZ00FZZZZZZZZZZZZZ' + ); + }); +}); + +describe('decode on un-tagged input', () => { + it('returns tagged: false for a plain ULID', () => { + const decoded = decode(SAMPLE_ULID); + expect(decoded.tagged).toBe(false); + // Decoded ulid equals input (already had tag bit cleared). + expect(decoded.ulid).toBe(SAMPLE_ULID); + }); + + it('surfaces null metadata fields for un-tagged input', () => { + // Un-tagged decode results carry `null` in the metadata positions so + // callers must discriminate on `tagged` before reading them — the + // bits themselves are arbitrary randomness from the source ULID. + const decoded = decode(SAMPLE_ULID); + if (decoded.tagged) { + throw new Error('expected un-tagged result'); + } + expect(decoded.regionId).toBeNull(); + expect(decoded.version).toBeNull(); + expect(decoded.region).toBeNull(); + }); + + it('discriminated-union type narrows on the `tagged` check', () => { + // Type-level assertion: in the un-tagged branch, the metadata fields + // must type as `null`; in the tagged branch they must type as + // `number | RegionCode | null`. This is enforced at compile time by + // the conditional below — the test body itself just sanity-checks + // that the runtime values agree with what the types say. + const decoded = decode(SAMPLE_ULID); + if (decoded.tagged) { + // Within this branch, regionId is `number`, region is `RegionCode | null`. + expect(typeof decoded.regionId).toBe('number'); + expect(typeof decoded.version).toBe('number'); + } else { + // Within this branch, all three are typed as `null`. + const r: null = decoded.regionId; + const v: null = decoded.version; + const code: null = decoded.region; + expect(r).toBeNull(); + expect(v).toBeNull(); + expect(code).toBeNull(); + } + }); +}); + +describe('encode validation', () => { + it('rejects invalid ULID input', () => { + expect(() => encode('not-a-ulid', 'iad1')).toThrow(); + expect(() => encode('', 'iad1')).toThrow(/Invalid ULID length/); + expect(() => encode(SAMPLE_ULID.slice(1), 'iad1')).toThrow( + /Invalid ULID length/ + ); + }); + + it('rejects unknown region codes', () => { + expect(() => encode(SAMPLE_ULID, 'xxx1' as RegionCode)).toThrow( + /Unknown region/ + ); + }); + + it('rejects out-of-range numeric regions', () => { + expect(() => encode(SAMPLE_ULID, -1)).toThrow(RangeError); + expect(() => encode(SAMPLE_ULID, 64)).toThrow(RangeError); + expect(() => encode(SAMPLE_ULID, 1.5)).toThrow(RangeError); + expect(() => encode(SAMPLE_ULID, Number.NaN)).toThrow(RangeError); + }); + + it('rejects out-of-range versions', () => { + expect(() => encode(SAMPLE_ULID, 'iad1', { version: -1 })).toThrow( + RangeError + ); + expect(() => encode(SAMPLE_ULID, 'iad1', { version: 32 })).toThrow( + RangeError + ); + expect(() => encode(SAMPLE_ULID, 'iad1', { version: 1.5 })).toThrow( + RangeError + ); + }); +}); + +describe('region table coverage', () => { + it('covers all 21 known Vercel compute regions plus hel1/zrh1 + unknown', () => { + const expected: RegionKey[] = [ + 'unknown', + 'iad1', + 'sfo1', + 'pdx1', + 'cle1', + 'yul1', + 'gru1', + 'dub1', + 'lhr1', + 'cdg1', + 'fra1', + 'bru1', + 'arn1', + 'hel1', + 'zrh1', + 'cpt1', + 'dxb1', + 'bom1', + 'sin1', + 'hkg1', + 'hnd1', + 'icn1', + 'kix1', + 'syd1', + ]; + expect(Object.keys(REGION_IDS).sort()).toEqual([...expected].sort()); + }); + + it('assigns each region a unique ID in [0, 63]', () => { + const ids = Object.values(REGION_IDS); + expect(new Set(ids).size).toBe(ids.length); + for (const id of ids) { + expect(id).toBeGreaterThanOrEqual(0); + expect(id).toBeLessThanOrEqual(MAX_REGION_ID); + } + }); + + it('all known region codes round-trip through encode/decode', () => { + for (const key of Object.keys(REGION_IDS) as RegionKey[]) { + if (key === 'unknown') continue; + const code: RegionCode = key; + const tagged = encode(SAMPLE_ULID, code); + const decoded = decode(tagged); + expect(decoded.region).toBe(code); + expect(decoded.regionId).toBe(REGION_IDS[code]); + } + }); + + it('rejects the "unknown" sentinel string as a region code in encode', () => { + // encode(_, 'unknown') was previously silently accepted (resolving to + // regionId=0). It is now rejected at the type level and at runtime. + expect(() => encode(SAMPLE_ULID, 'unknown' as RegionCode)).toThrow( + /Unknown region/ + ); + }); +}); + +describe('lexicographic order', () => { + it('all tagged ULIDs sort above all untagged ULIDs', () => { + // Tag bit on byte[0] sets the first char to ≥ '4'. Plain ULIDs that + // haven't blown past year 2248 start with '0' or '1'. + const minTagged = encode('0'.repeat(26), 0, { version: 0 }); + expect(minTagged).toBe('40000000000000000000000000'); + expect(minTagged > '3'.repeat(26)).toBe(true); + }); + + it('two tagged ULIDs with the same metadata preserve input ordering when they differ above the metadata bits', () => { + // Pick two ULIDs differing in the timestamp (char[5]). The metadata + // bits (top 11 bits of randomness) get normalized to the same values, + // but the timestamp bits are preserved verbatim apart from the tag bit. + const a = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; + const b = '01ARZ3NDEMTSV4RRFFQ69G5FAV'; + expect(a < b).toBe(true); + const ta = encode(a, 'iad1'); + const tb = encode(b, 'iad1'); + expect(ta).toBe('41ARZ3NDEK0GV4RRFFQ69G5FAV'); + expect(tb).toBe('41ARZ3NDEM0GV4RRFFQ69G5FAV'); + expect(ta < tb).toBe(true); + }); + + it('preserves intra-millisecond monotonicity (low 69 randomness bits untouched)', () => { + // The new layout puts metadata at the top of the randomness section, + // so a monotonic ULID factory's bottom-bit increments survive encoding + // intact. Simulate two consecutive monotonic-factory outputs that + // share a timestamp and differ only in the bottom of randomness, then + // verify the encoded forms still strictly increase. + const a = '01ARZ3NDEKTSV4RRFFQ69G5FAV'; + // Identical to `a` except for the very last char (LSB of randomness). + const aPlus1 = '01ARZ3NDEKTSV4RRFFQ69G5FAW'; + expect(a < aPlus1).toBe(true); + const ta = encode(a, 'iad1'); + const taPlus1 = encode(aPlus1, 'iad1'); + expect(ta < taPlus1).toBe(true); + // And both decode back to the same metadata. + expect(decode(ta).region).toBe('iad1'); + expect(decode(taPlus1).region).toBe('iad1'); + expect(decode(ta).version).toBe(CURRENT_VERSION); + expect(decode(taPlus1).version).toBe(CURRENT_VERSION); + }); + + it('preserves order across a sequence of incrementing bottom bits', () => { + // Stronger version of the previous test: synthesize a sequence of + // ULIDs that share a timestamp and increment by 1 in the bottom of + // randomness (the operation `monotonicFactory()` performs when called + // multiple times in the same millisecond), then verify the encoded + // sequence is strictly increasing. + const ALPHABET = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'; + function incrementBottomChar(s: string): string { + const chars = s.split(''); + for (let i = chars.length - 1; i >= 0; i--) { + const v = ALPHABET.indexOf(chars[i]); + if (v < ALPHABET.length - 1) { + chars[i] = ALPHABET[v + 1]; + return chars.join(''); + } + chars[i] = '0'; + } + throw new Error('overflow'); + } + + let current = '01ARZ3NDEKTSV4RRFFQ69G5F00'; + let prevEncoded = encode(current, 'iad1'); + for (let i = 0; i < 64; i++) { + current = incrementBottomChar(current); + const encoded = encode(current, 'iad1'); + expect(encoded > prevEncoded).toBe(true); + prevEncoded = encoded; + } + }); +}); diff --git a/packages/world-vercel/src/run-id/index.ts b/packages/world-vercel/src/run-id/index.ts new file mode 100644 index 0000000000..1e1111e587 --- /dev/null +++ b/packages/world-vercel/src/run-id/index.ts @@ -0,0 +1,269 @@ +/** + * Region-tagged ULID encoding for Vercel workflow run IDs. + * + * A "tagged" run ID is a regular 26-character Crockford-Base32 ULID with: + * + * - **Tag bit**: the MSB of byte 0 (the most-significant bit of the 48-bit + * timestamp) is set to 1, distinguishing this scheme from a plain ULID. + * This shifts the first character into the range `4`..`7`. + * - **Region ID** (6 bits, 0–63): encoded at the **top** of the 80-bit + * randomness section, in the high 6 bits of byte 6. Region IDs are + * assigned in {@link REGION_IDS}. + * - **Version** (5 bits, 0–31): encoded immediately below the region ID + * (high 2 bits in the bottom of byte 6, low 3 bits in the top of byte 7). + * + * Net effect: 80 bits of ULID randomness become 69 bits (still ~5.9 × 10²⁰ + * distinct values per millisecond), and the maximum representable timestamp + * drops from year ~10895 down to year ~5429 — neither limit is practically + * relevant. + * + * Tagged ULIDs remain valid ULIDs. Because the metadata sits at the **top** + * of the randomness section, the bottom 69 bits are untouched by `encode`, + * which means a `monotonicFactory()`-style ULID generator's same-millisecond + * bottom-bit increments survive encoding intact. As a result: + * + * - Lexicographic order is preserved across millisecond boundaries. + * - Intra-millisecond order is preserved when the metadata is held + * constant (i.e. consecutive `encode(ulid(), region, { version })` calls + * with the same `(region, version)` produce strictly increasing strings + * for as long as the underlying monotonic factory does). + * + * Changing the metadata mid-millisecond can still invert ordering relative + * to a previous emission with different metadata; the {@link encode} + * function itself does not enforce any ordering invariants — that is the + * caller's responsibility (see the `createRunId` helper used by `start()`). + * + * @example + * ```ts + * import { monotonicFactory } from 'ulid'; + * import { encode, decode } from '@workflow/world-vercel/run-id'; + * + * const ulid = monotonicFactory(); + * const taggedRunId = encode(ulid(), 'iad1'); + * + * const { region, regionId, version } = decode(taggedRunId); + * // region === 'iad1', regionId === 1, version === 1, tagged === true + * ``` + * + * @packageDocumentation + */ + +import { + bytesToUlid, + isTaggedString, + MAX_REGION, + MAX_VERSION, + REGION_BYTE_INDEX, + REGION_MASK, + TAG_BIT_MASK, + ulidToBytes, + VERSION_HIGH_MASK, + VERSION_LOW_BYTE_INDEX, + VERSION_LOW_MASK, +} from './codec.js'; +import { lookupRegion, REGION_IDS, type RegionCode } from './regions.js'; + +export { + lookupRegion, + REGION_IDS, + type RegionCode, + type RegionId, + type RegionKey, + regionIdFor, +} from './regions.js'; + +/** Encoding format version currently emitted by {@link encode}. */ +export const CURRENT_VERSION = 1; + +export interface EncodeOptions { + /** + * Encoding format version to embed. Must be in the range 0..31. Defaults to + * {@link CURRENT_VERSION} (1). Version 0 is reserved as a sentinel meaning + * "no metadata encoded" — callers should not normally emit it. + */ + version?: number; +} + +/** + * Common fields shared by both tagged and un-tagged decode results. + */ +interface DecodedRunIdBase { + /** + * The input ULID with **only the tag bit cleared**. For tagged inputs the + * 11 metadata bits at the top of the randomness section (bytes 6–7) are + * preserved verbatim. For un-tagged input this equals the input string + * (uppercased). + */ + ulid: string; +} + +/** + * Decode result for a ULID whose tag bit was set — the metadata fields + * carry the values that `encode` wrote. + */ +export interface TaggedDecodedRunId extends DecodedRunIdBase { + tagged: true; + /** Encoded format version (0..31). */ + version: number; + /** Encoded region ID (0..63). 0 represents "unknown". */ + regionId: number; + /** + * Region code (e.g. `'iad1'`) when {@link regionId} matches a known entry + * in {@link REGION_IDS}, else `null`. + */ + region: RegionCode | null; +} + +/** + * Decode result for a ULID whose tag bit was *not* set. The metadata + * fields are `null` rather than populated with garbage bits, forcing + * callers to discriminate on {@link tagged} before reading them. + */ +export interface UntaggedDecodedRunId extends DecodedRunIdBase { + tagged: false; + version: null; + regionId: null; + region: null; +} + +/** + * Discriminated union of the decode result; check `tagged` to narrow. + */ +export type DecodedRunId = TaggedDecodedRunId | UntaggedDecodedRunId; + +function isRegionCode(value: unknown): value is RegionCode { + return ( + typeof value === 'string' && + value !== 'unknown' && + Object.hasOwn(REGION_IDS, value) + ); +} + +/** + * Encode a region ID and version into a ULID, producing a 26-character + * "tagged" ULID. The input ULID's top 11 randomness bits (the high bits + * of byte 6 + the high bits of byte 7) and its timestamp MSB are + * overwritten; the low 69 randomness bits are preserved intact. + * + * @param ulid - A valid 26-character Crockford-Base32 ULID. + * @param region - Either a numeric region ID (0..63) or a known + * {@link RegionCode} (e.g. `'iad1'`). + * @param options - See {@link EncodeOptions}. + * @returns The tagged ULID, always uppercase. + * + * @throws If `ulid` is not a valid ULID string, if `region` is an unknown + * region code, if a numeric `region` is outside 0..63, or if + * `options.version` is outside 0..31. + */ +export function encode( + ulid: string, + region: number | RegionCode, + options: EncodeOptions = {} +): string { + // Resolve region → numeric ID. + let regionId: number; + if (typeof region === 'number') { + if (!Number.isInteger(region) || region < 0 || region > MAX_REGION) { + throw new RangeError( + `regionId must be an integer in [0, ${MAX_REGION}]; got ${region}` + ); + } + regionId = region; + } else if (isRegionCode(region)) { + regionId = REGION_IDS[region]; + } else { + throw new Error(`Unknown region: ${String(region)}`); + } + + const version = options.version ?? CURRENT_VERSION; + if (!Number.isInteger(version) || version < 0 || version > MAX_VERSION) { + throw new RangeError( + `version must be an integer in [0, ${MAX_VERSION}]; got ${version}` + ); + } + + const bytes = ulidToBytes(ulid); + + // Set the tag bit. + bytes[0] = bytes[0] | TAG_BIT_MASK; + + // Pack `regionId` (6 bits) into the top of byte[6] and the high 2 bits + // of `version` into the bottom of byte[6]; the remaining low 3 bits of + // `version` go into the top of byte[7]. The metadata sits at the **top** + // of the 80-bit randomness section so that a monotonic ULID factory's + // bottom-bit increments survive encoding intact. + const regionShifted = (regionId & MAX_REGION) << 2; // 6 bits at bits 7..2 + const versionHigh = (version >> 3) & VERSION_HIGH_MASK; // top 2 bits at bits 1..0 + const versionLow = (version & 0x07) << 5; // low 3 bits at bits 7..5 of byte[7] + + bytes[REGION_BYTE_INDEX] = + (bytes[REGION_BYTE_INDEX] & ~(REGION_MASK | VERSION_HIGH_MASK)) | + regionShifted | + versionHigh; + bytes[VERSION_LOW_BYTE_INDEX] = + (bytes[VERSION_LOW_BYTE_INDEX] & ~VERSION_LOW_MASK) | versionLow; + + return bytesToUlid(bytes); +} + +/** + * Decode a (possibly) tagged ULID. Always succeeds for any syntactically + * valid ULID; check {@link DecodedRunId.tagged} to determine whether the + * input was actually tagged by this scheme. + * + * The returned {@link DecodedRunId.ulid} has only the tag bit cleared — the + * 11 metadata bits at the top of the randomness section remain in place, so + * `decode(encode(u, r)).ulid` is *not* byte-identical to `u` (the top 11 + * randomness bits of `u` were overwritten by `encode`), but + * `decode(encode(u, r)).ulid` is byte-identical to + * `decode(encode(decode(encode(u, r)).ulid, r)).ulid`. + * + * @throws If the input is not a syntactically valid 26-character + * Crockford-Base32 ULID. + */ +export function decode(taggedUlid: string): DecodedRunId { + const bytes = ulidToBytes(taggedUlid); + const tagged = (bytes[0] & TAG_BIT_MASK) !== 0; + + // Clear the tag bit for the returned "untagged" ULID. + bytes[0] = bytes[0] & ~TAG_BIT_MASK; + const ulid = bytesToUlid(bytes); + + if (!tagged) { + // For un-tagged input, the bits in the metadata positions are + // arbitrary randomness from the source ULID. Surfacing them as `null` + // forces callers to discriminate on `tagged` before reading them. + return { tagged: false, ulid, version: null, regionId: null, region: null }; + } + + // Pull `regionId` from the top 6 bits of byte[6] and the 5-bit `version` + // from the low 2 bits of byte[6] + the high 3 bits of byte[7]. + const regionId = (bytes[REGION_BYTE_INDEX] & REGION_MASK) >> 2; + const version = + ((bytes[REGION_BYTE_INDEX] & VERSION_HIGH_MASK) << 3) | + ((bytes[VERSION_LOW_BYTE_INDEX] & VERSION_LOW_MASK) >> 5); + + return { + tagged: true, + ulid, + version, + regionId, + region: lookupRegion(regionId), + }; +} + +/** + * Returns `true` if `value` is a 26-character Crockford-Base32 ULID with the + * tag bit set (i.e. was produced by {@link encode}). Returns `false` for any + * input that is not a syntactically valid ULID, including non-strings. + * + * The parameter is typed as `unknown` so this function can safely be used as + * a guard on untrusted input without requiring callers to cast. + */ +export function isTagged(value: unknown): boolean { + return isTaggedString(value); +} + +// Re-export internal constants that may be useful for callers wanting to +// reason about the encoding's bit budget without importing from a deep path. +export { MAX_REGION as MAX_REGION_ID, MAX_VERSION } from './codec.js'; diff --git a/packages/world-vercel/src/run-id/regions.ts b/packages/world-vercel/src/run-id/regions.ts new file mode 100644 index 0000000000..bee5cf31e9 --- /dev/null +++ b/packages/world-vercel/src/run-id/regions.ts @@ -0,0 +1,90 @@ +/** + * Stable mapping between Vercel compute region codes (e.g. `iad1`) and the + * 6-bit region IDs encoded into tagged workflow run IDs. + * + * **DO NOT REORDER OR REUSE IDS.** Once a region has been assigned an ID, that + * ID is part of the on-the-wire encoding of every run ID ever issued for that + * region. New regions must be appended with the next unused ID. + * + * `0` is reserved for "unknown" — encode functions may emit it when the + * caller's region cannot be determined, and decode will surface it as + * `region: null`. + * + * The list below covers the 21 currently-deployed Vercel compute regions plus + * `hel1` and `zrh1`, which are reserved for future rollout so they can be + * assigned without requiring a version bump. + */ +export const REGION_IDS = { + unknown: 0, + iad1: 1, + sfo1: 2, + pdx1: 3, + cle1: 4, + yul1: 5, + gru1: 6, + dub1: 7, + lhr1: 8, + cdg1: 9, + fra1: 10, + bru1: 11, + arn1: 12, + hel1: 13, + zrh1: 14, + cpt1: 15, + dxb1: 16, + bom1: 17, + sin1: 18, + hkg1: 19, + hnd1: 20, + icn1: 21, + kix1: 22, + syd1: 23, +} as const; + +/** + * Any key in {@link REGION_IDS}, including the `'unknown'` sentinel. Not + * usually what callers want — see {@link RegionCode} for the "known region" + * subset. + */ +export type RegionKey = keyof typeof REGION_IDS; + +/** + * A concrete Vercel compute region code (e.g. `'iad1'`, `'fra1'`). Excludes + * the `'unknown'` sentinel since it does not correspond to any real region. + */ +export type RegionCode = Exclude; + +export type RegionId = (typeof REGION_IDS)[RegionKey]; + +/** + * Reverse map: numeric region ID → region code. Only populated for known + * regions (i.e. excludes the `unknown`/0 sentinel); {@link lookupRegion} + * returns `null` for any ID not present in this map. + */ +const REGION_CODES_BY_ID: ReadonlyMap = new Map( + (Object.entries(REGION_IDS) as Array<[RegionKey, number]>) + .filter((entry): entry is [RegionCode, number] => entry[0] !== 'unknown') + .map(([code, id]) => [id, code]) +); + +/** + * Look up a region code by ID. Returns `null` for IDs not in {@link REGION_IDS} + * and for the `unknown`/0 sentinel. + */ +export function lookupRegion(regionId: number): RegionCode | null { + return REGION_CODES_BY_ID.get(regionId) ?? null; +} + +/** + * Look up a numeric region ID by code. The TypeScript signature requires a + * known {@link RegionCode}, but the function still validates at runtime + * for callers crossing a JS/TS boundary where the input may be any string. + */ +export function regionIdFor(code: RegionCode): RegionId { + const id = REGION_IDS[code]; + /* c8 ignore next 3 -- defensive runtime backstop; unreachable in well-typed TS */ + if (id === undefined) { + throw new Error(`Unknown Vercel region code: ${String(code)}`); + } + return id; +}