bytes/fallback/base64.js at bb9a3c1bbfca5cf2a0461ef301cac5ad7f0102c8 · ExodusOSS/bytes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import { assertUint8 } from '../assert.js'
import { nativeEncoder, nativeDecoder } from './_utils.js'
import { encodeAscii } from './ascii.js'

// See https://datatracker.ietf.org/doc/html/rfc4648

const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
const BASE64_HELPERS = {}
const BASE64URL_HELPERS = {}

export const E_CHAR = 'Invalid character in base64 input'
export const E_PADDING = 'Invalid base64 padding'
export const E_LENGTH = 'Invalid base64 length'
export const E_LAST = 'Invalid last chunk'

// Alternatively, we could have mapped 0-255 bytes to charcodes and just used btoa(ascii),
// but that approach is _slower_ than our toBase64js function, even on Hermes

const useTemplates = Boolean(globalThis.HermesInternal) // Faster on Hermes and JSC, but we need only Hermes

// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
export function toBase64(arr, isURL, padding) {
  assertUint8(arr)
  const fullChunks = Math.floor(arr.length / 3)
  const fullChunksBytes = fullChunks * 3
  let o = ''
  let i = 0

  const alphabet = isURL ? BASE64URL : BASE64
  const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
  if (!helpers.pairs) {
    helpers.pairs = []
    if (nativeDecoder) {
      // Lazy to save memory in case if this is not needed
      helpers.codepairs = new Uint16Array(64 * 64)
      const u16 = helpers.codepairs
      const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
      for (let i = 0; i < 64; i++) {
        const ic = alphabet[i].charCodeAt(0)
        for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
      }
    } else {
      const p = helpers.pairs
      for (let i = 0; i < 64; i++) {
        for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
      }
    }
  }

  const { pairs, codepairs } = helpers

  // Fast path for complete blocks
  // This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
  if (nativeDecoder) {
    const oa = new Uint16Array(fullChunks * 2)
    for (let j = 0; i < fullChunksBytes; i += 3) {
      const a = arr[i]
      const b = arr[i + 1]
      const c = arr[i + 2]
      oa[j++] = codepairs[(a << 4) | (b >> 4)]
      oa[j++] = codepairs[((b & 0x0f) << 8) | c]
    }

    o = nativeDecoder.decode(oa)
  } else {
    // Templates are faster only on Hermes and JSC. Other engines have built-in toBase64 and don't need this extra opt anyway
    if (useTemplates) {
      const fullChunksBytes3 = fullChunksBytes - 9 // this ensures we can fit 4 = 3 + 1 full chunks
      const p = pairs
      for (; i < fullChunksBytes3; i += 12) {
        const a0 = arr[i]
        const a1 = arr[i + 1]
        const a2 = arr[i + 2]
        const b0 = arr[i + 3]
        const b1 = arr[i + 4]
        const b2 = arr[i + 5]
        const c0 = arr[i + 6]
        const c1 = arr[i + 7]
        const c2 = arr[i + 8]
        const d0 = arr[i + 9]
        const d1 = arr[i + 10]
        const d2 = arr[i + 11]
        // Make pairs
        const x0 = (a0 << 4) | (a1 >> 4)
        const x1 = ((a1 & 0x0f) << 8) | a2
        const x2 = (b0 << 4) | (b1 >> 4)
        const x3 = ((b1 & 0x0f) << 8) | b2
        const x4 = (c0 << 4) | (c1 >> 4)
        const x5 = ((c1 & 0x0f) << 8) | c2
        const x6 = (d0 << 4) | (d1 >> 4)
        const x7 = ((d1 & 0x0f) << 8) | d2
        o += `${p[x0]}${p[x1]}${p[x2]}${p[x3]}${p[x4]}${p[x5]}${p[x6]}${p[x7]}`
      }
    }

    for (; i < fullChunksBytes; i += 3) {
      const a = arr[i]
      const b = arr[i + 1]
      const c = arr[i + 2]
      o += pairs[(a << 4) | (b >> 4)] + pairs[((b & 0x0f) << 8) | c]
    }
  }

  // If we have something left, process it with a full algo
  let carry = 0
  let shift = 2 // First byte needs to be shifted by 2 to get 6 bits
  const length = arr.length
  for (; i < length; i++) {
    const x = arr[i]
    o += alphabet[carry | (x >> shift)] // shift >= 2, so this fits
    if (shift === 6) {
      shift = 0
      o += alphabet[x & 0x3f]
    }

    carry = (x << (6 - shift)) & 0x3f
    shift += 2 // Each byte prints 6 bits and leaves 2 bits
  }

  if (shift !== 2) o += alphabet[carry] // shift 2 means we have no carry left
  if (padding) o += ['', '==', '='][length - fullChunksBytes]

  return o
}

// TODO: can this be optimized? This only affects non-Hermes barebone engines though
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
const _min = 0x2b_2b // '++' string in hex, minimal allowed
const _AAm = 0x41_41 - _min // 'AA' string in hex, the only allowed char pair to generate 12 zero bits, mapped
const _zz = 0x7a_7a // 'zz' string in hex, max allowed char pair, mapped

export function fromBase64(str, isURL) {
  let inputLength = str.length
  while (str[inputLength - 1] === '=') inputLength--
  const paddingLength = str.length - inputLength
  const tailLength = inputLength % 4
  const mainLength = inputLength - tailLength // multiples of 4
  if (tailLength === 1) throw new SyntaxError(E_LENGTH)
  if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
    throw new SyntaxError(E_PADDING)
  }

  const alphabet = isURL ? BASE64URL : BASE64
  const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS

  if (!helpers.fromMap) {
    helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
    alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
  }

  const m = helpers.fromMap

  const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
  let at = 0
  let i = 0

  if (nativeEncoder) {
    if (!helpers.fromMap16) {
      helpers.fromMap16 = new Uint16Array(_zz - _min + 1) // Warning: 40 KiB
      const u8 = new Uint8Array(2)
      const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
      alphabet.forEach((c0, i0) => {
        u8[0] = c0.charCodeAt(0) // FIXME, we should avoid calling charCodeAt in a loop
        alphabet.forEach((c1, i1) => {
          u8[1] = c1.charCodeAt(0)
          helpers.fromMap16[u16[0] - _min] = (i0 << 6) | i1
        })
      })
    }
    const m16 = helpers.fromMap16

    const codes = encodeAscii(str, E_CHAR)
    const mainLength16 = mainLength >> 1
    const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, mainLength16)

    // Optional fast loop
    for (const mainLength16_2 = mainLength16 - 2; i < mainLength16_2; ) {
      const c01 = codes16[i]-_min
      const c23 = codes16[i + 1]-_min
      const c45 = codes16[i + 2]-_min
      const c67 = codes16[i + 3]-_min
      const x01 = m16[c01]
      const x23 = m16[c23]
      const x45 = m16[c45]
      const x67 = m16[c67]
      if (!x01 && c01 !== _AAm || !x23 && c23 !== _AAm) throw new SyntaxError(E_CHAR)
      if (!x45 && c45 !== _AAm || !x67 && c67 !== _AAm) throw new SyntaxError(E_CHAR)
      arr[at] = x01 >> 4
      arr[at + 1] = ((x01 & 0xf) << 4) | (x23 >> 8)
      arr[at + 2] = x23 & 0xff
      arr[at + 3] = x45 >> 4
      arr[at + 4] = ((x45 & 0xf) << 4) | (x67 >> 8)
      arr[at + 5] = x67 & 0xff
      i += 4
      at += 6
    }

   for (; i < mainLength16; i += 2) {
      const c01 = codes16[i]-_min
      const c23 = codes16[i + 1]-_min
      const x01 = m16[c01]
      const x23 = m16[c23]
      if (!x01 && c01 !== _AAm || !x23 && c23 !== _AAm) throw new SyntaxError(E_CHAR)
      arr[at] = x01 >> 4
      arr[at + 1] = ((x01 & 0xf) << 4) | (x23 >> 8)
      arr[at + 2] = x23 & 0xff
      at += 3
    }
    i *= 2
  } else {
    for (; i < mainLength; i += 4) {
      const c0 = str.charCodeAt(i)
      const c1 = str.charCodeAt(i + 1)
      const c2 = str.charCodeAt(i + 2)
      const c3 = str.charCodeAt(i + 3)
      const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
      if (a < 0) throw new SyntaxError(E_CHAR)
      arr[at] = a >> 16
      arr[at + 1] = (a >> 8) & 0xff
      arr[at + 2] = a & 0xff
      at += 3
    }
  }

  // Can be 0, 2 or 3, verified by padding checks already
  if (tailLength < 2) return arr // 0
  const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
  if (ab < 0) throw new SyntaxError(E_CHAR)
  arr[at++] = ab >> 4
  if (tailLength < 3) {
    if (ab & 0xf) throw new SyntaxError(E_LAST)
    return arr // 2
  }

  const c = m[str.charCodeAt(i++)]
  if (c < 0) throw new SyntaxError(E_CHAR)
  arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
  if (c & 0x3) throw new SyntaxError(E_LAST)
  return arr // 3
}