-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathbase64.js
More file actions
241 lines (214 loc) · 8.53 KB
/
base64.js
File metadata and controls
241 lines (214 loc) · 8.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import { assertUint8 } from '../assert.js'
import { nativeEncoder, nativeDecoder } from './_utils.js'
import { encodeAscii } from './ascii.js'
// See https://datatracker.ietf.org/doc/html/rfc4648
const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
const BASE64_HELPERS = {}
const BASE64URL_HELPERS = {}
export const E_CHAR = 'Invalid character in base64 input'
export const E_PADDING = 'Invalid base64 padding'
export const E_LENGTH = 'Invalid base64 length'
export const E_LAST = 'Invalid last chunk'
// Alternatively, we could have mapped 0-255 bytes to charcodes and just used btoa(ascii),
// but that approach is _slower_ than our toBase64js function, even on Hermes
const useTemplates = Boolean(globalThis.HermesInternal) // Faster on Hermes and JSC, but we need only Hermes
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
export function toBase64(arr, isURL, padding) {
assertUint8(arr)
const fullChunks = Math.floor(arr.length / 3)
const fullChunksBytes = fullChunks * 3
let o = ''
let i = 0
const alphabet = isURL ? BASE64URL : BASE64
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
if (!helpers.pairs) {
helpers.pairs = []
if (nativeDecoder) {
// Lazy to save memory in case if this is not needed
helpers.codepairs = new Uint16Array(64 * 64)
const u16 = helpers.codepairs
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
for (let i = 0; i < 64; i++) {
const ic = alphabet[i].charCodeAt(0)
for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
}
} else {
const p = helpers.pairs
for (let i = 0; i < 64; i++) {
for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
}
}
}
const { pairs, codepairs } = helpers
// Fast path for complete blocks
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
if (nativeDecoder) {
const oa = new Uint16Array(fullChunks * 2)
for (let j = 0; i < fullChunksBytes; i += 3) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
oa[j++] = codepairs[(a << 4) | (b >> 4)]
oa[j++] = codepairs[((b & 0x0f) << 8) | c]
}
o = nativeDecoder.decode(oa)
} else {
// Templates are faster only on Hermes and JSC. Other engines have built-in toBase64 and don't need this extra opt anyway
if (useTemplates) {
const fullChunksBytes3 = fullChunksBytes - 9 // this ensures we can fit 4 = 3 + 1 full chunks
const p = pairs
for (; i < fullChunksBytes3; i += 12) {
const a0 = arr[i]
const a1 = arr[i + 1]
const a2 = arr[i + 2]
const b0 = arr[i + 3]
const b1 = arr[i + 4]
const b2 = arr[i + 5]
const c0 = arr[i + 6]
const c1 = arr[i + 7]
const c2 = arr[i + 8]
const d0 = arr[i + 9]
const d1 = arr[i + 10]
const d2 = arr[i + 11]
// Make pairs
const x0 = (a0 << 4) | (a1 >> 4)
const x1 = ((a1 & 0x0f) << 8) | a2
const x2 = (b0 << 4) | (b1 >> 4)
const x3 = ((b1 & 0x0f) << 8) | b2
const x4 = (c0 << 4) | (c1 >> 4)
const x5 = ((c1 & 0x0f) << 8) | c2
const x6 = (d0 << 4) | (d1 >> 4)
const x7 = ((d1 & 0x0f) << 8) | d2
o += `${p[x0]}${p[x1]}${p[x2]}${p[x3]}${p[x4]}${p[x5]}${p[x6]}${p[x7]}`
}
}
for (; i < fullChunksBytes; i += 3) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
o += pairs[(a << 4) | (b >> 4)] + pairs[((b & 0x0f) << 8) | c]
}
}
// If we have something left, process it with a full algo
let carry = 0
let shift = 2 // First byte needs to be shifted by 2 to get 6 bits
const length = arr.length
for (; i < length; i++) {
const x = arr[i]
o += alphabet[carry | (x >> shift)] // shift >= 2, so this fits
if (shift === 6) {
shift = 0
o += alphabet[x & 0x3f]
}
carry = (x << (6 - shift)) & 0x3f
shift += 2 // Each byte prints 6 bits and leaves 2 bits
}
if (shift !== 2) o += alphabet[carry] // shift 2 means we have no carry left
if (padding) o += ['', '==', '='][length - fullChunksBytes]
return o
}
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
const _min = 0x2b_2b // '++' string in hex, minimal allowed
const _AAm = 0x41_41 - _min // 'AA' string in hex, the only allowed char pair to generate 12 zero bits, mapped
const _zz = 0x7a_7a // 'zz' string in hex, max allowed char pair, mapped
export function fromBase64(str, isURL) {
let inputLength = str.length
while (str[inputLength - 1] === '=') inputLength--
const paddingLength = str.length - inputLength
const tailLength = inputLength % 4
const mainLength = inputLength - tailLength // multiples of 4
if (tailLength === 1) throw new SyntaxError(E_LENGTH)
if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
throw new SyntaxError(E_PADDING)
}
const alphabet = isURL ? BASE64URL : BASE64
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
if (!helpers.fromMap) {
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
}
const m = helpers.fromMap
const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
let at = 0
let i = 0
if (nativeEncoder) {
if (!helpers.fromMap16) {
helpers.fromMap16 = new Uint16Array(_zz - _min + 1) // Warning: 40 KiB
const u8 = new Uint8Array(2)
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
alphabet.forEach((c0, i0) => {
u8[0] = c0.charCodeAt(0) // FIXME, we should avoid calling charCodeAt in a loop
alphabet.forEach((c1, i1) => {
u8[1] = c1.charCodeAt(0)
helpers.fromMap16[u16[0] - _min] = (i0 << 6) | i1
})
})
}
const m16 = helpers.fromMap16
const codes = encodeAscii(str, E_CHAR)
const mainLength16 = mainLength >> 1
const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, mainLength16)
// Optional fast loop
for (const mainLength16_2 = mainLength16 - 2; i < mainLength16_2; ) {
const c01 = codes16[i]-_min
const c23 = codes16[i + 1]-_min
const c45 = codes16[i + 2]-_min
const c67 = codes16[i + 3]-_min
const x01 = m16[c01]
const x23 = m16[c23]
const x45 = m16[c45]
const x67 = m16[c67]
if (!x01 && c01 !== _AAm || !x23 && c23 !== _AAm) throw new SyntaxError(E_CHAR)
if (!x45 && c45 !== _AAm || !x67 && c67 !== _AAm) throw new SyntaxError(E_CHAR)
arr[at] = x01 >> 4
arr[at + 1] = ((x01 & 0xf) << 4) | (x23 >> 8)
arr[at + 2] = x23 & 0xff
arr[at + 3] = x45 >> 4
arr[at + 4] = ((x45 & 0xf) << 4) | (x67 >> 8)
arr[at + 5] = x67 & 0xff
i += 4
at += 6
}
for (; i < mainLength16; i += 2) {
const c01 = codes16[i]-_min
const c23 = codes16[i + 1]-_min
const x01 = m16[c01]
const x23 = m16[c23]
if (!x01 && c01 !== _AAm || !x23 && c23 !== _AAm) throw new SyntaxError(E_CHAR)
arr[at] = x01 >> 4
arr[at + 1] = ((x01 & 0xf) << 4) | (x23 >> 8)
arr[at + 2] = x23 & 0xff
at += 3
}
i *= 2
} else {
for (; i < mainLength; i += 4) {
const c0 = str.charCodeAt(i)
const c1 = str.charCodeAt(i + 1)
const c2 = str.charCodeAt(i + 2)
const c3 = str.charCodeAt(i + 3)
const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
if (a < 0) throw new SyntaxError(E_CHAR)
arr[at] = a >> 16
arr[at + 1] = (a >> 8) & 0xff
arr[at + 2] = a & 0xff
at += 3
}
}
// Can be 0, 2 or 3, verified by padding checks already
if (tailLength < 2) return arr // 0
const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
if (ab < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ab >> 4
if (tailLength < 3) {
if (ab & 0xf) throw new SyntaxError(E_LAST)
return arr // 2
}
const c = m[str.charCodeAt(i++)]
if (c < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
if (c & 0x3) throw new SyntaxError(E_LAST)
return arr // 3
}