Skip to content

Commit 69efd0c

Browse files
committed
refactor: move mergePrefix encoding helper to utils
1 parent 6beec83 commit 69efd0c

2 files changed

Lines changed: 41 additions & 26 deletions

File tree

fallback/encoding.js

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/by
66
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
77
import labels from './encoding.labels.js'
88
import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
9-
import { unfinishedBytes } from './encoding.util.js'
9+
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
1010

1111
export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
1212

@@ -68,32 +68,13 @@ export class TextDecoder {
6868
if (this.#unicode) {
6969
let prefix
7070
if (this.#chunk) {
71-
if (empty) {
72-
u = this.#chunk // process as final chunk to handle errors and state changes
73-
} else if (u.length < 3) {
74-
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
75-
const a = new Uint8Array(u.length + this.#chunk.length)
76-
a.set(this.#chunk)
77-
a.set(u, this.#chunk.length)
78-
u = a
71+
const merged = mergePrefix(u, this.#chunk, this.encoding)
72+
if (u.length < 3) {
73+
u = merged // might be unfinished, but fully consumed old u
7974
} else {
80-
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
81-
const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
82-
t.set(this.#chunk)
83-
t.set(u.subarray(0, 3), this.#chunk.length)
84-
85-
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
86-
// If that doesn't happen (u too short), just concat chunk and u completely
87-
for (let i = 1; i <= 3; i++) {
88-
const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
89-
if (unfinished <= i) {
90-
// Always reachable at 3, but we still need 'unfinished' value for it
91-
const add = i - unfinished // 0-3
92-
prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
93-
if (add > 0) u = u.subarray(add)
94-
break
95-
}
96-
}
75+
prefix = merged // stops at complete chunk
76+
const add = prefix.length - this.#chunk.length
77+
if (add > 0) u = u.subarray(add)
9778
}
9879

9980
this.#chunk = null

fallback/encoding.util.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
2+
// form a codepoint yet, but can be a part of a single codepoint on more data
13
export function unfinishedBytes(u, len, enc) {
24
switch (enc) {
35
case 'utf-8': {
@@ -32,3 +34,35 @@ export function unfinishedBytes(u, len, enc) {
3234

3335
throw new Error('Unsupported encoding')
3436
}
37+
38+
// Merge prefix `chunk` with `u` and return new combined prefix
39+
// For u.length < 3, fully consumes u and can return unfinished data,
40+
// otherwise returns a prefix with no unfinished bytes
41+
export function mergePrefix(u, chunk, enc) {
42+
if (u.length === 0) return chunk
43+
if (u.length < 3) {
44+
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
45+
const a = new Uint8Array(u.length + chunk.length)
46+
a.set(chunk)
47+
a.set(u, chunk.length)
48+
return a
49+
}
50+
51+
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
52+
const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
53+
t.set(chunk)
54+
t.set(u.subarray(0, 3), chunk.length)
55+
56+
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
57+
// If that doesn't happen (u too short), just concat chunk and u completely (above)
58+
for (let i = 1; i <= 3; i++) {
59+
const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
60+
if (unfinished <= i) {
61+
// Always reachable at 3, but we still need 'unfinished' value for it
62+
const add = i - unfinished // 0-3
63+
return add > 0 ? t.subarray(0, chunk.length + add) : chunk
64+
}
65+
}
66+
67+
// Unreachable
68+
}

0 commit comments

Comments
 (0)