refactor: move mergePrefix encoding helper to utils

ChALkeR · ChALkeR · commit 69efd0c470d3 · 2026-01-21T20:20:19.000+04:00
diff --git a/fallback/encoding.js b/fallback/encoding.js
@@ -6,7 +6,7 @@ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/by
 import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
 import labels from './encoding.labels.js'
 import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
-import { unfinishedBytes } from './encoding.util.js'
+import { unfinishedBytes, mergePrefix } from './encoding.util.js'
 
 export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
 
@@ -68,32 +68,13 @@ export class TextDecoder {
     if (this.#unicode) {
       let prefix
       if (this.#chunk) {
-        if (empty) {
-          u = this.#chunk // process as final chunk to handle errors and state changes
-        } else if (u.length < 3) {
-          // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
-          const a = new Uint8Array(u.length + this.#chunk.length)
-          a.set(this.#chunk)
-          a.set(u, this.#chunk.length)
-          u = a
+        const merged = mergePrefix(u, this.#chunk, this.encoding)
+        if (u.length < 3) {
+          u = merged // might be unfinished, but fully consumed old u
         } else {
-          // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
-          const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
-          t.set(this.#chunk)
-          t.set(u.subarray(0, 3), this.#chunk.length)
-
-          // Stop at the first offset where unfinished bytes reaches 0 or fits into u
-          // If that doesn't happen (u too short), just concat chunk and u completely
-          for (let i = 1; i <= 3; i++) {
-            const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
-            if (unfinished <= i) {
-              // Always reachable at 3, but we still need 'unfinished' value for it
-              const add = i - unfinished // 0-3
-              prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
-              if (add > 0) u = u.subarray(add)
-              break
-            }
-          }
+          prefix = merged // stops at complete chunk
+          const add = prefix.length - this.#chunk.length
+          if (add > 0) u = u.subarray(add)
         }
 
         this.#chunk = null
diff --git a/fallback/encoding.util.js b/fallback/encoding.util.js
@@ -1,3 +1,5 @@
+// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
+// form a codepoint yet, but can be a part of a single codepoint on more data
 export function unfinishedBytes(u, len, enc) {
   switch (enc) {
     case 'utf-8': {
@@ -32,3 +34,35 @@ export function unfinishedBytes(u, len, enc) {
 
   throw new Error('Unsupported encoding')
 }
+
+// Merge prefix `chunk` with `u` and return new combined prefix
+// For u.length < 3, fully consumes u and can return unfinished data,
+// otherwise returns a prefix with no unfinished bytes
+export function mergePrefix(u, chunk, enc) {
+  if (u.length === 0) return chunk
+  if (u.length < 3) {
+    // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+    const a = new Uint8Array(u.length + chunk.length)
+    a.set(chunk)
+    a.set(u, chunk.length)
+    return a
+  }
+
+  // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
+  const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
+  t.set(chunk)
+  t.set(u.subarray(0, 3), chunk.length)
+
+  // Stop at the first offset where unfinished bytes reaches 0 or fits into u
+  // If that doesn't happen (u too short), just concat chunk and u completely (above)
+  for (let i = 1; i <= 3; i++) {
+    const unfinished = unfinishedBytes(t, chunk.length + i, enc) // 0-3
+    if (unfinished <= i) {
+      // Always reachable at 3, but we still need 'unfinished' value for it
+      const add = i - unfinished // 0-3
+      return add > 0 ? t.subarray(0, chunk.length + add) : chunk
+    }
+  }
+
+  // Unreachable
+}