From 6226fac0197ea2f3e45d9a52ac39b41f849e7bde Mon Sep 17 00:00:00 2001 From: Thomas Steiner Date: Fri, 15 May 2026 19:01:19 +0200 Subject: [PATCH] =?UTF-8?q?[Web][COS]=20Persist=20URL=E2=86=92hash=20mappi?= =?UTF-8?q?ng=20across=20page=20loads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CrossOriginStorage class was storing the URL→hash map only in the module-level GLOBAL_HASH_CACHE. After a page reload that cache is empty, and getFileHash() can only recover hashes for HuggingFace LFS files (URLs containing /resolve/). This left several resource categories uncacheable across sessions: - JSON files not stored in LFS (mlc-chat-config.json, tokenizer.json, tensor-cache.json) — getFileHash returns null for their /resolve/ URLs because the raw pointer is the actual file content, not an LFS pointer. - .wasm files from GitHub raw URLs — no /resolve/ pattern at all. - Any file whose hash was computed from blob content via getBlobHash. Additionally, even for genuine LFS model shards, each page load was re-fetching every shard's LFS pointer file over the network just to re-derive the SHA-256 hash. Fix: persist the URL→hash mapping to a dedicated Cache API store (tvmjs-cos-hash-meta). Two write sites: 1. put() — after a file is stored in COS, persist its blob-derived hash. This covers all non-LFS files and non-HuggingFace URLs. 2. resolveHashDescriptor() — after getFileHash() resolves a hash from the LFS pointer, persist it immediately. This eliminates repeated pointer-file network requests for model shards on subsequent visits. Both write sites use a best-effort try/catch so storage quota errors are silently ignored. loadPersistedHashEntry() similarly swallows errors. The typeof caches === "undefined" guard keeps the code safe in Node.js test environments. --- web/src/artifact_cache.ts | 47 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/web/src/artifact_cache.ts b/web/src/artifact_cache.ts index d36573ccccea..4dbaf7d657f2 100644 --- a/web/src/artifact_cache.ts +++ b/web/src/artifact_cache.ts @@ -133,6 +133,7 @@ declare global { const HASH_ALGORITHM = "SHA-256"; const DEFAULT_FETCH_OPTIONS: RequestInit = { method: "GET" }; +const COS_HASH_META_CACHE = "tvmjs-cos-hash-meta"; let crossOriginFallbackWarningLogged = false; const GLOBAL_HASH_CACHE = new Map< @@ -194,6 +195,7 @@ class CrossOriginStorage { await writableStream.write(blob); await writableStream.close(); this.hashCache.set(url, hash); + await this.persistHashEntry(url, hash); } async delete(_request: RequestLike): Promise { @@ -224,6 +226,39 @@ class CrossOriginStorage { throw new Error("CrossOriginStorage: Unsupported request type."); } + private async persistHashEntry( + url: string, + hash: CrossOriginHashDescriptor, + ): Promise { + try { + if (typeof caches === "undefined") { + return; + } + const store = await caches.open(COS_HASH_META_CACHE); + await store.put(url, new Response(JSON.stringify(hash))); + } catch { + // best-effort: ignore storage errors + } + } + + private async loadPersistedHashEntry( + url: string, + ): Promise { + try { + if (typeof caches === "undefined") { + return null; + } + const store = await caches.open(COS_HASH_META_CACHE); + const response = await store.match(url); + if (!response) { + return null; + } + return JSON.parse(await response.text()) as CrossOriginHashDescriptor; + } catch { + return null; + } + } + private async resolveHashDescriptor( url: string, ): Promise { @@ -231,6 +266,15 @@ class CrossOriginStorage { if (cached) { return cached; } + // Check persistent store before falling back to network-based hash extraction. + // This covers non-LFS files (JSON configs, tokenizers) and non-HuggingFace URLs + // (e.g. GitHub raw .wasm) whose hashes were computed from blob content on a + // previous visit and persisted to the Cache API. + const persisted = await this.loadPersistedHashEntry(url); + if (persisted) { + this.hashCache.set(url, persisted); + return persisted; + } const hashValue = await this.getFileHash(url); if (!hashValue) { return null; @@ -240,6 +284,9 @@ class CrossOriginStorage { value: hashValue, }; this.hashCache.set(url, descriptor); + // Persist pointer-derived hashes so subsequent visits skip the LFS pointer + // network request (especially important for models with many shards). + await this.persistHashEntry(url, descriptor); return descriptor; }