Skip to content

Commit 9885c8d

Browse files
committed
Re-create the algorithm for descrambling puzzleme
1 parent 82e5c9c commit 9885c8d

File tree

12 files changed

+1813
-94
lines changed

12 files changed

+1813
-94
lines changed

.yarn/install-state.gz

-5 Bytes
Binary file not shown.

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ This isn't a comprehensive doc because to our knowledge there are no OSS consume
22

33
### 12.1.0
44

5-
- Adds some functions for handling importing from a Puzzleme URL
5+
- Adds some functions for handling importing from a Puzzleme URL. Built on code found in https://github.com/thisisparker/xword-dl and https://github.com/jpd236/kotwords
66

77
### 12.0.0
88

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "xd-crossword-monorepo",
33
"private": true,
4-
"version": "12.1.1",
4+
"version": "12.1.2",
55
"description": "Tools for taking different crossword file formats and converting them to xd, and for converting an xd file to useful JSON",
66
"scripts": {
77
"build": "yarn workspaces foreach -At run build",

packages/xd-crossword-tools-parser/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "xd-crossword-tools-parser",
3-
"version": "12.1.1",
3+
"version": "12.1.2",
44
"main": "dist/index.js",
55
"module": "./dist/index.mjs",
66
"types": "./dist/index.d.ts",

packages/xd-crossword-tools/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "xd-crossword-tools",
3-
"version": "12.1.1",
3+
"version": "12.1.2",
44
"main": "dist/index.js",
55
"module": "./dist/index.mjs",
66
"types": "./dist/index.d.ts",

packages/xd-crossword-tools/src/puzzleMeDecode.ts

Lines changed: 224 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,196 @@
11
import type { AmuseTopLevel, AmuseData } from "./amuseJSONToXD.types"
22

3+
// The deobfuscation algorithm is adapted from:
4+
// - https://github.com/thisisparker/xword-dl (xword-dl by Parker Higgins)
5+
// https://github.com/thisisparker/xword-dl/blob/main/src/xword_dl/downloader/amuselabsdownloader.py
6+
// - https://github.com/jpd236/kotwords (kotwords by jpd236)
7+
// https://github.com/jpd236/kotwords/blob/master/LICENSE
8+
//
9+
// Used here under the terms of their respective licenses (Apache 2.0 / MIT).
10+
//
11+
// PuzzleMe scrambles the rawc field by reversing successive chunks of the string,
12+
// using a repeating key of 7 digits (each 2-20) as chunk lengths. This implementation
13+
// uses brute-force key discovery with BFS to find the correct key.
14+
//
15+
// Known keys (for fast-path optimization):
16+
// - V1: [8, 11, 7, 17, 11, 7, 11] - older puzzles
17+
// - V2: [15, 14, 9, 8, 17, 11, 4] - current as of late 2024
18+
// - V3: [18, 15, 7, 16, 14, 17, 12] - Billboard variant
19+
20+
/** Known descramble keys for fast-path decoding */
21+
const KNOWN_KEYS: number[][] = [
22+
[15, 14, 9, 8, 17, 11, 4], // V2 - most common currently
23+
[18, 15, 7, 16, 14, 17, 12], // V3 - Billboard variant
24+
[8, 11, 7, 17, 11, 7, 11], // V1 - legacy
25+
]
26+
327
/**
4-
* Descramble function that reverses chunks of the string at specific intervals.
28+
* Descramble a string using a known key.
529
*
6-
* This is a direct port of the JavaScript Lf function from PuzzleMe's c-min.js.
7-
* It performs 5 passes of chunk reversal with different parameters.
30+
* Reverses successive chunks of the string using key digits as chunk lengths.
831
*
9-
* @param t - The scrambled string
32+
* @param rawc - The scrambled string
33+
* @param key - Array of chunk lengths (typically 7 digits, each 2-20)
1034
* @returns The descrambled string
1135
*/
12-
function descramble(t: string): string {
13-
const i = t.split("")
14-
const length = t.length
15-
let n: number, r: number, o: number, s: number, c: number, a: string
16-
17-
// Pass 1: start at 8, step 61, reverse 11 chars (or remaining)
18-
for (n = 8; n < length; n += 61) {
19-
r = n
20-
c = 10 + r < length ? 11 : length - r + 1
21-
for (o = r, s = r + c - 1; o < s; s--, o++) {
22-
a = i[s]
23-
i[s] = i[o]
24-
i[o] = a
25-
}
26-
n = r += c
27-
}
36+
function descrambleWithKey(rawc: string, key: number[]): string {
37+
const buffer = rawc.split("")
38+
let i = 0
39+
let segmentCount = 0
2840

29-
// Pass 2: start at 19, step 65, reverse 7 chars (or remaining)
30-
for (n = 19; n < length; n += 65) {
31-
r = n
32-
c = 6 + r < length ? 7 : length - r + 1
33-
for (o = r, s = r + c - 1; o < s; s--, o++) {
34-
a = i[s]
35-
i[s] = i[o]
36-
i[o] = a
41+
while (i < buffer.length - 1) {
42+
const segmentLength = Math.min(key[segmentCount % key.length], buffer.length - i)
43+
segmentCount++
44+
45+
// Reverse this segment
46+
let left = i
47+
let right = i + segmentLength - 1
48+
while (left < right) {
49+
;[buffer[left], buffer[right]] = [buffer[right], buffer[left]]
50+
left++
51+
right--
3752
}
38-
n = r += c
53+
54+
i += segmentLength
3955
}
4056

41-
// Pass 3: start at 61, step 61, reverse 11 chars (or remaining)
42-
for (n = 61; n < length; n += 61) {
43-
r = n
44-
c = 10 + r < length ? 11 : length - r + 1
45-
for (o = r, s = r + c - 1; o < s; s--, o++) {
46-
a = i[s]
47-
i[s] = i[o]
48-
i[o] = a
57+
return buffer.join("")
58+
}
59+
60+
/**
61+
* Validate if a key prefix could produce valid base64/UTF-8 output.
62+
*
63+
* This is used during brute-force key discovery to prune invalid branches early.
64+
*
65+
* @param rawc - The scrambled string
66+
* @param keyPrefix - Partial key to test
67+
* @param spacing - Remaining space to account for (min/max of remaining digits)
68+
* @returns true if this prefix could lead to valid output
69+
*/
70+
function isValidKeyPrefix(rawc: string, keyPrefix: number[], spacing: number): boolean {
71+
try {
72+
let pos = 0
73+
let chunk: string[] = []
74+
75+
while (pos < rawc.length) {
76+
const startPos = pos
77+
let keyIndex = 0
78+
79+
// Assemble a chunk by reversing segments of specified lengths
80+
while (keyIndex < keyPrefix.length && pos < rawc.length) {
81+
const chunkLength = Math.min(keyPrefix[keyIndex], rawc.length - pos)
82+
chunk.push(
83+
rawc
84+
.slice(pos, pos + chunkLength)
85+
.split("")
86+
.reverse()
87+
.join("")
88+
)
89+
pos += chunkLength
90+
keyIndex++
91+
}
92+
93+
const chunkStr = chunk.join("")
94+
95+
// Align to 4-byte Base64 boundaries
96+
const base64Start = Math.floor((startPos + 3) / 4) * 4 - startPos
97+
const base64End = Math.floor(pos / 4) * 4 - startPos
98+
99+
if (base64Start >= chunkStr.length || base64End <= base64Start) {
100+
chunk = []
101+
pos += spacing
102+
continue
103+
}
104+
105+
const b64Chunk = chunkStr.slice(base64Start, base64End)
106+
107+
try {
108+
const decoded = base64Decode(b64Chunk)
109+
// Check for invalid UTF-8 bytes
110+
for (const byte of decoded) {
111+
if ((byte < 32 && ![0x09, 0x0a, 0x0d].includes(byte)) || byte === 0xc0 || byte === 0xc1 || byte >= 0xf5) {
112+
return false
113+
}
114+
}
115+
} catch {
116+
return false
117+
}
118+
119+
pos += spacing
120+
chunk = []
49121
}
50-
n = r += c
122+
return true
123+
} catch {
124+
return false
51125
}
126+
}
52127

53-
// Pass 4: start at 26, step 37, reverse 17 then 11 then 7
54-
for (n = 26; n < length; n += 37) {
55-
r = n
128+
/**
129+
* Brute-force discover the descramble key using BFS.
130+
*
131+
* Uses heuristics to find the first key digit, then expands candidates
132+
* using validation to prune invalid branches.
133+
*
134+
* @param rawc - The scrambled string
135+
* @returns The discovered 7-digit key, or null if not found
136+
*/
137+
function discoverKey(rawc: string): number[] | null {
138+
// Heuristic: find "ye" or "we" which appear at the start of Base64-encoded JSON
139+
// These strings (reversed) correspond to `{"` and `{\n`
140+
const yePos = rawc.indexOf("ye")
141+
const wePos = rawc.indexOf("we")
56142

57-
// First: 17 chars
58-
c = 16 + r < length ? 17 : length - r + 1
59-
for (o = r, s = r + c - 1; o < s; s--, o++) {
60-
a = i[s]
61-
i[s] = i[o]
62-
i[o] = a
63-
}
64-
r += c
65-
66-
// Second: 11 chars
67-
c = 10 + r < length ? 11 : length - r + 1
68-
for (o = r, s = r + c - 1; o < s; s--, o++) {
69-
a = i[s]
70-
i[s] = i[o]
71-
i[o] = a
72-
}
73-
r += c
74-
75-
// Third: 7 chars
76-
c = 6 + r < length ? 7 : length - r + 1
77-
for (o = r, s = r + c - 1; o < s; s--, o++) {
78-
a = i[s]
79-
i[s] = i[o]
80-
i[o] = a
143+
const ye = yePos !== -1 ? yePos : rawc.length
144+
const we = wePos !== -1 ? wePos : rawc.length
145+
146+
const firstKeyDigit = Math.min(ye, we) + 2
147+
148+
// Initialize BFS queue
149+
let queue: number[][] = firstKeyDigit > 20 ? [[]] : [[firstKeyDigit]]
150+
151+
while (queue.length > 0) {
152+
const candidateKeyPrefix = queue.shift()!
153+
154+
if (candidateKeyPrefix.length === 7) {
155+
// Try this complete key
156+
try {
157+
const descrambled = descrambleWithKey(rawc, candidateKeyPrefix)
158+
const decodedBytes = base64Decode(descrambled)
159+
const decodedString = utf8Decode(decodedBytes)
160+
161+
if (decodedString.startsWith("{")) {
162+
const parsed = JSON.parse(decodedString) as AmuseData
163+
if (parsed.w && parsed.h) {
164+
return candidateKeyPrefix
165+
}
166+
}
167+
} catch {
168+
// This key didn't work, continue searching
169+
}
170+
continue
81171
}
82-
n = r += c
83-
}
84172

85-
// Pass 5: start at 0, step 64, reverse 8 chars
86-
for (n = 0; n < length; n += 64) {
87-
r = n
88-
c = 7 + r < length ? 8 : length - r + 1
89-
for (o = r, s = r + c - 1; o < s; s--, o++) {
90-
a = i[s]
91-
i[s] = i[o]
92-
i[o] = a
173+
// Expand by trying next digits (2-20)
174+
for (let nextDigit = 2; nextDigit <= 20; nextDigit++) {
175+
const newCandidate = [...candidateKeyPrefix, nextDigit]
176+
const remainingDigits = 7 - newCandidate.length
177+
const minSpacing = 2 * remainingDigits
178+
const maxSpacing = 20 * remainingDigits
179+
180+
// Test if any spacing within bounds produces valid output
181+
let valid = false
182+
for (let spacing = minSpacing; spacing <= maxSpacing && !valid; spacing++) {
183+
if (isValidKeyPrefix(rawc, newCandidate, spacing)) {
184+
valid = true
185+
}
186+
}
187+
if (valid) {
188+
queue.push(newCandidate)
189+
}
93190
}
94-
n = r += c
95191
}
96192

97-
return i.join("")
193+
return null
98194
}
99195

100196
/**
@@ -160,32 +256,71 @@ function utf8Decode(bytes: Uint8Array): string {
160256
return result
161257
}
162258

259+
/**
260+
* Try to decode with a specific key.
261+
*
262+
* @param rawc - The encoded rawc string
263+
* @param key - The descramble key to use
264+
* @returns The parsed puzzle data, or null if decoding failed
265+
*/
266+
function tryDecodeWithKey(rawc: string, key: number[]): AmuseData | null {
267+
try {
268+
const descrambled = descrambleWithKey(rawc, key)
269+
const decodedBytes = base64Decode(descrambled)
270+
const decodedString = utf8Decode(decodedBytes)
271+
272+
// Quick validation: should start with { and be valid JSON
273+
if (!decodedString.startsWith("{")) {
274+
return null
275+
}
276+
277+
const puzzleData = JSON.parse(decodedString) as AmuseData
278+
279+
// Validate it has expected fields
280+
if (!puzzleData.w || !puzzleData.h) {
281+
return null
282+
}
283+
284+
return puzzleData
285+
} catch {
286+
return null
287+
}
288+
}
289+
163290
/**
164291
* Decode the rawc field from PuzzleMe to get puzzle data.
165292
*
166293
* The decode chain is:
167-
* 1. descramble() - Reverses chunks of the string at specific intervals
294+
* 1. descramble() - Reverses chunks of the string using a key
168295
* 2. base64Decode() - Standard base64 decoding to bytes
169296
* 3. utf8Decode() - Converts bytes to UTF-8 string
170297
* 4. JSON.parse() - Parses the JSON puzzle data
171298
*
299+
* This function first tries known keys for fast decoding, then falls back
300+
* to brute-force key discovery if none work.
301+
*
172302
* @param rawc - The encoded rawc string from PuzzleMe HTML
173303
* @returns The decoded puzzle data as AmuseData
174304
*/
175305
export function decodePuzzleMeRawc(rawc: string): AmuseData {
176-
// Step 1: Descramble using the Lf algorithm
177-
const descrambled = descramble(rawc)
178-
179-
// Step 2: Base64 decode
180-
const decodedBytes = base64Decode(descrambled)
181-
182-
// Step 3: UTF-8 decode
183-
const decodedString = utf8Decode(decodedBytes)
306+
// Fast path: try known keys first
307+
for (const key of KNOWN_KEYS) {
308+
const puzzleData = tryDecodeWithKey(rawc, key)
309+
if (puzzleData) {
310+
return puzzleData
311+
}
312+
}
184313

185-
// Step 4: Parse JSON
186-
const puzzleData = JSON.parse(decodedString) as AmuseData
314+
// Slow path: brute-force key discovery
315+
const discoveredKey = discoverKey(rawc)
316+
if (discoveredKey) {
317+
const puzzleData = tryDecodeWithKey(rawc, discoveredKey)
318+
if (puzzleData) {
319+
return puzzleData
320+
}
321+
}
187322

188-
return puzzleData
323+
throw new Error("Failed to decode PuzzleMe rawc data with any known algorithm")
189324
}
190325

191326
/**

0 commit comments

Comments
 (0)