Skip to content

Commit 6c87651

Browse files
committed
git commit -m "Add support for cmap format 12 (Unicode > U+FFFF)"
1 parent a6e9a68 commit 6c87651

2 files changed

Lines changed: 126 additions & 37 deletions

File tree

src/FontLib/Table/Type/cmap.php

Lines changed: 113 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -149,56 +149,56 @@ protected function _parse() {
149149

150150
$segCount = $subtable["segCountX2"] / 2;
151151
$subtable["segCount"] = $segCount;
152-
152+
153153
$endCode = $font->readUInt16Many($segCount);
154-
154+
155155
$font->readUInt16(); // reservedPad
156-
156+
157157
$startCode = $font->readUInt16Many($segCount);
158158
$idDelta = $font->readInt16Many($segCount);
159-
159+
160160
$ro_start = $font->pos();
161161
$idRangeOffset = $font->readUInt16Many($segCount);
162-
162+
163163
$glyphIndexArray = array();
164164
for ($i = 0; $i < $segCount; $i++) {
165165
$c1 = $startCode[$i];
166166
$c2 = $endCode[$i];
167167
$d = $idDelta[$i];
168168
$ro = $idRangeOffset[$i];
169-
169+
170170
if ($ro > 0) {
171171
$font->seek($subtable["offset"] + 2 * $i + $ro);
172172
}
173-
173+
174174
for ($c = $c1; $c <= $c2; $c++) {
175175
if ($c === 0xFFFF) {
176176
continue;
177177
}
178-
178+
179179
if ($ro == 0) {
180180
$gid = ($c + $d) & 0xFFFF;
181181
}
182182
else {
183183
$offset = ($c - $c1) * 2 + $ro;
184184
$offset = $ro_start + 2 * $i + $offset;
185-
185+
186186
$gid = 0;
187187
if ($font->seek($offset) === true) {
188188
$gid = $font->readUInt16();
189189
}
190-
190+
191191
if ($gid != 0) {
192192
$gid = ($gid + $d) & 0xFFFF;
193193
}
194194
}
195-
195+
196196
if ($gid >= 0) {
197197
$glyphIndexArray[$c] = $gid;
198198
}
199199
}
200200
}
201-
201+
202202
$subtable += array(
203203
"endCode" => $endCode,
204204
"startCode" => $startCode,
@@ -257,13 +257,36 @@ function _encode() {
257257

258258
ksort($newGlyphIndexArray); // Sort by char code
259259

260+
// Check if there are any SIP characters (> 0xFFFF)
261+
$hasSIP = false;
262+
$maxCode = 0;
263+
foreach ($newGlyphIndexArray as $code => $gid) {
264+
if ($code > 0xFFFF) {
265+
$hasSIP = true;
266+
}
267+
if ($code > $maxCode) {
268+
$maxCode = $code;
269+
}
270+
}
271+
272+
// Split BMP and SIP characters
273+
$bmpGlyphIndexArray = array();
274+
$sipGlyphIndexArray = array();
275+
foreach ($newGlyphIndexArray as $code => $gid) {
276+
if ($code <= 0xFFFF) {
277+
$bmpGlyphIndexArray[$code] = $gid;
278+
} else {
279+
$sipGlyphIndexArray[$code] = $gid;
280+
}
281+
}
282+
260283
$segments = array();
261284

262285
$i = -1;
263286
$prevCode = 0xFFFF;
264287
$prevGid = 0xFFFF;
265288

266-
foreach ($newGlyphIndexArray as $code => $gid) {
289+
foreach ($bmpGlyphIndexArray as $code => $gid) {
267290
if (
268291
$prevCode + 1 != $code ||
269292
$prevGid + 1 != $gid
@@ -325,10 +348,21 @@ function _encode() {
325348
"endCode" => $endCode,
326349
"idDelta" => $idDelta,
327350
"idRangeOffset" => $idRangeOffset,
328-
"glyphIndexArray" => $newGlyphIndexArray,
351+
"glyphIndexArray" => $bmpGlyphIndexArray,
329352
)
330353
);
331354

355+
// Add Format 12 subtable if SIP characters exist
356+
if ($hasSIP) {
357+
$subtables[] = array(
358+
"platformID" => 3,
359+
"platformSpecificID" => 10,
360+
"offset" => null,
361+
"format" => 12,
362+
"glyphIndexArray" => $newGlyphIndexArray, // All chars including SIP
363+
);
364+
}
365+
332366
$data = array(
333367
"version" => 0,
334368
"numberSubtables" => count($subtables),
@@ -347,26 +381,71 @@ function _encode() {
347381
$length_before = $length;
348382
$data["subtables"][$i]["offset"] = $length;
349383

350-
$length += $font->writeUInt16($subtable["format"]);
351-
352-
$before_subheader = $font->pos();
353-
$length += $font->pack(self::$subtable_v4_format, $subtable);
354-
355-
$segCount = $subtable["segCount"];
356-
$length += $font->w(array(self::uint16, $segCount), $subtable["endCode"]);
357-
$length += $font->writeUInt16(0); // reservedPad
358-
$length += $font->w(array(self::uint16, $segCount), $subtable["startCode"]);
359-
$length += $font->w(array(self::int16, $segCount), $subtable["idDelta"]);
360-
$length += $font->w(array(self::uint16, $segCount), $subtable["idRangeOffset"]);
361-
$length += $font->w(array(self::uint16, $segCount), array_values($subtable["glyphIndexArray"]));
362-
363-
$after_subtable = $font->pos();
364-
365-
$subtable["length"] = $length - $length_before;
366-
$font->seek($before_subheader);
367-
$font->pack(self::$subtable_v4_format, $subtable);
368-
369-
$font->seek($after_subtable);
384+
if ($subtable["format"] == 12) {
385+
// Write Format 12 subtable
386+
$length += $font->writeUInt16(12); // format
387+
$length += $font->writeUInt16(0); // reserved
388+
// Build groups for Format 12
389+
$fmt12Groups = array();
390+
$fmt12Glyphs = $subtable["glyphIndexArray"];
391+
ksort($fmt12Glyphs);
392+
$groupStart = null;
393+
$groupEnd = null;
394+
$groupGidStart = null;
395+
$prevCode = -2;
396+
$prevGid = -2;
397+
foreach ($fmt12Glyphs as $code => $gid) {
398+
if ($code === $prevCode + 1 && $gid === $prevGid + 1) {
399+
$groupEnd = $code;
400+
$prevCode = $code;
401+
$prevGid = $gid;
402+
} else {
403+
if ($groupStart !== null) {
404+
$fmt12Groups[] = array($groupStart, $groupEnd, $groupGidStart);
405+
}
406+
$groupStart = $code;
407+
$groupEnd = $code;
408+
$groupGidStart = $gid;
409+
$prevCode = $code;
410+
$prevGid = $gid;
411+
}
412+
}
413+
if ($groupStart !== null) {
414+
$fmt12Groups[] = array($groupStart, $groupEnd, $groupGidStart);
415+
}
416+
$ngroups = count($fmt12Groups);
417+
$fmt12Length = 16 + $ngroups * 12; // 16 bytes header + 12 bytes per group
418+
$length += $font->writeUInt32($fmt12Length); // length
419+
$length += $font->writeUInt32(0); // language
420+
$length += $font->writeUInt32($ngroups); // ngroups
421+
foreach ($fmt12Groups as $group) {
422+
$length += $font->writeUInt32($group[0]); // startCharCode
423+
$length += $font->writeUInt32($group[1]); // endCharCode
424+
$length += $font->writeUInt32($group[2]); // startGlyphID
425+
}
426+
} else {
427+
// Write Format 4 subtable (original code)
428+
$length += $font->writeUInt16($subtable["format"]);
429+
430+
$before_subheader = $font->pos();
431+
$length += $font->pack(self::$subtable_v4_format, $subtable);
432+
433+
$segCount = $subtable["segCount"];
434+
$length += $font->w(array(self::uint16, $segCount), $subtable["endCode"]);
435+
$length += $font->writeUInt16(0); // reservedPad
436+
$length += $font->w(array(self::uint16, $segCount), $subtable["startCode"]);
437+
$length += $font->w(array(self::int16, $segCount), $subtable["idDelta"]);
438+
$length += $font->w(array(self::uint16, $segCount), $subtable["idRangeOffset"]);
439+
$length += $font->w(array(self::uint16, $segCount), array_values($subtable["glyphIndexArray"]));
440+
441+
$after_subtable = $font->pos();
442+
443+
$subtable["length"] = $length - $length_before;
444+
$font->seek($before_subheader);
445+
$font->pack(self::$subtable_v4_format, $subtable);
446+
447+
$font->seek($after_subtable);
448+
}
370449
}
371450

372451
// write subtables headers

src/FontLib/TrueType/File.php

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,23 @@ function utf8toUnicode($str) {
221221

222222
function getUnicodeCharMap() {
223223
$subtable = null;
224+
$subtableFmt12 = null;
224225
foreach ($this->getData("cmap", "subtables") as $_subtable) {
225-
if ($_subtable["platformID"] == 0 || ($_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1)) {
226+
// Prefer Format 12 (full Unicode including SIP) over Format 4 (BMP only)
227+
if (isset($_subtable["format"]) && $_subtable["format"] == 12 &&
228+
($_subtable["platformID"] == 0 || ($_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 10))) {
229+
$subtableFmt12 = $_subtable;
230+
}
231+
if ($subtable === null && ($_subtable["platformID"] == 0 || ($_subtable["platformID"] == 3 && $_subtable["platformSpecificID"] == 1))) {
226232
$subtable = $_subtable;
227-
break;
228233
}
229234
}
230235

236+
// Use Format 12 if available (supports SIP characters U+10000+)
237+
if ($subtableFmt12 && isset($subtableFmt12["glyphIndexArray"])) {
238+
return $subtableFmt12["glyphIndexArray"];
239+
}
240+
231241
if ($subtable) {
232242
return $subtable["glyphIndexArray"];
233243
}
@@ -276,7 +286,7 @@ function getUnicodeCharMap() {
276286
return $glyphIndexArray;
277287
}
278288
}
279-
289+
280290
return null;
281291
}
282292

0 commit comments

Comments
 (0)