-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathToUnicodeInterval.java
More file actions
111 lines (101 loc) · 3.9 KB
/
ToUnicodeInterval.java
File metadata and controls
111 lines (101 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/**
* This file is part of veraPDF Parser, a module of the veraPDF project.
* Copyright (c) 2015-2025, veraPDF Consortium <info@verapdf.org>
* All rights reserved.
*
* veraPDF Parser is free software: you can redistribute it and/or modify
* it under the terms of either:
*
* The GNU General public license GPLv3+.
* You should have received a copy of the GNU General Public License
* along with veraPDF Parser as the LICENSE.GPL file in the root of the source
* tree. If not, see http://www.gnu.org/licenses/ or
* https://www.gnu.org/licenses/gpl-3.0.en.html.
*
* The Mozilla Public License MPLv2+.
* You should have received a copy of the Mozilla Public License along with
* veraPDF Parser as the LICENSE.MPL file in the root of the source tree.
* If a copy of the MPL was not distributed with this file, you can obtain one at
* http://mozilla.org/MPL/2.0/.
*/
package org.verapdf.pd.font.cmap;
import java.nio.charset.StandardCharsets;
import java.util.logging.Logger;
/**
* Represents interval of mappings to Unicode.
*
* @author Sergey Shemyakov
*/
public class ToUnicodeInterval {
private static final Logger LOGGER = Logger.getLogger(ToUnicodeInterval.class.getCanonicalName());
private final long intervalBegin;
private final long intervalEnd;
private final int length;
private final long startingValue;
/**
* @param intervalBegin is the first code of mapping interval.
* @param intervalEnd is the last code of mapping interval.
* @param startingValue is the cid value for first code of mapping interval.
*/
public ToUnicodeInterval(long intervalBegin, long intervalEnd, byte[] startingValue) {
this.intervalBegin = intervalBegin;
this.intervalEnd = intervalEnd;
this.length = startingValue.length;
this.startingValue = CMapParser.numberFromBytes(startingValue);
}
/**
* Checks if given code can be successfully mapped to cid with this mapping
* interval.
*
* @param code is character code to be checked.
* @return mapping for code is present in this mapping interval.
*/
public boolean containsCode(long code) {
return code >= intervalBegin && code <= intervalEnd;
}
/**
* If code is in mapping interval, this method performs mapping of given
* code to Unicode value. If code is not in interval, return value is
* undefined.
*
* @param code is a character code.
* @return Unicode value for character code as a String object.
*/
public String toUnicode(int code) {
byte[] unicode = new byte[length];
long unicodeNumber = code - intervalBegin + startingValue;
for (int index = unicode.length - 1; index >= 0; index--) {
unicode[index] = (byte) (unicodeNumber & 0xFF);
unicodeNumber = unicodeNumber >> 8;
}
return getUnicodeNameFromLong(unicode);
}
private static String getUnicodeNameFromLong(byte[] unicode) {
String fffe = getFFFEFromUnicode(unicode);
if (fffe == null) {
fffe = getFEFFFromUnicode(unicode);
}
if (fffe != null) {
return fffe;
}
return (unicode[0] == 0) ? String.valueOf((char)(unicode[unicode.length - 1] & 0xFF)) : new String(unicode, StandardCharsets.UTF_16BE);
}
private static String getFFFEFromUnicode(byte[] unicode) {
for (int i = 0; i < unicode.length - 1; ++i) {
if (unicode[i] == (byte) 0xFF && unicode[i+1] == (byte) 0xFE) {
char[] c = {0xFFFE};
return new String(c);
}
}
return null;
}
private static String getFEFFFromUnicode(byte[] unicode) {
for (int i = 0; i < unicode.length - 1; ++i) {
if (unicode[i] == (byte) 0xFE && unicode[i+1] == (byte) 0xFF) {
char[] c = {0xFFFE};
return new String(c);
}
}
return null;
}
}