Skip to content

Commit 468a912

Browse files
committed
chore: moved private utility methods to Unicode
1 parent c37bf6f commit 468a912

2 files changed

Lines changed: 100 additions & 86 deletions

File tree

twinkle-ansi/src/main/java/org/codejive/twinkle/util/SequenceIterator.java

Lines changed: 16 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -58,55 +58,22 @@ abstract class BaseSequenceIterator implements SequenceIterator {
5858
protected int currentWidth = 0;
5959

6060
protected boolean shouldBreak(int prev, int curr, int riCount) {
61-
if (isL(prev) && (isL(curr) || isV(curr) || isLV(curr) || isLVT(curr))) return false;
62-
if ((isLV(prev) || isV(prev)) && (isV(curr) || isT(curr))) return false;
63-
if ((isLVT(prev) || isT(prev)) && isT(curr)) return false;
61+
if (Unicode.isL(prev)
62+
&& (Unicode.isL(curr)
63+
|| Unicode.isV(curr)
64+
|| Unicode.isLV(curr)
65+
|| Unicode.isLVT(curr))) return false;
66+
if ((Unicode.isLV(prev) || Unicode.isV(prev)) && (Unicode.isV(curr) || Unicode.isT(curr)))
67+
return false;
68+
if ((Unicode.isLVT(prev) || Unicode.isT(prev)) && Unicode.isT(curr)) return false;
6469
int type = Character.getType(curr);
6570
if (type == Character.NON_SPACING_MARK
6671
|| type == Character.COMBINING_SPACING_MARK
6772
|| curr == 0x200D
6873
|| prev == 0x200D) return false;
69-
if (isRegionalIndicator(prev) && isRegionalIndicator(curr)) return (riCount % 2 == 0);
70-
return !(isPrepend(prev) || isVirama(prev));
71-
}
72-
73-
protected static boolean isRegionalIndicator(int cp) {
74-
return cp >= 0x1F1E6 && cp <= 0x1F1FF;
75-
}
76-
77-
protected static boolean isL(int cp) {
78-
return (cp >= 0x1100 && cp <= 0x115F);
79-
}
80-
81-
protected static boolean isV(int cp) {
82-
return (cp >= 0x1160 && cp <= 0x11A7);
83-
}
84-
85-
protected static boolean isT(int cp) {
86-
return (cp >= 0x11A8 && cp <= 0x11FF);
87-
}
88-
89-
protected static boolean isLV(int cp) {
90-
return (cp >= 0xAC00 && cp <= 0xD7A3 && (cp - 0xAC00) % 28 == 0);
91-
}
92-
93-
protected static boolean isLVT(int cp) {
94-
return (cp >= 0xAC00 && cp <= 0xD7A3 && (cp - 0xAC00) % 28 != 0);
95-
}
96-
97-
protected static boolean isVirama(int cp) {
98-
return (cp >= 0x094D && cp <= 0x0D4D && (cp & 0xFF) == 0x4D) || cp == 0x0D4D;
99-
}
100-
101-
protected static boolean isPrepend(int cp) {
102-
return cp == 0x0600
103-
|| cp == 0x0601
104-
|| cp == 0x0602
105-
|| cp == 0x0603
106-
|| cp == 0x0604
107-
|| cp == 0x0605
108-
|| cp == 0x06DD
109-
|| cp == 0x070F;
74+
if (Unicode.isRegionalIndicator(prev) && Unicode.isRegionalIndicator(curr))
75+
return (riCount % 2 == 0);
76+
return !(Unicode.isPrepend(prev) || Unicode.isVirama(prev));
11077
}
11178

11279
@Override
@@ -122,49 +89,12 @@ protected int calculateWidth(int cp) {
12289
return 0;
12390
}
12491

125-
if (isWide(cp)) {
92+
if (Unicode.isWide(cp)) {
12693
return 2;
12794
}
12895

12996
return 1;
13097
}
131-
132-
private boolean isWide(int cp) {
133-
// East Asian Wide (W) and Fullwidth (F)
134-
if ((cp >= 0x1100 && cp <= 0x115F)
135-
|| // Hangul Jamo
136-
(cp >= 0x2E80 && cp <= 0xA4CF && cp != 0x303F)
137-
|| // CJK Radicals, Symbols, Han
138-
(cp >= 0xAC00 && cp <= 0xD7A3)
139-
|| // Hangul Syllables
140-
(cp >= 0xF900 && cp <= 0xFAFF)
141-
|| // CJK Compatibility Ideographs
142-
(cp >= 0xFE10 && cp <= 0xFE19)
143-
|| // Vertical forms
144-
(cp >= 0xFE30 && cp <= 0xFE6F)
145-
|| // CJK Compatibility Forms
146-
(cp >= 0xFF00 && cp <= 0xFF60)
147-
|| // Fullwidth Forms
148-
(cp >= 0xFFE0 && cp <= 0xFFE6)) {
149-
return true;
150-
}
151-
152-
// Plane 2 and 3 (SIP/TIP) are almost entirely CJK Ideographs (Wide)
153-
if (cp >= 0x20000 && cp <= 0x3FFFD) {
154-
return true;
155-
}
156-
157-
// Common Emoji Presentation ranges (Simplified)
158-
// Includes Miscellaneous Symbols and Pictographs, Emoticons, Transport, etc.
159-
if ((cp >= 0x1F300 && cp <= 0x1F64F)
160-
|| (cp >= 0x1F680 && cp <= 0x1F6FF)
161-
|| (cp >= 0x1F900 && cp <= 0x1F9FF)
162-
|| (cp >= 0x1F200 && cp <= 0x1F2FF)) {
163-
return true;
164-
}
165-
166-
return false;
167-
}
16898
}
16999

170100
/**
@@ -243,7 +173,7 @@ private void primeNext() {
243173
}
244174
nextLeadCodePoint = '\n';
245175
} else {
246-
int riCount = isRegionalIndicator(cp) ? 1 : 0;
176+
int riCount = Unicode.isRegionalIndicator(cp) ? 1 : 0;
247177
int prevCp = cp;
248178

249179
while (cursor < length) {
@@ -257,7 +187,7 @@ private void primeNext() {
257187
}
258188

259189
cursor += Character.charCount(curr);
260-
riCount = isRegionalIndicator(curr) ? riCount + 1 : 0;
190+
riCount = Unicode.isRegionalIndicator(curr) ? riCount + 1 : 0;
261191
prevCp = curr;
262192
}
263193
}
@@ -382,7 +312,7 @@ private void primeNext() {
382312
}
383313
nextLeadCodePoint = NEWLINE;
384314
} else {
385-
int riCount = isRegionalIndicator(cp) ? 1 : 0;
315+
int riCount = Unicode.isRegionalIndicator(cp) ? 1 : 0;
386316
int prevCp = cp;
387317
while (true) {
388318
int curr = readCodePoint();
@@ -395,7 +325,7 @@ private void primeNext() {
395325
break;
396326
}
397327
currentSequence.append(Character.toChars(curr));
398-
riCount = isRegionalIndicator(curr) ? riCount + 1 : 0;
328+
riCount = Unicode.isRegionalIndicator(curr) ? riCount + 1 : 0;
399329
prevCp = curr;
400330
}
401331
}

twinkle-ansi/src/main/java/org/codejive/twinkle/util/Unicode.java

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,88 @@ public static int visibleWidth(CharSequence text) {
1010
}
1111
return width;
1212
}
13+
14+
public static boolean isWide(int cp) {
15+
// Modern Pictographs and Enclosed Ideographs
16+
if (isEmoji(cp)) {
17+
return true;
18+
}
19+
20+
// Hangul (Leading Jamo and full Syllables)
21+
if (isHangulWide(cp)) {
22+
return true;
23+
}
24+
25+
// East Asian Wide (W) and Fullwidth (F)
26+
if ((cp >= 0x2E80 && cp <= 0xA4CF && cp != 0x303F)
27+
|| // Hangul Syllables
28+
(cp >= 0xF900 && cp <= 0xFAFF)
29+
|| // CJK Compatibility Ideographs
30+
(cp >= 0xFE10 && cp <= 0xFE19)
31+
|| // Vertical forms
32+
(cp >= 0xFE30 && cp <= 0xFE6F)
33+
|| // CJK Compatibility Forms
34+
(cp >= 0xFF00 && cp <= 0xFF60)
35+
|| // Fullwidth Forms
36+
(cp >= 0xFFE0 && cp <= 0xFFE6)) {
37+
return true;
38+
}
39+
40+
// Plane 2 and 3 (SIP/TIP) are almost entirely CJK Ideographs (Wide)
41+
if (cp >= 0x20000 && cp <= 0x3FFFD) {
42+
return true;
43+
}
44+
45+
return false;
46+
}
47+
48+
public static boolean isEmoji(int cp) {
49+
return (cp >= 0x1F300 && cp <= 0x1F64F) // Misc Symbols and Pictographs, Emoticons
50+
|| (cp >= 0x1F680 && cp <= 0x1F6FF) // Transport and Map
51+
|| (cp >= 0x1F900 && cp <= 0x1F9FF) // Supplemental Symbols/Pictographs
52+
|| (cp >= 0x1F200 && cp <= 0x1F2FF); // Enclosed Ideographic Supplement
53+
}
54+
55+
public static boolean isHangulWide(int cp) {
56+
return isL(cp) || isLV(cp) || isLVT(cp);
57+
}
58+
59+
public static boolean isRegionalIndicator(int cp) {
60+
return cp >= 0x1F1E6 && cp <= 0x1F1FF;
61+
}
62+
63+
public static boolean isL(int cp) {
64+
return (cp >= 0x1100 && cp <= 0x115F);
65+
}
66+
67+
public static boolean isV(int cp) {
68+
return (cp >= 0x1160 && cp <= 0x11A7);
69+
}
70+
71+
public static boolean isT(int cp) {
72+
return (cp >= 0x11A8 && cp <= 0x11FF);
73+
}
74+
75+
public static boolean isLV(int cp) {
76+
return (cp >= 0xAC00 && cp <= 0xD7A3 && (cp - 0xAC00) % 28 == 0);
77+
}
78+
79+
public static boolean isLVT(int cp) {
80+
return (cp >= 0xAC00 && cp <= 0xD7A3 && (cp - 0xAC00) % 28 != 0);
81+
}
82+
83+
public static boolean isVirama(int cp) {
84+
return (cp >= 0x094D && cp <= 0x0D4D && (cp & 0xFF) == 0x4D) || cp == 0x0D4D;
85+
}
86+
87+
public static boolean isPrepend(int cp) {
88+
return cp == 0x0600
89+
|| cp == 0x0601
90+
|| cp == 0x0602
91+
|| cp == 0x0603
92+
|| cp == 0x0604
93+
|| cp == 0x0605
94+
|| cp == 0x06DD
95+
|| cp == 0x070F;
96+
}
1397
}

0 commit comments

Comments
 (0)