-
Notifications
You must be signed in to change notification settings - Fork 234
Expand file tree
/
Copy pathTrie.java
More file actions
194 lines (176 loc) · 6.07 KB
/
Trie.java
File metadata and controls
194 lines (176 loc) · 6.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.annotation.Nullable;
/**
* A trie used to separate punctuation tokens in a run of non-whitespace
* characters by preferring the longest punctuation string possible in a
* greedy left-to-right scan.
*
* @author Mike Samuel (mikesamuel@gmail.com)
*/
final class Trie<T> {
private final char[] childMap;
private final Trie<T>[] children;
private final boolean terminal;
private final @Nullable T value;
/**
* @param elements not empty, non null.
*/
public Trie(Map<String, T> elements) {
this(sortedUniqEntries(elements), 0);
}
private Trie(List<Map.Entry<String, T>> elements, int depth) {
this(elements, depth, 0, elements.size());
}
/**
* @param elements not empty, non null. Not modified.
* @param depth the depth in the tree.
* @param start an index into punctuationStrings of the first string in this
* subtree.
* @param end an index into punctuationStrings past the last string in this
* subtree.
*/
@SuppressWarnings("unchecked")
private Trie(
List<Map.Entry<String, T>> elements, int depth,
int start, int end) {
int pos = start;
this.terminal = depth == elements.get(pos).getKey().length();
if (this.terminal) {
this.value = elements.get(pos).getValue();
if (pos + 1 == end) { // base case
this.childMap = ZERO_CHARS;
this.children = ((Trie<T>[]) ZERO_TRIES);
return;
}
++pos;
} else {
this.value = null;
}
int childCount = 0;
{
int last = -1;
for (int i = pos; i < end; ++i) {
char ch = elements.get(i).getKey().charAt(depth);
if (ch != last) {
++childCount;
last = ch;
}
}
}
this.childMap = new char[childCount];
this.children = new Trie[childCount];
int childStart = pos;
int childIndex = 0;
char lastCh = elements.get(pos).getKey().charAt(depth);
for (int i = pos + 1; i < end; ++i) {
char ch = elements.get(i).getKey().charAt(depth);
if (ch != lastCh) {
childMap[childIndex] = lastCh;
children[childIndex++] = new Trie<>(
elements, depth + 1, childStart, i);
childStart = i;
lastCh = ch;
}
}
childMap[childIndex] = lastCh;
children[childIndex++] = new Trie<>(elements, depth + 1, childStart, end);
}
/** Does this node correspond to a complete string in the input set. */
public boolean isTerminal() { return terminal; }
public @Nullable T getValue() { return value; }
/**
* The child corresponding to the given character.
* @return null if no such trie.
*/
public Trie<T> lookup(char ch) {
int i = Arrays.binarySearch(childMap, ch);
return i >= 0 ? children[i] : null;
}
/**
* The descendant of this trie corresponding to the string for this trie
* appended with s.
* @param s non null.
* @return null if no such trie.
*/
public Trie<T> lookup(CharSequence s) {
Trie<T> t = this;
for (int i = 0, n = s.length(); i < n; ++i) {
t = t.lookup(s.charAt(i));
if (null == t) { break; }
}
return t;
}
public boolean contains(char ch) {
return Arrays.binarySearch(childMap, ch) >= 0;
}
private static <U> List<Map.Entry<String, U>> sortedUniqEntries(
Map<String, U> m) {
return new ArrayList<>(
new TreeMap<>(m).entrySet());
}
private static final char[] ZERO_CHARS = new char[0];
private static final Trie<?>[] ZERO_TRIES = new Trie<?>[0];
/**
* Append all strings s such that {@code this.lookup(s).isTerminal()} to the
* given list in lexical order.
*/
public void toStringList(List<String> strings) {
toStringList("", strings);
}
private void toStringList(String prefix, List<String> strings) {
if (terminal) { strings.add(prefix); }
for (int i = 0, n = childMap.length; i < n; ++i) {
children[i].toStringList(prefix + childMap[i], strings);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
toStringBuilder(0, sb);
return sb.toString();
}
private void toStringBuilder(int depth, StringBuilder sb) {
sb.append(terminal ? "terminal" : "nonterminal");
int childDepth = depth + 1;
for (int i = 0; i < childMap.length; ++i) {
sb.append('\n');
for (int d = 0; d < childDepth; ++d) {
sb.append('\t');
}
sb.append('\'').append(childMap[i]).append("' ");
children[i].toStringBuilder(childDepth, sb);
}
}
}