Skip to content

Commit 7ad961a

Browse files
committed
A faster implementation with regex.
b/478848482
1 parent ab8874f commit 7ad961a

5 files changed

Lines changed: 858 additions & 238 deletions

File tree

core/src/main/java/google/registry/flows/FeeExtensionXmlTagNormalizer.java

Lines changed: 64 additions & 224 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,19 @@
1414

1515
package google.registry.flows;
1616

17-
import static com.google.common.collect.ImmutableList.toImmutableList;
18-
import static com.google.common.collect.Streams.stream;
19-
import static java.nio.charset.StandardCharsets.UTF_8;
17+
import static google.registry.model.eppcommon.ProtocolDefinition.ServiceExtension.FEE_0_11;
18+
import static google.registry.model.eppcommon.ProtocolDefinition.ServiceExtension.FEE_0_12;
19+
import static google.registry.model.eppcommon.ProtocolDefinition.ServiceExtension.FEE_0_6;
20+
import static google.registry.model.eppcommon.ProtocolDefinition.ServiceExtension.FEE_1_00;
2021

21-
import com.google.common.base.CharMatcher;
22-
import com.google.common.collect.ImmutableList;
22+
import com.google.common.annotations.VisibleForTesting;
2323
import com.google.common.collect.ImmutableSet;
24-
import com.google.common.collect.Iterables;
25-
import com.google.common.flogger.FluentLogger;
26-
import java.io.ByteArrayInputStream;
27-
import java.io.ByteArrayOutputStream;
28-
import java.io.UnsupportedEncodingException;
29-
import java.util.Base64;
30-
import java.util.Iterator;
24+
import google.registry.model.eppcommon.EppXmlTransformer;
25+
import google.registry.model.eppcommon.ProtocolDefinition.ServiceExtension;
3126
import java.util.Optional;
32-
import java.util.Set;
27+
import java.util.regex.Matcher;
28+
import java.util.regex.Pattern;
3329
import java.util.stream.Collectors;
34-
import javax.xml.namespace.QName;
35-
import javax.xml.stream.XMLEventFactory;
36-
import javax.xml.stream.XMLEventReader;
37-
import javax.xml.stream.XMLEventWriter;
38-
import javax.xml.stream.XMLInputFactory;
39-
import javax.xml.stream.XMLOutputFactory;
40-
import javax.xml.stream.XMLStreamException;
41-
import javax.xml.stream.events.Attribute;
42-
import javax.xml.stream.events.EndElement;
43-
import javax.xml.stream.events.Namespace;
44-
import javax.xml.stream.events.StartElement;
45-
import javax.xml.stream.events.XMLEvent;
4630

4731
/**
4832
* Normalizes Fee extension namespace tags in EPP XML response messages.
@@ -53,236 +37,92 @@
5337
*
5438
* <p>Some registrars are not XML namespace-aware and rely on the XML tags being specific literals.
5539
* This makes it difficult to perform seamless rollout of new versions: if Nomulus reassigns a tag
56-
* literal to a different version, it effectively forces all these registrars to upgrade.
40+
* literal to a different version, it effectively forces all these registrars to upgrade at the time
41+
* of the deployment.
5742
*
5843
* <p>This class can be used to normalize the namespace tag in EPP responses. Since every response
5944
* message may use at most one version of the Fee extension, we can remove declared but unused
6045
* versions from the message, thus freeing up the canonical tag ('fee') for the active version.
6146
*/
6247
public class FeeExtensionXmlTagNormalizer {
6348

64-
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
65-
6649
// So far we only have Fee extensions to process
6750
private static final String CANONICAL_FEE_TAG = "fee";
68-
private static final ImmutableSet FEE_EXTENSIONS =
69-
ImmutableSet.of(
70-
"urn:ietf:params:xml:ns:fee-0.6",
71-
"urn:ietf:params:xml:ns:fee-0.11",
72-
"urn:ietf:params:xml:ns:fee-0.12",
73-
"urn:ietf:params:xml:ns:epp:fee-1.0");
74-
75-
private static final XMLInputFactory XML_INPUT_FACTORY = createXmlInputFactory();
76-
private static final XMLOutputFactory XML_OUTPUT_FACTORY = XMLOutputFactory.newFactory();
77-
private static final XMLEventFactory XML_EVENT_FACTORY = XMLEventFactory.newFactory();
78-
79-
/**
80-
* Returns an EPP XML message with normalized Fee extension tags.
81-
*
82-
* <p>The output always begins with version and encoding declarations no matter if the input
83-
* includes them. If encoding is not declared by input, UTF-8 will be used according to XML
84-
* standard.
85-
*/
86-
public static String normalizeFeeExtensionTag(byte[] inputXmlBytes) {
87-
try {
88-
// Keep exactly one newline at end of sanitized string.
89-
return CharMatcher.whitespace().trimTrailingFrom(normalize(inputXmlBytes)) + "\n";
90-
} catch (XMLStreamException | UnsupportedEncodingException e) {
91-
logger.atWarning().withCause(e).log("Failed to sanitize EPP XML message.");
92-
return Base64.getMimeEncoder().encodeToString(inputXmlBytes);
93-
}
94-
}
95-
96-
private static String normalize(byte[] inputXmlBytes)
97-
throws XMLStreamException, UnsupportedEncodingException {
98-
ParseResults parseResults = findFeeExtensionInUse(inputXmlBytes);
9951

100-
if (parseResults.feeExtensionInUse.isEmpty()) {
101-
// Fee extension not present. Return as is.
102-
return new String(inputXmlBytes, UTF_8);
103-
}
104-
105-
ByteArrayOutputStream outputXmlBytes = new ByteArrayOutputStream();
106-
XMLEventWriter xmlEventWriter =
107-
XML_OUTPUT_FACTORY.createXMLEventWriter(outputXmlBytes, UTF_8.name());
52+
private static final ImmutableSet<ServiceExtension> FEE_EXTENSIONS =
53+
ImmutableSet.of(FEE_0_6, FEE_0_11, FEE_0_12, FEE_1_00);
10854

109-
for (XMLEvent event : parseResults.xmlEvents()) {
110-
xmlEventWriter.add(normalizeXmlEvent(event, parseResults.feeExtensionInUse));
111-
// Most standard Java StAX implementations omits the content between the XML header and the
112-
// root element. Add a "\n" between them to improve readability.
113-
if (event.isStartDocument()) {
114-
xmlEventWriter.add(XML_EVENT_FACTORY.createCharacters("\n"));
115-
}
116-
}
55+
private static final Pattern FEE_EXTENSION_IN_USE_PATTERN =
56+
Pattern.compile(feeExtensionInUseRegex());
11757

118-
xmlEventWriter.flush();
119-
return outputXmlBytes.toString(UTF_8);
58+
@VisibleForTesting
59+
static String feeExtensionInUseRegex() {
60+
return FEE_EXTENSIONS.stream()
61+
.map(ServiceExtension::getXmlTag)
62+
.map(tag -> String.format("\\b(%s):", tag))
63+
.collect(Collectors.joining("|"));
12064
}
12165

12266
/**
123-
* Holds intermediate results during XML processing.
124-
*
125-
* @param feeExtensionInUse The fee extension namespace URI in the EPP response, if found
126-
* @param xmlEvents The parsed XML objects found in a pass, saved for reuse
127-
*/
128-
private record ParseResults(
129-
Optional<String> feeExtensionInUse, ImmutableList<XMLEvent> xmlEvents) {}
130-
131-
/**
132-
* Makes one pass of the input XML and returns parsed data the Fee extension in use.
67+
* Returns a EPP response that uses the canonical tag ({@code fee}) for the fee extension.
13368
*
134-
* <p>Each XML message should use at most one Fee extension. This method returns it if found. The
135-
* {@link XMLEvent} objects returned by the parser are also saved for reuse.
69+
* <p>This method replaces any versioned tag, e.g., {@code fee12} with the canonical tag. It also
70+
* removes unused namespace declarations and update the tag in the remaining declaration.
13671
*
137-
* @throws IllegalArgumentException if more than one Fee extension version is found
72+
* <p>The input {@code xml} must be an EPP response message generated by the {@link
73+
* EppXmlTransformer}. With this assumption, we can use regular expressions which is 10X faster
74+
* than XML stream parsers.
13875
*/
139-
private static ParseResults findFeeExtensionInUse(byte[] inputXmlBytes)
140-
throws XMLStreamException {
141-
XMLEventReader xmlEventReader =
142-
XML_INPUT_FACTORY.createXMLEventReader(new ByteArrayInputStream(inputXmlBytes));
143-
144-
ImmutableList.Builder<XMLEvent> eventBuffer = new ImmutableList.Builder<>();
145-
Optional<String> feeExtensionInUse = Optional.empty();
146-
147-
// Make one pass through the message to identify the Fee extension in use.
148-
while (xmlEventReader.hasNext()) {
149-
XMLEvent xmlEvent = xmlEventReader.nextEvent();
150-
Optional<String> eventFeeExtensionUri = getXmlEventFeeExtensionUri(xmlEvent);
151-
152-
if (feeExtensionInUse.isEmpty()) {
153-
feeExtensionInUse = eventFeeExtensionUri;
154-
} else if (eventFeeExtensionUri.isPresent()
155-
&& !feeExtensionInUse.equals(eventFeeExtensionUri)) {
156-
throw new IllegalArgumentException(
157-
String.format(
158-
"Expecting one Fee extension, found two: %s -- %s",
159-
feeExtensionInUse, eventFeeExtensionUri.get()));
160-
}
161-
eventBuffer.add(xmlEvent);
162-
}
163-
return new ParseResults(feeExtensionInUse, eventBuffer.build());
164-
}
165-
166-
private static XMLEvent normalizeXmlEvent(XMLEvent xmlEvent, Optional<String> feeExtensionInUse) {
167-
if (xmlEvent.isStartElement()) {
168-
return normalizeStartElement(xmlEvent.asStartElement(), feeExtensionInUse);
169-
} else if (xmlEvent.isEndElement()) {
170-
return normalizeEndElement(xmlEvent.asEndElement(), feeExtensionInUse);
171-
} else {
172-
return xmlEvent;
76+
public static String normalize(String xml) {
77+
Optional<String> maybeFeeTagInUse = findFeeExtensionInUse(xml);
78+
if (maybeFeeTagInUse.isEmpty()) {
79+
return xml;
17380
}
174-
}
175-
176-
private static Optional<String> getXmlEventFeeExtensionUri(XMLEvent xmlEvent) {
177-
if (xmlEvent.isStartElement()) {
178-
return getFeeExtensionUri(xmlEvent.asStartElement());
179-
}
180-
if (xmlEvent.isEndElement()) {
181-
String extension = xmlEvent.asEndElement().getName().getNamespaceURI();
182-
if (FEE_EXTENSIONS.contains(extension)) {
183-
return Optional.of(extension);
81+
String feeTagInUse = maybeFeeTagInUse.get();
82+
String normalized = xml;
83+
for (ServiceExtension serviceExtension : FEE_EXTENSIONS) {
84+
if (serviceExtension.getXmlTag().equals(feeTagInUse)) {
85+
normalized = normalizeExtensionInUse(feeTagInUse, serviceExtension.getUri(), normalized);
86+
} else {
87+
normalized =
88+
removeUnusedExtension(
89+
serviceExtension.getXmlTag(), serviceExtension.getUri(), normalized);
18490
}
18591
}
186-
return Optional.empty();
187-
}
188-
189-
private static Optional<String> getFeeExtensionUri(StartElement startElement) {
190-
Set<String> attrs =
191-
stream(startElement.asStartElement().getAttributes())
192-
.map(Attribute::getName)
193-
.map(FeeExtensionXmlTagNormalizer::getFeeExtensionUri)
194-
.flatMap(Optional::stream)
195-
.collect(Collectors.toSet());
196-
var qName = startElement.asStartElement().getName();
197-
if (FEE_EXTENSIONS.contains(qName.getNamespaceURI())) {
198-
attrs.add(qName.getNamespaceURI());
199-
}
200-
if (attrs.size() > 1) {
201-
throw new IllegalArgumentException("Multiple Fee extension in use: " + attrs);
202-
}
203-
if (attrs.isEmpty()) {
204-
return Optional.empty();
205-
}
206-
// attrs.size == 1
207-
return Optional.of(Iterables.getOnlyElement(attrs));
208-
}
209-
210-
private static Optional<String> getFeeExtensionUri(QName name) {
211-
String extensionUri = name.getNamespaceURI();
212-
if (FEE_EXTENSIONS.contains(extensionUri)) {
213-
return Optional.of(extensionUri);
214-
}
215-
return Optional.empty();
92+
return normalized;
21693
}
21794

218-
private static XMLEvent normalizeStartElement(
219-
StartElement startElement, Optional<String> feeExtensionInUse) {
220-
QName name = normalizeName(startElement.getName());
221-
ImmutableList<Namespace> namespaces =
222-
normalizeNamespaces(startElement.getNamespaces(), feeExtensionInUse);
223-
ImmutableList<Attribute> attributes = normalizeAttributes(startElement.getAttributes());
224-
225-
return XML_EVENT_FACTORY.createStartElement(name, attributes.iterator(), namespaces.iterator());
226-
}
227-
228-
private static XMLEvent normalizeEndElement(
229-
EndElement endElement, Optional<String> feeExtensionInUse) {
230-
QName name = normalizeName(endElement.getName());
231-
ImmutableList<Namespace> namespaces =
232-
normalizeNamespaces(endElement.getNamespaces(), feeExtensionInUse);
233-
234-
return XML_EVENT_FACTORY.createEndElement(name, namespaces.iterator());
95+
static String removeUnusedExtension(String tag, String uri, String xml) {
96+
String declaration = String.format("xmlns:%s=\"%s\"", tag, uri);
97+
// There must be a leading whitespace, and it can be safely removed with the declaration.
98+
return xml.replaceAll(String.format("\\s%s", declaration), "");
23599
}
236100

237-
private static QName normalizeName(QName name) {
238-
if (!FEE_EXTENSIONS.contains(name.getNamespaceURI())
239-
|| name.getPrefix().equals(CANONICAL_FEE_TAG)) {
240-
return name;
101+
static String normalizeExtensionInUse(String tagInUse, String uriInUse, String xml) {
102+
if (tagInUse.equals(CANONICAL_FEE_TAG)) {
103+
return xml;
241104
}
242-
return new QName(name.getNamespaceURI(), name.getLocalPart(), CANONICAL_FEE_TAG);
105+
// Change the tag in the namespace declaration:
106+
String currentDeclaration = String.format("xmlns:%s=\"%s\"", tagInUse, uriInUse);
107+
String desiredDeclaraion = String.format("xmlns:fee=\"%s\"", uriInUse);
108+
// The new tag at each site of use, with trailing colon:
109+
String newTagWithColon = CANONICAL_FEE_TAG + ":";
110+
return xml.replaceAll(String.format("\\b%s:", tagInUse), newTagWithColon)
111+
.replaceAll(currentDeclaration, desiredDeclaraion);
243112
}
244113

245-
private static Attribute normalizeAttribute(Attribute attribute) {
246-
QName name = normalizeName(attribute.getName());
247-
return XML_EVENT_FACTORY.createAttribute(name, attribute.getValue());
248-
}
114+
static Optional<String> findFeeExtensionInUse(String xml) {
115+
Matcher matcher = FEE_EXTENSION_IN_USE_PATTERN.matcher(xml);
249116

250-
private static Optional<Namespace> normalizeNamespace(
251-
Namespace namespace, Optional<String> feeExtensionInUse) {
252-
var extension = namespace.getNamespaceURI();
253-
if (!FEE_EXTENSIONS.contains(extension)) {
254-
return Optional.of(namespace);
117+
if (!matcher.find()) {
118+
return Optional.empty();
255119
}
256-
if (feeExtensionInUse.isPresent() && extension.equals(feeExtensionInUse.get())) {
257-
if (namespace.getPrefix().equals(CANONICAL_FEE_TAG)) {
258-
return Optional.of(namespace);
120+
// We know only one extension is in use, so we can return on the first match
121+
for (int i = 1; i <= matcher.groupCount(); i++) {
122+
if (matcher.group(i) != null) {
123+
return Optional.of(matcher.group(i));
259124
}
260-
return Optional.of(XML_EVENT_FACTORY.createNamespace(CANONICAL_FEE_TAG, extension));
261125
}
262-
return Optional.empty();
263-
}
264-
265-
private static ImmutableList<Attribute> normalizeAttributes(Iterator<Attribute> attributes) {
266-
return stream(attributes).map(attr -> normalizeAttribute(attr)).collect(toImmutableList());
267-
}
268-
269-
private static ImmutableList<Namespace> normalizeNamespaces(
270-
Iterator<Namespace> namespaces, Optional<String> feeExtensionInUse) {
271-
return stream(namespaces)
272-
.map(namespace -> normalizeNamespace(namespace, feeExtensionInUse))
273-
.flatMap(Optional::stream)
274-
.collect(toImmutableList());
275-
}
276-
277-
private static XMLInputFactory createXmlInputFactory() {
278-
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
279-
// Coalesce adjacent data, so that all chars in a string will be grouped as one item.
280-
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, true);
281-
// Preserve Name Space information.
282-
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true);
283-
// Prevent XXE attacks.
284-
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
285-
xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
286-
return xmlInputFactory;
126+
throw new IllegalStateException("Should not reach here. Bad FEE_EXTENSION_IN_USE_PATTERN?");
287127
}
288128
}

core/src/main/java/google/registry/model/eppcommon/ProtocolDefinition.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static com.google.common.collect.Maps.uniqueIndex;
1919

2020
import com.google.common.annotations.VisibleForTesting;
21+
import com.google.common.base.VerifyException;
2122
import com.google.common.collect.ImmutableMap;
2223
import com.google.common.collect.ImmutableSet;
2324
import google.registry.model.domain.fee06.FeeCheckCommandExtensionV06;
@@ -87,6 +88,7 @@ public enum ServiceExtension {
8788
private final Class<? extends CommandExtension> commandExtensionClass;
8889
private final Class<? extends ResponseExtension> responseExtensionClass;
8990
private final String uri;
91+
private final String xmlTag;
9092
private final ServiceExtensionVisibility visibility;
9193

9294
ServiceExtension(
@@ -96,6 +98,7 @@ public enum ServiceExtension {
9698
this.commandExtensionClass = commandExtensionClass;
9799
this.responseExtensionClass = responseExtensionClass;
98100
this.uri = getCommandExtensionUri(commandExtensionClass);
101+
this.xmlTag = getCommandExtensionXmlTag(commandExtensionClass);
99102
this.visibility = visibility;
100103
}
101104

@@ -111,11 +114,27 @@ public String getUri() {
111114
return uri;
112115
}
113116

117+
public String getXmlTag() {
118+
return xmlTag;
119+
}
120+
114121
/** Returns the namespace URI of the command extension class. */
115122
public static String getCommandExtensionUri(Class<? extends CommandExtension> clazz) {
116123
return clazz.getPackage().getAnnotation(XmlSchema.class).namespace();
117124
}
118125

126+
/** Returns the XML tag for this extension in the response message. */
127+
public static String getCommandExtensionXmlTag(Class<? extends CommandExtension> clazz) {
128+
var xmlSchema = clazz.getPackage().getAnnotation(XmlSchema.class);
129+
var xmlns = xmlSchema.xmlns();
130+
if (xmlns == null || xmlns.length != 1) {
131+
throw new VerifyException(
132+
String.format(
133+
"Expecting exactly one NS declaration in %s", clazz.getPackage().getName()));
134+
}
135+
return xmlns[0].prefix();
136+
}
137+
119138
public boolean isVisible() {
120139
return switch (visibility) {
121140
case ALL -> true;

0 commit comments

Comments
 (0)