Skip to content

Commit f4f3e75

Browse files
authored
Add a cache for compiled regex patterns (#459)
cel-java doesn't currently have the optimization from cel-go which optimizes calls to the 'matches' function to compile a regular expression once instead of at every invocation. Override cel-java's builtin matches functions to add a pattern compilation cache (scoped to a Validator).
1 parent 9bcd060 commit f4f3e75

10 files changed

Lines changed: 136 additions & 43 deletions

File tree

benchmarks/src/jmh/java/build/buf/protovalidate/benchmarks/ValidationBenchmark.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import build.buf.protovalidate.Validator;
1818
import build.buf.protovalidate.ValidatorFactory;
1919
import build.buf.protovalidate.benchmarks.gen.ManyUnruledFieldsMessage;
20+
import build.buf.protovalidate.benchmarks.gen.RegexPatternMessage;
2021
import build.buf.protovalidate.benchmarks.gen.RepeatedRuleMessage;
2122
import build.buf.protovalidate.benchmarks.gen.SimpleStringMessage;
2223
import build.buf.protovalidate.exceptions.ValidationException;
@@ -40,6 +41,7 @@ public class ValidationBenchmark {
4041
private SimpleStringMessage simple;
4142
private ManyUnruledFieldsMessage manyUnruled;
4243
private RepeatedRuleMessage repeatedRule;
44+
private RegexPatternMessage regexPattern;
4345

4446
@Setup
4547
public void setup() throws ValidationException {
@@ -67,10 +69,13 @@ public void setup() throws ValidationException {
6769
}
6870
repeatedRule = repeatedRuleBuilder.build();
6971

72+
regexPattern = RegexPatternMessage.newBuilder().setName("Alice Example").build();
73+
7074
// Warm evaluator cache for steady-state benchmarks.
7175
validator.validate(simple);
7276
validator.validate(manyUnruled);
7377
validator.validate(repeatedRule);
78+
validator.validate(regexPattern);
7479
}
7580

7681
// Steady-state validate() benchmarks. These exercise the hot path after the
@@ -90,4 +95,9 @@ public void validateManyUnruled(Blackhole bh) throws ValidationException {
9095
public void validateRepeatedRule(Blackhole bh) throws ValidationException {
9196
bh.consume(validator.validate(repeatedRule));
9297
}
98+
99+
@Benchmark
100+
public void validateRegexPattern(Blackhole bh) throws ValidationException {
101+
bh.consume(validator.validate(regexPattern));
102+
}
93103
}

benchmarks/src/jmh/proto/bench/v1/bench.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,13 @@ message RepeatedRuleMessage {
6969
string f19 = 19 [(buf.validate.field).string.min_len = 1];
7070
string f20 = 20 [(buf.validate.field).string.min_len = 1];
7171
}
72+
73+
// Single string field with a string.pattern rule. Targets the regex
74+
// recompile-per-evaluation cost: the CEL runtime's matches() calls
75+
// Pattern.compile on every invocation.
76+
message RegexPatternMessage {
77+
string name = 1 [(buf.validate.field).string = {
78+
pattern: "^[[:alpha:]]+( [[:alpha:]]+)*$"
79+
max_bytes: 256
80+
}];
81+
}

build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ dependencies {
398398
api(libs.jspecify)
399399
api(libs.protobuf.java)
400400
implementation(libs.cel)
401+
implementation(libs.re2j)
401402

402403
buf("build.buf:buf:${libs.versions.buf.get()}:${osdetector.classifier}@exe")
403404

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ error-prone = "2.49.0"
66
junit = "5.14.3"
77
maven-publish = "0.36.0"
88
protobuf = "4.34.1"
9+
re2j = "1.8"
910

1011
[libraries]
1112
assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" }
@@ -19,6 +20,7 @@ junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" }
1920
maven-plugin = { module = "com.vanniktech:gradle-maven-publish-plugin", version.ref = "maven-publish" }
2021
nullaway = { module = "com.uber.nullaway:nullaway", version = "0.13.3" }
2122
protobuf-java = { module = "com.google.protobuf:protobuf-java", version.ref = "protobuf" }
23+
re2j = { module = "com.google.re2j:re2j", version.ref = "re2j" }
2224
spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version = "8.4.0" }
2325

2426
[plugins]

src/main/java/build/buf/protovalidate/CustomDeclarations.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,17 @@ static List<CelFunctionDecl> create() {
104104
newMemberOverload(
105105
"is_hostname", SimpleType.BOOL, Collections.singletonList(SimpleType.STRING))));
106106

107+
// Redeclare 'matches' with the same overload ids as the stdlib.
108+
decls.add(
109+
newFunctionDeclaration(
110+
"matches",
111+
newGlobalOverload(
112+
"matches", SimpleType.BOOL, Arrays.asList(SimpleType.STRING, SimpleType.STRING)),
113+
newMemberOverload(
114+
"matches_string",
115+
SimpleType.BOOL,
116+
Arrays.asList(SimpleType.STRING, SimpleType.STRING))));
117+
107118
decls.add(
108119
newFunctionDeclaration(
109120
"isHostAndPort",

src/main/java/build/buf/protovalidate/CustomOverload.java

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
import com.google.protobuf.Descriptors;
1818
import com.google.protobuf.Message;
19+
import com.google.re2j.Matcher;
20+
import com.google.re2j.Pattern;
21+
import com.google.re2j.PatternSyntaxException;
22+
import dev.cel.common.CelOptions;
1923
import dev.cel.common.types.CelType;
2024
import dev.cel.common.types.SimpleType;
2125
import dev.cel.common.values.CelByteString;
@@ -28,7 +32,7 @@
2832
import java.util.List;
2933
import java.util.Locale;
3034
import java.util.Set;
31-
import java.util.regex.Pattern;
35+
import java.util.concurrent.ConcurrentMap;
3236

3337
/** Defines custom function overloads (the implementation). */
3438
final class CustomOverload {
@@ -39,11 +43,14 @@ final class CustomOverload {
3943
"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$");
4044

4145
/**
42-
* Create custom function overload list.
46+
* Create a list of custom function overloads.
4347
*
48+
* @param patternCache cache used by the {@code matches}/{@code matches_string} overrides.
49+
* @param celOptions CEL options the enclosing runtime is built with.
4450
* @return a list of overloaded functions.
4551
*/
46-
static List<CelFunctionBinding> create() {
52+
static List<CelFunctionBinding> create(
53+
ConcurrentMap<String, Pattern> patternCache, CelOptions celOptions) {
4754
ArrayList<CelFunctionBinding> bindings = new ArrayList<>();
4855
bindings.addAll(
4956
Arrays.asList(
@@ -65,7 +72,9 @@ static List<CelFunctionBinding> create() {
6572
celIsNan(),
6673
celIsInfUnary(),
6774
celIsInfBinary(),
68-
celIsHostAndPort()));
75+
celIsHostAndPort(),
76+
celMatches(patternCache, celOptions),
77+
celMatchesString(patternCache, celOptions)));
6978
bindings.addAll(celUnique());
7079
return Collections.unmodifiableList(bindings);
7180
}
@@ -356,6 +365,41 @@ private static CelFunctionBinding celIsHostAndPort() {
356365
CustomOverload::isHostAndPort);
357366
}
358367

368+
/** Caching replacement for CEL's global {@code matches(string, string)}. */
369+
@SuppressWarnings("Immutable")
370+
private static CelFunctionBinding celMatches(
371+
ConcurrentMap<String, Pattern> patternCache, CelOptions celOptions) {
372+
return CelFunctionBinding.from(
373+
"matches",
374+
String.class,
375+
String.class,
376+
(value, regex) -> matches(patternCache, celOptions, value, regex));
377+
}
378+
379+
/** Caching replacement for CEL's member-style {@code string.matches(string)}. */
380+
@SuppressWarnings("Immutable")
381+
private static CelFunctionBinding celMatchesString(
382+
ConcurrentMap<String, Pattern> patternCache, CelOptions celOptions) {
383+
return CelFunctionBinding.from(
384+
"matches_string",
385+
String.class,
386+
String.class,
387+
(value, regex) -> matches(patternCache, celOptions, value, regex));
388+
}
389+
390+
private static boolean matches(
391+
ConcurrentMap<String, Pattern> cache, CelOptions celOptions, String value, String regex)
392+
throws CelEvaluationException {
393+
Pattern pattern;
394+
try {
395+
pattern = cache.computeIfAbsent(regex, Pattern::compile);
396+
} catch (PatternSyntaxException e) {
397+
throw new CelEvaluationException("failed to compile regex: " + e.getMessage(), e);
398+
}
399+
Matcher matcher = pattern.matcher(value);
400+
return celOptions.enableRegexPartialMatch() ? matcher.find() : matcher.matches();
401+
}
402+
359403
/**
360404
* Returns true if the string is a valid host/port pair, for example "example.com:8080".
361405
*

src/main/java/build/buf/protovalidate/ValidateLibrary.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,59 @@
1414

1515
package build.buf.protovalidate;
1616

17+
import com.google.re2j.Pattern;
18+
import dev.cel.bundle.Cel;
19+
import dev.cel.bundle.CelFactory;
1720
import dev.cel.checker.CelCheckerBuilder;
21+
import dev.cel.checker.CelStandardDeclarations;
22+
import dev.cel.common.CelOptions;
1823
import dev.cel.common.CelVarDecl;
1924
import dev.cel.common.types.SimpleType;
2025
import dev.cel.compiler.CelCompilerLibrary;
26+
import dev.cel.extensions.CelExtensions;
2127
import dev.cel.parser.CelParserBuilder;
2228
import dev.cel.parser.CelStandardMacro;
2329
import dev.cel.runtime.CelRuntimeBuilder;
2430
import dev.cel.runtime.CelRuntimeLibrary;
31+
import dev.cel.runtime.CelStandardFunctions;
32+
import java.util.concurrent.ConcurrentHashMap;
33+
import java.util.concurrent.ConcurrentMap;
2534

2635
/**
2736
* Custom {@link CelCompilerLibrary} and {@link CelRuntimeLibrary}. Provides all the custom
2837
* extension function definitions and overloads.
2938
*/
3039
final class ValidateLibrary implements CelCompilerLibrary, CelRuntimeLibrary {
3140

41+
private static final CelOptions CEL_OPTIONS = CelOptions.DEFAULT;
42+
43+
private final ConcurrentMap<String, Pattern> patternCache = new ConcurrentHashMap<>();
44+
3245
/** Creates a ValidateLibrary with all custom declarations and overloads. */
3346
ValidateLibrary() {}
3447

48+
static Cel newCel() {
49+
ValidateLibrary validateLibrary = new ValidateLibrary();
50+
// NOTE: CelExtensions.strings() does not implement string.reverse() or strings.quote() which
51+
// are available in protovalidate-go. Fixed in https://github.com/google/cel-java/pull/998.
52+
return CelFactory.standardCelBuilder()
53+
.setOptions(CEL_OPTIONS)
54+
// Drop stdlib matches; CustomOverload provides a caching replacement.
55+
// Ref: https://github.com/google/cel-java/issues/1038
56+
.setStandardEnvironmentEnabled(false)
57+
.setStandardDeclarations(
58+
CelStandardDeclarations.newBuilder()
59+
.excludeFunctions(CelStandardDeclarations.StandardFunction.MATCHES)
60+
.build())
61+
.setStandardFunctions(
62+
CelStandardFunctions.newBuilder()
63+
.excludeFunctions(CelStandardFunctions.StandardFunction.MATCHES)
64+
.build())
65+
.addCompilerLibraries(validateLibrary, CelExtensions.strings())
66+
.addRuntimeLibraries(validateLibrary, CelExtensions.strings())
67+
.build();
68+
}
69+
3570
@Override
3671
public void setParserOptions(CelParserBuilder parserBuilder) {
3772
parserBuilder.setStandardMacros(
@@ -54,6 +89,6 @@ public void setCheckerOptions(CelCheckerBuilder checkerBuilder) {
5489

5590
@Override
5691
public void setRuntimeOptions(CelRuntimeBuilder runtimeBuilder) {
57-
runtimeBuilder.addFunctionBindings(CustomOverload.create());
92+
runtimeBuilder.addFunctionBindings(CustomOverload.create(patternCache, CEL_OPTIONS));
5893
}
5994
}

src/main/java/build/buf/protovalidate/ValidatorImpl.java

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818
import build.buf.protovalidate.exceptions.ValidationException;
1919
import com.google.protobuf.Descriptors.Descriptor;
2020
import com.google.protobuf.Message;
21-
import dev.cel.bundle.Cel;
22-
import dev.cel.bundle.CelFactory;
23-
import dev.cel.common.CelOptions;
24-
import dev.cel.extensions.CelExtensions;
2521
import java.util.ArrayList;
2622
import java.util.List;
2723

@@ -36,13 +32,14 @@ final class ValidatorImpl implements Validator {
3632
private final boolean failFast;
3733

3834
ValidatorImpl(Config config) {
39-
this.evaluatorBuilder = new EvaluatorBuilder(newCel(), config);
35+
this.evaluatorBuilder = new EvaluatorBuilder(ValidateLibrary.newCel(), config);
4036
this.failFast = config.isFailFast();
4137
}
4238

4339
ValidatorImpl(Config config, List<Descriptor> descriptors, boolean disableLazy)
4440
throws CompilationException {
45-
this.evaluatorBuilder = new EvaluatorBuilder(newCel(), config, descriptors, disableLazy);
41+
this.evaluatorBuilder =
42+
new EvaluatorBuilder(ValidateLibrary.newCel(), config, descriptors, disableLazy);
4643
this.failFast = config.isFailFast();
4744
}
4845

@@ -63,16 +60,4 @@ public ValidationResult validate(Message msg) throws ValidationException {
6360
}
6461
return new ValidationResult(violations);
6562
}
66-
67-
private static Cel newCel() {
68-
ValidateLibrary validateLibrary = new ValidateLibrary();
69-
// NOTE: CelExtensions.strings() does not implement string.reverse() or strings.quote() which
70-
// are available in protovalidate-go.
71-
return CelFactory.standardCelBuilder()
72-
.addCompilerLibraries(validateLibrary, CelExtensions.strings())
73-
.addRuntimeLibraries(validateLibrary, CelExtensions.strings())
74-
.setOptions(
75-
CelOptions.DEFAULT.toBuilder().evaluateCanonicalTypesToNativeValues(true).build())
76-
.build();
77-
}
7863
}

src/test/java/build/buf/protovalidate/CustomOverloadTest.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
import static org.assertj.core.api.Assertions.assertThatThrownBy;
1919

2020
import dev.cel.bundle.Cel;
21-
import dev.cel.bundle.CelFactory;
2221
import dev.cel.common.CelAbstractSyntaxTree;
23-
import dev.cel.common.CelOptions;
2422
import dev.cel.common.CelValidationException;
2523
import dev.cel.common.CelValidationResult;
2624
import dev.cel.runtime.CelEvaluationException;
@@ -31,14 +29,7 @@
3129

3230
public class CustomOverloadTest {
3331

34-
private final ValidateLibrary validateLibrary = new ValidateLibrary();
35-
private final Cel cel =
36-
CelFactory.standardCelBuilder()
37-
.addCompilerLibraries(validateLibrary)
38-
.addRuntimeLibraries(validateLibrary)
39-
.setOptions(
40-
CelOptions.DEFAULT.toBuilder().evaluateCanonicalTypesToNativeValues(true).build())
41-
.build();
32+
private final Cel cel = ValidateLibrary.newCel();
4233

4334
@Test
4435
public void testIsInf() throws Exception {
@@ -173,6 +164,19 @@ public void testBytesContains() throws Exception {
173164
assertThat(evalToBool("bytes('12345').contains(bytes('123456'))")).isFalse();
174165
}
175166

167+
@Test
168+
public void testMatchesPartialMatch() throws Exception {
169+
// CelOptions.DEFAULT sets enableRegexPartialMatch(true), so an unanchored regex should
170+
// match anywhere in the input (find()), not require a full-string match.
171+
assertThat(evalToBool("'hello world'.matches('world')")).isTrue();
172+
assertThat(evalToBool("'hello world'.matches('ell')")).isTrue();
173+
// Anchored patterns still behave the same.
174+
assertThat(evalToBool("'hello'.matches('^hello$')")).isTrue();
175+
assertThat(evalToBool("'hello world'.matches('^hello$')")).isFalse();
176+
// Global form.
177+
assertThat(evalToBool("matches('hello world', 'world')")).isTrue();
178+
}
179+
176180
private Object eval(String source) throws Exception {
177181
return eval(source, Collections.emptyMap());
178182
}

src/test/java/build/buf/protovalidate/FormatTest.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626
import com.google.protobuf.TextFormat;
2727
import dev.cel.bundle.Cel;
2828
import dev.cel.bundle.CelBuilder;
29-
import dev.cel.bundle.CelFactory;
30-
import dev.cel.common.CelOptions;
3129
import dev.cel.common.CelValidationException;
3230
import dev.cel.common.CelValidationResult;
3331
import dev.cel.common.types.SimpleType;
@@ -87,14 +85,7 @@ public static void setUp() throws Exception {
8785
.flatMap(s -> s.getTestList().stream())
8886
.collect(Collectors.toList());
8987

90-
ValidateLibrary validateLibrary = new ValidateLibrary();
91-
cel =
92-
CelFactory.standardCelBuilder()
93-
.addCompilerLibraries(validateLibrary)
94-
.addRuntimeLibraries(validateLibrary)
95-
.setOptions(
96-
CelOptions.DEFAULT.toBuilder().evaluateCanonicalTypesToNativeValues(true).build())
97-
.build();
88+
cel = ValidateLibrary.newCel();
9889
}
9990

10091
@ParameterizedTest

0 commit comments

Comments
 (0)