Skip to content

Commit c320c60

Browse files
committed
Address review comments
1 parent b136f4c commit c320c60

5 files changed

Lines changed: 85 additions & 52 deletions

File tree

api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
3838
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
3939
import org.apache.iceberg.transforms.Transforms;
40-
import org.apache.iceberg.types.Type;
4140
import org.apache.iceberg.types.Types;
4241
import org.apache.iceberg.util.DateTimeUtil;
4342
import org.apache.iceberg.variants.PhysicalType;
@@ -306,15 +305,13 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
306305
} else if (pred.isLiteralPredicate()) {
307306
BoundLiteralPredicate<T> bound = (BoundLiteralPredicate<T>) pred;
308307
return new UnboundPredicate<>(
309-
pred.op(),
310-
unbind(pred.term()),
311-
(T) sanitize(bound.term().type(), bound.literal(), now, today));
308+
pred.op(), unbind(pred.term()), (T) sanitize(bound.literal(), now, today));
312309
} else if (pred.isSetPredicate()) {
313310
BoundSetPredicate<T> bound = (BoundSetPredicate<T>) pred;
314311
Iterable<T> iter =
315312
() ->
316313
bound.literalSet().stream()
317-
.map(lit -> (T) sanitize(bound.term().type(), lit, now, today))
314+
.map(lit -> (T) sanitize((Literal<?>) lit, now, today))
318315
.iterator();
319316
return new UnboundPredicate<>(pred.op(), unbind(pred.term()), iter);
320317
}
@@ -393,7 +390,8 @@ public String or(String leftResult, String rightResult) {
393390
}
394391

395392
private String value(BoundLiteralPredicate<?> pred) {
396-
return sanitize(pred.term().type(), pred.literal().value(), nowMicros, today);
393+
// return sanitize(pred.term().type(), pred.literal().value(), nowMicros, today);
394+
return sanitize(pred.literal(), nowMicros, today);
397395
}
398396

399397
@Override
@@ -425,7 +423,7 @@ public <T> String predicate(BoundPredicate<T> pred) {
425423
+ " IN "
426424
+ abbreviateValues(
427425
pred.asSetPredicate().literalSet().stream()
428-
.map(lit -> sanitize(pred.term().type(), lit, nowMicros, today))
426+
.map(lit -> sanitize((Literal<?>) lit, nowMicros, today))
429427
.collect(Collectors.toList()))
430428
.stream()
431429
.collect(Collectors.joining(", ", "(", ")"));
@@ -434,7 +432,7 @@ public <T> String predicate(BoundPredicate<T> pred) {
434432
+ " NOT IN "
435433
+ abbreviateValues(
436434
pred.asSetPredicate().literalSet().stream()
437-
.map(lit -> sanitize(pred.term().type(), lit, nowMicros, today))
435+
.map(lit -> sanitize((Literal<?>) lit, nowMicros, today))
438436
.collect(Collectors.toList()))
439437
.stream()
440438
.collect(Collectors.joining(", ", "(", ")"));
@@ -529,47 +527,6 @@ private static List<String> abbreviateValues(List<String> sanitizedValues) {
529527
return sanitizedValues;
530528
}
531529

532-
private static String sanitize(Type type, Literal<?> lit, long now, int today) {
533-
return sanitize(type, lit.value(), now, today);
534-
}
535-
536-
private static String sanitize(Type type, Object value, long now, int today) {
537-
switch (type.typeId()) {
538-
case INTEGER:
539-
case LONG:
540-
return sanitizeNumber((Number) value, "int");
541-
case FLOAT:
542-
case DOUBLE:
543-
return sanitizeNumber((Number) value, "float");
544-
case DATE:
545-
return sanitizeDate((int) value, today);
546-
case TIME:
547-
return "(time)";
548-
case TIMESTAMP:
549-
return sanitizeTimestamp((long) value, now);
550-
case TIMESTAMP_NANO:
551-
return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now);
552-
case STRING:
553-
return sanitizeString((CharSequence) value, now, today);
554-
case VARIANT:
555-
return sanitizeVariant((Variant) value, now, today);
556-
case UNKNOWN:
557-
return "(unknown)";
558-
case BOOLEAN:
559-
case UUID:
560-
case DECIMAL:
561-
case FIXED:
562-
case BINARY:
563-
// for boolean, uuid, decimal, fixed, unknown, and binary, match the string result
564-
return sanitizeSimpleString(value.toString());
565-
case GEOMETRY:
566-
case GEOGRAPHY:
567-
return "(geospatial)";
568-
}
569-
throw new UnsupportedOperationException(
570-
String.format("Cannot sanitize value for unsupported type %s: %s", type, value));
571-
}
572-
573530
private static String sanitize(Literal<?> literal, long now, int today) {
574531
if (literal instanceof Literals.StringLiteral) {
575532
return sanitizeString(((Literals.StringLiteral) literal).value(), now, today);
@@ -593,7 +550,7 @@ private static String sanitize(Literal<?> literal, long now, int today) {
593550
} else if (literal instanceof Literals.VariantLiteral) {
594551
return sanitizeVariant(((Literals.VariantLiteral) literal).value(), now, today);
595552
} else if (literal instanceof BoundingBoxLiteral) {
596-
return "(geospatial)";
553+
return "(boundingbox)";
597554
} else {
598555
// for uuid, decimal, fixed and binary, match the string result
599556
return sanitizeSimpleString(literal.value().toString());

api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ public <T> R predicate(BoundPredicate<T> pred) {
350350
"Invalid operation for BoundSetPredicate: " + pred.op());
351351
}
352352
}
353+
353354
throw new IllegalStateException("Unsupported bound predicate: " + pred.getClass().getName());
354355
}
355356

api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,13 +1347,13 @@ public void testSanitizeGeospatialPredicates(Expression.Operation operation, Str
13471347

13481348
UnboundPredicate<ByteBuffer> geoPredicate =
13491349
Expressions.geospatialPredicate(operation, columnName, bbox);
1350-
Expression predicateSanitized = Expressions.predicate(operation, columnName, "(geospatial)");
1350+
Expression predicateSanitized = Expressions.predicate(operation, columnName, "(boundingbox)");
13511351
assertEquals(predicateSanitized, ExpressionUtil.sanitize(geoPredicate));
13521352
assertEquals(predicateSanitized, ExpressionUtil.sanitize(geoStruct, geoPredicate, true));
13531353

13541354
String opString = operation.name();
13551355
String expectedSanitizedString =
1356-
opString.toLowerCase(Locale.ROOT) + "(" + columnName + ", (geospatial))";
1356+
opString.toLowerCase(Locale.ROOT) + "(" + columnName + ", (boundingbox))";
13571357

13581358
assertThat(ExpressionUtil.toSanitizedString(geoPredicate))
13591359
.as("Sanitized string should be identical for geospatial predicates")

open-api/rest-catalog-open-api.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ class ExpressionType(BaseModel):
134134
'not-null',
135135
'is-nan',
136136
'not-nan',
137+
'st-intersects',
138+
'st-disjoint',
137139
],
138140
)
139141

@@ -150,6 +152,11 @@ class FalseExpression(BaseModel):
150152
)
151153

152154

155+
class CoordinateRange(BaseModel):
156+
min: float
157+
max: float
158+
159+
153160
class Reference(BaseModel):
154161
__root__: str = Field(..., example=['column-name'])
155162

@@ -1029,6 +1036,13 @@ class RenameTableRequest(BaseModel):
10291036
destination: TableIdentifier
10301037

10311038

1039+
class BoundingBox(BaseModel):
1040+
x: CoordinateRange
1041+
y: CoordinateRange
1042+
z: Optional[CoordinateRange] = None
1043+
m: Optional[CoordinateRange] = None
1044+
1045+
10321046
class TransformTerm(BaseModel):
10331047
type: str = Field('transform', const=True)
10341048
transform: Transform
@@ -1189,6 +1203,12 @@ class SetExpression(BaseModel):
11891203
values: List[PrimitiveTypeValue]
11901204

11911205

1206+
class SpatialExpression(BaseModel):
1207+
type: ExpressionType
1208+
term: Term
1209+
value: BoundingBox
1210+
1211+
11921212
class ResidualFilter6(SetExpression, ResidualFilter1):
11931213
"""
11941214
An optional filter to be applied to rows in this file scan task.
@@ -1210,6 +1230,13 @@ class ResidualFilter8(UnaryExpression, ResidualFilter1):
12101230
"""
12111231

12121232

1233+
class ResidualFilter9(SpatialExpression, ResidualFilter1):
1234+
"""
1235+
An optional filter to be applied to rows in this file scan task.
1236+
If the residual is not present, the client must produce the residual or use the original filter.
1237+
"""
1238+
1239+
12131240
class StructField(BaseModel):
12141241
id: int
12151242
name: str
@@ -1254,6 +1281,7 @@ class Expression(BaseModel):
12541281
SetExpression,
12551282
LiteralExpression,
12561283
UnaryExpression,
1284+
SpatialExpression,
12571285
]
12581286

12591287

@@ -1612,6 +1640,7 @@ class ResidualFilter(BaseModel):
16121640
ResidualFilter6,
16131641
ResidualFilter7,
16141642
ResidualFilter8,
1643+
ResidualFilter9,
16151644
] = Field(
16161645
...,
16171646
description='An optional filter to be applied to rows in this file scan task.\nIf the residual is not present, the client must produce the residual or use the original filter.',

open-api/rest-catalog-open-api.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2249,6 +2249,7 @@ components:
22492249
- $ref: '#/components/schemas/SetExpression'
22502250
- $ref: '#/components/schemas/LiteralExpression'
22512251
- $ref: '#/components/schemas/UnaryExpression'
2252+
- $ref: '#/components/schemas/SpatialExpression'
22522253

22532254
ExpressionType:
22542255
type: string
@@ -2272,6 +2273,8 @@ components:
22722273
- "not-null"
22732274
- "is-nan"
22742275
- "not-nan"
2276+
- "st-intersects"
2277+
- "st-disjoint"
22752278

22762279
TrueExpression:
22772280
type: object
@@ -2362,6 +2365,49 @@ components:
23622365
items:
23632366
$ref: '#/components/schemas/PrimitiveTypeValue'
23642367

2368+
SpatialExpression:
2369+
type: object
2370+
required:
2371+
- type
2372+
- term
2373+
- value
2374+
properties:
2375+
type:
2376+
$ref: '#/components/schemas/ExpressionType'
2377+
enum: ["st-intersects", "st-disjoint"]
2378+
term:
2379+
$ref: '#/components/schemas/Term'
2380+
value:
2381+
$ref: '#/components/schemas/BoundingBox'
2382+
2383+
BoundingBox:
2384+
type: object
2385+
required:
2386+
- x
2387+
- y
2388+
properties:
2389+
x:
2390+
$ref: '#/components/schemas/CoordinateRange'
2391+
y:
2392+
$ref: '#/components/schemas/CoordinateRange'
2393+
z:
2394+
$ref: '#/components/schemas/CoordinateRange'
2395+
m:
2396+
$ref: '#/components/schemas/CoordinateRange'
2397+
2398+
CoordinateRange:
2399+
type: object
2400+
required:
2401+
- min
2402+
- max
2403+
properties:
2404+
min:
2405+
type: number
2406+
format: double
2407+
max:
2408+
type: number
2409+
format: double
2410+
23652411
Term:
23662412
oneOf:
23672413
- $ref: '#/components/schemas/Reference'

0 commit comments

Comments
 (0)