Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ enum Operation {
OR,
STARTS_WITH,
NOT_STARTS_WITH,
ST_INTERSECTS,
ST_DISJOINT,
COUNT,
COUNT_NULL,
COUNT_STAR,
Expand Down Expand Up @@ -91,6 +93,10 @@ public Operation negate() {
return Operation.NOT_STARTS_WITH;
case NOT_STARTS_WITH:
return Operation.STARTS_WITH;
case ST_INTERSECTS:
return Operation.ST_DISJOINT;
case ST_DISJOINT:
return Operation.ST_INTERSECTS;
default:
throw new IllegalArgumentException("No negation for operation: " + this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Literals.BoundingBoxLiteral;
import org.apache.iceberg.geospatial.BoundingBox;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.transforms.Transforms;
Expand Down Expand Up @@ -340,6 +342,8 @@ public <T> Expression predicate(UnboundPredicate<T> pred) {
case NOT_EQ:
case STARTS_WITH:
case NOT_STARTS_WITH:
case ST_INTERSECTS:
case ST_DISJOINT:
return new UnboundPredicate<>(
pred.op(), pred.term(), (T) sanitize(pred.literal(), now, today));
case IN:
Expand Down Expand Up @@ -440,6 +444,10 @@ public <T> String predicate(BoundPredicate<T> pred) {
return term + " STARTS WITH " + value((BoundLiteralPredicate<?>) pred);
case NOT_STARTS_WITH:
return term + " NOT STARTS WITH " + value((BoundLiteralPredicate<?>) pred);
case ST_INTERSECTS:
return "st_intersects(" + term + ", " + value((BoundLiteralPredicate<?>) pred) + ")";
case ST_DISJOINT:
return "st_disjoint(" + term + ", " + value((BoundLiteralPredicate<?>) pred) + ")";
default:
throw new UnsupportedOperationException(
"Cannot sanitize unsupported predicate type: " + pred.op());
Expand Down Expand Up @@ -492,6 +500,10 @@ public <T> String predicate(UnboundPredicate<T> pred) {
return term + " STARTS WITH " + sanitize(pred.literal(), nowMicros, today);
case NOT_STARTS_WITH:
return term + " NOT STARTS WITH " + sanitize(pred.literal(), nowMicros, today);
case ST_INTERSECTS:
return "st_intersects(" + term + ", " + sanitize(pred.literal(), nowMicros, today) + ")";
case ST_DISJOINT:
return "st_disjoint(" + term + ", " + sanitize(pred.literal(), nowMicros, today) + ")";
default:
throw new UnsupportedOperationException(
"Cannot sanitize unsupported predicate type: " + pred.op());
Expand Down Expand Up @@ -541,6 +553,9 @@ private static String sanitize(Literal<?> literal, long now, int today) {
return sanitizeNumber(((Literals.DoubleLiteral) literal).value(), "float");
} else if (literal instanceof Literals.VariantLiteral) {
return sanitizeVariant(((Literals.VariantLiteral) literal).value(), now, today);
} else if (literal instanceof BoundingBoxLiteral) {
BoundingBox bbox = BoundingBox.fromByteBuffer(((BoundingBoxLiteral) literal).value());
return sanitizeBoundingBox(bbox);
} else {
// for uuid, decimal, fixed and binary, match the string result
return sanitizeSimpleString(literal.value().toString());
Expand Down Expand Up @@ -619,6 +634,20 @@ private static String sanitizeSimpleString(CharSequence value) {
return String.format(Locale.ROOT, "(hash-%08x)", HASH_FUNC.apply(value));
}

private static String sanitizeBoundingBox(BoundingBox bbox) {
boolean hasZ = bbox.min().hasZ() && bbox.max().hasZ();
boolean hasM = bbox.min().hasM() && bbox.max().hasM();
if (hasZ && hasM) {
return "(boundingbox-xyzm)";
} else if (hasZ) {
return "(boundingbox-xyz)";
} else if (hasM) {
return "(boundingbox-xym)";
} else {
return "(boundingbox-xy)";
}
}

private static String sanitizeVariant(Variant value, long now, int today) {
return sanitizeVariant(value.value(), now, today);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.iceberg.expressions;

import java.nio.ByteBuffer;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.iceberg.exceptions.ValidationException;
Expand Down Expand Up @@ -126,6 +127,16 @@ public <T> R notStartsWith(BoundReference<T> ref, Literal<T> lit) {
"notStartsWith expression is not supported by the visitor");
}

public <T> R stIntersects(BoundReference<T> ref, Literal<ByteBuffer> lit) {
throw new UnsupportedOperationException(
"stIntersects expression is not supported by the visitor");
}

public <T> R stDisjoint(BoundReference<T> ref, Literal<ByteBuffer> lit) {
throw new UnsupportedOperationException(
"stDisjoint expression is not supported by the visitor");
}

/**
* Handle a non-reference value in this visitor.
*
Expand All @@ -141,6 +152,7 @@ public <T> R handleNonReference(Bound<T> term) {
throw new ValidationException("Visitor %s does not support non-reference: %s", this, term);
}

@SuppressWarnings("unchecked")
@Override
public <T> R predicate(BoundPredicate<T> pred) {
if (!(pred.term() instanceof BoundReference)) {
Expand All @@ -166,6 +178,12 @@ public <T> R predicate(BoundPredicate<T> pred) {
return startsWith((BoundReference<T>) pred.term(), literalPred.literal());
case NOT_STARTS_WITH:
return notStartsWith((BoundReference<T>) pred.term(), literalPred.literal());
case ST_INTERSECTS:
return stIntersects(
(BoundReference<T>) pred.term(), (Literal<ByteBuffer>) literalPred.literal());
case ST_DISJOINT:
return stDisjoint(
(BoundReference<T>) pred.term(), (Literal<ByteBuffer>) literalPred.literal());
default:
throw new IllegalStateException(
"Invalid operation for BoundLiteralPredicate: " + pred.op());
Expand Down Expand Up @@ -266,6 +284,16 @@ public <T> R notStartsWith(Bound<T> expr, Literal<T> lit) {
throw new UnsupportedOperationException("Unsupported operation.");
}

public <T> R stIntersects(Bound<T> term, Literal<ByteBuffer> literal) {
throw new UnsupportedOperationException(
"stIntersects operator is not supported by the visitor");
}

public <T> R stDisjoint(Bound<T> term, Literal<ByteBuffer> literal) {
throw new UnsupportedOperationException(
"stDisjoint operator is not supported by the visitor");
}

@Override
public <T> R predicate(BoundPredicate<T> pred) {
if (pred.isLiteralPredicate()) {
Expand All @@ -287,6 +315,10 @@ public <T> R predicate(BoundPredicate<T> pred) {
return startsWith(pred.term(), literalPred.literal());
case NOT_STARTS_WITH:
return notStartsWith(pred.term(), literalPred.literal());
case ST_INTERSECTS:
return stIntersects(pred.term(), (Literal<ByteBuffer>) literalPred.literal());
case ST_DISJOINT:
return stDisjoint(pred.term(), (Literal<ByteBuffer>) literalPred.literal());
default:
throw new IllegalStateException(
"Invalid operation for BoundLiteralPredicate: " + pred.op());
Expand Down Expand Up @@ -465,6 +497,10 @@ public <T> R predicate(BoundPredicate<T> pred) {
return startsWith(pred.term(), literalPred.literal());
case NOT_STARTS_WITH:
return notStartsWith(pred.term(), literalPred.literal());
case ST_INTERSECTS:
return stIntersects(pred.term(), (Literal<ByteBuffer>) literalPred.literal());
case ST_DISJOINT:
return stDisjoint(pred.term(), (Literal<ByteBuffer>) literalPred.literal());
default:
throw new IllegalStateException(
"Invalid operation for BoundLiteralPredicate: " + pred.op());
Expand Down Expand Up @@ -555,6 +591,14 @@ public <T> R startsWith(BoundTerm<T> term, Literal<T> lit) {
public <T> R notStartsWith(BoundTerm<T> term, Literal<T> lit) {
return null;
}

public <T> R stIntersects(BoundTerm<T> term, Literal<ByteBuffer> lit) {
return null;
}

public <T> R stDisjoint(BoundTerm<T> term, Literal<ByteBuffer> lit) {
return null;
}
}

/**
Expand Down
30 changes: 30 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Expressions.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
*/
package org.apache.iceberg.expressions;

import java.nio.ByteBuffer;
import java.util.stream.Stream;
import org.apache.iceberg.expressions.Expression.Operation;
import org.apache.iceberg.geospatial.BoundingBox;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.transforms.Transform;
Expand Down Expand Up @@ -202,6 +204,24 @@ public static UnboundPredicate<String> notStartsWith(UnboundTerm<String> expr, S
return new UnboundPredicate<>(Expression.Operation.NOT_STARTS_WITH, expr, value);
}

public static UnboundPredicate<ByteBuffer> stIntersects(String name, BoundingBox value) {
return geospatialPredicate(Operation.ST_INTERSECTS, name, value);
}

public static UnboundPredicate<ByteBuffer> stIntersects(
UnboundTerm<ByteBuffer> expr, BoundingBox value) {
return geospatialPredicate(Operation.ST_INTERSECTS, expr, value);
}

public static UnboundPredicate<ByteBuffer> stDisjoint(String name, BoundingBox value) {
return geospatialPredicate(Operation.ST_DISJOINT, name, value);
}

public static UnboundPredicate<ByteBuffer> stDisjoint(
UnboundTerm<ByteBuffer> expr, BoundingBox value) {
return geospatialPredicate(Operation.ST_DISJOINT, expr, value);
}

public static <T> UnboundPredicate<T> in(String name, T... values) {
return predicate(Operation.IN, name, Lists.newArrayList(values));
}
Expand Down Expand Up @@ -280,6 +300,16 @@ public static <T> UnboundPredicate<T> predicate(Operation op, UnboundTerm<T> exp
return new UnboundPredicate<>(op, expr);
}

static UnboundPredicate<ByteBuffer> geospatialPredicate(
Operation op, String name, BoundingBox value) {
return geospatialPredicate(op, ref(name), value);
}

static UnboundPredicate<ByteBuffer> geospatialPredicate(
Operation op, UnboundTerm<ByteBuffer> expr, BoundingBox value) {
return new UnboundPredicate<>(op, expr, Literal.of(value));
}

public static True alwaysTrue() {
return True.INSTANCE;
}
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Literal.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.UUID;
import org.apache.iceberg.geospatial.BoundingBox;
import org.apache.iceberg.types.Type;

/**
Expand Down Expand Up @@ -71,6 +72,10 @@ static Literal<BigDecimal> of(BigDecimal value) {
return new Literals.DecimalLiteral(value);
}

static Literal<ByteBuffer> of(BoundingBox value) {
return new Literals.BoundingBoxLiteral(value);
}

/** Returns the value wrapped by this literal. */
T value();

Expand Down
71 changes: 71 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Literals.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;
Expand All @@ -32,6 +33,7 @@
import java.util.Comparator;
import java.util.Objects;
import java.util.UUID;
import org.apache.iceberg.geospatial.BoundingBox;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.io.BaseEncoding;
import org.apache.iceberg.types.Comparators;
Expand Down Expand Up @@ -85,6 +87,8 @@ static <T> Literal<T> from(T value) {
return (Literal<T>) new Literals.DecimalLiteral((BigDecimal) value);
} else if (value instanceof Variant) {
return (Literal<T>) new Literals.VariantLiteral((Variant) value);
} else if (value instanceof BoundingBox) {
return (Literal<T>) new Literals.BoundingBoxLiteral((BoundingBox) value);
}

throw new IllegalArgumentException(
Expand Down Expand Up @@ -719,4 +723,71 @@ public String toString() {
return "X'" + BaseEncoding.base16().encode(bytes) + "'";
}
}

static class BoundingBoxLiteral implements Literal<ByteBuffer> {
private static final Comparator<ByteBuffer> CMP =
Comparators.<ByteBuffer>nullsFirst().thenComparing(Comparators.unsignedBytes());

private final ByteBuffer value;

BoundingBoxLiteral(BoundingBox value) {
this.value = value.toByteBuffer();
}

BoundingBoxLiteral(ByteBuffer value) {
Preconditions.checkNotNull(value, "Bounding box buffer cannot be null");
this.value = value.slice().order(ByteOrder.LITTLE_ENDIAN);
}

@Override
public ByteBuffer value() {
return value;
}

@Override
public ByteBuffer toByteBuffer() {
return value;
}

@Override
public <T> Literal<T> to(Type type) {
if (type.typeId() != Type.TypeID.GEOMETRY && type.typeId() != Type.TypeID.GEOGRAPHY) {
return null;
}

return (Literal<T>) this;
}

@Override
public Comparator<ByteBuffer> comparator() {
return CMP;
}

Object writeReplace() throws ObjectStreamException {
return new SerializationProxies.BoundingBoxLiteralProxy(value());
}

@Override
public String toString() {
return BoundingBox.fromByteBuffer(value()).toString();
}

@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (!(other instanceof BoundingBoxLiteral)) {
return false;
}

BoundingBoxLiteral that = (BoundingBoxLiteral) other;
return comparator().compare(value(), that.value()) == 0;
}

@Override
public int hashCode() {
return Objects.hashCode(value());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.ObjectStreamException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
* Stand-in classes for expression classes in Java Serialization.
Expand Down Expand Up @@ -81,4 +82,19 @@ protected byte[] bytes() {
return bytes;
}
}

static class BoundingBoxLiteralProxy extends FixedLiteralProxy {
/** Constructor for Java serialization. */
BoundingBoxLiteralProxy() {}

BoundingBoxLiteralProxy(ByteBuffer buffer) {
super(buffer);
}

@Override
Object readResolve() throws ObjectStreamException {
return new Literals.BoundingBoxLiteral(
ByteBuffer.wrap(bytes()).order(ByteOrder.LITTLE_ENDIAN));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ public String toString() {
return term() + " startsWith \"" + literal() + "\"";
case NOT_STARTS_WITH:
return term() + " notStartsWith \"" + literal() + "\"";
case ST_INTERSECTS:
return "st_intersects(" + term() + ", " + literal() + ")";
case ST_DISJOINT:
return "st_disjoint(" + term() + ", " + literal() + ")";
case IN:
return term() + " in (" + COMMA.join(literals()) + ")";
case NOT_IN:
Expand Down
Loading