diff --git a/arrow-variant/pom.xml b/arrow-variant/pom.xml new file mode 100644 index 000000000..3a842178a --- /dev/null +++ b/arrow-variant/pom.xml @@ -0,0 +1,51 @@ + + + + 4.0.0 + + org.apache.arrow + arrow-java-root + 19.0.0-SNAPSHOT + + arrow-variant + Arrow Variant + Arrow Variant type support. + + + + org.apache.arrow + arrow-memory-core + + + org.apache.arrow + arrow-vector + + + org.apache.parquet + parquet-variant + ${dep.parquet.version} + + + org.apache.arrow + arrow-memory-unsafe + test + + + diff --git a/arrow-variant/src/main/java/module-info.java b/arrow-variant/src/main/java/module-info.java new file mode 100644 index 000000000..da9417396 --- /dev/null +++ b/arrow-variant/src/main/java/module-info.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@SuppressWarnings("requires-automatic") +module org.apache.arrow.variant { + exports org.apache.arrow.variant; + exports org.apache.arrow.variant.extension; + exports org.apache.arrow.variant.impl; + exports org.apache.arrow.variant.holders; + + requires org.apache.arrow.memory.core; + requires org.apache.arrow.vector; + requires parquet.variant; +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java new file mode 100644 index 000000000..fa05cdd93 --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Objects; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.variant.holders.NullableVariantHolder; + +/** + * Wrapper around parquet-variant's Variant implementation. + * + *

This wrapper exists to isolate the parquet-variant dependency from Arrow's public API, + * allowing the vector module to expose variant functionality without requiring users to depend on + * parquet-variant directly. It also ensures that nested variant values (from arrays and objects) + * are consistently wrapped. + */ +public class Variant { + + private final org.apache.parquet.variant.Variant delegate; + + /** Creates a Variant from raw metadata and value byte arrays. */ + public Variant(byte[] metadata, byte[] value) { + this.delegate = new org.apache.parquet.variant.Variant(value, metadata); + } + + /** Creates a Variant by copying data from ArrowBuf instances. */ + public Variant( + ArrowBuf metadataBuffer, + int metadataStart, + int metadataEnd, + ArrowBuf valueBuffer, + int valueStart, + int valueEnd) { + byte[] metadata = new byte[metadataEnd - metadataStart]; + byte[] value = new byte[valueEnd - valueStart]; + metadataBuffer.getBytes(metadataStart, metadata); + valueBuffer.getBytes(valueStart, value); + this.delegate = new org.apache.parquet.variant.Variant(value, metadata); + } + + private Variant(org.apache.parquet.variant.Variant delegate) { + this.delegate = delegate; + } + + /** Constructs a Variant from a NullableVariantHolder. */ + public Variant(NullableVariantHolder holder) { + this( + holder.metadataBuffer, + holder.metadataStart, + holder.metadataEnd, + holder.valueBuffer, + holder.valueStart, + holder.valueEnd); + } + + public ByteBuffer getValueBuffer() { + return delegate.getValueBuffer(); + } + + public ByteBuffer getMetadataBuffer() { + return delegate.getMetadataBuffer(); + } + + public boolean getBoolean() { + return delegate.getBoolean(); + } + + public byte getByte() { + return delegate.getByte(); + } + + public short getShort() { + return delegate.getShort(); + } + + public int getInt() { + return delegate.getInt(); + } + + public long getLong() { + return delegate.getLong(); + } + + public double getDouble() { + return delegate.getDouble(); + } + + public BigDecimal getDecimal() { + return delegate.getDecimal(); + } + + public float getFloat() { + return delegate.getFloat(); + } + + public ByteBuffer getBinary() { + return delegate.getBinary(); + } + + public UUID getUUID() { + return delegate.getUUID(); + } + + public String getString() { + return delegate.getString(); + } + + public Type getType() { + return Type.fromParquet(delegate.getType()); + } + + public int numObjectElements() { + return delegate.numObjectElements(); + } + + public Variant getFieldByKey(String key) { + org.apache.parquet.variant.Variant result = delegate.getFieldByKey(key); + return result != null ? wrap(result) : null; + } + + public ObjectField getFieldAtIndex(int idx) { + org.apache.parquet.variant.Variant.ObjectField field = delegate.getFieldAtIndex(idx); + return new ObjectField(field.key, wrap(field.value)); + } + + public int numArrayElements() { + return delegate.numArrayElements(); + } + + public Variant getElementAtIndex(int index) { + org.apache.parquet.variant.Variant result = delegate.getElementAtIndex(index); + return result != null ? wrap(result) : null; + } + + private static Variant wrap(org.apache.parquet.variant.Variant parquetVariant) { + return new Variant(parquetVariant); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Variant variant = (Variant) o; + return delegate.getMetadataBuffer().equals(variant.delegate.getMetadataBuffer()) + && delegate.getValueBuffer().equals(variant.delegate.getValueBuffer()); + } + + @Override + public int hashCode() { + return Objects.hash(delegate.getMetadataBuffer(), delegate.getValueBuffer()); + } + + @Override + public String toString() { + return "Variant{type=" + getType() + '}'; + } + + public enum Type { + OBJECT, + ARRAY, + NULL, + BOOLEAN, + BYTE, + SHORT, + INT, + LONG, + STRING, + DOUBLE, + DECIMAL4, + DECIMAL8, + DECIMAL16, + DATE, + TIMESTAMP_TZ, + TIMESTAMP_NTZ, + FLOAT, + BINARY, + TIME, + TIMESTAMP_NANOS_TZ, + TIMESTAMP_NANOS_NTZ, + UUID; + + static Type fromParquet(org.apache.parquet.variant.Variant.Type parquetType) { + return Type.valueOf(parquetType.name()); + } + } + + public static final class ObjectField { + public final String key; + public final Variant value; + + public ObjectField(String key, Variant value) { + this.key = key; + this.value = value; + } + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java new file mode 100644 index 000000000..3deb70cdc --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.variant.impl.VariantWriterImpl; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.FieldType; + +/** + * Arrow extension type for Parquet + * Variant binary encoding. The type itself does not support shredded variant data. + */ +public final class VariantType extends ExtensionType { + + public static final VariantType INSTANCE = new VariantType(); + + public static final String EXTENSION_NAME = "parquet.variant"; + + static { + ExtensionTypeRegistry.register(INSTANCE); + } + + private VariantType() {} + + @Override + public ArrowType storageType() { + return ArrowType.Struct.INSTANCE; + } + + @Override + public String extensionName() { + return EXTENSION_NAME; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof VariantType; + } + + @Override + public String serialize() { + return ""; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(this.storageType())) { + throw new UnsupportedOperationException( + "Cannot construct VariantType from underlying type " + storageType); + } + return INSTANCE; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new VariantVector(name, allocator); + } + + @Override + public boolean isComplex() { + // The type itself is not complex meaning we need separate functions to convert/extract + // different types. + // Meanwhile, the containing vector is complex in terms of containing multiple values (metadata + // and value) + return false; + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new VariantWriterImpl((VariantVector) vector); + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java new file mode 100644 index 000000000..1bbf1a6bd --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import java.nio.ByteBuffer; +import java.util.List; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.complex.AbstractStructVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; + +/** + * Arrow vector for storing {@link VariantType} values. + * + *

Stores semi-structured data (like JSON) as metadata + value binary pairs, allowing + * type-flexible columnar storage within Arrow's type system. + */ +public class VariantVector extends ExtensionTypeVector { + + public static final String METADATA_VECTOR_NAME = "metadata"; + public static final String VALUE_VECTOR_NAME = "value"; + + private final Field rootField; + + /** + * Constructs a new VariantVector with the given name and allocator. + * + * @param name the name of the vector + * @param allocator the buffer allocator for memory management + */ + public VariantVector(String name, BufferAllocator allocator) { + super( + name, + allocator, + new StructVector( + name, + allocator, + FieldType.nullable(ArrowType.Struct.INSTANCE), + null, + AbstractStructVector.ConflictPolicy.CONFLICT_ERROR, + false)); + rootField = createVariantField(name); + ((FieldVector) this.getUnderlyingVector()) + .initializeChildrenFromFields(rootField.getChildren()); + } + + /** + * Creates a new VariantVector with the given name. The Variant Field schema has to be the same + * everywhere, otherwise ArrowBuffer loading might fail during serialization/deserialization and + * schema mismatches can occur. This includes CompleteType's VARIANT and VARIANT_REQUIRED types. + */ + public static Field createVariantField(String name) { + return new Field( + name, new FieldType(true, VariantType.INSTANCE, null), createVariantChildFields()); + } + + /** + * Creates the child fields for the VariantVector. Metadata vector will be index 0 and value + * vector will be index 1. + */ + public static List createVariantChildFields() { + return List.of( + new Field(METADATA_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null), + new Field(VALUE_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null)); + } + + @Override + public void initializeChildrenFromFields(List children) { + // No-op, as children are initialized in the constructor + } + + @Override + public Field getField() { + return rootField; + } + + public VarBinaryVector getMetadataVector() { + return getUnderlyingVector().getChild(METADATA_VECTOR_NAME, VarBinaryVector.class); + } + + public VarBinaryVector getValueVector() { + return getUnderlyingVector().getChild(VALUE_VECTOR_NAME, VarBinaryVector.class); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new VariantTransferPair(this, (VariantVector) target); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(field.getName(), allocator)); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(ref, allocator)); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(this.getField().getName(), allocator); + } + + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFrom(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public Object getObject(int index) { + if (isNull(index)) { + return null; + } + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + + int metadataStart = metadataVector.getStartOffset(index); + int metadataEnd = metadataVector.getEndOffset(index); + int valueStart = valueVector.getStartOffset(index); + int valueEnd = valueVector.getEndOffset(index); + + return new Variant( + metadataVector.getDataBuffer(), + metadataStart, + metadataEnd, + valueVector.getDataBuffer(), + valueStart, + valueEnd); + } + + /** + * Retrieves the variant value at the specified index into the provided holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, NullableVariantHolder holder) { + if (isNull(index)) { + holder.isSet = 0; + } else { + holder.isSet = 1; + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + } + + /** + * Retrieves the variant value at the specified index into the provided non-nullable holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, VariantHolder holder) { + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + + /** + * Sets the variant value at the specified index from the provided holder. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void set(int index, VariantHolder holder) { + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void set(int index, NullableVariantHolder holder) { + BitVectorHelper.setValidityBit(getUnderlyingVector().getValidityBuffer(), index, holder.isSet); + if (holder.isSet == 0) { + return; + } + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided holder, with bounds checking. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void setSafe(int index, VariantHolder holder) { + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder, with bounds + * checking. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void setSafe(int index, NullableVariantHolder holder) { + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + return; + } + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** Sets the value at the given index from the provided Variant. */ + public void setSafe(int index, Variant variant) { + ByteBuffer metadataBuffer = variant.getMetadataBuffer(); + ByteBuffer valueBuffer = variant.getValueBuffer(); + int metadataLength = metadataBuffer.remaining(); + int valueLength = valueBuffer.remaining(); + try (ArrowBuf metaBuf = getAllocator().buffer(metadataLength); + ArrowBuf valBuf = getAllocator().buffer(valueLength)) { + metaBuf.setBytes(0, metadataBuffer.duplicate()); + valBuf.setBytes(0, valueBuffer.duplicate()); + getUnderlyingVector().setIndexDefined(index); + getMetadataVector().setSafe(index, 1, 0, metadataLength, metaBuf); + getValueVector().setSafe(index, 1, 0, valueLength, valBuf); + } + } + + @Override + protected FieldReader getReaderImpl() { + return new org.apache.arrow.variant.impl.VariantReaderImpl(this); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + /** + * VariantTransferPair is a transfer pair for VariantVector. It transfers the metadata and value + * together using the underlyingVector's transfer pair. + */ + protected static class VariantTransferPair implements TransferPair { + private final TransferPair pair; + private final VariantVector from; + private final VariantVector to; + + public VariantTransferPair(VariantVector from, VariantVector to) { + this.from = from; + this.to = to; + this.pair = from.getUnderlyingVector().makeTransferPair((to).getUnderlyingVector()); + } + + @Override + public void transfer() { + pair.transfer(); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + pair.splitAndTransfer(startIndex, length); + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + pair.copyValueSafe(from, to); + } + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java new file mode 100644 index 000000000..b78d4a201 --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.holders; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.variant.extension.VariantType; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class NullableVariantHolder extends ExtensionHolder { + + public int isSet; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public NullableVariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java new file mode 100644 index 000000000..e3947ac43 --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.holders; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.variant.extension.VariantType; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class VariantHolder extends ExtensionHolder { + + public final int isSet = 1; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public VariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java new file mode 100644 index 000000000..1645529c0 --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.impl; + +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractFieldReader; +import org.apache.arrow.vector.types.Types; + +public class NullableVariantHolderReaderImpl extends AbstractFieldReader { + private final NullableVariantHolder holder; + + public NullableVariantHolderReaderImpl(NullableVariantHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException("You can't call size on a Holder value reader."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException("You can't call next on a single value reader."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException("You can't call setPosition on a single value reader."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + /** + * Reads the variant holder data into the provided holder. + * + * @param h the holder to read into + */ + public void read(NullableVariantHolder h) { + h.metadataStart = this.holder.metadataStart; + h.metadataEnd = this.holder.metadataEnd; + h.metadataBuffer = this.holder.metadataBuffer; + h.valueStart = this.holder.valueStart; + h.valueEnd = this.holder.valueEnd; + h.valueBuffer = this.holder.valueBuffer; + h.isSet = this.isSet() ? 1 : 0; + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java new file mode 100644 index 000000000..670104b7d --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.impl; + +import org.apache.arrow.variant.extension.VariantVector; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractFieldReader; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; + +public class VariantReaderImpl extends AbstractFieldReader { + private final VariantVector vector; + + public VariantReaderImpl(VariantVector vector) { + this.vector = vector; + } + + @Override + public Types.MinorType getMinorType() { + return this.vector.getMinorType(); + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public boolean isSet() { + return !this.vector.isNull(this.idx()); + } + + @Override + public void read(ExtensionHolder holder) { + if (holder instanceof VariantHolder) { + vector.get(idx(), (VariantHolder) holder); + } else if (holder instanceof NullableVariantHolder) { + vector.get(idx(), (NullableVariantHolder) holder); + } else { + throw new IllegalArgumentException( + "Unsupported holder type for VariantReader: " + holder.getClass()); + } + } + + public void read(VariantHolder h) { + this.vector.get(this.idx(), h); + } + + public void read(NullableVariantHolder h) { + this.vector.get(this.idx(), h); + } + + @Override + public Object readObject() { + return this.vector.getObject(this.idx()); + } +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java new file mode 100644 index 000000000..266ddb75d --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.impl; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.extension.VariantVector; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractExtensionTypeWriter; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * Writer implementation for VARIANT extension type vectors. + * + *

This writer handles writing variant data to a {@link VariantVector}. It accepts both {@link + * VariantHolder} and {@link NullableVariantHolder} objects containing metadata and value buffers + * and writes them to the appropriate position in the vector. + */ +public class VariantWriterImpl extends AbstractExtensionTypeWriter { + + private static final String UNSUPPORTED_TYPE_TEMPLATE = "Unsupported type for Variant: %s"; + + /** + * Constructs a new VariantWriterImpl for the given vector. + * + * @param vector the variant vector to write to + */ + public VariantWriterImpl(VariantVector vector) { + super(vector); + } + + /** + * Writes an extension type or variant value to the vector. + * + *

This method handles {@link ExtensionHolder} by delegating to {@link #write(ExtensionHolder)} + * and {@link Variant} by delegating to {@link #writeVariant(Variant)}. + * + * @param object the object to write, must be an {@link ExtensionHolder} or {@link Variant} + * @throws IllegalArgumentException if the object is not an {@link ExtensionHolder} or {@link + * Variant} + */ + @Override + public void writeExtension(Object object) { + if (object instanceof ExtensionHolder) { + write((ExtensionHolder) object); + } else if (object instanceof Variant) { + writeVariant((Variant) object); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, object.getClass().getName())); + } + } + + private void writeVariant(Variant variant) { + java.nio.ByteBuffer metadataBuffer = variant.getMetadataBuffer(); + java.nio.ByteBuffer valueBuffer = variant.getValueBuffer(); + int metadataLength = metadataBuffer.remaining(); + int valueLength = valueBuffer.remaining(); + try (ArrowBuf metadataBuf = vector.getAllocator().buffer(metadataLength); + ArrowBuf valueBuf = vector.getAllocator().buffer(valueLength)) { + metadataBuf.setBytes(0, metadataBuffer.duplicate()); + valueBuf.setBytes(0, valueBuffer.duplicate()); + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 1; + holder.metadataBuffer = metadataBuf; + holder.metadataStart = 0; + holder.metadataEnd = metadataLength; + holder.valueBuffer = valueBuf; + holder.valueStart = 0; + holder.valueEnd = valueLength; + vector.setSafe(getPosition(), holder); + vector.setValueCount(getPosition() + 1); + } + } + + @Override + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); + } + + /** + * Writes a variant holder to the vector at the current position. + * + *

The holder can be either a {@link VariantHolder} (non-nullable, always set) or a {@link + * NullableVariantHolder} (nullable, may be null). The data is written using {@link + * VariantVector#setSafe(int, NullableVariantHolder)} which handles buffer allocation and copying. + * + * @param extensionHolder the variant holder to write, must be a {@link VariantHolder} or {@link + * NullableVariantHolder} + * @throws IllegalArgumentException if the holder is neither a {@link VariantHolder} nor a {@link + * NullableVariantHolder} + */ + @Override + public void write(ExtensionHolder extensionHolder) { + if (extensionHolder instanceof VariantHolder) { + vector.setSafe(getPosition(), (VariantHolder) extensionHolder); + } else if (extensionHolder instanceof NullableVariantHolder) { + vector.setSafe(getPosition(), (NullableVariantHolder) extensionHolder); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, extensionHolder.getClass().getName())); + } + vector.setValueCount(getPosition() + 1); + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java b/arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java new file mode 100644 index 000000000..bc46a6861 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.parquet.variant.VariantBuilder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariant { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + static Variant buildVariant(VariantBuilder builder) { + org.apache.parquet.variant.Variant parquetVariant = builder.build(); + ByteBuffer valueBuf = parquetVariant.getValueBuffer(); + ByteBuffer metaBuf = parquetVariant.getMetadataBuffer(); + byte[] valueBytes = new byte[valueBuf.remaining()]; + byte[] metaBytes = new byte[metaBuf.remaining()]; + valueBuf.get(valueBytes); + metaBuf.get(metaBytes); + return new Variant(metaBytes, valueBytes); + } + + public static Variant variantString(String value) { + VariantBuilder builder = new VariantBuilder(); + builder.appendString(value); + return buildVariant(builder); + } + + @Test + void testConstructionWithArrowBuf() { + VariantBuilder builder = new VariantBuilder(); + builder.appendInt(42); + Variant source = buildVariant(builder); + int metaLen = source.getMetadataBuffer().remaining(); + int valueLen = source.getValueBuffer().remaining(); + + try (ArrowBuf metadataArrowBuf = allocator.buffer(metaLen + 2); + ArrowBuf valueArrowBuf = allocator.buffer(valueLen + 3)) { + metadataArrowBuf.setBytes(2, source.getMetadataBuffer()); + valueArrowBuf.setBytes(3, source.getValueBuffer()); + + Variant variant = + new Variant(metadataArrowBuf, 2, 2 + metaLen, valueArrowBuf, 3, 3 + valueLen); + + assertEquals(Variant.Type.INT, variant.getType()); + assertEquals(42, variant.getInt()); + } + } + + @Test + void testNullType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendNull(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.NULL, variant.getType()); + } + + @Test + void testBooleanType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendBoolean(true); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BOOLEAN, variant.getType()); + assertTrue(variant.getBoolean()); + + builder = new VariantBuilder(); + builder.appendBoolean(false); + variant = buildVariant(builder); + + assertEquals(Variant.Type.BOOLEAN, variant.getType()); + assertFalse(variant.getBoolean()); + } + + @Test + void testByteType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendByte((byte) 42); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BYTE, variant.getType()); + assertEquals((byte) 42, variant.getByte()); + } + + @Test + void testShortType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendShort((short) 1234); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.SHORT, variant.getType()); + assertEquals((short) 1234, variant.getShort()); + } + + @Test + void testIntType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendInt(123456); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.INT, variant.getType()); + assertEquals(123456, variant.getInt()); + } + + @Test + void testLongType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendLong(9876543210L); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.LONG, variant.getType()); + assertEquals(9876543210L, variant.getLong()); + } + + @Test + void testFloatType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendFloat(3.14f); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.FLOAT, variant.getType()); + assertEquals(3.14f, variant.getFloat(), 0.001f); + } + + @Test + void testDoubleType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendDouble(3.14159265359); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.DOUBLE, variant.getType()); + assertEquals(3.14159265359, variant.getDouble(), 0.0000001); + } + + @Test + void testStringType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendString("hello world"); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.STRING, variant.getType()); + assertEquals("hello world", variant.getString()); + } + + @Test + void testDecimalType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendDecimal(new BigDecimal("123.456")); + Variant variant = buildVariant(builder); + + assertTrue( + variant.getType() == Variant.Type.DECIMAL4 + || variant.getType() == Variant.Type.DECIMAL8 + || variant.getType() == Variant.Type.DECIMAL16); + assertEquals(new BigDecimal("123.456"), variant.getDecimal()); + } + + @Test + void testBinaryType() { + VariantBuilder builder = new VariantBuilder(); + byte[] data = new byte[] {1, 2, 3, 4, 5}; + builder.appendBinary(ByteBuffer.wrap(data)); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BINARY, variant.getType()); + ByteBuffer result = variant.getBinary(); + byte[] resultBytes = new byte[result.remaining()]; + result.get(resultBytes); + assertArrayEquals(data, resultBytes); + } + + @Test + void testUuidType() { + VariantBuilder builder = new VariantBuilder(); + UUID uuid = UUID.randomUUID(); + builder.appendUUID(uuid); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.UUID, variant.getType()); + assertEquals(uuid, variant.getUUID()); + } + + @Test + void testDateType() { + VariantBuilder builder = new VariantBuilder(); + int daysSinceEpoch = 19000; + builder.appendDate(daysSinceEpoch); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.DATE, variant.getType()); + } + + @Test + void testTimestampTzType() { + VariantBuilder builder = new VariantBuilder(); + long micros = System.currentTimeMillis() * 1000; + builder.appendTimestampTz(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIMESTAMP_TZ, variant.getType()); + } + + @Test + void testTimestampNtzType() { + VariantBuilder builder = new VariantBuilder(); + long micros = System.currentTimeMillis() * 1000; + builder.appendTimestampNtz(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIMESTAMP_NTZ, variant.getType()); + } + + @Test + void testTimeType() { + VariantBuilder builder = new VariantBuilder(); + long micros = 12345678L; + builder.appendTime(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIME, variant.getType()); + } + + @Test + void testObjectType() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("name"); + objBuilder.appendString("test"); + objBuilder.appendKey("value"); + objBuilder.appendInt(42); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + assertEquals(2, variant.numObjectElements()); + + Variant nameField = variant.getFieldByKey("name"); + assertNotNull(nameField); + assertEquals(Variant.Type.STRING, nameField.getType()); + assertEquals("test", nameField.getString()); + + Variant valueField = variant.getFieldByKey("value"); + assertNotNull(valueField); + assertEquals(Variant.Type.INT, valueField.getType()); + assertEquals(42, valueField.getInt()); + + assertNull(variant.getFieldByKey("nonexistent")); + + // Empty object + builder = new VariantBuilder(); + builder.startObject(); + builder.endObject(); + Variant emptyObj = buildVariant(builder); + assertEquals(Variant.Type.OBJECT, emptyObj.getType()); + assertEquals(0, emptyObj.numObjectElements()); + } + + @Test + void testObjectFieldAtIndex() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("alpha"); + objBuilder.appendInt(1); + objBuilder.appendKey("beta"); + objBuilder.appendInt(2); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + assertEquals(2, variant.numObjectElements()); + + Variant.ObjectField field0 = variant.getFieldAtIndex(0); + assertNotNull(field0); + assertNotNull(field0.key); + assertNotNull(field0.value); + + Variant.ObjectField field1 = variant.getFieldAtIndex(1); + assertNotNull(field1); + assertNotNull(field1.key); + assertNotNull(field1.value); + } + + @Test + void testArrayType() { + VariantBuilder builder = new VariantBuilder(); + var arrayBuilder = builder.startArray(); + arrayBuilder.appendInt(1); + arrayBuilder.appendInt(2); + arrayBuilder.appendInt(3); + builder.endArray(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.ARRAY, variant.getType()); + assertEquals(3, variant.numArrayElements()); + + Variant elem0 = variant.getElementAtIndex(0); + assertNotNull(elem0); + assertEquals(Variant.Type.INT, elem0.getType()); + assertEquals(1, elem0.getInt()); + + Variant elem1 = variant.getElementAtIndex(1); + assertEquals(2, elem1.getInt()); + + Variant elem2 = variant.getElementAtIndex(2); + assertEquals(3, elem2.getInt()); + + assertNull(variant.getElementAtIndex(-1)); + assertNull(variant.getElementAtIndex(3)); + + // Empty array + builder = new VariantBuilder(); + builder.startArray(); + builder.endArray(); + Variant emptyArr = buildVariant(builder); + assertEquals(Variant.Type.ARRAY, emptyArr.getType()); + assertEquals(0, emptyArr.numArrayElements()); + } + + @Test + void testNestedStructure() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("items"); + var arrayBuilder = objBuilder.startArray(); + arrayBuilder.appendString("a"); + arrayBuilder.appendString("b"); + objBuilder.endArray(); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + Variant items = variant.getFieldByKey("items"); + assertNotNull(items); + assertEquals(Variant.Type.ARRAY, items.getType()); + assertEquals(2, items.numArrayElements()); + assertEquals("a", items.getElementAtIndex(0).getString()); + assertEquals("b", items.getElementAtIndex(1).getString()); + } + + @Test + void testEquals() { + VariantBuilder builder1 = new VariantBuilder(); + builder1.appendString("test"); + Variant variant1 = buildVariant(builder1); + + VariantBuilder builder2 = new VariantBuilder(); + builder2.appendString("test"); + Variant variant2 = buildVariant(builder2); + + VariantBuilder builder3 = new VariantBuilder(); + builder3.appendString("different"); + Variant variant3 = buildVariant(builder3); + + assertEquals(variant1, variant1); + assertEquals(variant1, variant2); + assertNotEquals(variant1, variant3); + assertNotEquals(variant1, null); + assertNotEquals(variant1, "not a variant"); + } + + @Test + void testHashCode() { + VariantBuilder builder1 = new VariantBuilder(); + builder1.appendInt(42); + Variant variant1 = buildVariant(builder1); + + VariantBuilder builder2 = new VariantBuilder(); + builder2.appendInt(42); + Variant variant2 = buildVariant(builder2); + + assertEquals(variant1.hashCode(), variant2.hashCode()); + } + + @Test + void testToString() { + VariantBuilder builder = new VariantBuilder(); + builder.appendString("test"); + Variant variant = buildVariant(builder); + + String str = variant.toString(); + assertNotNull(str); + assertTrue(str.contains("type=")); + } + + @Test + void testTypeEnumsMatch() { + for (Variant.Type arrowType : Variant.Type.values()) { + org.apache.parquet.variant.Variant.Type parquetType = + org.apache.parquet.variant.Variant.Type.valueOf(arrowType.name()); + assertEquals(arrowType, Variant.Type.fromParquet(parquetType)); + } + for (org.apache.parquet.variant.Variant.Type parquetType : + org.apache.parquet.variant.Variant.Type.values()) { + Variant.Type arrowType = Variant.Type.valueOf(parquetType.name()); + assertEquals(parquetType.name(), arrowType.name()); + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java new file mode 100644 index 000000000..f3213d523 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compare.Range; +import org.apache.arrow.vector.compare.RangeEqualsVisitor; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.writer.BaseWriter; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.VectorBatchAppender; +import org.apache.arrow.vector.validate.ValidateVectorVisitor; +import org.junit.jupiter.api.Test; + +public class TestVariantExtensionType { + + private static void ensureRegistered(ArrowType.ExtensionType type) { + if (ExtensionTypeRegistry.lookup(type.extensionName()) == null) { + ExtensionTypeRegistry.register(type); + } + } + + @Test + public void roundtripVariant() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(Field.nullable("a", VariantType.INSTANCE))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setSafe(1, TestVariant.variantString("world")); + vector.setValueCount(2); + root.setRowCount(2); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); + assertEquals(root.getSchema(), readerRoot.getSchema()); + + final Field field = readerRoot.getSchema().getFields().get(0); + final VariantType expectedType = VariantType.INSTANCE; + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), + expectedType.extensionName()); + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), + expectedType.serialize()); + + final ExtensionTypeVector deserialized = + (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(vector.isNull(i), deserialized.isNull(i)); + if (!vector.isNull(i)) { + assertEquals(vector.getObject(i), deserialized.getObject(i)); + } + } + } + } + } + + @Test + public void readVariantAsUnderlyingType() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(VariantVector.createVariantField("a"))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setValueCount(1); + root.setRowCount(1); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + ExtensionTypeRegistry.unregister(VariantType.INSTANCE); + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + VectorSchemaRoot readRoot = reader.getVectorSchemaRoot(); + + // Verify schema properties + assertEquals(1, readRoot.getSchema().getFields().size()); + assertEquals("a", readRoot.getSchema().getFields().get(0).getName()); + assertTrue(readRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.Struct); + + // Verify extension metadata is preserved + final Field field = readRoot.getSchema().getFields().get(0); + assertEquals( + VariantType.EXTENSION_NAME, + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME)); + assertEquals("", field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA)); + + // Verify vector type and row count + assertEquals(1, readRoot.getRowCount()); + FieldVector readVector = readRoot.getVector("a"); + assertEquals(StructVector.class, readVector.getClass()); + + // Verify value count matches + StructVector structVector = (StructVector) readVector; + assertEquals(vector.getValueCount(), structVector.getValueCount()); + + // Verify the underlying data can be accessed from child vectors + VarBinaryVector metadataVector = + structVector.getChild(VariantVector.METADATA_VECTOR_NAME, VarBinaryVector.class); + VarBinaryVector valueVector = + structVector.getChild(VariantVector.VALUE_VECTOR_NAME, VarBinaryVector.class); + assertNotNull(metadataVector); + assertNotNull(valueVector); + assertEquals(1, metadataVector.getValueCount()); + assertEquals(1, valueVector.getValueCount()); + } + } + } + + @Test + public void testVariantVectorCompare() { + VariantType variantType = VariantType.INSTANCE; + ExtensionTypeRegistry.register(variantType); + Variant hello = TestVariant.variantString("hello"); + Variant world = TestVariant.variantString("world"); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector a1 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector a2 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector bb = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator)) { + + ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor(); + validateVisitor.visit(a1, null); + + a1.allocateNew(); + a2.allocateNew(); + bb.allocateNew(); + + a1.setSafe(0, hello); + a1.setSafe(1, world); + a1.setValueCount(2); + + a2.setSafe(0, hello); + a2.setSafe(1, world); + a2.setValueCount(2); + + bb.setSafe(0, world); + bb.setSafe(1, hello); + bb.setValueCount(2); + + Range range = new Range(0, 0, a1.getValueCount()); + RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2); + assertTrue(visitor.rangeEquals(range)); + + visitor = new RangeEqualsVisitor(a1, bb); + assertFalse(visitor.rangeEquals(range)); + + VectorBatchAppender.batchAppend(a1, a2, bb); + assertEquals(6, a1.getValueCount()); + validateVisitor.visit(a1, null); + } + } + + @Test + public void testVariantCopyAsValueThrowsException() { + ensureRegistered(VariantType.INSTANCE); + try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector vector = new VariantVector("variant", allocator)) { + vector.allocateNew(); + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setValueCount(1); + + var reader = vector.getReader(); + reader.setPosition(0); + + assertThrows( + IllegalArgumentException.class, () -> reader.copyAsValue((BaseWriter.StructWriter) null)); + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java new file mode 100644 index 000000000..8b6000bc4 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariantInListVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testListVectorWithVariantExtensionType() { + final FieldType type = FieldType.nullable(VariantType.INSTANCE); + try (ListVector inVector = new ListVector("input", allocator, type, null)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + extensionWriter.writeExtension(variant2); + writer.endList(); + inVector.setValueCount(1); + + ArrayList resultSet = (ArrayList) inVector.getObject(0); + assertEquals(2, resultSet.size()); + assertEquals(variant1, resultSet.get(0)); + assertEquals(variant2, resultSet.get(1)); + } + } + + @Test + public void testListVectorReaderForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + UnionListReader reader = inVector.getReader(); + reader.setPosition(0); + assertTrue(reader.next()); + FieldReader variantReader = reader.reader(); + NullableVariantHolder resultHolder = new NullableVariantHolder(); + variantReader.read(resultHolder); + assertEquals(variant1, new Variant(resultHolder)); + + reader.setPosition(1); + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, new Variant(resultHolder)); + + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, new Variant(resultHolder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.copyFrom(1, 1, inVector); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + + @Test + public void testCopyValueSafeForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + try (ListVector outVector = (ListVector) inVector.getTransferPair(allocator).getTo()) { + TransferPair tp = inVector.makeTransferPair(outVector); + tp.copyValueSafe(0, 0); + tp.copyValueSafe(1, 1); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java new file mode 100644 index 000000000..dd925810d --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapReader; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariantInMapVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void terminate() { + allocator.close(); + } + + @Test + public void testMapVectorWithVariantExtensionType() { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("map", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + + UnionMapReader mapReader = inVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, new Variant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, new Variant(holder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("in", allocator, false); + final MapVector outVector = MapVector.empty("out", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.setValueCount(1); + + UnionMapReader mapReader = outVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, new Variant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, new Variant(holder)); + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java new file mode 100644 index 000000000..017e71224 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class TestVariantType { + BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + @Test + void testConstants() { + assertNotNull(VariantType.INSTANCE); + } + + @Test + void testStorageType() { + VariantType type = VariantType.INSTANCE; + assertEquals(ArrowType.Struct.INSTANCE, type.storageType()); + assertInstanceOf(ArrowType.Struct.class, type.storageType()); + } + + @Test + void testExtensionName() { + VariantType type = VariantType.INSTANCE; + assertEquals("parquet.variant", type.extensionName()); + } + + @Test + void testExtensionEquals() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testIsComplex() { + VariantType type = VariantType.INSTANCE; + assertFalse(type.isComplex()); + } + + @Test + void testSerialize() { + VariantType type = VariantType.INSTANCE; + String serialized = type.serialize(); + assertEquals("", serialized); + } + + @Test + void testDeserializeValid() { + VariantType type = VariantType.INSTANCE; + ArrowType storageType = ArrowType.Struct.INSTANCE; + + ArrowType deserialized = assertDoesNotThrow(() -> type.deserialize(storageType, "")); + assertInstanceOf(VariantType.class, deserialized); + assertEquals(VariantType.INSTANCE, deserialized); + } + + @Test + void testDeserializeInvalidStorageType() { + VariantType type = VariantType.INSTANCE; + ArrowType wrongStorageType = ArrowType.Utf8.INSTANCE; + + assertThrows(UnsupportedOperationException.class, () -> type.deserialize(wrongStorageType, "")); + } + + @Test + void testGetNewVector() { + VariantType type = VariantType.INSTANCE; + try (FieldVector vector = + type.getNewVector("variant_field", FieldType.nullable(type), allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("variant_field", vector.getField().getName()); + assertEquals(type, vector.getField().getType()); + } + } + + @Test + void testGetNewVectorWithNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nullableFieldType = FieldType.nullable(type); + + try (FieldVector vector = type.getNewVector("nullable_variant", nullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("nullable_variant", vector.getField().getName()); + assertTrue(vector.getField().isNullable()); + } + } + + @Test + void testGetNewVectorWithNonNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nonNullableFieldType = FieldType.notNullable(type); + + try (FieldVector vector = + type.getNewVector("non_nullable_variant", nonNullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("non_nullable_variant", vector.getField().getName()); + } + } + + @Test + void testIpcRoundTrip() { + VariantType type = VariantType.INSTANCE; + + Schema schema = new Schema(Collections.singletonList(Field.nullable("variant", type))); + byte[] serialized = schema.serializeAsMessage(); + Schema deserialized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); + assertEquals(schema, deserialized); + } + + @Test + void testVectorIpcRoundTrip() throws IOException { + VariantType type = VariantType.INSTANCE; + + try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + VariantVector variantVector = (VariantVector) vector; + + byte[] metadata1 = new byte[] {1, 2, 3}; + byte[] value1 = new byte[] {4, 5, 6, 7}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {8, 9}; + byte[] value2 = new byte[] {10, 11, 12}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder1 = new NullableVariantHolder(); + holder1.isSet = 1; + holder1.metadataStart = 0; + holder1.metadataEnd = metadata1.length; + holder1.metadataBuffer = metadataBuf1; + holder1.valueStart = 0; + holder1.valueEnd = value1.length; + holder1.valueBuffer = valueBuf1; + + NullableVariantHolder holder2 = new NullableVariantHolder(); + holder2.isSet = 1; + holder2.metadataStart = 0; + holder2.metadataEnd = metadata2.length; + holder2.metadataBuffer = metadataBuf2; + holder2.valueStart = 0; + holder2.valueEnd = value2.length; + holder2.valueBuffer = valueBuf2; + + variantVector.setSafe(0, holder1); + variantVector.setNull(1); + variantVector.setSafe(2, holder2); + variantVector.setValueCount(3); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(variantVector)); + ArrowStreamWriter writer = + new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { + writer.start(); + writer.writeBatch(); + } + + try (ArrowStreamReader reader = + new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { + assertTrue(reader.loadNextBatch()); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + assertEquals(3, root.getRowCount()); + assertEquals( + new Schema(Collections.singletonList(variantVector.getField())), root.getSchema()); + + VariantVector actual = assertInstanceOf(VariantVector.class, root.getVector("field")); + assertFalse(actual.isNull(0)); + assertTrue(actual.isNull(1)); + assertFalse(actual.isNull(2)); + + NullableVariantHolder result1 = new NullableVariantHolder(); + actual.get(0, result1); + assertEquals(1, result1.isSet); + assertEquals(metadata1.length, result1.metadataEnd - result1.metadataStart); + assertEquals(value1.length, result1.valueEnd - result1.valueStart); + + assertNull(actual.getObject(1)); + + NullableVariantHolder result2 = new NullableVariantHolder(); + actual.get(2, result2); + assertEquals(1, result2.isSet); + assertEquals(metadata2.length, result2.metadataEnd - result2.metadataStart); + assertEquals(value2.length, result2.valueEnd - result2.valueStart); + } + } + } + + @Test + void testSingleton() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + // Same instance + assertSame(type1, type2); + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testExtensionTypeRegistry() { + // VariantType should be automatically registered via static initializer + ArrowType.ExtensionType registeredType = + ExtensionTypeRegistry.lookup(VariantType.EXTENSION_NAME); + assertNotNull(registeredType); + assertInstanceOf(VariantType.class, registeredType); + assertEquals(VariantType.INSTANCE, registeredType); + } + + @Test + void testFieldMetadata() { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + + FieldType fieldType = new FieldType(true, VariantType.INSTANCE, null, metadata); + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = new Field("test", fieldType, VariantVector.createVariantChildFields()); + + // Field metadata includes both custom metadata and extension type metadata + Map fieldMetadata = field.getMetadata(); + assertEquals("value1", fieldMetadata.get("key1")); + assertEquals("value2", fieldMetadata.get("key2")); + // Extension type metadata is also present + assertTrue(fieldMetadata.containsKey("ARROW:extension:name")); + assertTrue(fieldMetadata.containsKey("ARROW:extension:metadata")); + } + } + + @Test + void testFieldChildren() { + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = vector.getField(); + + assertNotNull(field.getChildren()); + assertEquals(2, field.getChildren().size()); + + Field metadataField = field.getChildren().get(0); + assertEquals(VariantVector.METADATA_VECTOR_NAME, metadataField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, metadataField.getType()); + + Field valueField = field.getChildren().get(1); + assertEquals(VariantVector.VALUE_VECTOR_NAME, valueField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, valueField.getType()); + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java new file mode 100644 index 000000000..1c172e304 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java @@ -0,0 +1,844 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.variant.impl.VariantReaderImpl; +import org.apache.arrow.variant.impl.VariantWriterImpl; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for VariantVector, VariantWriterImpl, and VariantReaderImpl. */ +class TestVariantVector { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + private VariantHolder createHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + VariantHolder holder = new VariantHolder(); + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullableHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 1; + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullHolder() { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 0; + return holder; + } + + // ========== Basic Vector Tests ========== + + @Test + void testVectorCreation() { + try (VariantVector vector = new VariantVector("test", allocator)) { + assertNotNull(vector); + assertEquals("test", vector.getField().getName()); + assertNotNull(vector.getMetadataVector()); + assertNotNull(vector.getValueVector()); + } + } + + @Test + void testSetAndGet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + // Retrieve and verify + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testSetNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + NullableVariantHolder holder = createNullHolder(); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertTrue(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(0, result.isSet); + } + } + + @Test + void testMultipleValues() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1, 2}; + byte[] value1 = new byte[] {3, 4, 5}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {6, 7, 8}; + byte[] value2 = new byte[] {9, 10}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + vector.setSafe(0, holder1); + vector.setSafe(1, holder2); + vector.setValueCount(2); + + // Verify first value + NullableVariantHolder result1 = new NullableVariantHolder(); + vector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + // Verify second value + NullableVariantHolder result2 = new NullableVariantHolder(); + vector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testNonNullableHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(1, result.isSet); + } + } + + // ========== Writer Tests ========== + + @Test + void testWriteWithVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + NullableVariantHolder holder = createNullHolder(); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertTrue(vector.isNull(0)); + } + } + + @Test + void testWriteExtensionWithUnsupportedType() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + @Test + void testWriteWithUnsupportedHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + ExtensionHolder unsupportedHolder = + new ExtensionHolder() { + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } + }; + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.write(unsupportedHolder)); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + // ========== Reader Tests ========== + + @Test + void testReaderReadWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder result = new NullableVariantHolder(); + reader.read(result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + } + } + + @Test + void testReaderReadWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder holder = new NullableVariantHolder(); + reader.read(holder); + + assertEquals(0, holder.isSet); + } + } + + @Test + void testReaderIsSet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setNull(1); + vector.setValueCount(2); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + + reader.setPosition(0); + assertTrue(reader.isSet()); + + reader.setPosition(1); + assertFalse(reader.isSet()); + } + } + + @Test + void testReaderGetMinorType() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getMinorType(), reader.getMinorType()); + } + } + + @Test + void testReaderGetField() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getField(), reader.getField()); + assertEquals("test", reader.getField().getName()); + } + } + + @Test + void testReaderReadWithNonNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + VariantHolder result = new VariantHolder(); + reader.read(result); + + // Verify the data was read correctly + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + assertEquals(1, result.isSet); + } + } + + // ========== Transfer Pair Tests ========== + + @Test + void testTransferPair() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(0, fromVector.getValueCount()); + assertEquals(1, toVector.getValueCount()); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + toVector.close(); + } + } + + @Test + void testSplitAndTransfer() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10); + ArrowBuf metadataBuf3 = allocator.buffer(10); + ArrowBuf valueBuf3 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + byte[] metadata3 = new byte[] {7, 8, 9}; + byte[] value3 = new byte[] {10, 11, 12}; + metadataBuf3.setBytes(0, metadata3); + valueBuf3.setBytes(0, value3); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + NullableVariantHolder holder3 = + createNullableHolder(metadataBuf3, metadata3, valueBuf3, value3); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setSafe(2, holder3); + fromVector.setValueCount(3); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + // Split and transfer indices 1-2 (middle and last) + transferPair.splitAndTransfer(1, 2); + + assertEquals(2, toVector.getValueCount()); + + // Verify transferred values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata2.length]; + byte[] actualValue1 = new byte[value2.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata2, actualMetadata1); + assertArrayEquals(value2, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata3.length]; + byte[] actualValue2 = new byte[value3.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata3, actualMetadata2); + assertArrayEquals(value3, actualValue2); + + toVector.close(); + } + } + + @Test + void testCopyValueSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.makeTransferPair(toVector); + + transferPair.copyValueSafe(0, 0); + toVector.setValueCount(1); + + // Verify the value was copied + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + // Original vector should still have the value + NullableVariantHolder originalResult = new NullableVariantHolder(); + fromVector.get(0, originalResult); + assertEquals(1, originalResult.isSet); + } + } + + @Test + void testGetTransferPairWithField() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(fromVector.getField(), allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(1, toVector.getValueCount()); + assertEquals(fromVector.getField().getName(), toVector.getField().getName()); + + toVector.close(); + } + } + + // ========== Copy Operations Tests ========== + + @Test + void testCopyFrom() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + toVector.allocateNew(); + toVector.copyFrom(0, 0, fromVector); + toVector.setValueCount(1); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testCopyFromSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setValueCount(2); + + // Copy without pre-allocating toVector + for (int i = 0; i < 2; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(2); + + // Verify both values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testCopyFromWithNulls() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setNull(1); + fromVector.setSafe(2, holder); + fromVector.setValueCount(3); + + toVector.allocateNew(); + for (int i = 0; i < 3; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(3); + + assertFalse(toVector.isNull(0)); + assertTrue(toVector.isNull(1)); + assertFalse(toVector.isNull(2)); + } + } + + // ========== GetObject Tests ========== + + @Test + void testGetObject() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNotNull(obj); + assertTrue(obj instanceof Variant); + assertEquals(new Variant(metadata, value), obj); + } + } + + @Test + void testGetObjectNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNull(obj); + } + } + + // ========== Allocate and Capacity Tests ========== + + @Test + void testAllocateNew() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.allocateNew(); + assertTrue(vector.getValueCapacity() > 0); + } + } + + @Test + void testSetInitialCapacity() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setInitialCapacity(100); + vector.allocateNew(); + assertTrue(vector.getValueCapacity() >= 100); + } + } + + @Test + void testClearAndReuse() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + vector.clear(); + vector.allocateNew(); + + // After clear, vector should be empty + assertEquals(0, vector.getValueCount()); + } + } +} diff --git a/bom/pom.xml b/bom/pom.xml index 9efde5324..b631f9366 100644 --- a/bom/pom.xml +++ b/bom/pom.xml @@ -194,6 +194,11 @@ under the License. arrow-tools ${project.version} + + org.apache.arrow + arrow-variant + ${project.version} + diff --git a/pom.xml b/pom.xml index d64df1ade..9ffaf6b60 100644 --- a/pom.xml +++ b/pom.xml @@ -68,6 +68,7 @@ under the License. bom format memory + arrow-variant vector tools adapter/jdbc @@ -104,6 +105,7 @@ under the License. 3.4.2 25.2.10 1.12.1 + 1.17.0 5.17.0 2 diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index 556fb576c..789295e95 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -29,9 +29,9 @@ * Source code generated using FreeMarker template ${.template_name} */ @SuppressWarnings("unused") -abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{ +public abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{ - AbstractFieldReader(){ + protected AbstractFieldReader(){ super(); }