From 479ea37ee8a0e7eb79014d331aea1aed4cbb29a8 Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Wed, 7 Jan 2026 09:49:59 +0100 Subject: [PATCH 1/5] GH-946: Add Variant extension type support Implements VariantType extension type with VariantVector for storing variant data with metadata and value buffers. Includes reader/writer implementations and comprehensive test coverage. --- .../impl/NullableVariantHolderReaderImpl.java | 68 ++ .../complex/impl/VariantReaderImpl.java | 72 ++ .../complex/impl/VariantWriterFactory.java | 45 + .../complex/impl/VariantWriterImpl.java | 87 ++ .../arrow/vector/extension/VariantType.java | 80 ++ .../arrow/vector/extension/VariantVector.java | 306 +++++++ .../vector/holders/NullableVariantHolder.java | 48 + .../arrow/vector/holders/VariantHolder.java | 48 + .../apache/arrow/vector/TestVariantType.java | 308 +++++++ .../arrow/vector/TestVariantVector.java | 861 ++++++++++++++++++ 10 files changed, 1923 insertions(+) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestVariantType.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java new file mode 100644 index 000000000..0aa6246fb --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.types.Types; + +public class NullableVariantHolderReaderImpl extends AbstractFieldReader { + private final NullableVariantHolder holder; + + public NullableVariantHolderReaderImpl(NullableVariantHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException("You can't call size on a Holder value reader."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException("You can't call next on a single value reader."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException("You can't call setPosition on a single value reader."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + /** + * Reads the variant holder data into the provided holder. + * + * @param h the holder to read into + */ + public void read(NullableVariantHolder h) { + h.metadataStart = this.holder.metadataStart; + h.metadataEnd = this.holder.metadataEnd; + h.metadataBuffer = this.holder.metadataBuffer; + h.valueStart = this.holder.valueStart; + h.valueEnd = this.holder.valueEnd; + h.valueBuffer = this.holder.valueBuffer; + h.isSet = this.isSet() ? 1 : 0; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java new file mode 100644 index 000000000..cf5509792 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; + +public class VariantReaderImpl extends AbstractFieldReader { + private final VariantVector vector; + + public VariantReaderImpl(VariantVector vector) { + this.vector = vector; + } + + @Override + public Types.MinorType getMinorType() { + return this.vector.getMinorType(); + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public boolean isSet() { + return !this.vector.isNull(this.idx()); + } + + @Override + public void read(ExtensionHolder holder) { + if (holder instanceof VariantHolder) { + vector.get(idx(), (VariantHolder) holder); + } else if (holder instanceof NullableVariantHolder) { + vector.get(idx(), (NullableVariantHolder) holder); + } else { + throw new IllegalArgumentException( + "Unsupported holder type for VariantReader: " + holder.getClass()); + } + } + + public void read(VariantHolder h) { + this.vector.get(this.idx(), h); + } + + public void read(NullableVariantHolder h) { + this.vector.get(this.idx(), h); + } + + @Override + public Object readObject() { + return this.vector.getObject(this.idx()); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java new file mode 100644 index 000000000..6d41b890e --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.extension.VariantVector; + +/** + * Factory for creating {@link VariantWriterImpl} instances. + * + *

This factory is used to create writers for Variant extension type vectors. + * + * @see VariantWriterImpl + * @see org.apache.arrow.vector.extension.VariantType + */ +public class VariantWriterFactory implements ExtensionTypeWriterFactory { + + /** + * Creates a writer implementation for the given extension type vector. + * + * @param extensionTypeVector the vector to create a writer for + * @return a {@link VariantWriterImpl} if the vector is a {@link VariantVector}, null otherwise + */ + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof VariantVector) { + return new VariantWriterImpl((VariantVector) extensionTypeVector); + } + return null; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java new file mode 100644 index 000000000..ac5250283 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; + +/** + * Writer implementation for VARIANT extension type vectors. + * + *

This writer handles writing variant data to a {@link VariantVector}. It accepts both {@link + * VariantHolder} and {@link NullableVariantHolder} objects containing metadata and value buffers + * and writes them to the appropriate position in the vector. + */ +public class VariantWriterImpl extends AbstractExtensionTypeWriter { + + private static final String UNSUPPORTED_TYPE_TEMPLATE = "Unsupported type for Variant: %s"; + + /** + * Constructs a new VariantWriterImpl for the given vector. + * + * @param vector the variant vector to write to + */ + public VariantWriterImpl(VariantVector vector) { + super(vector); + } + + /** + * Writes an extension type value to the vector. + * + *

This method validates that the object is an {@link ExtensionHolder} and delegates to {@link + * #write(ExtensionHolder)}. + * + * @param object the object to write, must be an {@link ExtensionHolder} + * @throws IllegalArgumentException if the object is not an {@link ExtensionHolder} + */ + @Override + public void writeExtension(Object object) { + if (object instanceof ExtensionHolder) { + write((ExtensionHolder) object); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, object.getClass().getName())); + } + } + + /** + * Writes a variant holder to the vector at the current position. + * + *

The holder can be either a {@link VariantHolder} (non-nullable, always set) or a {@link + * NullableVariantHolder} (nullable, may be null). The data is written using {@link + * VariantVector#setSafe(int, NullableVariantHolder)} which handles buffer allocation and copying. + * + * @param extensionHolder the variant holder to write, must be a {@link VariantHolder} or {@link + * NullableVariantHolder} + * @throws IllegalArgumentException if the holder is neither a {@link VariantHolder} nor a {@link + * NullableVariantHolder} + */ + @Override + public void write(ExtensionHolder extensionHolder) { + if (extensionHolder instanceof VariantHolder) { + vector.setSafe(getPosition(), (VariantHolder) extensionHolder); + } else if (extensionHolder instanceof NullableVariantHolder) { + vector.setSafe(getPosition(), (NullableVariantHolder) extensionHolder); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, extensionHolder.getClass().getName())); + } + vector.setValueCount(getPosition() + 1); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java new file mode 100644 index 000000000..cf083fc49 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.extension; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.FieldType; + +public final class VariantType extends ExtensionType { + + public static final VariantType INSTANCE = new VariantType(); + + public static final String EXTENSION_NAME = "parquet.variant"; + + static { + ExtensionTypeRegistry.register(INSTANCE); + } + + private VariantType() {} + + @Override + public ArrowType storageType() { + return ArrowType.Struct.INSTANCE; + } + + @Override + public String extensionName() { + return EXTENSION_NAME; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof VariantType; + } + + @Override + public String serialize() { + return ""; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(this.storageType())) { + throw new UnsupportedOperationException( + "Cannot construct VariantType from underlying type " + storageType); + } + return INSTANCE; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new VariantVector(name, allocator); + } + + @Override + public boolean isComplex() { + // The type itself is not complex meaning we need separate functions to convert/extract + // different types. + // Meanwhile, the containing vector is complex in terms of containing multiple values (metadata + // and value) + return false; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java new file mode 100644 index 000000000..cc087c4f8 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.extension; + +import java.util.List; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.complex.AbstractStructVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; + +public class VariantVector extends ExtensionTypeVector { + + public static final String METADATA_VECTOR_NAME = "metadata"; + public static final String VALUE_VECTOR_NAME = "value"; + + private final Field rootField; + + /** + * Constructs a new VariantVector with the given name and allocator. + * + * @param name the name of the vector + * @param allocator the buffer allocator for memory management + */ + public VariantVector(String name, BufferAllocator allocator) { + super( + name, + allocator, + new StructVector( + name, + allocator, + FieldType.nullable(ArrowType.Struct.INSTANCE), + null, + AbstractStructVector.ConflictPolicy.CONFLICT_ERROR, + false)); + rootField = createVariantField(name); + ((FieldVector) this.getUnderlyingVector()) + .initializeChildrenFromFields(rootField.getChildren()); + } + + /** + * Creates a new VariantVector with the given name. The Variant Field schema has to be the same + * everywhere, otherwise ArrowBuffer loading might fail during serialization/deserialization and + * schema mismatches can occur. This includes CompleteType's VARIANT and VARIANT_REQUIRED types. + */ + public static Field createVariantField(String name) { + return new Field( + name, new FieldType(true, VariantType.INSTANCE, null), createVariantChildFields()); + } + + /** + * Creates the child fields for the VariantVector. Metadata vector will be index 0 and value + * vector will be index 1. + */ + public static List createVariantChildFields() { + return List.of( + new Field(METADATA_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null), + new Field(VALUE_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null)); + } + + @Override + public void initializeChildrenFromFields(List children) { + // No-op, as children are initialized in the constructor + } + + @Override + public Field getField() { + return rootField; + } + + public VarBinaryVector getMetadataVector() { + return getUnderlyingVector().getChild(METADATA_VECTOR_NAME, VarBinaryVector.class); + } + + public VarBinaryVector getValueVector() { + return getUnderlyingVector().getChild(VALUE_VECTOR_NAME, VarBinaryVector.class); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new VariantTransferPair(this, (VariantVector) target); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(field.getName(), allocator)); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(ref, allocator)); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(this.getField().getName(), allocator); + } + + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFrom(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public Object getObject(int index) { + return getUnderlyingVector().getObject(index); + } + + /** + * Retrieves the variant value at the specified index into the provided holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, NullableVariantHolder holder) { + if (isNull(index)) { + holder.isSet = 0; + } else { + holder.isSet = 1; + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + } + + /** + * Retrieves the variant value at the specified index into the provided non-nullable holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, VariantHolder holder) { + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + + /** + * Sets the variant value at the specified index from the provided holder. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void set(int index, VariantHolder holder) { + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void set(int index, NullableVariantHolder holder) { + BitVectorHelper.setValidityBit(getUnderlyingVector().getValidityBuffer(), index, holder.isSet); + if (holder.isSet == 0) { + return; + } + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided holder, with bounds checking. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void setSafe(int index, VariantHolder holder) { + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder, with bounds + * checking. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void setSafe(int index, NullableVariantHolder holder) { + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + return; + } + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + @Override + protected FieldReader getReaderImpl() { + return new org.apache.arrow.vector.complex.impl.VariantReaderImpl(this); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + /** + * VariantTransferPair is a transfer pair for VariantVector. It transfers the metadata and value + * together using the underlyingVector's transfer pair. + */ + protected static class VariantTransferPair implements TransferPair { + private final TransferPair pair; + private final VariantVector from; + private final VariantVector to; + + public VariantTransferPair(VariantVector from, VariantVector to) { + this.from = from; + this.to = to; + this.pair = from.getUnderlyingVector().makeTransferPair((to).getUnderlyingVector()); + } + + @Override + public void transfer() { + pair.transfer(); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + pair.splitAndTransfer(startIndex, length); + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + pair.copyValueSafe(from, to); + } + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java new file mode 100644 index 000000000..6e80ce341 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +import org.apache.arrow.memory.ArrowBuf; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class NullableVariantHolder extends ExtensionHolder { + + public int isSet; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public NullableVariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java new file mode 100644 index 000000000..1cf90f9c8 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +import org.apache.arrow.memory.ArrowBuf; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class VariantHolder extends ExtensionHolder { + + public final int isSet = 1; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public VariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java new file mode 100644 index 000000000..115d9f46d --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class TestVariantType { + BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + @Test + void testConstants() { + assertNotNull(VariantType.INSTANCE); + } + + @Test + void testStorageType() { + VariantType type = VariantType.INSTANCE; + assertEquals(ArrowType.Struct.INSTANCE, type.storageType()); + assertInstanceOf(ArrowType.Struct.class, type.storageType()); + } + + @Test + void testExtensionName() { + VariantType type = VariantType.INSTANCE; + assertEquals("parquet.variant", type.extensionName()); + } + + @Test + void testExtensionEquals() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testIsComplex() { + VariantType type = VariantType.INSTANCE; + assertFalse(type.isComplex()); + } + + @Test + void testSerialize() { + VariantType type = VariantType.INSTANCE; + String serialized = type.serialize(); + assertEquals("", serialized); + } + + @Test + void testDeserializeValid() { + VariantType type = VariantType.INSTANCE; + ArrowType storageType = ArrowType.Struct.INSTANCE; + + ArrowType deserialized = assertDoesNotThrow(() -> type.deserialize(storageType, "")); + assertInstanceOf(VariantType.class, deserialized); + assertEquals(VariantType.INSTANCE, deserialized); + } + + @Test + void testDeserializeInvalidStorageType() { + VariantType type = VariantType.INSTANCE; + ArrowType wrongStorageType = ArrowType.Utf8.INSTANCE; + + assertThrows(UnsupportedOperationException.class, () -> type.deserialize(wrongStorageType, "")); + } + + @Test + void testGetNewVector() { + VariantType type = VariantType.INSTANCE; + try (FieldVector vector = + type.getNewVector("variant_field", FieldType.nullable(type), allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("variant_field", vector.getField().getName()); + assertEquals(type, vector.getField().getType()); + } + } + + @Test + void testGetNewVectorWithNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nullableFieldType = FieldType.nullable(type); + + try (FieldVector vector = type.getNewVector("nullable_variant", nullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("nullable_variant", vector.getField().getName()); + assertTrue(vector.getField().isNullable()); + } + } + + @Test + void testGetNewVectorWithNonNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nonNullableFieldType = FieldType.notNullable(type); + + try (FieldVector vector = + type.getNewVector("non_nullable_variant", nonNullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("non_nullable_variant", vector.getField().getName()); + } + } + + @Test + void testIpcRoundTrip() { + VariantType type = VariantType.INSTANCE; + + Schema schema = new Schema(Collections.singletonList(Field.nullable("variant", type))); + byte[] serialized = schema.serializeAsMessage(); + Schema deserialized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); + assertEquals(schema, deserialized); + } + + @Test + void testVectorIpcRoundTrip() throws IOException { + VariantType type = VariantType.INSTANCE; + + try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + VariantVector variantVector = (VariantVector) vector; + + byte[] metadata1 = new byte[] {1, 2, 3}; + byte[] value1 = new byte[] {4, 5, 6, 7}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {8, 9}; + byte[] value2 = new byte[] {10, 11, 12}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder1 = new NullableVariantHolder(); + holder1.isSet = 1; + holder1.metadataStart = 0; + holder1.metadataEnd = metadata1.length; + holder1.metadataBuffer = metadataBuf1; + holder1.valueStart = 0; + holder1.valueEnd = value1.length; + holder1.valueBuffer = valueBuf1; + + NullableVariantHolder holder2 = new NullableVariantHolder(); + holder2.isSet = 1; + holder2.metadataStart = 0; + holder2.metadataEnd = metadata2.length; + holder2.metadataBuffer = metadataBuf2; + holder2.valueStart = 0; + holder2.valueEnd = value2.length; + holder2.valueBuffer = valueBuf2; + + variantVector.setSafe(0, holder1); + variantVector.setNull(1); + variantVector.setSafe(2, holder2); + variantVector.setValueCount(3); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(variantVector)); + ArrowStreamWriter writer = + new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { + writer.start(); + writer.writeBatch(); + } + + try (ArrowStreamReader reader = + new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { + assertTrue(reader.loadNextBatch()); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + assertEquals(3, root.getRowCount()); + assertEquals( + new Schema(Collections.singletonList(variantVector.getField())), root.getSchema()); + + VariantVector actual = assertInstanceOf(VariantVector.class, root.getVector("field")); + assertFalse(actual.isNull(0)); + assertTrue(actual.isNull(1)); + assertFalse(actual.isNull(2)); + + NullableVariantHolder result1 = new NullableVariantHolder(); + actual.get(0, result1); + assertEquals(1, result1.isSet); + assertEquals(metadata1.length, result1.metadataEnd - result1.metadataStart); + assertEquals(value1.length, result1.valueEnd - result1.valueStart); + + assertNull(actual.getObject(1)); + + NullableVariantHolder result2 = new NullableVariantHolder(); + actual.get(2, result2); + assertEquals(1, result2.isSet); + assertEquals(metadata2.length, result2.metadataEnd - result2.metadataStart); + assertEquals(value2.length, result2.valueEnd - result2.valueStart); + } + } + } + + @Test + void testSingleton() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + // Same instance + assertSame(type1, type2); + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testExtensionTypeRegistry() { + // VariantType should be automatically registered via static initializer + ArrowType.ExtensionType registeredType = + ExtensionTypeRegistry.lookup(VariantType.EXTENSION_NAME); + assertNotNull(registeredType); + assertInstanceOf(VariantType.class, registeredType); + assertEquals(VariantType.INSTANCE, registeredType); + } + + @Test + void testFieldMetadata() { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + + FieldType fieldType = new FieldType(true, VariantType.INSTANCE, null, metadata); + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = new Field("test", fieldType, VariantVector.createVariantChildFields()); + + // Field metadata includes both custom metadata and extension type metadata + Map fieldMetadata = field.getMetadata(); + assertEquals("value1", fieldMetadata.get("key1")); + assertEquals("value2", fieldMetadata.get("key2")); + // Extension type metadata is also present + assertTrue(fieldMetadata.containsKey("ARROW:extension:name")); + assertTrue(fieldMetadata.containsKey("ARROW:extension:metadata")); + } + } + + @Test + void testFieldChildren() { + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = vector.getField(); + + assertNotNull(field.getChildren()); + assertEquals(2, field.getChildren().size()); + + Field metadataField = field.getChildren().get(0); + assertEquals(VariantVector.METADATA_VECTOR_NAME, metadataField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, metadataField.getType()); + + Field valueField = field.getChildren().get(1); + assertEquals(VariantVector.VALUE_VECTOR_NAME, valueField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, valueField.getType()); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java new file mode 100644 index 000000000..5774e67c6 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java @@ -0,0 +1,861 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.VariantReaderImpl; +import org.apache.arrow.vector.complex.impl.VariantWriterFactory; +import org.apache.arrow.vector.complex.impl.VariantWriterImpl; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for VariantVector, VariantWriterImpl, and VariantReaderImpl. */ +class TestVariantVector { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + private VariantHolder createHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + VariantHolder holder = new VariantHolder(); + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullableHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 1; + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullHolder() { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 0; + return holder; + } + + // ========== Basic Vector Tests ========== + + @Test + void testVectorCreation() { + try (VariantVector vector = new VariantVector("test", allocator)) { + assertNotNull(vector); + assertEquals("test", vector.getField().getName()); + assertNotNull(vector.getMetadataVector()); + assertNotNull(vector.getValueVector()); + } + } + + @Test + void testSetAndGet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + // Retrieve and verify + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testSetNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + NullableVariantHolder holder = createNullHolder(); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertTrue(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(0, result.isSet); + } + } + + @Test + void testMultipleValues() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1, 2}; + byte[] value1 = new byte[] {3, 4, 5}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {6, 7, 8}; + byte[] value2 = new byte[] {9, 10}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + vector.setSafe(0, holder1); + vector.setSafe(1, holder2); + vector.setValueCount(2); + + // Verify first value + NullableVariantHolder result1 = new NullableVariantHolder(); + vector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + // Verify second value + NullableVariantHolder result2 = new NullableVariantHolder(); + vector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testNonNullableHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(1, result.isSet); + } + } + + // ========== Writer Tests ========== + + @Test + void testWriteWithVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + NullableVariantHolder holder = createNullHolder(); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertTrue(vector.isNull(0)); + } + } + + @Test + void testWriteExtensionWithUnsupportedType() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + @Test + void testWriteWithUnsupportedHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + ExtensionHolder unsupportedHolder = new ExtensionHolder() {}; + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.write(unsupportedHolder)); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + // ========== Reader Tests ========== + + @Test + void testReaderReadWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder result = new NullableVariantHolder(); + reader.read(result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + } + } + + @Test + void testReaderReadWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder holder = new NullableVariantHolder(); + reader.read(holder); + + assertEquals(0, holder.isSet); + } + } + + @Test + void testReaderIsSet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setNull(1); + vector.setValueCount(2); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + + reader.setPosition(0); + assertTrue(reader.isSet()); + + reader.setPosition(1); + assertFalse(reader.isSet()); + } + } + + @Test + void testReaderGetMinorType() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getMinorType(), reader.getMinorType()); + } + } + + @Test + void testReaderGetField() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getField(), reader.getField()); + assertEquals("test", reader.getField().getName()); + } + } + + @Test + void testReaderReadWithNonNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + VariantHolder result = new VariantHolder(); + reader.read(result); + + // Verify the data was read correctly + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + assertEquals(1, result.isSet); + } + } + + // ========== Factory Tests ========== + + @Test + void testVariantWriterFactory() { + VariantWriterFactory factory = new VariantWriterFactory(); + + try (VariantVector vector = new VariantVector("test", allocator)) { + FieldWriter writer = factory.getWriterImpl(vector); + assertNotNull(writer); + assertTrue(writer instanceof VariantWriterImpl); + } + } + + @Test + void testVariantWriterFactoryWithNonVariantVector() { + VariantWriterFactory factory = new VariantWriterFactory(); + + // Use UuidVector as a different extension type + try (UuidVector uuidVector = new UuidVector("uuid", allocator)) { + FieldWriter writer = factory.getWriterImpl(uuidVector); + assertNull(writer); + } + } + + // ========== Transfer Pair Tests ========== + + @Test + void testTransferPair() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(0, fromVector.getValueCount()); + assertEquals(1, toVector.getValueCount()); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + toVector.close(); + } + } + + @Test + void testSplitAndTransfer() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10); + ArrowBuf metadataBuf3 = allocator.buffer(10); + ArrowBuf valueBuf3 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + byte[] metadata3 = new byte[] {7, 8, 9}; + byte[] value3 = new byte[] {10, 11, 12}; + metadataBuf3.setBytes(0, metadata3); + valueBuf3.setBytes(0, value3); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + NullableVariantHolder holder3 = + createNullableHolder(metadataBuf3, metadata3, valueBuf3, value3); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setSafe(2, holder3); + fromVector.setValueCount(3); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + // Split and transfer indices 1-2 (middle and last) + transferPair.splitAndTransfer(1, 2); + + assertEquals(2, toVector.getValueCount()); + + // Verify transferred values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata2.length]; + byte[] actualValue1 = new byte[value2.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata2, actualMetadata1); + assertArrayEquals(value2, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata3.length]; + byte[] actualValue2 = new byte[value3.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata3, actualMetadata2); + assertArrayEquals(value3, actualValue2); + + toVector.close(); + } + } + + @Test + void testCopyValueSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.makeTransferPair(toVector); + + transferPair.copyValueSafe(0, 0); + toVector.setValueCount(1); + + // Verify the value was copied + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + // Original vector should still have the value + NullableVariantHolder originalResult = new NullableVariantHolder(); + fromVector.get(0, originalResult); + assertEquals(1, originalResult.isSet); + } + } + + @Test + void testGetTransferPairWithField() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(fromVector.getField(), allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(1, toVector.getValueCount()); + assertEquals(fromVector.getField().getName(), toVector.getField().getName()); + + toVector.close(); + } + } + + // ========== Copy Operations Tests ========== + + @Test + void testCopyFrom() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + toVector.allocateNew(); + toVector.copyFrom(0, 0, fromVector); + toVector.setValueCount(1); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testCopyFromSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setValueCount(2); + + // Copy without pre-allocating toVector + for (int i = 0; i < 2; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(2); + + // Verify both values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testCopyFromWithNulls() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setNull(1); + fromVector.setSafe(2, holder); + fromVector.setValueCount(3); + + toVector.allocateNew(); + for (int i = 0; i < 3; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(3); + + assertFalse(toVector.isNull(0)); + assertTrue(toVector.isNull(1)); + assertFalse(toVector.isNull(2)); + } + } + + // ========== GetObject Tests ========== + + @Test + void testGetObject() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNotNull(obj); + } + } + + @Test + void testGetObjectNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNull(obj); + } + } + + // ========== Allocate and Capacity Tests ========== + + @Test + void testAllocateNew() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.allocateNew(); + assertTrue(vector.getValueCapacity() > 0); + } + } + + @Test + void testSetInitialCapacity() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setInitialCapacity(100); + vector.allocateNew(); + assertTrue(vector.getValueCapacity() >= 100); + } + } + + @Test + void testClearAndReuse() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + vector.clear(); + vector.allocateNew(); + + // After clear, vector should be empty + assertEquals(0, vector.getValueCount()); + } + } +} From 27ee8463f72c747ead413c87ee6365b9292175a0 Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Thu, 8 Jan 2026 13:45:28 +0100 Subject: [PATCH 2/5] rebase on Add ExtensionTypeWriterFactory to TransferPair --- .../complex/impl/VariantWriterFactory.java | 45 ------------------- .../arrow/vector/extension/VariantType.java | 8 ++++ .../vector/holders/NullableVariantHolder.java | 7 +++ .../arrow/vector/holders/VariantHolder.java | 7 +++ .../arrow/vector/TestVariantVector.java | 36 ++++----------- 5 files changed, 31 insertions(+), 72 deletions(-) delete mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java deleted file mode 100644 index 6d41b890e..000000000 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.extension.VariantVector; - -/** - * Factory for creating {@link VariantWriterImpl} instances. - * - *

This factory is used to create writers for Variant extension type vectors. - * - * @see VariantWriterImpl - * @see org.apache.arrow.vector.extension.VariantType - */ -public class VariantWriterFactory implements ExtensionTypeWriterFactory { - - /** - * Creates a writer implementation for the given extension type vector. - * - * @param extensionTypeVector the vector to create a writer for - * @return a {@link VariantWriterImpl} if the vector is a {@link VariantVector}, null otherwise - */ - @Override - public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { - if (extensionTypeVector instanceof VariantVector) { - return new VariantWriterImpl((VariantVector) extensionTypeVector); - } - return null; - } -} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java index cf083fc49..d8e8b93b6 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java @@ -18,6 +18,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.impl.VariantWriterImpl; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; @@ -77,4 +80,9 @@ public boolean isComplex() { // and value) return false; } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new VariantWriterImpl((VariantVector) vector); + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java index 6e80ce341..7076f587e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector.holders; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.vector.types.pojo.ArrowType; @SuppressWarnings("checkstyle:VisibilityModifier") public final class NullableVariantHolder extends ExtensionHolder { @@ -45,4 +47,9 @@ public int hashCode() { public String toString() { throw new UnsupportedOperationException(); } + + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java index 1cf90f9c8..e82b49e08 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector.holders; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.vector.types.pojo.ArrowType; @SuppressWarnings("checkstyle:VisibilityModifier") public final class VariantHolder extends ExtensionHolder { @@ -45,4 +47,9 @@ public int hashCode() { public String toString() { throw new UnsupportedOperationException(); } + + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java index 5774e67c6..2539d9311 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java @@ -28,13 +28,13 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.complex.impl.VariantReaderImpl; -import org.apache.arrow.vector.complex.impl.VariantWriterFactory; import org.apache.arrow.vector.complex.impl.VariantWriterImpl; -import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.extension.VariantType; import org.apache.arrow.vector.extension.VariantVector; import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -305,7 +305,13 @@ void testWriteWithUnsupportedHolder() { try (VariantVector vector = new VariantVector("test", allocator); VariantWriterImpl writer = new VariantWriterImpl(vector)) { - ExtensionHolder unsupportedHolder = new ExtensionHolder() {}; + ExtensionHolder unsupportedHolder = + new ExtensionHolder() { + @Override + public ArrowType type() { + return VariantType.INSTANCE; + } + }; writer.setPosition(0); @@ -440,30 +446,6 @@ void testReaderReadWithNonNullableVariantHolder() { } } - // ========== Factory Tests ========== - - @Test - void testVariantWriterFactory() { - VariantWriterFactory factory = new VariantWriterFactory(); - - try (VariantVector vector = new VariantVector("test", allocator)) { - FieldWriter writer = factory.getWriterImpl(vector); - assertNotNull(writer); - assertTrue(writer instanceof VariantWriterImpl); - } - } - - @Test - void testVariantWriterFactoryWithNonVariantVector() { - VariantWriterFactory factory = new VariantWriterFactory(); - - // Use UuidVector as a different extension type - try (UuidVector uuidVector = new UuidVector("uuid", allocator)) { - FieldWriter writer = factory.getWriterImpl(uuidVector); - assertNull(writer); - } - } - // ========== Transfer Pair Tests ========== @Test From 3a0d92747cf3ee2f012f4e1efe7a073795545c6e Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Tue, 27 Jan 2026 14:31:28 +0100 Subject: [PATCH 3/5] rebase, introduce arrow-variant module, add complex tests --- arrow-variant/pom.xml | 62 +++ arrow-variant/src/main/java/module-info.java | 24 + .../apache/arrow/vector/variant/Variant.java | 205 ++++++++ .../arrow/vector/variant/TestVariant.java | 439 ++++++++++++++++++ bom/pom.xml | 5 + flight/flight-sql-jdbc-driver/pom.xml | 5 + pom.xml | 7 + vector/pom.xml | 11 + vector/src/main/java/module-info.java | 1 + .../complex/impl/VariantWriterImpl.java | 43 +- .../arrow/vector/extension/VariantVector.java | 38 +- .../apache/arrow/vector/TestListVector.java | 164 +++++++ .../apache/arrow/vector/TestMapVector.java | 92 ++++ .../arrow/vector/TestVariantVector.java | 3 + .../vector/types/pojo/TestExtensionType.java | 194 ++++++++ 15 files changed, 1287 insertions(+), 6 deletions(-) create mode 100644 arrow-variant/pom.xml create mode 100644 arrow-variant/src/main/java/module-info.java create mode 100644 arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java create mode 100644 arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java diff --git a/arrow-variant/pom.xml b/arrow-variant/pom.xml new file mode 100644 index 000000000..5169e9bdc --- /dev/null +++ b/arrow-variant/pom.xml @@ -0,0 +1,62 @@ + + + + 4.0.0 + + org.apache.arrow + arrow-java-root + 19.0.0-SNAPSHOT + + arrow-variant + Arrow Variant + Arrow Variant type support. + + + + org.apache.arrow + arrow-memory-core + + + org.apache.parquet + parquet-variant + + + org.apache.arrow + arrow-memory-unsafe + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/arrow-variant/src/main/java/module-info.java b/arrow-variant/src/main/java/module-info.java new file mode 100644 index 000000000..1df19e507 --- /dev/null +++ b/arrow-variant/src/main/java/module-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@SuppressWarnings("requires-automatic") +module org.apache.arrow.variant { + exports org.apache.arrow.vector.variant; + + requires org.apache.arrow.memory.core; + requires parquet.variant; +} diff --git a/arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java b/arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java new file mode 100644 index 000000000..895ce5ae3 --- /dev/null +++ b/arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.variant; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Objects; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; + +/** + * Wrapper around parquet-variant's Variant implementation. + * + *

This wrapper exists to isolate the parquet-variant dependency from Arrow's public API, + * allowing the vector module to expose variant functionality without requiring users to depend on + * parquet-variant directly. It also ensures that nested variant values (from arrays and objects) + * are consistently wrapped. + */ +public class Variant { + + private final org.apache.parquet.variant.Variant delegate; + + /** Creates a Variant from raw metadata and value byte arrays. */ + public Variant(byte[] metadata, byte[] value) { + this.delegate = new org.apache.parquet.variant.Variant(value, metadata); + } + + /** Creates a Variant by copying data from ArrowBuf instances. */ + public Variant( + ArrowBuf metadataBuffer, + int metadataStart, + int metadataEnd, + ArrowBuf valueBuffer, + int valueStart, + int valueEnd) { + byte[] metadata = new byte[metadataEnd - metadataStart]; + byte[] value = new byte[valueEnd - valueStart]; + metadataBuffer.getBytes(metadataStart, metadata); + valueBuffer.getBytes(valueStart, value); + this.delegate = new org.apache.parquet.variant.Variant(value, metadata); + } + + private Variant(org.apache.parquet.variant.Variant delegate) { + this.delegate = delegate; + } + + public ByteBuffer getValueBuffer() { + return delegate.getValueBuffer(); + } + + public ByteBuffer getMetadataBuffer() { + return delegate.getMetadataBuffer(); + } + + public boolean getBoolean() { + return delegate.getBoolean(); + } + + public byte getByte() { + return delegate.getByte(); + } + + public short getShort() { + return delegate.getShort(); + } + + public int getInt() { + return delegate.getInt(); + } + + public long getLong() { + return delegate.getLong(); + } + + public double getDouble() { + return delegate.getDouble(); + } + + public BigDecimal getDecimal() { + return delegate.getDecimal(); + } + + public float getFloat() { + return delegate.getFloat(); + } + + public ByteBuffer getBinary() { + return delegate.getBinary(); + } + + public UUID getUUID() { + return delegate.getUUID(); + } + + public String getString() { + return delegate.getString(); + } + + public Type getType() { + return Type.fromParquet(delegate.getType()); + } + + public int numObjectElements() { + return delegate.numObjectElements(); + } + + public Variant getFieldByKey(String key) { + org.apache.parquet.variant.Variant result = delegate.getFieldByKey(key); + return result != null ? wrap(result) : null; + } + + public ObjectField getFieldAtIndex(int idx) { + org.apache.parquet.variant.Variant.ObjectField field = delegate.getFieldAtIndex(idx); + return new ObjectField(field.key, wrap(field.value)); + } + + public int numArrayElements() { + return delegate.numArrayElements(); + } + + public Variant getElementAtIndex(int index) { + org.apache.parquet.variant.Variant result = delegate.getElementAtIndex(index); + return result != null ? wrap(result) : null; + } + + private static Variant wrap(org.apache.parquet.variant.Variant parquetVariant) { + return new Variant(parquetVariant); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Variant variant = (Variant) o; + return delegate.getMetadataBuffer().equals(variant.delegate.getMetadataBuffer()) + && delegate.getValueBuffer().equals(variant.delegate.getValueBuffer()); + } + + @Override + public int hashCode() { + return Objects.hash(delegate.getMetadataBuffer(), delegate.getValueBuffer()); + } + + @Override + public String toString() { + return "Variant{type=" + getType() + '}'; + } + + public enum Type { + OBJECT, + ARRAY, + NULL, + BOOLEAN, + BYTE, + SHORT, + INT, + LONG, + STRING, + DOUBLE, + DECIMAL4, + DECIMAL8, + DECIMAL16, + DATE, + TIMESTAMP_TZ, + TIMESTAMP_NTZ, + FLOAT, + BINARY, + TIME, + TIMESTAMP_NANOS_TZ, + TIMESTAMP_NANOS_NTZ, + UUID; + + static Type fromParquet(org.apache.parquet.variant.Variant.Type parquetType) { + return Type.valueOf(parquetType.name()); + } + } + + public static final class ObjectField { + public final String key; + public final Variant value; + + public ObjectField(String key, Variant value) { + this.key = key; + this.value = value; + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java b/arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java new file mode 100644 index 000000000..9c66242cc --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.variant; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.parquet.variant.VariantBuilder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariant { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + static Variant buildVariant(VariantBuilder builder) { + org.apache.parquet.variant.Variant parquetVariant = builder.build(); + ByteBuffer valueBuf = parquetVariant.getValueBuffer(); + ByteBuffer metaBuf = parquetVariant.getMetadataBuffer(); + byte[] valueBytes = new byte[valueBuf.remaining()]; + byte[] metaBytes = new byte[metaBuf.remaining()]; + valueBuf.get(valueBytes); + metaBuf.get(metaBytes); + return new Variant(metaBytes, valueBytes); + } + + public static Variant variantString(String value) { + VariantBuilder builder = new VariantBuilder(); + builder.appendString(value); + return buildVariant(builder); + } + + @Test + void testConstructionWithArrowBuf() { + VariantBuilder builder = new VariantBuilder(); + builder.appendInt(42); + Variant source = buildVariant(builder); + int metaLen = source.getMetadataBuffer().remaining(); + int valueLen = source.getValueBuffer().remaining(); + + try (ArrowBuf metadataArrowBuf = allocator.buffer(metaLen + 2); + ArrowBuf valueArrowBuf = allocator.buffer(valueLen + 3)) { + metadataArrowBuf.setBytes(2, source.getMetadataBuffer()); + valueArrowBuf.setBytes(3, source.getValueBuffer()); + + Variant variant = + new Variant(metadataArrowBuf, 2, 2 + metaLen, valueArrowBuf, 3, 3 + valueLen); + + assertEquals(Variant.Type.INT, variant.getType()); + assertEquals(42, variant.getInt()); + } + } + + @Test + void testNullType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendNull(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.NULL, variant.getType()); + } + + @Test + void testBooleanType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendBoolean(true); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BOOLEAN, variant.getType()); + assertTrue(variant.getBoolean()); + + builder = new VariantBuilder(); + builder.appendBoolean(false); + variant = buildVariant(builder); + + assertEquals(Variant.Type.BOOLEAN, variant.getType()); + assertFalse(variant.getBoolean()); + } + + @Test + void testByteType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendByte((byte) 42); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BYTE, variant.getType()); + assertEquals((byte) 42, variant.getByte()); + } + + @Test + void testShortType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendShort((short) 1234); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.SHORT, variant.getType()); + assertEquals((short) 1234, variant.getShort()); + } + + @Test + void testIntType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendInt(123456); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.INT, variant.getType()); + assertEquals(123456, variant.getInt()); + } + + @Test + void testLongType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendLong(9876543210L); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.LONG, variant.getType()); + assertEquals(9876543210L, variant.getLong()); + } + + @Test + void testFloatType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendFloat(3.14f); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.FLOAT, variant.getType()); + assertEquals(3.14f, variant.getFloat(), 0.001f); + } + + @Test + void testDoubleType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendDouble(3.14159265359); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.DOUBLE, variant.getType()); + assertEquals(3.14159265359, variant.getDouble(), 0.0000001); + } + + @Test + void testStringType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendString("hello world"); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.STRING, variant.getType()); + assertEquals("hello world", variant.getString()); + } + + @Test + void testDecimalType() { + VariantBuilder builder = new VariantBuilder(); + builder.appendDecimal(new BigDecimal("123.456")); + Variant variant = buildVariant(builder); + + assertTrue( + variant.getType() == Variant.Type.DECIMAL4 + || variant.getType() == Variant.Type.DECIMAL8 + || variant.getType() == Variant.Type.DECIMAL16); + assertEquals(new BigDecimal("123.456"), variant.getDecimal()); + } + + @Test + void testBinaryType() { + VariantBuilder builder = new VariantBuilder(); + byte[] data = new byte[] {1, 2, 3, 4, 5}; + builder.appendBinary(ByteBuffer.wrap(data)); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.BINARY, variant.getType()); + ByteBuffer result = variant.getBinary(); + byte[] resultBytes = new byte[result.remaining()]; + result.get(resultBytes); + assertArrayEquals(data, resultBytes); + } + + @Test + void testUuidType() { + VariantBuilder builder = new VariantBuilder(); + UUID uuid = UUID.randomUUID(); + builder.appendUUID(uuid); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.UUID, variant.getType()); + assertEquals(uuid, variant.getUUID()); + } + + @Test + void testDateType() { + VariantBuilder builder = new VariantBuilder(); + int daysSinceEpoch = 19000; + builder.appendDate(daysSinceEpoch); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.DATE, variant.getType()); + } + + @Test + void testTimestampTzType() { + VariantBuilder builder = new VariantBuilder(); + long micros = System.currentTimeMillis() * 1000; + builder.appendTimestampTz(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIMESTAMP_TZ, variant.getType()); + } + + @Test + void testTimestampNtzType() { + VariantBuilder builder = new VariantBuilder(); + long micros = System.currentTimeMillis() * 1000; + builder.appendTimestampNtz(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIMESTAMP_NTZ, variant.getType()); + } + + @Test + void testTimeType() { + VariantBuilder builder = new VariantBuilder(); + long micros = 12345678L; + builder.appendTime(micros); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.TIME, variant.getType()); + } + + @Test + void testObjectType() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("name"); + objBuilder.appendString("test"); + objBuilder.appendKey("value"); + objBuilder.appendInt(42); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + assertEquals(2, variant.numObjectElements()); + + Variant nameField = variant.getFieldByKey("name"); + assertNotNull(nameField); + assertEquals(Variant.Type.STRING, nameField.getType()); + assertEquals("test", nameField.getString()); + + Variant valueField = variant.getFieldByKey("value"); + assertNotNull(valueField); + assertEquals(Variant.Type.INT, valueField.getType()); + assertEquals(42, valueField.getInt()); + + assertNull(variant.getFieldByKey("nonexistent")); + + // Empty object + builder = new VariantBuilder(); + builder.startObject(); + builder.endObject(); + Variant emptyObj = buildVariant(builder); + assertEquals(Variant.Type.OBJECT, emptyObj.getType()); + assertEquals(0, emptyObj.numObjectElements()); + } + + @Test + void testObjectFieldAtIndex() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("alpha"); + objBuilder.appendInt(1); + objBuilder.appendKey("beta"); + objBuilder.appendInt(2); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + assertEquals(2, variant.numObjectElements()); + + Variant.ObjectField field0 = variant.getFieldAtIndex(0); + assertNotNull(field0); + assertNotNull(field0.key); + assertNotNull(field0.value); + + Variant.ObjectField field1 = variant.getFieldAtIndex(1); + assertNotNull(field1); + assertNotNull(field1.key); + assertNotNull(field1.value); + } + + @Test + void testArrayType() { + VariantBuilder builder = new VariantBuilder(); + var arrayBuilder = builder.startArray(); + arrayBuilder.appendInt(1); + arrayBuilder.appendInt(2); + arrayBuilder.appendInt(3); + builder.endArray(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.ARRAY, variant.getType()); + assertEquals(3, variant.numArrayElements()); + + Variant elem0 = variant.getElementAtIndex(0); + assertNotNull(elem0); + assertEquals(Variant.Type.INT, elem0.getType()); + assertEquals(1, elem0.getInt()); + + Variant elem1 = variant.getElementAtIndex(1); + assertEquals(2, elem1.getInt()); + + Variant elem2 = variant.getElementAtIndex(2); + assertEquals(3, elem2.getInt()); + + assertNull(variant.getElementAtIndex(-1)); + assertNull(variant.getElementAtIndex(3)); + + // Empty array + builder = new VariantBuilder(); + builder.startArray(); + builder.endArray(); + Variant emptyArr = buildVariant(builder); + assertEquals(Variant.Type.ARRAY, emptyArr.getType()); + assertEquals(0, emptyArr.numArrayElements()); + } + + @Test + void testNestedStructure() { + VariantBuilder builder = new VariantBuilder(); + var objBuilder = builder.startObject(); + objBuilder.appendKey("items"); + var arrayBuilder = objBuilder.startArray(); + arrayBuilder.appendString("a"); + arrayBuilder.appendString("b"); + objBuilder.endArray(); + builder.endObject(); + Variant variant = buildVariant(builder); + + assertEquals(Variant.Type.OBJECT, variant.getType()); + Variant items = variant.getFieldByKey("items"); + assertNotNull(items); + assertEquals(Variant.Type.ARRAY, items.getType()); + assertEquals(2, items.numArrayElements()); + assertEquals("a", items.getElementAtIndex(0).getString()); + assertEquals("b", items.getElementAtIndex(1).getString()); + } + + @Test + void testEquals() { + VariantBuilder builder1 = new VariantBuilder(); + builder1.appendString("test"); + Variant variant1 = buildVariant(builder1); + + VariantBuilder builder2 = new VariantBuilder(); + builder2.appendString("test"); + Variant variant2 = buildVariant(builder2); + + VariantBuilder builder3 = new VariantBuilder(); + builder3.appendString("different"); + Variant variant3 = buildVariant(builder3); + + assertEquals(variant1, variant1); + assertEquals(variant1, variant2); + assertNotEquals(variant1, variant3); + assertNotEquals(variant1, null); + assertNotEquals(variant1, "not a variant"); + } + + @Test + void testHashCode() { + VariantBuilder builder1 = new VariantBuilder(); + builder1.appendInt(42); + Variant variant1 = buildVariant(builder1); + + VariantBuilder builder2 = new VariantBuilder(); + builder2.appendInt(42); + Variant variant2 = buildVariant(builder2); + + assertEquals(variant1.hashCode(), variant2.hashCode()); + } + + @Test + void testToString() { + VariantBuilder builder = new VariantBuilder(); + builder.appendString("test"); + Variant variant = buildVariant(builder); + + String str = variant.toString(); + assertNotNull(str); + assertTrue(str.contains("type=")); + } + + @Test + void testTypeEnumsMatch() { + for (Variant.Type arrowType : Variant.Type.values()) { + org.apache.parquet.variant.Variant.Type parquetType = + org.apache.parquet.variant.Variant.Type.valueOf(arrowType.name()); + assertEquals(arrowType, Variant.Type.fromParquet(parquetType)); + } + for (org.apache.parquet.variant.Variant.Type parquetType : + org.apache.parquet.variant.Variant.Type.values()) { + Variant.Type arrowType = Variant.Type.valueOf(parquetType.name()); + assertEquals(parquetType.name(), arrowType.name()); + } + } +} diff --git a/bom/pom.xml b/bom/pom.xml index 9efde5324..b631f9366 100644 --- a/bom/pom.xml +++ b/bom/pom.xml @@ -194,6 +194,11 @@ under the License. arrow-tools ${project.version} + + org.apache.arrow + arrow-variant + ${project.version} + diff --git a/flight/flight-sql-jdbc-driver/pom.xml b/flight/flight-sql-jdbc-driver/pom.xml index 559c42597..6036bde2d 100644 --- a/flight/flight-sql-jdbc-driver/pom.xml +++ b/flight/flight-sql-jdbc-driver/pom.xml @@ -93,6 +93,11 @@ under the License. com.google.errorprone:error_prone_annotations com.google.code.findbugs:jsr305 com.google.j2objc:j2objc-annotations + + org.apache.parquet:parquet-common + org.apache.parquet:parquet-column + org.apache.parquet:parquet-encoding + org.apache.parquet:parquet-format-structures diff --git a/pom.xml b/pom.xml index d64df1ade..38186e6e7 100644 --- a/pom.xml +++ b/pom.xml @@ -68,6 +68,7 @@ under the License. bom format memory + arrow-variant vector tools adapter/jdbc @@ -104,6 +105,7 @@ under the License. 3.4.2 25.2.10 1.12.1 + 1.17.0 5.17.0 2 @@ -241,6 +243,11 @@ under the License. logback-core ${logback.version} + + org.apache.parquet + parquet-variant + ${dep.parquet.version} + diff --git a/vector/pom.xml b/vector/pom.xml index 89e977900..8172802d1 100644 --- a/vector/pom.xml +++ b/vector/pom.xml @@ -37,6 +37,17 @@ under the License. org.apache.arrow arrow-memory-core + + org.apache.arrow + arrow-variant + + + org.apache.arrow + arrow-variant + ${project.version} + test-jar + test + org.immutables value-annotations diff --git a/vector/src/main/java/module-info.java b/vector/src/main/java/module-info.java index 8ba1b3579..46e95ff61 100644 --- a/vector/src/main/java/module-info.java +++ b/vector/src/main/java/module-info.java @@ -46,6 +46,7 @@ requires jdk.unsupported; requires org.apache.arrow.format; requires org.apache.arrow.memory.core; + requires org.apache.arrow.variant; requires org.apache.commons.codec; requires org.slf4j; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java index ac5250283..89f393ad8 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java @@ -16,10 +16,13 @@ */ package org.apache.arrow.vector.complex.impl; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.extension.VariantVector; import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.variant.Variant; /** * Writer implementation for VARIANT extension type vectors. @@ -42,24 +45,54 @@ public VariantWriterImpl(VariantVector vector) { } /** - * Writes an extension type value to the vector. + * Writes an extension type or variant value to the vector. * - *

This method validates that the object is an {@link ExtensionHolder} and delegates to {@link - * #write(ExtensionHolder)}. + *

This method handles {@link ExtensionHolder} by delegating to {@link #write(ExtensionHolder)} + * and {@link Variant} by delegating to {@link #writeVariant(Variant)}. * - * @param object the object to write, must be an {@link ExtensionHolder} - * @throws IllegalArgumentException if the object is not an {@link ExtensionHolder} + * @param object the object to write, must be an {@link ExtensionHolder} or {@link Variant} + * @throws IllegalArgumentException if the object is not an {@link ExtensionHolder} or {@link + * Variant} */ @Override public void writeExtension(Object object) { if (object instanceof ExtensionHolder) { write((ExtensionHolder) object); + } else if (object instanceof Variant) { + writeVariant((Variant) object); } else { throw new IllegalArgumentException( String.format(UNSUPPORTED_TYPE_TEMPLATE, object.getClass().getName())); } } + private void writeVariant(Variant variant) { + java.nio.ByteBuffer metadataBuffer = variant.getMetadataBuffer(); + java.nio.ByteBuffer valueBuffer = variant.getValueBuffer(); + int metadataLength = metadataBuffer.remaining(); + int valueLength = valueBuffer.remaining(); + try (ArrowBuf metadataBuf = vector.getAllocator().buffer(metadataLength); + ArrowBuf valueBuf = vector.getAllocator().buffer(valueLength)) { + metadataBuf.setBytes(0, metadataBuffer.duplicate()); + valueBuf.setBytes(0, valueBuffer.duplicate()); + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 1; + holder.metadataBuffer = metadataBuf; + holder.metadataStart = 0; + holder.metadataEnd = metadataLength; + holder.valueBuffer = valueBuf; + holder.valueStart = 0; + holder.valueEnd = valueLength; + vector.setSafe(getPosition(), holder); + vector.setValueCount(getPosition() + 1); + } + } + + @Override + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); + } + /** * Writes a variant holder to the vector at the current position. * diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java index cc087c4f8..2b2c181fa 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java @@ -16,7 +16,9 @@ */ package org.apache.arrow.vector.extension; +import java.nio.ByteBuffer; import java.util.List; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.BitVectorHelper; @@ -35,6 +37,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.variant.Variant; public class VariantVector extends ExtensionTypeVector { @@ -147,7 +150,24 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { @Override public Object getObject(int index) { - return getUnderlyingVector().getObject(index); + if (isNull(index)) { + return null; + } + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + + int metadataStart = metadataVector.getStartOffset(index); + int metadataEnd = metadataVector.getEndOffset(index); + int valueStart = valueVector.getStartOffset(index); + int valueEnd = valueVector.getEndOffset(index); + + return new Variant( + metadataVector.getDataBuffer(), + metadataStart, + metadataEnd, + valueVector.getDataBuffer(), + valueStart, + valueEnd); } /** @@ -253,6 +273,22 @@ public void setSafe(int index, NullableVariantHolder holder) { getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); } + /** Sets the value at the given index from the provided Variant. */ + public void setSafe(int index, Variant variant) { + ByteBuffer metadataBuffer = variant.getMetadataBuffer(); + ByteBuffer valueBuffer = variant.getValueBuffer(); + int metadataLength = metadataBuffer.remaining(); + int valueLength = valueBuffer.remaining(); + try (ArrowBuf metaBuf = getAllocator().buffer(metadataLength); + ArrowBuf valBuf = getAllocator().buffer(valueLength)) { + metaBuf.setBytes(0, metadataBuffer.duplicate()); + valBuf.setBytes(0, valueBuffer.duplicate()); + getUnderlyingVector().setIndexDefined(index); + getMetadataVector().setSafe(index, 1, 0, metadataLength, metaBuf); + getValueVector().setSafe(index, 1, 0, valueLength, valBuf); + } + } + @Override protected FieldReader getReaderImpl() { return new org.apache.arrow.vector.complex.impl.VariantReaderImpl(this); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 0c90b32ab..96100e042 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -38,9 +38,11 @@ import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.extension.VariantType; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; @@ -49,6 +51,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.util.UuidUtility; +import org.apache.arrow.vector.variant.TestVariant; +import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -57,6 +61,16 @@ public class TestListVector { private BufferAllocator allocator; + private static Variant toVariant(NullableVariantHolder holder) { + return new Variant( + holder.metadataBuffer, + holder.metadataStart, + holder.metadataEnd, + holder.valueBuffer, + holder.valueStart, + holder.valueEnd); + } + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); @@ -1379,6 +1393,156 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testListVectorWithVariantExtensionType() { + final FieldType type = FieldType.nullable(VariantType.INSTANCE); + try (ListVector inVector = new ListVector("input", allocator, type, null)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + extensionWriter.writeExtension(variant2); + writer.endList(); + inVector.setValueCount(1); + + ArrayList resultSet = (ArrayList) inVector.getObject(0); + assertEquals(2, resultSet.size()); + assertEquals(variant1, resultSet.get(0)); + assertEquals(variant2, resultSet.get(1)); + } + } + + @Test + public void testListVectorReaderForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + UnionListReader reader = inVector.getReader(); + reader.setPosition(0); + assertTrue(reader.next()); + FieldReader variantReader = reader.reader(); + NullableVariantHolder resultHolder = new NullableVariantHolder(); + variantReader.read(resultHolder); + assertEquals(variant1, toVariant(resultHolder)); + + reader.setPosition(1); + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, toVariant(resultHolder)); + + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, toVariant(resultHolder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.copyFrom(1, 1, inVector); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + + @Test + public void testCopyValueSafeForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + try (ListVector outVector = (ListVector) inVector.getTransferPair(allocator).getTo()) { + TransferPair tp = inVector.makeTransferPair(outVector); + tp.copyValueSafe(0, 0); + tp.copyValueSafe(1, 1); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + } + @Test public void testEmptyListOffsetBuffer() { // Test that ListVector has correct readableBytes after allocation. diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 274d2973b..b5ea0ffd3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -41,8 +41,10 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.extension.VariantType; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -50,6 +52,8 @@ import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.util.UuidUtility; +import org.apache.arrow.vector.variant.TestVariant; +import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1669,4 +1673,92 @@ public void testMapWithUuidKeyAndListUuidValue() throws Exception { assertEquals(value2c, actualValue2c); } } + + @Test + public void testMapVectorWithVariantExtensionType() throws Exception { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("map", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + + UnionMapReader mapReader = inVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, toVariant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, toVariant(holder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() throws Exception { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("in", allocator, false); + final MapVector outVector = MapVector.empty("out", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.setValueCount(1); + + UnionMapReader mapReader = outVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, toVariant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, toVariant(holder)); + } + } + + private static Variant toVariant(NullableVariantHolder holder) { + return new Variant( + holder.metadataBuffer, + holder.metadataStart, + holder.metadataEnd, + holder.valueBuffer, + holder.valueStart, + holder.valueEnd); + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java index 2539d9311..38e33d6e1 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java @@ -35,6 +35,7 @@ import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.holders.VariantHolder; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -782,6 +783,8 @@ void testGetObject() { Object obj = vector.getObject(0); assertNotNull(obj); + assertTrue(obj instanceof Variant); + assertEquals(new Variant(metadata, value), obj); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index ae5ac0726..e7c1d2ecb 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -45,18 +45,24 @@ import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.writer.BaseWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.extension.UuidType; +import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.vector.extension.VariantVector; import org.apache.arrow.vector.ipc.ArrowFileReader; import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; +import org.apache.arrow.vector.variant.TestVariant; +import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.Test; public class TestExtensionType { @@ -300,6 +306,194 @@ public void testVectorCompare() { } } + @Test + public void roundtripVariant() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(Field.nullable("a", VariantType.INSTANCE))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + setVariant(vector, 0, TestVariant.variantString("hello")); + setVariant(vector, 1, TestVariant.variantString("world")); + vector.setValueCount(2); + root.setRowCount(2); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); + assertEquals(root.getSchema(), readerRoot.getSchema()); + + final Field field = readerRoot.getSchema().getFields().get(0); + final VariantType expectedType = VariantType.INSTANCE; + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), + expectedType.extensionName()); + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), + expectedType.serialize()); + + final ExtensionTypeVector deserialized = + (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(vector.isNull(i), deserialized.isNull(i)); + if (!vector.isNull(i)) { + assertEquals(vector.getObject(i), deserialized.getObject(i)); + } + } + } + } + } + + @Test + public void readVariantAsUnderlyingType() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(VariantVector.createVariantField("a"))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + setVariant(vector, 0, TestVariant.variantString("hello")); + vector.setValueCount(1); + root.setRowCount(1); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + ExtensionTypeRegistry.unregister(VariantType.INSTANCE); + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + VectorSchemaRoot readRoot = reader.getVectorSchemaRoot(); + + // Verify schema properties + assertEquals(1, readRoot.getSchema().getFields().size()); + assertEquals("a", readRoot.getSchema().getFields().get(0).getName()); + assertTrue(readRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.Struct); + + // Verify extension metadata is preserved + final Field field = readRoot.getSchema().getFields().get(0); + assertEquals( + VariantType.EXTENSION_NAME, + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME)); + assertEquals("", field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA)); + + // Verify vector type and row count + assertEquals(1, readRoot.getRowCount()); + FieldVector readVector = readRoot.getVector("a"); + assertEquals(StructVector.class, readVector.getClass()); + + // Verify value count matches + StructVector structVector = (StructVector) readVector; + assertEquals(vector.getValueCount(), structVector.getValueCount()); + + // Verify the underlying data can be accessed from child vectors + VarBinaryVector metadataVector = + structVector.getChild(VariantVector.METADATA_VECTOR_NAME, VarBinaryVector.class); + VarBinaryVector valueVector = + structVector.getChild(VariantVector.VALUE_VECTOR_NAME, VarBinaryVector.class); + assertNotNull(metadataVector); + assertNotNull(valueVector); + assertEquals(1, metadataVector.getValueCount()); + assertEquals(1, valueVector.getValueCount()); + } + } + } + + @Test + public void testVariantVectorCompare() { + VariantType variantType = VariantType.INSTANCE; + ExtensionTypeRegistry.register(variantType); + Variant hello = TestVariant.variantString("hello"); + Variant world = TestVariant.variantString("world"); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector a1 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector a2 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector bb = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator)) { + + ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor(); + validateVisitor.visit(a1, null); + + a1.allocateNew(); + a2.allocateNew(); + bb.allocateNew(); + + setVariant(a1, 0, hello); + setVariant(a1, 1, world); + a1.setValueCount(2); + + setVariant(a2, 0, hello); + setVariant(a2, 1, world); + a2.setValueCount(2); + + setVariant(bb, 0, world); + setVariant(bb, 1, hello); + bb.setValueCount(2); + + Range range = new Range(0, 0, a1.getValueCount()); + RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2); + assertTrue(visitor.rangeEquals(range)); + + visitor = new RangeEqualsVisitor(a1, bb); + assertFalse(visitor.rangeEquals(range)); + + VectorBatchAppender.batchAppend(a1, a2, bb); + assertEquals(6, a1.getValueCount()); + validateVisitor.visit(a1, null); + } + } + + @Test + public void testVariantCopyAsValueThrowsException() { + ensureRegistered(VariantType.INSTANCE); + try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector vector = new VariantVector("variant", allocator)) { + vector.allocateNew(); + setVariant(vector, 0, TestVariant.variantString("hello")); + vector.setValueCount(1); + + var reader = vector.getReader(); + reader.setPosition(0); + + assertThrows( + IllegalArgumentException.class, () -> reader.copyAsValue((BaseWriter.StructWriter) null)); + } + } + + private static void setVariant(VariantVector vector, int index, Variant variant) { + vector.setSafe(index, variant); + } + static class LocationType extends ExtensionType { @Override From b76a0ea97788ea21da0eadd45ad5e47e7b562f29 Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Wed, 28 Jan 2026 14:05:09 +0100 Subject: [PATCH 4/5] Move Variant classes from arrow-vector to arrow-variant module --- arrow-variant/pom.xml | 21 +- arrow-variant/src/main/java/module-info.java | 6 +- .../arrow/{vector => }/variant/Variant.java | 13 +- .../arrow/variant}/extension/VariantType.java | 8 +- .../variant}/extension/VariantVector.java | 16 +- .../holders/NullableVariantHolder.java | 5 +- .../arrow/variant}/holders/VariantHolder.java | 5 +- .../impl/NullableVariantHolderReaderImpl.java | 5 +- .../variant}/impl/VariantReaderImpl.java | 9 +- .../variant}/impl/VariantWriterImpl.java | 11 +- .../{vector => }/variant/TestVariant.java | 2 +- .../extension/TestVariantExtensionType.java | 249 ++++++++++++++++++ .../extension/TestVariantInListVector.java | 202 ++++++++++++++ .../extension/TestVariantInMapVector.java | 125 +++++++++ .../variant/extension}/TestVariantType.java | 8 +- .../variant/extension}/TestVariantVector.java | 14 +- flight/flight-sql-jdbc-driver/pom.xml | 5 - pom.xml | 5 - vector/pom.xml | 11 - .../templates/AbstractFieldReader.java | 4 +- vector/src/main/java/module-info.java | 1 - .../apache/arrow/vector/TestListVector.java | 164 ------------ .../apache/arrow/vector/TestMapVector.java | 92 ------- .../vector/types/pojo/TestExtensionType.java | 194 -------------- 24 files changed, 648 insertions(+), 527 deletions(-) rename arrow-variant/src/main/java/org/apache/arrow/{vector => }/variant/Variant.java (94%) rename {vector/src/main/java/org/apache/arrow/vector => arrow-variant/src/main/java/org/apache/arrow/variant}/extension/VariantType.java (89%) rename {vector/src/main/java/org/apache/arrow/vector => arrow-variant/src/main/java/org/apache/arrow/variant}/extension/VariantVector.java (96%) rename {vector/src/main/java/org/apache/arrow/vector => arrow-variant/src/main/java/org/apache/arrow/variant}/holders/NullableVariantHolder.java (91%) rename {vector/src/main/java/org/apache/arrow/vector => arrow-variant/src/main/java/org/apache/arrow/variant}/holders/VariantHolder.java (91%) rename {vector/src/main/java/org/apache/arrow/vector/complex => arrow-variant/src/main/java/org/apache/arrow/variant}/impl/NullableVariantHolderReaderImpl.java (92%) rename {vector/src/main/java/org/apache/arrow/vector/complex => arrow-variant/src/main/java/org/apache/arrow/variant}/impl/VariantReaderImpl.java (88%) rename {vector/src/main/java/org/apache/arrow/vector/complex => arrow-variant/src/main/java/org/apache/arrow/variant}/impl/VariantWriterImpl.java (93%) rename arrow-variant/src/test/java/org/apache/arrow/{vector => }/variant/TestVariant.java (99%) create mode 100644 arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java create mode 100644 arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java create mode 100644 arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java rename {vector/src/test/java/org/apache/arrow/vector => arrow-variant/src/test/java/org/apache/arrow/variant/extension}/TestVariantType.java (98%) rename {vector/src/test/java/org/apache/arrow/vector => arrow-variant/src/test/java/org/apache/arrow/variant/extension}/TestVariantVector.java (98%) diff --git a/arrow-variant/pom.xml b/arrow-variant/pom.xml index 5169e9bdc..3a842178a 100644 --- a/arrow-variant/pom.xml +++ b/arrow-variant/pom.xml @@ -33,9 +33,14 @@ under the License. org.apache.arrow arrow-memory-core + + org.apache.arrow + arrow-vector + org.apache.parquet parquet-variant + ${dep.parquet.version} org.apache.arrow @@ -43,20 +48,4 @@ under the License. test - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - - - diff --git a/arrow-variant/src/main/java/module-info.java b/arrow-variant/src/main/java/module-info.java index 1df19e507..da9417396 100644 --- a/arrow-variant/src/main/java/module-info.java +++ b/arrow-variant/src/main/java/module-info.java @@ -17,8 +17,12 @@ @SuppressWarnings("requires-automatic") module org.apache.arrow.variant { - exports org.apache.arrow.vector.variant; + exports org.apache.arrow.variant; + exports org.apache.arrow.variant.extension; + exports org.apache.arrow.variant.impl; + exports org.apache.arrow.variant.holders; requires org.apache.arrow.memory.core; + requires org.apache.arrow.vector; requires parquet.variant; } diff --git a/arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java similarity index 94% rename from arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java index 895ce5ae3..99f4c5e6d 100644 --- a/arrow-variant/src/main/java/org/apache/arrow/vector/variant/Variant.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java @@ -14,13 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.variant; +package org.apache.arrow.variant; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.Objects; import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.variant.holders.NullableVariantHolder; /** * Wrapper around parquet-variant's Variant implementation. @@ -58,6 +59,16 @@ private Variant(org.apache.parquet.variant.Variant delegate) { this.delegate = delegate; } + public Variant(NullableVariantHolder holder) { + this( + holder.metadataBuffer, + holder.metadataStart, + holder.metadataEnd, + holder.valueBuffer, + holder.valueStart, + holder.valueEnd); + } + public ByteBuffer getValueBuffer() { return delegate.getValueBuffer(); } diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java similarity index 89% rename from vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java index d8e8b93b6..dc9f7a7a5 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java @@ -14,18 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.extension; +package org.apache.arrow.variant.extension; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.variant.impl.VariantWriterImpl; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.impl.VariantWriterImpl; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; import org.apache.arrow.vector.types.pojo.FieldType; +/** + * Arrow extension type for + * Parquet Variant binary encoding. The type itself does not support shredded variant data. + */ public final class VariantType extends ExtensionType { public static final VariantType INSTANCE = new VariantType(); diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java similarity index 96% rename from vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java index 2b2c181fa..1bbf1a6bd 100644 --- a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantVector.java @@ -14,13 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.extension; +package org.apache.arrow.variant.extension; import java.nio.ByteBuffer; import java.util.List; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; @@ -29,16 +32,19 @@ import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableVariantHolder; -import org.apache.arrow.vector.holders.VariantHolder; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.variant.Variant; +/** + * Arrow vector for storing {@link VariantType} values. + * + *

Stores semi-structured data (like JSON) as metadata + value binary pairs, allowing + * type-flexible columnar storage within Arrow's type system. + */ public class VariantVector extends ExtensionTypeVector { public static final String METADATA_VECTOR_NAME = "metadata"; @@ -291,7 +297,7 @@ public void setSafe(int index, Variant variant) { @Override protected FieldReader getReaderImpl() { - return new org.apache.arrow.vector.complex.impl.VariantReaderImpl(this); + return new org.apache.arrow.variant.impl.VariantReaderImpl(this); } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java similarity index 91% rename from vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java index 7076f587e..b78d4a201 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/NullableVariantHolder.java @@ -14,10 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.holders; +package org.apache.arrow.variant.holders; import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.variant.extension.VariantType; +import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.types.pojo.ArrowType; @SuppressWarnings("checkstyle:VisibilityModifier") diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java similarity index 91% rename from vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java index e82b49e08..e3947ac43 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/holders/VariantHolder.java @@ -14,10 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.holders; +package org.apache.arrow.variant.holders; import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.variant.extension.VariantType; +import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.types.pojo.ArrowType; @SuppressWarnings("checkstyle:VisibilityModifier") diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java similarity index 92% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java index 0aa6246fb..1645529c0 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/NullableVariantHolderReaderImpl.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.complex.impl; +package org.apache.arrow.variant.impl; -import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractFieldReader; import org.apache.arrow.vector.types.Types; public class NullableVariantHolderReaderImpl extends AbstractFieldReader { diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java similarity index 88% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java index cf5509792..670104b7d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantReaderImpl.java @@ -14,12 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.complex.impl; +package org.apache.arrow.variant.impl; -import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.variant.extension.VariantVector; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractFieldReader; import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableVariantHolder; -import org.apache.arrow.vector.holders.VariantHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.Field; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java similarity index 93% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java rename to arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java index 89f393ad8..266ddb75d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/impl/VariantWriterImpl.java @@ -14,15 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.complex.impl; +package org.apache.arrow.variant.impl; import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.extension.VariantVector; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.vector.complex.impl.AbstractExtensionTypeWriter; import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableVariantHolder; -import org.apache.arrow.vector.holders.VariantHolder; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.variant.Variant; /** * Writer implementation for VARIANT extension type vectors. diff --git a/arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java b/arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java similarity index 99% rename from arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java rename to arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java index 9c66242cc..bc46a6861 100644 --- a/arrow-variant/src/test/java/org/apache/arrow/vector/variant/TestVariant.java +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/TestVariant.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector.variant; +package org.apache.arrow.variant; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java new file mode 100644 index 000000000..f3213d523 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantExtensionType.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compare.Range; +import org.apache.arrow.vector.compare.RangeEqualsVisitor; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.writer.BaseWriter; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.VectorBatchAppender; +import org.apache.arrow.vector.validate.ValidateVectorVisitor; +import org.junit.jupiter.api.Test; + +public class TestVariantExtensionType { + + private static void ensureRegistered(ArrowType.ExtensionType type) { + if (ExtensionTypeRegistry.lookup(type.extensionName()) == null) { + ExtensionTypeRegistry.register(type); + } + } + + @Test + public void roundtripVariant() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(Field.nullable("a", VariantType.INSTANCE))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setSafe(1, TestVariant.variantString("world")); + vector.setValueCount(2); + root.setRowCount(2); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); + assertEquals(root.getSchema(), readerRoot.getSchema()); + + final Field field = readerRoot.getSchema().getFields().get(0); + final VariantType expectedType = VariantType.INSTANCE; + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), + expectedType.extensionName()); + assertEquals( + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), + expectedType.serialize()); + + final ExtensionTypeVector deserialized = + (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(vector.isNull(i), deserialized.isNull(i)); + if (!vector.isNull(i)) { + assertEquals(vector.getObject(i), deserialized.getObject(i)); + } + } + } + } + } + + @Test + public void readVariantAsUnderlyingType() throws IOException { + ensureRegistered(VariantType.INSTANCE); + final Schema schema = + new Schema(Collections.singletonList(VariantVector.createVariantField("a"))); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + VariantVector vector = (VariantVector) root.getVector("a"); + vector.allocateNew(); + + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setValueCount(1); + root.setRowCount(1); + + final File file = File.createTempFile("varianttest", ".arrow"); + try (final WritableByteChannel channel = + FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); + final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + ExtensionTypeRegistry.unregister(VariantType.INSTANCE); + + try (final SeekableByteChannel channel = + Files.newByteChannel(Paths.get(file.getAbsolutePath())); + final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + reader.loadNextBatch(); + VectorSchemaRoot readRoot = reader.getVectorSchemaRoot(); + + // Verify schema properties + assertEquals(1, readRoot.getSchema().getFields().size()); + assertEquals("a", readRoot.getSchema().getFields().get(0).getName()); + assertTrue(readRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.Struct); + + // Verify extension metadata is preserved + final Field field = readRoot.getSchema().getFields().get(0); + assertEquals( + VariantType.EXTENSION_NAME, + field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME)); + assertEquals("", field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA)); + + // Verify vector type and row count + assertEquals(1, readRoot.getRowCount()); + FieldVector readVector = readRoot.getVector("a"); + assertEquals(StructVector.class, readVector.getClass()); + + // Verify value count matches + StructVector structVector = (StructVector) readVector; + assertEquals(vector.getValueCount(), structVector.getValueCount()); + + // Verify the underlying data can be accessed from child vectors + VarBinaryVector metadataVector = + structVector.getChild(VariantVector.METADATA_VECTOR_NAME, VarBinaryVector.class); + VarBinaryVector valueVector = + structVector.getChild(VariantVector.VALUE_VECTOR_NAME, VarBinaryVector.class); + assertNotNull(metadataVector); + assertNotNull(valueVector); + assertEquals(1, metadataVector.getValueCount()); + assertEquals(1, valueVector.getValueCount()); + } + } + } + + @Test + public void testVariantVectorCompare() { + VariantType variantType = VariantType.INSTANCE; + ExtensionTypeRegistry.register(variantType); + Variant hello = TestVariant.variantString("hello"); + Variant world = TestVariant.variantString("world"); + try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector a1 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector a2 = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator); + VariantVector bb = + (VariantVector) + variantType.getNewVector("a", FieldType.nullable(variantType), allocator)) { + + ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor(); + validateVisitor.visit(a1, null); + + a1.allocateNew(); + a2.allocateNew(); + bb.allocateNew(); + + a1.setSafe(0, hello); + a1.setSafe(1, world); + a1.setValueCount(2); + + a2.setSafe(0, hello); + a2.setSafe(1, world); + a2.setValueCount(2); + + bb.setSafe(0, world); + bb.setSafe(1, hello); + bb.setValueCount(2); + + Range range = new Range(0, 0, a1.getValueCount()); + RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2); + assertTrue(visitor.rangeEquals(range)); + + visitor = new RangeEqualsVisitor(a1, bb); + assertFalse(visitor.rangeEquals(range)); + + VectorBatchAppender.batchAppend(a1, a2, bb); + assertEquals(6, a1.getValueCount()); + validateVisitor.visit(a1, null); + } + } + + @Test + public void testVariantCopyAsValueThrowsException() { + ensureRegistered(VariantType.INSTANCE); + try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + VariantVector vector = new VariantVector("variant", allocator)) { + vector.allocateNew(); + vector.setSafe(0, TestVariant.variantString("hello")); + vector.setValueCount(1); + + var reader = vector.getReader(); + reader.setPosition(0); + + assertThrows( + IllegalArgumentException.class, () -> reader.copyAsValue((BaseWriter.StructWriter) null)); + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java new file mode 100644 index 000000000..8b6000bc4 --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInListVector.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariantInListVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testListVectorWithVariantExtensionType() { + final FieldType type = FieldType.nullable(VariantType.INSTANCE); + try (ListVector inVector = new ListVector("input", allocator, type, null)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + extensionWriter.writeExtension(variant2); + writer.endList(); + inVector.setValueCount(1); + + ArrayList resultSet = (ArrayList) inVector.getObject(0); + assertEquals(2, resultSet.size()); + assertEquals(variant1, resultSet.get(0)); + assertEquals(variant2, resultSet.get(1)); + } + } + + @Test + public void testListVectorReaderForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + UnionListReader reader = inVector.getReader(); + reader.setPosition(0); + assertTrue(reader.next()); + FieldReader variantReader = reader.reader(); + NullableVariantHolder resultHolder = new NullableVariantHolder(); + variantReader.read(resultHolder); + assertEquals(variant1, new Variant(resultHolder)); + + reader.setPosition(1); + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, new Variant(resultHolder)); + + assertTrue(reader.next()); + variantReader = reader.reader(); + variantReader.read(resultHolder); + assertEquals(variant2, new Variant(resultHolder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.copyFrom(1, 1, inVector); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + + @Test + public void testCopyValueSafeForVariantExtensionType() { + try (ListVector inVector = ListVector.empty("input", allocator)) { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("bye"); + + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); + extensionWriter.writeExtension(variant1); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + extensionWriter.writeExtension(variant2); + extensionWriter.writeExtension(variant2); + writer.endList(); + + inVector.setValueCount(2); + + try (ListVector outVector = (ListVector) inVector.getTransferPair(allocator).getTo()) { + TransferPair tp = inVector.makeTransferPair(outVector); + tp.copyValueSafe(0, 0); + tp.copyValueSafe(1, 1); + outVector.setValueCount(2); + + ArrayList resultSet0 = (ArrayList) outVector.getObject(0); + assertEquals(1, resultSet0.size()); + assertEquals(variant1, resultSet0.get(0)); + + ArrayList resultSet1 = (ArrayList) outVector.getObject(1); + assertEquals(2, resultSet1.size()); + assertEquals(variant2, resultSet1.get(0)); + assertEquals(variant2, resultSet1.get(1)); + } + } + } +} diff --git a/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java new file mode 100644 index 000000000..dd925810d --- /dev/null +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantInMapVector.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.variant.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.TestVariant; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapReader; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestVariantInMapVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @AfterEach + public void terminate() { + allocator.close(); + } + + @Test + public void testMapVectorWithVariantExtensionType() { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("map", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + + UnionMapReader mapReader = inVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, new Variant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, new Variant(holder)); + } + } + + @Test + public void testCopyFromForVariantExtensionType() { + Variant variant1 = TestVariant.variantString("hello"); + Variant variant2 = TestVariant.variantString("world"); + try (final MapVector inVector = MapVector.empty("in", allocator, false); + final MapVector outVector = MapVector.empty("out", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector); + outVector.setValueCount(1); + + UnionMapReader mapReader = outVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader variantReader = mapReader.value(); + NullableVariantHolder holder = new NullableVariantHolder(); + variantReader.read(holder); + assertEquals(variant1, new Variant(holder)); + + mapReader.next(); + variantReader = mapReader.value(); + variantReader.read(holder); + assertEquals(variant2, new Variant(holder)); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java similarity index 98% rename from vector/src/test/java/org/apache/arrow/vector/TestVariantType.java rename to arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java index 115d9f46d..017e71224 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantType.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector; +package org.apache.arrow.variant.extension; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -36,10 +36,10 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.extension.VariantType; -import org.apache.arrow.vector.extension.VariantVector; -import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.ipc.ArrowStreamReader; import org.apache.arrow.vector.ipc.ArrowStreamWriter; import org.apache.arrow.vector.types.pojo.ArrowType; diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java similarity index 98% rename from vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java rename to arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java index 38e33d6e1..1c172e304 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java +++ b/arrow-variant/src/test/java/org/apache/arrow/variant/extension/TestVariantVector.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.arrow.vector; +package org.apache.arrow.variant.extension; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -27,15 +27,13 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.impl.VariantReaderImpl; -import org.apache.arrow.vector.complex.impl.VariantWriterImpl; -import org.apache.arrow.vector.extension.VariantType; -import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.variant.Variant; +import org.apache.arrow.variant.holders.NullableVariantHolder; +import org.apache.arrow.variant.holders.VariantHolder; +import org.apache.arrow.variant.impl.VariantReaderImpl; +import org.apache.arrow.variant.impl.VariantWriterImpl; import org.apache.arrow.vector.holders.ExtensionHolder; -import org.apache.arrow.vector.holders.NullableVariantHolder; -import org.apache.arrow.vector.holders.VariantHolder; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/flight/flight-sql-jdbc-driver/pom.xml b/flight/flight-sql-jdbc-driver/pom.xml index 6036bde2d..559c42597 100644 --- a/flight/flight-sql-jdbc-driver/pom.xml +++ b/flight/flight-sql-jdbc-driver/pom.xml @@ -93,11 +93,6 @@ under the License. com.google.errorprone:error_prone_annotations com.google.code.findbugs:jsr305 com.google.j2objc:j2objc-annotations - - org.apache.parquet:parquet-common - org.apache.parquet:parquet-column - org.apache.parquet:parquet-encoding - org.apache.parquet:parquet-format-structures diff --git a/pom.xml b/pom.xml index 38186e6e7..9ffaf6b60 100644 --- a/pom.xml +++ b/pom.xml @@ -243,11 +243,6 @@ under the License. logback-core ${logback.version} - - org.apache.parquet - parquet-variant - ${dep.parquet.version} - diff --git a/vector/pom.xml b/vector/pom.xml index 8172802d1..89e977900 100644 --- a/vector/pom.xml +++ b/vector/pom.xml @@ -37,17 +37,6 @@ under the License. org.apache.arrow arrow-memory-core - - org.apache.arrow - arrow-variant - - - org.apache.arrow - arrow-variant - ${project.version} - test-jar - test - org.immutables value-annotations diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index 556fb576c..789295e95 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -29,9 +29,9 @@ * Source code generated using FreeMarker template ${.template_name} */ @SuppressWarnings("unused") -abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{ +public abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{ - AbstractFieldReader(){ + protected AbstractFieldReader(){ super(); } diff --git a/vector/src/main/java/module-info.java b/vector/src/main/java/module-info.java index 46e95ff61..8ba1b3579 100644 --- a/vector/src/main/java/module-info.java +++ b/vector/src/main/java/module-info.java @@ -46,7 +46,6 @@ requires jdk.unsupported; requires org.apache.arrow.format; requires org.apache.arrow.memory.core; - requires org.apache.arrow.variant; requires org.apache.commons.codec; requires org.slf4j; diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 96100e042..0c90b32ab 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -38,11 +38,9 @@ import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.extension.VariantType; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; @@ -51,8 +49,6 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.util.UuidUtility; -import org.apache.arrow.vector.variant.TestVariant; -import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -61,16 +57,6 @@ public class TestListVector { private BufferAllocator allocator; - private static Variant toVariant(NullableVariantHolder holder) { - return new Variant( - holder.metadataBuffer, - holder.metadataStart, - holder.metadataEnd, - holder.valueBuffer, - holder.valueStart, - holder.valueEnd); - } - @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); @@ -1393,156 +1379,6 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } - @Test - public void testListVectorWithVariantExtensionType() { - final FieldType type = FieldType.nullable(VariantType.INSTANCE); - try (ListVector inVector = new ListVector("input", allocator, type, null)) { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("bye"); - - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startList(); - ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); - extensionWriter.writeExtension(variant1); - extensionWriter.writeExtension(variant2); - writer.endList(); - inVector.setValueCount(1); - - ArrayList resultSet = (ArrayList) inVector.getObject(0); - assertEquals(2, resultSet.size()); - assertEquals(variant1, resultSet.get(0)); - assertEquals(variant2, resultSet.get(1)); - } - } - - @Test - public void testListVectorReaderForVariantExtensionType() { - try (ListVector inVector = ListVector.empty("input", allocator)) { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("bye"); - - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startList(); - ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); - extensionWriter.writeExtension(variant1); - writer.endList(); - - writer.setPosition(1); - writer.startList(); - extensionWriter.writeExtension(variant2); - extensionWriter.writeExtension(variant2); - writer.endList(); - - inVector.setValueCount(2); - - UnionListReader reader = inVector.getReader(); - reader.setPosition(0); - assertTrue(reader.next()); - FieldReader variantReader = reader.reader(); - NullableVariantHolder resultHolder = new NullableVariantHolder(); - variantReader.read(resultHolder); - assertEquals(variant1, toVariant(resultHolder)); - - reader.setPosition(1); - assertTrue(reader.next()); - variantReader = reader.reader(); - variantReader.read(resultHolder); - assertEquals(variant2, toVariant(resultHolder)); - - assertTrue(reader.next()); - variantReader = reader.reader(); - variantReader.read(resultHolder); - assertEquals(variant2, toVariant(resultHolder)); - } - } - - @Test - public void testCopyFromForVariantExtensionType() { - try (ListVector inVector = ListVector.empty("input", allocator); - ListVector outVector = ListVector.empty("output", allocator)) { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("bye"); - - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startList(); - ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); - extensionWriter.writeExtension(variant1); - writer.endList(); - - writer.setPosition(1); - writer.startList(); - extensionWriter.writeExtension(variant2); - extensionWriter.writeExtension(variant2); - writer.endList(); - - inVector.setValueCount(2); - - outVector.allocateNew(); - outVector.copyFrom(0, 0, inVector); - outVector.copyFrom(1, 1, inVector); - outVector.setValueCount(2); - - ArrayList resultSet0 = (ArrayList) outVector.getObject(0); - assertEquals(1, resultSet0.size()); - assertEquals(variant1, resultSet0.get(0)); - - ArrayList resultSet1 = (ArrayList) outVector.getObject(1); - assertEquals(2, resultSet1.size()); - assertEquals(variant2, resultSet1.get(0)); - assertEquals(variant2, resultSet1.get(1)); - } - } - - @Test - public void testCopyValueSafeForVariantExtensionType() { - try (ListVector inVector = ListVector.empty("input", allocator)) { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("bye"); - - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startList(); - ExtensionWriter extensionWriter = writer.extension(VariantType.INSTANCE); - extensionWriter.writeExtension(variant1); - writer.endList(); - - writer.setPosition(1); - writer.startList(); - extensionWriter.writeExtension(variant2); - extensionWriter.writeExtension(variant2); - writer.endList(); - - inVector.setValueCount(2); - - try (ListVector outVector = (ListVector) inVector.getTransferPair(allocator).getTo()) { - TransferPair tp = inVector.makeTransferPair(outVector); - tp.copyValueSafe(0, 0); - tp.copyValueSafe(1, 1); - outVector.setValueCount(2); - - ArrayList resultSet0 = (ArrayList) outVector.getObject(0); - assertEquals(1, resultSet0.size()); - assertEquals(variant1, resultSet0.get(0)); - - ArrayList resultSet1 = (ArrayList) outVector.getObject(1); - assertEquals(2, resultSet1.size()); - assertEquals(variant2, resultSet1.get(0)); - assertEquals(variant2, resultSet1.get(1)); - } - } - } - @Test public void testEmptyListOffsetBuffer() { // Test that ListVector has correct readableBytes after allocation. diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index b5ea0ffd3..274d2973b 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -41,10 +41,8 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.extension.VariantType; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableUuidHolder; -import org.apache.arrow.vector.holders.NullableVariantHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -52,8 +50,6 @@ import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.util.UuidUtility; -import org.apache.arrow.vector.variant.TestVariant; -import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1673,92 +1669,4 @@ public void testMapWithUuidKeyAndListUuidValue() throws Exception { assertEquals(value2c, actualValue2c); } } - - @Test - public void testMapVectorWithVariantExtensionType() throws Exception { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("world"); - try (final MapVector inVector = MapVector.empty("map", allocator, false)) { - inVector.allocateNew(); - UnionMapWriter writer = inVector.getWriter(); - writer.setPosition(0); - - writer.startMap(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(0); - writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); - writer.endEntry(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(1); - writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); - writer.endEntry(); - writer.endMap(); - - writer.setValueCount(1); - - UnionMapReader mapReader = inVector.getReader(); - mapReader.setPosition(0); - mapReader.next(); - FieldReader variantReader = mapReader.value(); - NullableVariantHolder holder = new NullableVariantHolder(); - variantReader.read(holder); - assertEquals(variant1, toVariant(holder)); - - mapReader.next(); - variantReader = mapReader.value(); - variantReader.read(holder); - assertEquals(variant2, toVariant(holder)); - } - } - - @Test - public void testCopyFromForVariantExtensionType() throws Exception { - Variant variant1 = TestVariant.variantString("hello"); - Variant variant2 = TestVariant.variantString("world"); - try (final MapVector inVector = MapVector.empty("in", allocator, false); - final MapVector outVector = MapVector.empty("out", allocator, false)) { - inVector.allocateNew(); - UnionMapWriter writer = inVector.getWriter(); - writer.setPosition(0); - - writer.startMap(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(0); - writer.value().extension(VariantType.INSTANCE).writeExtension(variant1, VariantType.INSTANCE); - writer.endEntry(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(1); - writer.value().extension(VariantType.INSTANCE).writeExtension(variant2, VariantType.INSTANCE); - writer.endEntry(); - writer.endMap(); - - writer.setValueCount(1); - outVector.allocateNew(); - outVector.copyFrom(0, 0, inVector); - outVector.setValueCount(1); - - UnionMapReader mapReader = outVector.getReader(); - mapReader.setPosition(0); - mapReader.next(); - FieldReader variantReader = mapReader.value(); - NullableVariantHolder holder = new NullableVariantHolder(); - variantReader.read(holder); - assertEquals(variant1, toVariant(holder)); - - mapReader.next(); - variantReader = mapReader.value(); - variantReader.read(holder); - assertEquals(variant2, toVariant(holder)); - } - } - - private static Variant toVariant(NullableVariantHolder holder) { - return new Variant( - holder.metadataBuffer, - holder.metadataStart, - holder.metadataEnd, - holder.valueBuffer, - holder.valueStart, - holder.valueEnd); - } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index e7c1d2ecb..ae5ac0726 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -45,24 +45,18 @@ import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.writer.BaseWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.extension.UuidType; -import org.apache.arrow.vector.extension.VariantType; -import org.apache.arrow.vector.extension.VariantVector; import org.apache.arrow.vector.ipc.ArrowFileReader; import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; -import org.apache.arrow.vector.variant.TestVariant; -import org.apache.arrow.vector.variant.Variant; import org.junit.jupiter.api.Test; public class TestExtensionType { @@ -306,194 +300,6 @@ public void testVectorCompare() { } } - @Test - public void roundtripVariant() throws IOException { - ensureRegistered(VariantType.INSTANCE); - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", VariantType.INSTANCE))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - VariantVector vector = (VariantVector) root.getVector("a"); - vector.allocateNew(); - - setVariant(vector, 0, TestVariant.variantString("hello")); - setVariant(vector, 1, TestVariant.variantString("world")); - vector.setValueCount(2); - root.setRowCount(2); - - final File file = File.createTempFile("varianttest", ".arrow"); - try (final WritableByteChannel channel = - FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); - final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - try (final SeekableByteChannel channel = - Files.newByteChannel(Paths.get(file.getAbsolutePath())); - final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.loadNextBatch(); - final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(root.getSchema(), readerRoot.getSchema()); - - final Field field = readerRoot.getSchema().getFields().get(0); - final VariantType expectedType = VariantType.INSTANCE; - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); - - final ExtensionTypeVector deserialized = - (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - assertEquals(vector.getValueCount(), deserialized.getValueCount()); - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(vector.isNull(i), deserialized.isNull(i)); - if (!vector.isNull(i)) { - assertEquals(vector.getObject(i), deserialized.getObject(i)); - } - } - } - } - } - - @Test - public void readVariantAsUnderlyingType() throws IOException { - ensureRegistered(VariantType.INSTANCE); - final Schema schema = - new Schema(Collections.singletonList(VariantVector.createVariantField("a"))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - VariantVector vector = (VariantVector) root.getVector("a"); - vector.allocateNew(); - - setVariant(vector, 0, TestVariant.variantString("hello")); - vector.setValueCount(1); - root.setRowCount(1); - - final File file = File.createTempFile("varianttest", ".arrow"); - try (final WritableByteChannel channel = - FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); - final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - ExtensionTypeRegistry.unregister(VariantType.INSTANCE); - - try (final SeekableByteChannel channel = - Files.newByteChannel(Paths.get(file.getAbsolutePath())); - final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.loadNextBatch(); - VectorSchemaRoot readRoot = reader.getVectorSchemaRoot(); - - // Verify schema properties - assertEquals(1, readRoot.getSchema().getFields().size()); - assertEquals("a", readRoot.getSchema().getFields().get(0).getName()); - assertTrue(readRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.Struct); - - // Verify extension metadata is preserved - final Field field = readRoot.getSchema().getFields().get(0); - assertEquals( - VariantType.EXTENSION_NAME, - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME)); - assertEquals("", field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA)); - - // Verify vector type and row count - assertEquals(1, readRoot.getRowCount()); - FieldVector readVector = readRoot.getVector("a"); - assertEquals(StructVector.class, readVector.getClass()); - - // Verify value count matches - StructVector structVector = (StructVector) readVector; - assertEquals(vector.getValueCount(), structVector.getValueCount()); - - // Verify the underlying data can be accessed from child vectors - VarBinaryVector metadataVector = - structVector.getChild(VariantVector.METADATA_VECTOR_NAME, VarBinaryVector.class); - VarBinaryVector valueVector = - structVector.getChild(VariantVector.VALUE_VECTOR_NAME, VarBinaryVector.class); - assertNotNull(metadataVector); - assertNotNull(valueVector); - assertEquals(1, metadataVector.getValueCount()); - assertEquals(1, valueVector.getValueCount()); - } - } - } - - @Test - public void testVariantVectorCompare() { - VariantType variantType = VariantType.INSTANCE; - ExtensionTypeRegistry.register(variantType); - Variant hello = TestVariant.variantString("hello"); - Variant world = TestVariant.variantString("world"); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - VariantVector a1 = - (VariantVector) - variantType.getNewVector("a", FieldType.nullable(variantType), allocator); - VariantVector a2 = - (VariantVector) - variantType.getNewVector("a", FieldType.nullable(variantType), allocator); - VariantVector bb = - (VariantVector) - variantType.getNewVector("a", FieldType.nullable(variantType), allocator)) { - - ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor(); - validateVisitor.visit(a1, null); - - a1.allocateNew(); - a2.allocateNew(); - bb.allocateNew(); - - setVariant(a1, 0, hello); - setVariant(a1, 1, world); - a1.setValueCount(2); - - setVariant(a2, 0, hello); - setVariant(a2, 1, world); - a2.setValueCount(2); - - setVariant(bb, 0, world); - setVariant(bb, 1, hello); - bb.setValueCount(2); - - Range range = new Range(0, 0, a1.getValueCount()); - RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2); - assertTrue(visitor.rangeEquals(range)); - - visitor = new RangeEqualsVisitor(a1, bb); - assertFalse(visitor.rangeEquals(range)); - - VectorBatchAppender.batchAppend(a1, a2, bb); - assertEquals(6, a1.getValueCount()); - validateVisitor.visit(a1, null); - } - } - - @Test - public void testVariantCopyAsValueThrowsException() { - ensureRegistered(VariantType.INSTANCE); - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - VariantVector vector = new VariantVector("variant", allocator)) { - vector.allocateNew(); - setVariant(vector, 0, TestVariant.variantString("hello")); - vector.setValueCount(1); - - var reader = vector.getReader(); - reader.setPosition(0); - - assertThrows( - IllegalArgumentException.class, () -> reader.copyAsValue((BaseWriter.StructWriter) null)); - } - } - - private static void setVariant(VariantVector vector, int index, Variant variant) { - vector.setSafe(index, variant); - } - static class LocationType extends ExtensionType { @Override From 23549e6fa1f02c585788b96d04acd800f9bb332d Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Wed, 28 Jan 2026 14:14:30 +0100 Subject: [PATCH 5/5] Spotless + Checkstyle --- .../src/main/java/org/apache/arrow/variant/Variant.java | 1 + .../java/org/apache/arrow/variant/extension/VariantType.java | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java index 99f4c5e6d..fa05cdd93 100644 --- a/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/Variant.java @@ -59,6 +59,7 @@ private Variant(org.apache.parquet.variant.Variant delegate) { this.delegate = delegate; } + /** Constructs a Variant from a NullableVariantHolder. */ public Variant(NullableVariantHolder holder) { this( holder.metadataBuffer, diff --git a/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java index dc9f7a7a5..3deb70cdc 100644 --- a/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java +++ b/arrow-variant/src/main/java/org/apache/arrow/variant/extension/VariantType.java @@ -27,8 +27,9 @@ import org.apache.arrow.vector.types.pojo.FieldType; /** - * Arrow extension type for - * Parquet Variant binary encoding. The type itself does not support shredded variant data. + * Arrow extension type for Parquet + * Variant binary encoding. The type itself does not support shredded variant data. */ public final class VariantType extends ExtensionType {