From 3645fe2587d80aefed2da25f3b54e2ab3b835422 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 12:39:16 +0000 Subject: [PATCH 1/8] Initial boilerplate Signed-off-by: Adam Gutglick --- vortex-array/src/arrays/arbitrary.rs | 5 +- .../src/arrays/constant/vtable/canonical.rs | 1 + vortex-array/src/builders/mod.rs | 3 + vortex-array/src/canonical.rs | 1 + vortex-array/src/dtype/dtype_impl.rs | 5 +- vortex-array/src/dtype/mod.rs | 3 + vortex-array/src/dtype/serde/flatbuffers.rs | 3 + vortex-array/src/dtype/serde/proto.rs | 4 + vortex-array/src/dtype/serde/serde.rs | 1 + vortex-array/src/scalar/arbitrary.rs | 1 + vortex-array/src/scalar/arrow.rs | 1 + vortex-array/src/scalar/cast.rs | 2 + vortex-array/src/scalar/display.rs | 1 + vortex-array/src/scalar/scalar_impl.rs | 2 + vortex-array/src/scalar/scalar_value.rs | 2 + vortex-array/src/scalar/validate.rs | 1 + .../flatbuffers/vortex-dtype/dtype.fbs | 3 + vortex-flatbuffers/src/generated/dtype.rs | 112 +++++++++++++++++- vortex-flatbuffers/src/generated/message.rs | 2 +- vortex-proto/proto/dtype.proto | 3 + vortex-proto/src/generated/vortex.dtype.rs | 6 +- 21 files changed, 155 insertions(+), 7 deletions(-) diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index 47b2d316ea5..e559a53e536 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -162,7 +162,10 @@ fn random_array_chunk( random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len) } DType::Extension(..) => { - todo!("Extension arrays are not implemented") + unimplemented!("Extension arrays are not implemented") + } + DType::Variant => { + unimplemented!("Variant arrays are not implemented") } } } diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index 3540257fb85..9ac3772715e 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -164,6 +164,7 @@ pub(crate) fn constant_canonicalize(array: &ConstantArray) -> VortexResult todo!(), }) } diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index 4c6bbf0dbf6..427599041b3 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -280,5 +280,8 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box { Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity)) } + DType::Variant => { + unimplemented!() + } } } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 4aaee7d88f8..fd769a31e85 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -220,6 +220,7 @@ impl Canonical { ext_dtype.clone(), Canonical::empty(ext_dtype.storage_dtype()).into_array(), )), + DType::Variant => todo!(), } } diff --git a/vortex-array/src/dtype/dtype_impl.rs b/vortex-array/src/dtype/dtype_impl.rs index 36476d6f620..23da3400442 100644 --- a/vortex-array/src/dtype/dtype_impl.rs +++ b/vortex-array/src/dtype/dtype_impl.rs @@ -54,7 +54,7 @@ impl DType { #[inline] pub fn is_nullable(&self) -> bool { match self { - Null => true, + Null | Variant => true, Extension(ext_dtype) => ext_dtype.storage_dtype().is_nullable(), Bool(null) | Primitive(_, null) @@ -90,6 +90,7 @@ impl DType { List(edt, _) => List(edt.clone(), nullability), FixedSizeList(edt, size, _) => FixedSizeList(edt.clone(), *size, nullability), Extension(ext) => Extension(ext.with_nullability(nullability)), + Variant => Variant, } } @@ -284,6 +285,7 @@ impl DType { Some(sum) } Extension(ext) => ext.storage_dtype().element_size(), + Variant => None, } } @@ -459,6 +461,7 @@ impl Display for DType { List(edt, null) => write!(f, "list({edt}){null}"), FixedSizeList(edt, size, null) => write!(f, "fixed_size_list({edt})[{size}]{null}"), Extension(ext) => write!(f, "{}", ext), + Variant => write!(f, "variant"), } } } diff --git a/vortex-array/src/dtype/mod.rs b/vortex-array/src/dtype/mod.rs index 6c5f58b966e..3f3aab0c7c2 100644 --- a/vortex-array/src/dtype/mod.rs +++ b/vortex-array/src/dtype/mod.rs @@ -100,6 +100,9 @@ pub enum DType { /// /// See [`ExtDTypeRef`] for more information. Extension(ExtDTypeRef), + + /// Variant type + Variant, } pub use bigint::*; diff --git a/vortex-array/src/dtype/serde/flatbuffers.rs b/vortex-array/src/dtype/serde/flatbuffers.rs index 2cd12f5dc77..37dc62b8f8f 100644 --- a/vortex-array/src/dtype/serde/flatbuffers.rs +++ b/vortex-array/src/dtype/serde/flatbuffers.rs @@ -16,6 +16,7 @@ use vortex_flatbuffers::FlatBuffer; use vortex_flatbuffers::FlatBufferRoot; use vortex_flatbuffers::WriteFlatBuffer; use vortex_flatbuffers::dtype as fbd; +use vortex_flatbuffers::dtype::VariantArgs; use vortex_session::VortexSession; use crate::dtype::DType; @@ -349,6 +350,7 @@ impl WriteFlatBuffer for DType { ) .as_union_value() } + Self::Variant => fb::Variant::create(fbb, &fb::VariantArgs {}).as_union_value(), }; let dtype_type = match self { @@ -362,6 +364,7 @@ impl WriteFlatBuffer for DType { Self::List(..) => fb::Type::List, Self::FixedSizeList(..) => fb::Type::FixedSizeList, Self::Extension { .. } => fb::Type::Extension, + Self::Variant => fb::Type::Variant, }; Ok(fb::DType::create( diff --git a/vortex-array/src/dtype/serde/proto.rs b/vortex-array/src/dtype/serde/proto.rs index 98c1453da02..9d73dd439a9 100644 --- a/vortex-array/src/dtype/serde/proto.rs +++ b/vortex-array/src/dtype/serde/proto.rs @@ -99,6 +99,9 @@ impl DType { let ext_dtype = vtable.deserialize(e.metadata(), storage_dtype)?; Ok(Self::Extension(ext_dtype)) } + DtypeType::Variant(..) => { + unimplemented!() + } } } } @@ -152,6 +155,7 @@ impl TryFrom<&DType> for pb::DType { storage_dtype: Some(Box::new(e.storage_dtype().try_into()?)), metadata: Some(e.serialize_metadata()?), })), + DType::Variant => todo!(), }), }) } diff --git a/vortex-array/src/dtype/serde/serde.rs b/vortex-array/src/dtype/serde/serde.rs index a14f9c47321..f6c1d4cd9b2 100644 --- a/vortex-array/src/dtype/serde/serde.rs +++ b/vortex-array/src/dtype/serde/serde.rs @@ -117,6 +117,7 @@ impl Serialize for DType { DType::Extension(ext) => { serializer.serialize_newtype_variant("DType", 9, "Extension", ext) } + DType::Variant => todo!(), } } } diff --git a/vortex-array/src/scalar/arbitrary.rs b/vortex-array/src/scalar/arbitrary.rs index 342d4fa59b0..49486803af3 100644 --- a/vortex-array/src/scalar/arbitrary.rs +++ b/vortex-array/src/scalar/arbitrary.rs @@ -98,6 +98,7 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { DType::Extension(..) => { unreachable!("Can't yet generate arbitrary scalars for ext dtype") } + DType::Variant => todo!(), }) } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index bce11e27e0f..87358d61d72 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -65,6 +65,7 @@ impl TryFrom<&Scalar> for Arc { DType::List(..) => unimplemented!("list scalar conversion"), DType::FixedSizeList(..) => unimplemented!("fixed-size list scalar conversion"), DType::Extension(..) => extension_to_arrow(value.as_extension()), + DType::Variant => todo!(), } } } diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 82986cd278d..50e1d6a3b99 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -6,6 +6,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; +use vortex_error::vortex_err; use crate::dtype::DType; use crate::scalar::Scalar; @@ -58,6 +59,7 @@ impl Scalar { DType::Struct(..) => self.as_struct().cast(target_dtype), DType::List(..) | DType::FixedSizeList(..) => self.as_list().cast(target_dtype), DType::Extension(..) => self.as_extension().cast(target_dtype), + DType::Variant => vortex_err!("Variant scalars can't be cast to {target_dtype}"), } } diff --git a/vortex-array/src/scalar/display.rs b/vortex-array/src/scalar/display.rs index f43d8626c9e..9e89b86fe14 100644 --- a/vortex-array/src/scalar/display.rs +++ b/vortex-array/src/scalar/display.rs @@ -21,6 +21,7 @@ impl Display for Scalar { DType::Struct(..) => write!(f, "{}", self.as_struct()), DType::List(..) | DType::FixedSizeList(..) => write!(f, "{}", self.as_list()), DType::Extension(_) => write!(f, "{}", self.as_extension()), + DType::Variant => write!(f, "variant"), } } } diff --git a/vortex-array/src/scalar/scalar_impl.rs b/vortex-array/src/scalar/scalar_impl.rs index c7a0e9b7999..27a1e4c3db0 100644 --- a/vortex-array/src/scalar/scalar_impl.rs +++ b/vortex-array/src/scalar/scalar_impl.rs @@ -190,6 +190,7 @@ impl Scalar { DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize, DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(), DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?, + DType::Variant => todo!(), }; Some(is_zero) @@ -257,6 +258,7 @@ impl Scalar { .map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::()) .unwrap_or_default(), DType::Extension(_) => self.as_extension().to_storage_scalar().approx_nbytes(), + DType::Variant => todo!(), } } } diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index 71bf4fcfc77..9ed493abff4 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -64,6 +64,7 @@ impl ScalarValue { // zero storage value and try to make an extension scalar from that. Self::zero_value(ext_dtype.storage_dtype()) } + DType::Variant => todo!(), } } @@ -100,6 +101,7 @@ impl ScalarValue { // default storage value and try to make an extension scalar from that. Self::default_value(ext_dtype.storage_dtype())? } + DType::Variant => todo!(), }) } } diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 3fed201ed93..604b07bef92 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -119,6 +119,7 @@ impl Scalar { } } DType::Extension(ext_dtype) => ext_dtype.validate_storage_value(value)?, + DType::Variant => unimplemented!(), } Ok(()) diff --git a/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs b/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs index ae3b813c80e..1f1f450744d 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs @@ -63,6 +63,8 @@ table Extension { metadata: [ubyte]; } +table Variant {} + union Type { Null = 1, Bool = 2, @@ -74,6 +76,7 @@ union Type { List = 8, Extension = 9, FixedSizeList = 10, // This is after `Extension` for backwards compatibility. + Variant = 11, } table DType { diff --git a/vortex-flatbuffers/src/generated/dtype.rs b/vortex-flatbuffers/src/generated/dtype.rs index 1ba03d8a3c3..34439cbf386 100644 --- a/vortex-flatbuffers/src/generated/dtype.rs +++ b/vortex-flatbuffers/src/generated/dtype.rs @@ -133,10 +133,10 @@ impl flatbuffers::SimpleToVerifyInSlice for PType {} #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] pub const ENUM_MIN_TYPE: u8 = 0; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] -pub const ENUM_MAX_TYPE: u8 = 10; +pub const ENUM_MAX_TYPE: u8 = 11; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] #[allow(non_camel_case_types)] -pub const ENUM_VALUES_TYPE: [Type; 11] = [ +pub const ENUM_VALUES_TYPE: [Type; 12] = [ Type::NONE, Type::Null, Type::Bool, @@ -148,6 +148,7 @@ pub const ENUM_VALUES_TYPE: [Type; 11] = [ Type::List, Type::Extension, Type::FixedSizeList, + Type::Variant, ]; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -166,9 +167,10 @@ impl Type { pub const List: Self = Self(8); pub const Extension: Self = Self(9); pub const FixedSizeList: Self = Self(10); + pub const Variant: Self = Self(11); pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 10; + pub const ENUM_MAX: u8 = 11; pub const ENUM_VALUES: &'static [Self] = &[ Self::NONE, Self::Null, @@ -181,6 +183,7 @@ impl Type { Self::List, Self::Extension, Self::FixedSizeList, + Self::Variant, ]; /// Returns the variant's name or "" if unknown. pub fn variant_name(self) -> Option<&'static str> { @@ -196,6 +199,7 @@ impl Type { Self::List => Some("List"), Self::Extension => Some("Extension"), Self::FixedSizeList => Some("FixedSizeList"), + Self::Variant => Some("Variant"), _ => None, } } @@ -1375,6 +1379,85 @@ impl core::fmt::Debug for Extension<'_> { ds.finish() } } +pub enum VariantOffset {} +#[derive(Copy, Clone, PartialEq)] + +pub struct Variant<'a> { + pub _tab: flatbuffers::Table<'a>, +} + +impl<'a> flatbuffers::Follow<'a> for Variant<'a> { + type Inner = Variant<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: unsafe { flatbuffers::Table::new(buf, loc) } } + } +} + +impl<'a> Variant<'a> { + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Variant { _tab: table } + } + #[allow(unused_mut)] + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + _args: &'args VariantArgs + ) -> flatbuffers::WIPOffset> { + let mut builder = VariantBuilder::new(_fbb); + builder.finish() + } + +} + +impl flatbuffers::Verifiable for Variant<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .finish(); + Ok(()) + } +} +pub struct VariantArgs { +} +impl<'a> Default for VariantArgs { + #[inline] + fn default() -> Self { + VariantArgs { + } + } +} + +pub struct VariantBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, +} +impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> VariantBuilder<'a, 'b, A> { + #[inline] + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> VariantBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + VariantBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } +} + +impl core::fmt::Debug for Variant<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Variant"); + ds.finish() + } +} pub enum DTypeOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1574,6 +1657,21 @@ impl<'a> DType<'a> { } } + #[inline] + #[allow(non_snake_case)] + pub fn type__as_variant(&self) -> Option> { + if self.type_type() == Type::Variant { + self.type_().map(|t| { + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + unsafe { Variant::init_from_table(t) } + }) + } else { + None + } + } + } impl flatbuffers::Verifiable for DType<'_> { @@ -1595,6 +1693,7 @@ impl flatbuffers::Verifiable for DType<'_> { Type::List => v.verify_union_variant::>("Type::List", pos), Type::Extension => v.verify_union_variant::>("Type::Extension", pos), Type::FixedSizeList => v.verify_union_variant::>("Type::FixedSizeList", pos), + Type::Variant => v.verify_union_variant::>("Type::Variant", pos), _ => Ok(()), } })? @@ -1719,6 +1818,13 @@ impl core::fmt::Debug for DType<'_> { ds.field("type_", &"InvalidFlatbuffer: Union discriminant does not match value.") } }, + Type::Variant => { + if let Some(x) = self.type__as_variant() { + ds.field("type_", &x) + } else { + ds.field("type_", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + }, _ => { let x: Option<()> = None; ds.field("type_", &x) diff --git a/vortex-flatbuffers/src/generated/message.rs b/vortex-flatbuffers/src/generated/message.rs index 618bdedb808..0748adec10a 100644 --- a/vortex-flatbuffers/src/generated/message.rs +++ b/vortex-flatbuffers/src/generated/message.rs @@ -3,8 +3,8 @@ // @generated -use crate::dtype::*; use crate::array::*; +use crate::dtype::*; use core::mem; use core::cmp::Ordering; diff --git a/vortex-proto/proto/dtype.proto b/vortex-proto/proto/dtype.proto index 12af29d1c1e..945101f5f5f 100644 --- a/vortex-proto/proto/dtype.proto +++ b/vortex-proto/proto/dtype.proto @@ -70,6 +70,8 @@ message Extension { optional bytes metadata = 3; } +message Variant {} + message DType { oneof dtype_type { Null null = 1; @@ -82,6 +84,7 @@ message DType { List list = 8; Extension extension = 9; FixedSizeList fixed_size_list = 10; // This is after `Extension` for backwards compatibility. + Variant variant = 11; } } diff --git a/vortex-proto/src/generated/vortex.dtype.rs b/vortex-proto/src/generated/vortex.dtype.rs index 49a3defb90f..ac9ba872746 100644 --- a/vortex-proto/src/generated/vortex.dtype.rs +++ b/vortex-proto/src/generated/vortex.dtype.rs @@ -66,9 +66,11 @@ pub struct Extension { #[prost(bytes = "vec", optional, tag = "3")] pub metadata: ::core::option::Option<::prost::alloc::vec::Vec>, } +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct Variant {} #[derive(Clone, PartialEq, ::prost::Message)] pub struct DType { - #[prost(oneof = "d_type::DtypeType", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + #[prost(oneof = "d_type::DtypeType", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] pub dtype_type: ::core::option::Option, } /// Nested message and enum types in `DType`. @@ -96,6 +98,8 @@ pub mod d_type { /// This is after `Extension` for backwards compatibility. #[prost(message, tag = "10")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "11")] + Variant(super::Variant), } } #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] From 3b0525400b0196ed2092303d990b67f580ac03ee Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 18:47:22 +0000 Subject: [PATCH 2/8] Initial work Signed-off-by: Adam Gutglick --- Cargo.lock | 13 + Cargo.toml | 1 + encodings/parquet-variant/Cargo.toml | 32 ++ encodings/parquet-variant/src/lib.rs | 442 ++++++++++++++++++ vortex-array/src/arrays/mod.rs | 4 + vortex-array/src/arrays/variant/mod.rs | 33 ++ vortex-array/src/arrays/variant/vtable/mod.rs | 163 +++++++ .../src/arrays/variant/vtable/operations.rs | 17 + .../src/arrays/variant/vtable/rules.rs | 39 ++ .../src/arrays/variant/vtable/validity.rs | 15 + vortex-array/src/builders/tests.rs | 1 + vortex-array/src/builtins.rs | 31 ++ .../src/compute/conformance/consistency.rs | 1 + vortex-array/src/dtype/arrow.rs | 21 +- vortex-array/src/dtype/dtype_impl.rs | 7 +- vortex-array/src/dtype/serde/proto.rs | 12 +- vortex-array/src/expr/exprs.rs | 15 + vortex-array/src/scalar/cast.rs | 3 +- vortex-array/src/scalar_fn/fns/mod.rs | 1 + vortex-array/src/scalar_fn/fns/variant_get.rs | 125 +++++ vortex-array/src/scalar_fn/session.rs | 2 + vortex-proto/proto/expr.proto | 6 + vortex-proto/src/generated/vortex.expr.rs | 8 + 23 files changed, 981 insertions(+), 11 deletions(-) create mode 100644 encodings/parquet-variant/Cargo.toml create mode 100644 encodings/parquet-variant/src/lib.rs create mode 100644 vortex-array/src/arrays/variant/mod.rs create mode 100644 vortex-array/src/arrays/variant/vtable/mod.rs create mode 100644 vortex-array/src/arrays/variant/vtable/operations.rs create mode 100644 vortex-array/src/arrays/variant/vtable/rules.rs create mode 100644 vortex-array/src/arrays/variant/vtable/validity.rs create mode 100644 vortex-array/src/scalar_fn/fns/variant_get.rs diff --git a/Cargo.lock b/Cargo.lock index 733d82da3c2..d2a133eb101 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10295,6 +10295,19 @@ dependencies = [ "vortex-cuda-macros", ] +[[package]] +name = "vortex-parquet-variant" +version = "0.1.0" +dependencies = [ + "prost 0.14.3", + "rstest", + "vortex-array", + "vortex-buffer", + "vortex-error", + "vortex-mask", + "vortex-session", +] + [[package]] name = "vortex-pco" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 0da5ee805ba..1de3e3569b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ members = [ "benchmarks/duckdb-bench", "benchmarks/random-access-bench", "vortex-sqllogictest", + "encodings/parquet-variant", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" diff --git a/encodings/parquet-variant/Cargo.toml b/encodings/parquet-variant/Cargo.toml new file mode 100644 index 00000000000..aeb8615f6ca --- /dev/null +++ b/encodings/parquet-variant/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "vortex-parquet-variant" +authors = { workspace = true } +categories = { workspace = true } +description = "Vortex Pco array" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +prost = { workspace = true } +vortex-array = { workspace = true } +vortex-buffer = { workspace = true } +vortex-error = { workspace = true } +vortex-mask = { workspace = true } +vortex-session = { workspace = true } + +[dev-dependencies] +rstest = { workspace = true } +vortex-array = { workspace = true, features = ["_test-harness"] } + +[package.metadata.cargo-machete] +ignored = ["getrandom_v03"] diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs new file mode 100644 index 00000000000..ff4a1f9b342 --- /dev/null +++ b/encodings/parquet-variant/src/lib.rs @@ -0,0 +1,442 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! This crate exposes a Vortex encoding that supports variant arrays, encoded as parquet's +//! [Variant encoding], in order to allow for zero-copy export to Arrow's new +//! [canonical extension type]. +//! +//! The encoding follows the Arrow Parquet Variant canonical extension type structure: +//! - `metadata` (binary, required): type information for arrays/objects, field names and offsets +//! - `value` (binary, optional): un-shredded serialized variant values +//! - `typed_value` (any type, optional): shredded column data with a known type +//! +//! At least one of `value` or `typed_value` must be present. The `typed_value` child supports +//! full recursive shredding — it can be a primitive type, a list (whose elements are variant +//! nodes with value/typed_value), or a struct (whose fields are variant nodes). +//! +//! [Variant encoding]: https://parquet.apache.org/docs/file-format/types/variantencoding/ +//! [canonical extension type]: https://arrow.apache.org/docs/format/CanonicalExtensions.html#parquet-variant + +use std::hash::Hasher; + +use prost::Message; +use vortex_array::ArrayEq; +use vortex_array::ArrayHash; +use vortex_array::ArrayRef; +use vortex_array::DeserializeMetadata; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::Precision; +use vortex_array::ProstMetadata; +use vortex_array::arrays::scalar_fn::ExactScalarFn; +use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::buffer::BufferHandle; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::optimizer::rules::ArrayParentReduceRule; +use vortex_array::optimizer::rules::ParentRuleSet; +use vortex_array::scalar_fn::fns::variant_get::VariantGet; +use vortex_array::serde::ArrayChildren; +use vortex_array::stats::ArrayStats; +use vortex_array::stats::StatsSetRef; +use vortex_array::validity::Validity; +use vortex_array::vtable; +use vortex_array::vtable::ArrayId; +use vortex_array::vtable::NotSupported; +use vortex_array::vtable::VTable; +use vortex_array::vtable::ValidityVTable; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +vtable!(ParquetVariant); + +#[derive(Debug)] +pub struct ParquetVariantVTable; + +impl ParquetVariantVTable { + pub const ID: ArrayId = ArrayId::new_ref("vortex.parquet.variant"); +} + +/// Serialized metadata for a [`ParquetVariantArray`]. +/// +/// Tracks which optional children are present so the array can be correctly +/// reconstructed during deserialization. +#[derive(Clone, prost::Message)] +pub struct ParquetVariantMetadata { + /// Whether the un-shredded `value` child is present. + #[prost(bool, tag = "1")] + pub has_value: bool, + /// Whether the shredded `typed_value` child is present. + #[prost(bool, tag = "2")] + pub has_typed_value: bool, +} + +/// An array encoding that stores variant data in the Parquet Variant binary format. +/// +/// Contains up to three children following the Arrow Parquet Variant canonical extension type: +/// - `metadata` (always present): binary array with variant type information +/// - `value` (optional): binary array with un-shredded serialized variant values +/// - `typed_value` (optional): array of any type with shredded column data +/// +/// At least one of `value` or `typed_value` must be present. +/// The `typed_value` supports full recursive shredding — it can be a primitive, list, or struct +/// where nested struct/list elements themselves contain value/typed_value children. +#[derive(Clone, Debug)] +pub struct ParquetVariantArray { + metadata: ArrayRef, + value: Option, + typed_value: Option, + stats_set: ArrayStats, +} + +const VARIANT_DTYPE: DType = DType::Variant; + +impl ParquetVariantArray { + /// Creates a new ParquetVariantArray. + /// + /// # Panics + /// Panics if neither `value` nor `typed_value` is provided, or if children have + /// mismatched lengths. + pub fn new(metadata: ArrayRef, value: Option, typed_value: Option) -> Self { + assert!( + value.is_some() || typed_value.is_some(), + "at least one of value or typed_value must be present" + ); + let len = metadata.len(); + if let Some(ref v) = value { + assert_eq!(v.len(), len, "value length must match metadata length"); + } + if let Some(ref tv) = typed_value { + assert_eq!( + tv.len(), + len, + "typed_value length must match metadata length" + ); + } + Self { + metadata, + value, + typed_value, + stats_set: ArrayStats::default(), + } + } + + /// Returns a reference to the metadata child array. + pub fn metadata_array(&self) -> &ArrayRef { + &self.metadata + } + + /// Returns a reference to the un-shredded value child array, if present. + pub fn value_array(&self) -> Option<&ArrayRef> { + self.value.as_ref() + } + + /// Returns a reference to the shredded typed_value child array, if present. + pub fn typed_value_array(&self) -> Option<&ArrayRef> { + self.typed_value.as_ref() + } + + fn nchildren(&self) -> usize { + 1 + self.value.is_some() as usize + self.typed_value.is_some() as usize + } +} + +impl VTable for ParquetVariantVTable { + type Array = ParquetVariantArray; + type Metadata = ProstMetadata; + type OperationsVTable = NotSupported; + type ValidityVTable = Self; + + fn id(_array: &Self::Array) -> ArrayId { + Self::ID + } + + fn len(array: &ParquetVariantArray) -> usize { + array.metadata.len() + } + + fn dtype(_array: &ParquetVariantArray) -> &DType { + &VARIANT_DTYPE + } + + fn stats(array: &ParquetVariantArray) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &ParquetVariantArray, state: &mut H, precision: Precision) { + array.metadata.array_hash(state, precision); + if let Some(ref value) = array.value { + value.array_hash(state, precision); + } + if let Some(ref typed_value) = array.typed_value { + typed_value.array_hash(state, precision); + } + } + + fn array_eq( + array: &ParquetVariantArray, + other: &ParquetVariantArray, + precision: Precision, + ) -> bool { + if !array.metadata.array_eq(&other.metadata, precision) { + return false; + } + match (&array.value, &other.value) { + (Some(a), Some(b)) => { + if !a.array_eq(b, precision) { + return false; + } + } + (None, None) => {} + _ => return false, + } + match (&array.typed_value, &other.typed_value) { + (Some(a), Some(b)) => a.array_eq(b, precision), + (None, None) => true, + _ => false, + } + } + + fn nbuffers(_array: &ParquetVariantArray) -> usize { + 0 + } + + fn buffer(_array: &ParquetVariantArray, idx: usize) -> BufferHandle { + vortex_panic!("ParquetVariantArray buffer index {idx} out of bounds") + } + + fn buffer_name(_array: &ParquetVariantArray, _idx: usize) -> Option { + None + } + + fn nchildren(array: &ParquetVariantArray) -> usize { + array.nchildren() + } + + fn child(array: &ParquetVariantArray, idx: usize) -> ArrayRef { + match idx { + 0 => array.metadata.clone(), + 1 if array.value.is_some() => array.value.clone().unwrap(), + 1 => array.typed_value.clone().unwrap(), + 2 => array.typed_value.clone().unwrap(), + _ => vortex_panic!("ParquetVariantArray child index {idx} out of bounds"), + } + } + + fn child_name(array: &ParquetVariantArray, idx: usize) -> String { + match idx { + 0 => "metadata".to_string(), + 1 if array.value.is_some() => "value".to_string(), + 1 => "typed_value".to_string(), + 2 => "typed_value".to_string(), + _ => vortex_panic!("ParquetVariantArray child_name index {idx} out of bounds"), + } + } + + fn metadata(array: &ParquetVariantArray) -> VortexResult { + Ok(ProstMetadata(ParquetVariantMetadata { + has_value: array.value.is_some(), + has_typed_value: array.typed_value.is_some(), + })) + } + + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(metadata.encode_to_vec())) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let inner = + as DeserializeMetadata>::deserialize(bytes)?; + Ok(ProstMetadata(inner)) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + vortex_ensure!(matches!(dtype, DType::Variant), "Expected Variant DType"); + vortex_ensure!( + metadata.has_value || metadata.has_typed_value, + "At least one of value or typed_value must be present" + ); + + let expected_children = 1 + metadata.has_value as usize + metadata.has_typed_value as usize; + vortex_ensure!( + children.len() == expected_children, + "Expected {} children, got {}", + expected_children, + children.len() + ); + + let mut child_idx = 0; + let variant_metadata = + children.get(child_idx, &DType::Binary(Nullability::NonNullable), len)?; + child_idx += 1; + + let value = if metadata.has_value { + let v = children.get(child_idx, &DType::Binary(Nullability::NonNullable), len)?; + child_idx += 1; + Some(v) + } else { + None + }; + + let typed_value = if metadata.has_typed_value { + // typed_value can be any type — primitive, list, struct, etc. + // We retrieve it without constraining its DType. + let tv = children.get(child_idx, &DType::Variant, len)?; + Some(tv) + } else { + None + }; + + Ok(ParquetVariantArray::new( + variant_metadata, + value, + typed_value, + )) + } + + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == array.nchildren(), + "ParquetVariantArray expects {} children, got {}", + array.nchildren(), + children.len() + ); + let mut iter = children.into_iter(); + array.metadata = iter.next().unwrap(); + if array.value.is_some() { + array.value = Some(iter.next().unwrap()); + } + if array.typed_value.is_some() { + array.typed_value = Some(iter.next().unwrap()); + } + Ok(()) + } + + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + Ok(array.clone().into_array()) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +const PARENT_RULES: ParentRuleSet = + ParentRuleSet::new(&[ParentRuleSet::lift(&ParquetVariantGetRule)]); + +/// Rule to handle VariantGet on a ParquetVariantArray by returning the typed_value child. +#[derive(Debug)] +struct ParquetVariantGetRule; + +impl ArrayParentReduceRule for ParquetVariantGetRule { + type Parent = ExactScalarFn; + + fn reduce_parent( + &self, + array: &ParquetVariantArray, + parent: ScalarFnArrayView<'_, VariantGet>, + _child_idx: usize, + ) -> VortexResult> { + let options = parent.options; + match array.typed_value_array() { + Some(typed_value) => { + // The shredded typed_value is available; cast it to the requested dtype. + Ok(Some(typed_value.cast(options.dtype.clone())?)) + } + None => { + // No shredded data available; cannot push down. + Ok(None) + } + } + } +} + +impl ValidityVTable for ParquetVariantVTable { + fn validity(_array: &ParquetVariantArray) -> VortexResult { + // Variant is always nullable. Null-ness of individual values is encoded + // within the Parquet Variant binary format itself, not via a separate validity bitmap. + Ok(Validity::AllValid) + } +} + +#[cfg(test)] +mod tests { + use vortex_array::IntoArray; + use vortex_array::arrays::VariantArray; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_buffer::buffer; + + use super::*; + + #[test] + fn test_variant_get_pushdown_with_typed_value() -> VortexResult<()> { + // Create a ParquetVariantArray with shredded typed_value (i32 data) + let metadata = buffer![0u8, 1, 2].into_array(); + let typed_value = buffer![10i32, 20, 30].into_array(); + let pv_array = ParquetVariantArray::new(metadata, None, Some(typed_value)); + + // Wrap it in a VariantArray + let variant_array = VariantArray::new(pv_array.into_array()); + + // Apply variant_get + let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); + let result = variant_array + .into_array() + .variant_get("col", target_dtype)?; + + // The result should be the typed_value data, cast to nullable i32 + assert_eq!( + result.dtype(), + &DType::Primitive(PType::I32, Nullability::Nullable) + ); + assert_eq!(result.len(), 3); + + Ok(()) + } + + #[test] + fn test_variant_get_no_typed_value() -> VortexResult<()> { + // Create a ParquetVariantArray without typed_value (only value) + let metadata = buffer![0u8, 1, 2].into_array(); + let value = buffer![0u8, 1, 2].into_array(); + let pv_array = ParquetVariantArray::new(metadata, Some(value), None); + + // Wrap it in a VariantArray + let variant_array = VariantArray::new(pv_array.into_array()); + + // Apply variant_get - the rule returns None since there's no typed_value, + // so the optimizer creates a lazy ScalarFnArray that will error on execute. + let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); + let result = variant_array + .into_array() + .variant_get("col", target_dtype)?; + // The result is a lazy expression wrapping the variant array + assert_eq!( + result.dtype(), + &DType::Primitive(PType::I32, Nullability::Nullable) + ); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index f29a458b441..e0761464f1d 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -94,5 +94,9 @@ pub mod varbinview; pub use varbinview::VarBinViewArray; pub use varbinview::VarBinViewVTable; +pub mod variant; +pub use variant::VariantArray; +pub use variant::VariantVTable; + #[cfg(feature = "arbitrary")] pub mod arbitrary; diff --git a/vortex-array/src/arrays/variant/mod.rs b/vortex-array/src/arrays/variant/mod.rs new file mode 100644 index 00000000000..e56d305ebb4 --- /dev/null +++ b/vortex-array/src/arrays/variant/mod.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vtable; + +pub use self::vtable::VariantVTable; +use crate::ArrayRef; +use crate::stats::ArrayStats; + +/// The canonical in-memory representation of variant (semi-structured) data. +/// +/// Wraps a single child array that contains the actual variant-encoded data +/// (e.g. a [`ParquetVariantArray`] or any other variant encoding). +#[derive(Clone, Debug)] +pub struct VariantArray { + child: ArrayRef, + pub(super) stats_set: ArrayStats, +} + +impl VariantArray { + /// Creates a new VariantArray wrapping the given child. + pub fn new(child: ArrayRef) -> Self { + Self { + child, + stats_set: ArrayStats::default(), + } + } + + /// Returns a reference to the underlying child array. + pub fn child(&self) -> &ArrayRef { + &self.child + } +} diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs new file mode 100644 index 00000000000..2c5378cd0d2 --- /dev/null +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod operations; +mod rules; +mod validity; + +use std::hash::Hasher; + +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; + +use self::rules::PARENT_RULES; +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::EmptyMetadata; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::Precision; +use crate::arrays::VariantArray; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::serde::ArrayChildren; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::VTable; + +vtable!(Variant); + +#[derive(Debug)] +pub struct VariantVTable; + +const VARIANT_DTYPE: DType = DType::Variant; + +impl VariantVTable { + pub const ID: ArrayId = ArrayId::new_ref("vortex.variant"); +} + +impl VTable for VariantVTable { + type Array = VariantArray; + + type Metadata = EmptyMetadata; + + type OperationsVTable = Self; + + type ValidityVTable = Self; + + fn id(_array: &Self::Array) -> ArrayId { + Self::ID + } + + fn len(array: &Self::Array) -> usize { + array.child.len() + } + + fn dtype(_array: &Self::Array) -> &DType { + &VARIANT_DTYPE + } + + fn stats(array: &Self::Array) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.child.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.child.array_eq(&other.child, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 0 + } + + fn buffer(_array: &Self::Array, idx: usize) -> BufferHandle { + vortex_panic!("VariantArray buffer index {idx} out of bounds") + } + + fn buffer_name(_array: &Self::Array, _idx: usize) -> Option { + None + } + + fn nchildren(_array: &Self::Array) -> usize { + 1 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + match idx { + 0 => array.child.clone(), + _ => vortex_panic!("VariantArray child index {idx} out of bounds"), + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + match idx { + 0 => "child".to_string(), + _ => vortex_panic!("VariantArray child_name index {idx} out of bounds"), + } + } + + fn metadata(_array: &Self::Array) -> VortexResult { + Ok(EmptyMetadata) + } + + fn serialize(_metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + _bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &vortex_session::VortexSession, + ) -> VortexResult { + Ok(EmptyMetadata) + } + + fn build( + dtype: &DType, + len: usize, + _metadata: &Self::Metadata, + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + vortex_ensure!(matches!(dtype, DType::Variant), "Expected Variant DType"); + vortex_ensure!( + children.len() == 1, + "Expected 1 child, got {}", + children.len() + ); + // The child can be any variant encoding, so we use DType::Variant. + let child = children.get(0, &DType::Variant, len)?; + Ok(VariantArray::new(child)) + } + + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 1, + "VariantArray expects exactly 1 child, got {}", + children.len() + ); + array.child = children.into_iter().next().unwrap(); + Ok(()) + } + + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + // VariantArray is the canonical variant representation. + Ok(array.clone().into_array()) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} diff --git a/vortex-array/src/arrays/variant/vtable/operations.rs b/vortex-array/src/arrays/variant/vtable/operations.rs new file mode 100644 index 00000000000..8f936f67680 --- /dev/null +++ b/vortex-array/src/arrays/variant/vtable/operations.rs @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::arrays::VariantVTable; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for VariantVTable { + fn scalar_at( + _array: &::Array, + _index: usize, + ) -> VortexResult { + todo!("Variant scalar representation is not yet defined") + } +} diff --git a/vortex-array/src/arrays/variant/vtable/rules.rs b/vortex-array/src/arrays/variant/vtable/rules.rs new file mode 100644 index 00000000000..7903316b19a --- /dev/null +++ b/vortex-array/src/arrays/variant/vtable/rules.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::arrays::VariantArray; +use crate::arrays::scalar_fn::ExactScalarFn; +use crate::arrays::scalar_fn::ScalarFnArrayView; +use crate::arrays::variant::vtable::VariantVTable; +use crate::builtins::ArrayBuiltins; +use crate::optimizer::rules::ArrayParentReduceRule; +use crate::optimizer::rules::ParentRuleSet; +use crate::scalar_fn::fns::variant_get::VariantGet; + +pub(crate) const PARENT_RULES: ParentRuleSet = + ParentRuleSet::new(&[ParentRuleSet::lift(&VariantGetPushDownRule)]); + +/// Rule to push VariantGet through VariantArray to its child encoding. +#[derive(Debug)] +struct VariantGetPushDownRule; + +impl ArrayParentReduceRule for VariantGetPushDownRule { + type Parent = ExactScalarFn; + + fn reduce_parent( + &self, + array: &VariantArray, + parent: ScalarFnArrayView<'_, VariantGet>, + _child_idx: usize, + ) -> VortexResult> { + let options = parent.options; + Ok(Some( + array + .child() + .variant_get(&options.path, options.dtype.clone())?, + )) + } +} diff --git a/vortex-array/src/arrays/variant/vtable/validity.rs b/vortex-array/src/arrays/variant/vtable/validity.rs new file mode 100644 index 00000000000..3f92677c97c --- /dev/null +++ b/vortex-array/src/arrays/variant/vtable/validity.rs @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::arrays::VariantVTable; +use crate::validity::Validity; +use crate::vtable::VTable; +use crate::vtable::ValidityVTable; + +impl ValidityVTable for VariantVTable { + fn validity(array: &::Array) -> VortexResult { + array.child().validity() + } +} diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index ec46ca8879d..8959315face 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -630,6 +630,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { }; Scalar::extension_ref(ext_dtype.clone(), storage_scalar) } + DType::Variant => continue, }; scalars.push(scalar); } diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index ba3b8542eb7..207187d186d 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -33,6 +33,8 @@ use crate::scalar_fn::fns::list_contains::ListContains; use crate::scalar_fn::fns::mask::Mask; use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::operators::Operator; +use crate::scalar_fn::fns::variant_get::VariantGet; +use crate::scalar_fn::fns::variant_get::VariantGetOptions; use crate::scalar_fn::fns::zip::Zip; /// A collection of built-in scalar functions that can be applied to expressions or arrays. @@ -63,6 +65,9 @@ pub trait ExprBuiltins: Sized { /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`. fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult; + /// Extract data by path and dtype from a variant expression. + fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult; + /// Apply a binary operator to this expression and another. fn binary(&self, rhs: Expression, op: Operator) -> VortexResult; } @@ -100,6 +105,16 @@ impl ExprBuiltins for Expression { Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()]) } + fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult { + VariantGet.try_new_expr( + VariantGetOptions { + path: path.into(), + dtype, + }, + [self.clone()], + ) + } + fn binary(&self, rhs: Expression, op: Operator) -> VortexResult { Binary.try_new_expr(op, [self.clone(), rhs]) } @@ -132,6 +147,9 @@ pub trait ArrayBuiltins: Sized { /// Check if a list contains a value. fn list_contains(&self, value: ArrayRef) -> VortexResult; + /// Extract data by path and dtype from a variant array. + fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult; + /// Apply a binary operator to this array and another. fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult; @@ -202,6 +220,19 @@ impl ArrayBuiltins for ArrayRef { .optimize() } + fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult { + VariantGet + .try_new_array( + self.len(), + VariantGetOptions { + path: path.into(), + dtype, + }, + [self.clone()], + )? + .optimize() + } + fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { Binary .try_new_array(self.len(), op, [self.clone(), rhs])? diff --git a/vortex-array/src/compute/conformance/consistency.rs b/vortex-array/src/compute/conformance/consistency.rs index e6452e4b4de..f55ce1cbfbf 100644 --- a/vortex-array/src/compute/conformance/consistency.rs +++ b/vortex-array/src/compute/conformance/consistency.rs @@ -1235,6 +1235,7 @@ fn test_cast_slice_consistency(array: &ArrayRef) { )] } DType::Extension(_) => vec![], // Extension types typically only cast to themselves + DType::Variant => unimplemented!(), }; // Test each target dtype diff --git a/vortex-array/src/dtype/arrow.rs b/vortex-array/src/dtype/arrow.rs index 93d39b05212..50cf53c04c2 100644 --- a/vortex-array/src/dtype/arrow.rs +++ b/vortex-array/src/dtype/arrow.rs @@ -227,11 +227,21 @@ impl DType { let mut builder = SchemaBuilder::with_capacity(struct_dtype.names().len()); for (field_name, field_dtype) in struct_dtype.names().iter().zip(struct_dtype.fields()) { - builder.push(FieldRef::from(Field::new( - field_name.as_ref(), - field_dtype.to_arrow_dtype()?, - field_dtype.is_nullable(), - ))); + let field = if field_dtype.is_variant() { + Field::new( + field_name.as_ref(), + field_dtype.to_arrow_dtype()?, + field_dtype.is_nullable(), + ) + .with_metadata([("".to_owned(), "arrow.parquet.variant".to_owned())].into()) + } else { + Field::new( + field_name.as_ref(), + field_dtype.to_arrow_dtype()?, + field_dtype.is_nullable(), + ) + }; + builder.push(field); } Ok(builder.finish()) @@ -300,6 +310,7 @@ impl DType { DataType::Struct(Fields::from(fields)) } + DType::Variant => unimplemented!("should this be struct? fail and ask to use schema?"), DType::Extension(ext_dtype) => { // Try and match against the known extension DTypes. if let Some(temporal) = ext_dtype.metadata_opt::() { diff --git a/vortex-array/src/dtype/dtype_impl.rs b/vortex-array/src/dtype/dtype_impl.rs index 23da3400442..82d5aad281c 100644 --- a/vortex-array/src/dtype/dtype_impl.rs +++ b/vortex-array/src/dtype/dtype_impl.rs @@ -247,11 +247,16 @@ impl DType { matches!(self, Extension(_)) } + /// Check if `self` is a [`DType::Variant`] type + pub fn is_variant(&self) -> bool { + matches!(self, Variant) + } + /// Check if `self` is a nested type, i.e. list, fixed size list, struct, or extension of a /// recursive type. pub fn is_nested(&self) -> bool { match self { - List(..) | FixedSizeList(..) | Struct(..) => true, + List(..) | FixedSizeList(..) | Struct(..) | Variant => true, Extension(ext) => ext.storage_dtype().is_nested(), _ => false, } diff --git a/vortex-array/src/dtype/serde/proto.rs b/vortex-array/src/dtype/serde/proto.rs index 9d73dd439a9..88097c4235b 100644 --- a/vortex-array/src/dtype/serde/proto.rs +++ b/vortex-array/src/dtype/serde/proto.rs @@ -99,9 +99,7 @@ impl DType { let ext_dtype = vtable.deserialize(e.metadata(), storage_dtype)?; Ok(Self::Extension(ext_dtype)) } - DtypeType::Variant(..) => { - unimplemented!() - } + DtypeType::Variant(..) => Ok(Self::Variant), } } } @@ -155,7 +153,7 @@ impl TryFrom<&DType> for pb::DType { storage_dtype: Some(Box::new(e.storage_dtype().try_into()?)), metadata: Some(e.serialize_metadata()?), })), - DType::Variant => todo!(), + DType::Variant => DtypeType::Variant(pb::Variant {}), }), }) } @@ -365,6 +363,12 @@ mod tests { assert_eq!(ext_dtype, converted); } + #[test] + fn test_variant_round_trip() { + let converted = round_trip_dtype(&DType::Variant); + assert_eq!(DType::Variant, converted); + } + #[test] fn test_field_path_round_trip() { let test_paths = vec![ diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index bc30ba86ec4..3b1611b796c 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -45,6 +45,8 @@ use crate::scalar_fn::fns::pack::PackOptions; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::FieldSelection; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; +use crate::scalar_fn::fns::variant_get::VariantGetOptions; use crate::scalar_fn::fns::zip::Zip; // ---- Root ---- @@ -663,6 +665,19 @@ pub fn dynamic( ) } +// ---- VariantGet ---- + +/// Creates an expression that extracts data by path and dtype from a variant expression. +pub fn variant_get(path: impl Into, dtype: DType, child: Expression) -> Expression { + VariantGet.new_expr( + VariantGetOptions { + path: path.into(), + dtype, + }, + vec![child], + ) +} + // ---- ListContains ---- /// Creates an expression that checks if a value is contained in a list. diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 50e1d6a3b99..3043ea30a60 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -5,6 +5,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -59,7 +60,7 @@ impl Scalar { DType::Struct(..) => self.as_struct().cast(target_dtype), DType::List(..) | DType::FixedSizeList(..) => self.as_list().cast(target_dtype), DType::Extension(..) => self.as_extension().cast(target_dtype), - DType::Variant => vortex_err!("Variant scalars can't be cast to {target_dtype}"), + DType::Variant => vortex_bail!("Variant scalars can't be cast to {target_dtype}"), } } diff --git a/vortex-array/src/scalar_fn/fns/mod.rs b/vortex-array/src/scalar_fn/fns/mod.rs index 94fc8fb0384..b0c7e7547e5 100644 --- a/vortex-array/src/scalar_fn/fns/mod.rs +++ b/vortex-array/src/scalar_fn/fns/mod.rs @@ -19,4 +19,5 @@ pub mod operators; pub mod pack; pub mod root; pub mod select; +pub mod variant_get; pub mod zip; diff --git a/vortex-array/src/scalar_fn/fns/variant_get.rs b/vortex-array/src/scalar_fn/fns/variant_get.rs new file mode 100644 index 00000000000..149c91ebd55 --- /dev/null +++ b/vortex-array/src/scalar_fn/fns/variant_get.rs @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; +use std::fmt::Formatter; + +use prost::Message; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_proto::expr as pb; +use vortex_session::VortexSession; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::dtype::DType; +use crate::dtype::Nullability; +use crate::scalar_fn::Arity; +use crate::scalar_fn::ChildName; +use crate::scalar_fn::ExecutionArgs; +use crate::scalar_fn::ScalarFnId; +use crate::scalar_fn::ScalarFnVTable; + +/// Options for the `VariantGet` scalar function. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct VariantGetOptions { + /// The variant field path to extract. + pub path: String, + /// The expected return type. + pub dtype: DType, +} + +impl fmt::Display for VariantGetOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "variant_get({}, {:?})", self.path, self.dtype) + } +} + +/// Scalar function that extracts data by path and dtype from variant arrays. +#[derive(Clone)] +pub struct VariantGet; + +impl ScalarFnVTable for VariantGet { + type Options = VariantGetOptions; + + fn id(&self) -> ScalarFnId { + ScalarFnId::from("vortex.variant_get") + } + + fn serialize(&self, instance: &Self::Options) -> VortexResult>> { + Ok(Some( + pb::VariantGetOpts { + path: instance.path.clone(), + dtype: Some((&instance.dtype).try_into()?), + } + .encode_to_vec(), + )) + } + + fn deserialize(&self, metadata: &[u8], session: &VortexSession) -> VortexResult { + let opts = pb::VariantGetOpts::decode(metadata)?; + let dtype = DType::from_proto( + opts.dtype + .as_ref() + .ok_or_else(|| vortex_err!("VariantGetOpts missing dtype"))?, + session, + )?; + Ok(VariantGetOptions { + path: opts.path, + dtype, + }) + } + + fn arity(&self, _options: &VariantGetOptions) -> Arity { + Arity::Exact(1) + } + + fn child_name(&self, _options: &Self::Options, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("input"), + _ => unreachable!( + "Invalid child index {} for VariantGet expression", + child_idx + ), + } + } + + fn fmt_sql( + &self, + options: &VariantGetOptions, + expr: &crate::expr::Expression, + f: &mut Formatter<'_>, + ) -> fmt::Result { + expr.children()[0].fmt_sql(f)?; + write!(f, ".{}", options.path) + } + + fn return_dtype( + &self, + options: &VariantGetOptions, + _arg_dtypes: &[DType], + ) -> VortexResult { + // Always return nullable since Variant data is always nullable + Ok(options.dtype.with_nullability(Nullability::Nullable)) + } + + fn execute( + &self, + _options: &VariantGetOptions, + _args: &dyn ExecutionArgs, + _ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_bail!( + "VariantGet should be pushed down via parent reduction rules, not executed directly" + ) + } + + fn is_null_sensitive(&self, _options: &VariantGetOptions) -> bool { + true + } + + fn is_fallible(&self, _options: &VariantGetOptions) -> bool { + false + } +} diff --git a/vortex-array/src/scalar_fn/session.rs b/vortex-array/src/scalar_fn/session.rs index eef759bf8e3..6553cbd29e3 100644 --- a/vortex-array/src/scalar_fn/session.rs +++ b/vortex-array/src/scalar_fn/session.rs @@ -23,6 +23,7 @@ use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::pack::Pack; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; /// Registry of scalar function vtables. /// Registry of scalar function vtables. @@ -67,6 +68,7 @@ impl Default for ScalarFnSession { this.register(Pack); this.register(Root); this.register(Select); + this.register(VariantGet); this } diff --git a/vortex-proto/proto/expr.proto b/vortex-proto/proto/expr.proto index 3b47db2a756..4339021bb31 100644 --- a/vortex-proto/proto/expr.proto +++ b/vortex-proto/proto/expr.proto @@ -89,3 +89,9 @@ message SelectOpts { message CaseWhenOpts { uint32 num_children = 1; } + +// Options for `vortex.variant_get` +message VariantGetOpts { + string path = 1; + vortex.dtype.DType dtype = 2; +} diff --git a/vortex-proto/src/generated/vortex.expr.rs b/vortex-proto/src/generated/vortex.expr.rs index 9bc61475e59..a4fb3377f59 100644 --- a/vortex-proto/src/generated/vortex.expr.rs +++ b/vortex-proto/src/generated/vortex.expr.rs @@ -155,3 +155,11 @@ pub struct CaseWhenOpts { #[prost(uint32, tag = "1")] pub num_children: u32, } +/// Options for `vortex.variant_get` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantGetOpts { + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub dtype: ::core::option::Option, +} From 94f23e0e1b5e6978f54e8cc5bb353bf72d2d3b15 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 14:54:37 +0000 Subject: [PATCH 3/8] Filling out the missing pieces Signed-off-by: Adam Gutglick --- Cargo.lock | 55 ++- Cargo.toml | 2 + encodings/parquet-variant/Cargo.toml | 6 + encodings/parquet-variant/src/lib.rs | 369 +++++++++++++++--- vortex-array/src/arrays/variant/vtable/mod.rs | 3 +- .../src/arrays/variant/vtable/rules.rs | 9 +- vortex-array/src/arrow/executor/mod.rs | 18 + vortex-array/src/arrow/executor/variant.rs | 109 ++++++ vortex-array/src/builtins.rs | 22 +- vortex-array/src/canonical.rs | 4 +- vortex-array/src/dtype/arrow.rs | 52 ++- vortex-array/src/dtype/serde/flatbuffers.rs | 1 - vortex-array/src/expr/exprs.rs | 11 +- vortex-array/src/scalar/cast.rs | 1 - vortex-array/src/scalar/validate.rs | 4 +- vortex-array/src/scalar_fn/fns/variant_get.rs | 42 +- vortex-proto/proto/expr.proto | 3 +- vortex-proto/src/generated/vortex.expr.rs | 4 +- 18 files changed, 601 insertions(+), 114 deletions(-) create mode 100644 vortex-array/src/arrow/executor/variant.rs diff --git a/Cargo.lock b/Cargo.lock index d2a133eb101..2612aea8c8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -718,7 +718,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -6489,6 +6489,50 @@ dependencies = [ "zstd", ] +[[package]] +name = "parquet-variant" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" +dependencies = [ + "arrow-schema", + "chrono", + "half", + "indexmap", + "simdutf8", + "uuid", +] + +[[package]] +name = "parquet-variant-compute" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" +dependencies = [ + "arrow", + "arrow-schema", + "chrono", + "half", + "indexmap", + "parquet-variant", + "parquet-variant-json", + "uuid", +] + +[[package]] +name = "parquet-variant-json" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" +dependencies = [ + "arrow-schema", + "base64", + "chrono", + "parquet-variant", + "serde_json", + "uuid", +] + [[package]] name = "paste" version = "1.0.15" @@ -6849,7 +6893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.11.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -6881,7 +6925,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -10299,12 +10343,17 @@ dependencies = [ name = "vortex-parquet-variant" version = "0.1.0" dependencies = [ + "arrow-array", + "arrow-schema", + "parquet-variant", + "parquet-variant-compute", "prost 0.14.3", "rstest", "vortex-array", "vortex-buffer", "vortex-error", "vortex-mask", + "vortex-proto", "vortex-session", ] diff --git a/Cargo.toml b/Cargo.toml index 1de3e3569b1..aee6131d045 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -180,6 +180,8 @@ opentelemetry-otlp = "0.31.0" opentelemetry_sdk = "0.31.0" parking_lot = { version = "0.12.3", features = ["nightly"] } parquet = "57.1" +parquet-variant = "57" +parquet-variant-compute = "57" paste = "1.0.15" pco = "1.0.1" pin-project-lite = "0.2.15" diff --git a/encodings/parquet-variant/Cargo.toml b/encodings/parquet-variant/Cargo.toml index aeb8615f6ca..1669cbd4908 100644 --- a/encodings/parquet-variant/Cargo.toml +++ b/encodings/parquet-variant/Cargo.toml @@ -17,14 +17,20 @@ version = { workspace = true } workspace = true [dependencies] +arrow-array = { workspace = true } +parquet-variant-compute = { workspace = true } prost = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-error = { workspace = true } vortex-mask = { workspace = true } +vortex-proto = { workspace = true } vortex-session = { workspace = true } [dev-dependencies] +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +parquet-variant = { workspace = true } rstest = { workspace = true } vortex-array = { workspace = true, features = ["_test-harness"] } diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs index ff4a1f9b342..695320ca361 100644 --- a/encodings/parquet-variant/src/lib.rs +++ b/encodings/parquet-variant/src/lib.rs @@ -19,17 +19,18 @@ use std::hash::Hasher; +use arrow_array::Array as ArrowArray; use prost::Message; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; -use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::Precision; -use vortex_array::ProstMetadata; +use vortex_array::arrays::VariantArray; use vortex_array::arrays::scalar_fn::ExactScalarFn; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::arrow::FromArrowArray; use vortex_array::buffer::BufferHandle; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; @@ -46,9 +47,13 @@ use vortex_array::vtable::ArrayId; use vortex_array::vtable::NotSupported; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTable; +use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_err; use vortex_error::vortex_panic; +use vortex_proto::dtype as pb; use vortex_session::VortexSession; vtable!(ParquetVariant); @@ -64,14 +69,24 @@ impl ParquetVariantVTable { /// /// Tracks which optional children are present so the array can be correctly /// reconstructed during deserialization. -#[derive(Clone, prost::Message)] +#[derive(Clone, Debug)] pub struct ParquetVariantMetadata { + /// Whether the un-shredded `value` child is present. + pub has_value: bool, + /// DType of the shredded `typed_value`, if present. + /// + /// This is required to deserialize non-variant shredded children. + pub typed_value_dtype: Option, +} + +#[derive(Clone, prost::Message)] +struct ParquetVariantMetadataProto { /// Whether the un-shredded `value` child is present. #[prost(bool, tag = "1")] pub has_value: bool, - /// Whether the shredded `typed_value` child is present. - #[prost(bool, tag = "2")] - pub has_typed_value: bool, + /// DType of the shredded `typed_value`, if present. + #[prost(message, optional, tag = "2")] + pub typed_value_dtype: Option, } /// An array encoding that stores variant data in the Parquet Variant binary format. @@ -96,32 +111,31 @@ const VARIANT_DTYPE: DType = DType::Variant; impl ParquetVariantArray { /// Creates a new ParquetVariantArray. - /// - /// # Panics - /// Panics if neither `value` nor `typed_value` is provided, or if children have - /// mismatched lengths. - pub fn new(metadata: ArrayRef, value: Option, typed_value: Option) -> Self { - assert!( + pub fn try_new( + metadata: ArrayRef, + value: Option, + typed_value: Option, + ) -> VortexResult { + vortex_ensure!( value.is_some() || typed_value.is_some(), "at least one of value or typed_value must be present" ); let len = metadata.len(); if let Some(ref v) = value { - assert_eq!(v.len(), len, "value length must match metadata length"); + vortex_ensure!(v.len() == len, "value length must match metadata length"); } if let Some(ref tv) = typed_value { - assert_eq!( - tv.len(), - len, + vortex_ensure!( + tv.len() == len, "typed_value length must match metadata length" ); } - Self { + Ok(Self { metadata, value, typed_value, stats_set: ArrayStats::default(), - } + }) } /// Returns a reference to the metadata child array. @@ -139,6 +153,28 @@ impl ParquetVariantArray { self.typed_value.as_ref() } + /// Converts an Arrow `parquet_variant_compute::VariantArray` into a Vortex `ArrayRef` + /// wrapping `VariantArray(ParquetVariantArray(...))`. + pub fn from_arrow_variant( + arrow_variant: &parquet_variant_compute::VariantArray, + ) -> VortexResult { + let metadata = + ArrayRef::from_arrow(arrow_variant.metadata_field() as &dyn ArrowArray, false)?; + + let value = arrow_variant + .value_field() + .map(|v| ArrayRef::from_arrow(v as &dyn ArrowArray, false)) + .transpose()?; + + let typed_value = arrow_variant + .typed_value_field() + .map(|tv| ArrayRef::from_arrow(tv.as_ref(), tv.is_nullable())) + .transpose()?; + + let pv = ParquetVariantArray::try_new(metadata, value, typed_value)?; + Ok(VariantArray::new(pv.into_array()).into_array()) + } + fn nchildren(&self) -> usize { 1 + self.value.is_some() as usize + self.typed_value.is_some() as usize } @@ -146,7 +182,7 @@ impl ParquetVariantArray { impl VTable for ParquetVariantVTable { type Array = ParquetVariantArray; - type Metadata = ProstMetadata; + type Metadata = ParquetVariantMetadata; type OperationsVTable = NotSupported; type ValidityVTable = Self; @@ -219,9 +255,18 @@ impl VTable for ParquetVariantVTable { fn child(array: &ParquetVariantArray, idx: usize) -> ArrayRef { match idx { 0 => array.metadata.clone(), - 1 if array.value.is_some() => array.value.clone().unwrap(), - 1 => array.typed_value.clone().unwrap(), - 2 => array.typed_value.clone().unwrap(), + 1 if array.value.is_some() => array + .value + .clone() + .vortex_expect("ParquetVariantArray missing value child"), + 1 => array + .typed_value + .clone() + .vortex_expect("ParquetVariantArray missing typed_value child"), + 2 => array + .typed_value + .clone() + .vortex_expect("ParquetVariantArray missing typed_value child"), _ => vortex_panic!("ParquetVariantArray child index {idx} out of bounds"), } } @@ -237,14 +282,25 @@ impl VTable for ParquetVariantVTable { } fn metadata(array: &ParquetVariantArray) -> VortexResult { - Ok(ProstMetadata(ParquetVariantMetadata { + Ok(ParquetVariantMetadata { has_value: array.value.is_some(), - has_typed_value: array.typed_value.is_some(), - })) + typed_value_dtype: array.typed_value.as_ref().map(|tv| tv.dtype().clone()), + }) } fn serialize(metadata: Self::Metadata) -> VortexResult>> { - Ok(Some(metadata.encode_to_vec())) + let typed_value_dtype = metadata + .typed_value_dtype + .as_ref() + .map(|dtype| dtype.try_into()) + .transpose()?; + Ok(Some( + ParquetVariantMetadataProto { + has_value: metadata.has_value, + typed_value_dtype, + } + .encode_to_vec(), + )) } fn deserialize( @@ -254,9 +310,15 @@ impl VTable for ParquetVariantVTable { _buffers: &[BufferHandle], _session: &VortexSession, ) -> VortexResult { - let inner = - as DeserializeMetadata>::deserialize(bytes)?; - Ok(ProstMetadata(inner)) + let proto = ParquetVariantMetadataProto::decode(bytes)?; + let typed_value_dtype = match proto.typed_value_dtype.as_ref() { + Some(dtype) => Some(DType::from_proto(dtype, _session)?), + None => None, + }; + Ok(ParquetVariantMetadata { + has_value: proto.has_value, + typed_value_dtype, + }) } fn build( @@ -267,12 +329,13 @@ impl VTable for ParquetVariantVTable { children: &dyn ArrayChildren, ) -> VortexResult { vortex_ensure!(matches!(dtype, DType::Variant), "Expected Variant DType"); + let has_typed_value = metadata.typed_value_dtype.is_some(); vortex_ensure!( - metadata.has_value || metadata.has_typed_value, + metadata.has_value || has_typed_value, "At least one of value or typed_value must be present" ); - let expected_children = 1 + metadata.has_value as usize + metadata.has_typed_value as usize; + let expected_children = 1 + metadata.has_value as usize + has_typed_value as usize; vortex_ensure!( children.len() == expected_children, "Expected {} children, got {}", @@ -293,20 +356,19 @@ impl VTable for ParquetVariantVTable { None }; - let typed_value = if metadata.has_typed_value { + let typed_value = if has_typed_value { // typed_value can be any type — primitive, list, struct, etc. - // We retrieve it without constraining its DType. - let tv = children.get(child_idx, &DType::Variant, len)?; + let dtype = metadata + .typed_value_dtype + .clone() + .ok_or_else(|| vortex_err!("typed_value_dtype missing for typed_value child"))?; + let tv = children.get(child_idx, &dtype, len)?; Some(tv) } else { None }; - Ok(ParquetVariantArray::new( - variant_metadata, - value, - typed_value, - )) + ParquetVariantArray::try_new(variant_metadata, value, typed_value) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { @@ -317,12 +379,20 @@ impl VTable for ParquetVariantVTable { children.len() ); let mut iter = children.into_iter(); - array.metadata = iter.next().unwrap(); + array.metadata = iter + .next() + .vortex_expect("ParquetVariantArray missing metadata child"); if array.value.is_some() { - array.value = Some(iter.next().unwrap()); + array.value = Some( + iter.next() + .vortex_expect("ParquetVariantArray missing value child in with_children"), + ); } if array.typed_value.is_some() { - array.typed_value = Some(iter.next().unwrap()); + array.typed_value = + Some(iter.next().vortex_expect( + "ParquetVariantArray missing typed_value child in with_children", + )); } Ok(()) } @@ -357,13 +427,20 @@ impl ArrayParentReduceRule for ParquetVariantGetRule { _child_idx: usize, ) -> VortexResult> { let options = parent.options; + if options.path().is_some_and(|p| !p.is_empty()) { + vortex_bail!("ParquetVariant VariantGet only supports empty path"); + } + let target_dtype = options.dtype().with_nullability(Nullability::Nullable); match array.typed_value_array() { - Some(typed_value) => { - // The shredded typed_value is available; cast it to the requested dtype. - Ok(Some(typed_value.cast(options.dtype.clone())?)) + Some(typed_value) + if typed_value.dtype().with_nullability(Nullability::Nullable) == target_dtype => + { + // The shredded typed_value matches the requested type. + // Cast to ensure nullability matches (VariantGet always returns nullable). + Ok(Some(typed_value.cast(target_dtype)?)) } - None => { - // No shredded data available; cannot push down. + _ => { + // No shredded data or type mismatch; cannot push down. Ok(None) } } @@ -380,31 +457,109 @@ impl ValidityVTable for ParquetVariantVTable { #[cfg(test)] mod tests { + use std::sync::Arc; + + use arrow_array::ArrayRef as ArrowArrayRef; + use arrow_array::Int32Array; + use arrow_array::StructArray; + use arrow_array::builder::BinaryViewBuilder; + use arrow_array::cast::AsArray; + use arrow_schema::DataType; + use arrow_schema::Field; + use arrow_schema::Fields; + use parquet_variant::Variant; + use parquet_variant_compute::VariantArray as ArrowVariantArray; + use parquet_variant_compute::VariantArrayBuilder; + use vortex_array::ArrayContext; use vortex_array::IntoArray; - use vortex_array::arrays::VariantArray; + use vortex_array::LEGACY_SESSION; + use vortex_array::Precision; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::VarBinViewArray; + use vortex_array::arrays::VariantVTable; + use vortex_array::arrow::ArrowArrayExecutor; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; + use vortex_array::serde::ArrayParts; + use vortex_array::serde::SerializeOptions; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::ByteBufferMut; use vortex_buffer::buffer; + use vortex_session::VortexSession; + use vortex_session::registry::ReadContext; use super::*; + #[test] + fn test_from_arrow_variant_basic() -> VortexResult<()> { + let mut builder = VariantArrayBuilder::new(3); + builder.append_variant(Variant::from(42i32)); + builder.append_variant(Variant::from("hello")); + builder.append_variant(Variant::from(true)); + let arrow_variant = builder.build(); + + let vortex_arr = ParquetVariantArray::from_arrow_variant(&arrow_variant)?; + + assert_eq!(vortex_arr.len(), 3); + assert_eq!(vortex_arr.dtype(), &DType::Variant); + + Ok(()) + } + + #[test] + fn test_from_arrow_variant_with_shredded_typed_value() -> VortexResult<()> { + // Build the underlying StructArray with metadata + typed_value fields + let mut metadata_builder = BinaryViewBuilder::new(); + // Minimal variant metadata: version 1, no dictionary + let min_metadata = [1u8, 0]; + for _ in 0..3 { + metadata_builder.append_value(min_metadata); + } + let metadata = metadata_builder.finish(); + + let typed_value: ArrowArrayRef = Arc::new(Int32Array::from(vec![10, 20, 30])); + + let struct_fields: Fields = vec![ + Arc::new(Field::new("metadata", DataType::BinaryView, false)), + Arc::new(Field::new("typed_value", DataType::Int32, false)), + ] + .into(); + let struct_array = + StructArray::try_new(struct_fields, vec![Arc::new(metadata), typed_value], None) + .unwrap(); + + let arrow_variant = ArrowVariantArray::try_new(&struct_array).unwrap(); + + let vortex_arr = ParquetVariantArray::from_arrow_variant(&arrow_variant)?; + assert_eq!(vortex_arr.len(), 3); + assert_eq!(vortex_arr.dtype(), &DType::Variant); + + // Verify typed_value is present by downcasting through the layers + let variant_arr = vortex_arr.as_opt::().unwrap(); + let inner = variant_arr + .child() + .as_opt::() + .unwrap(); + assert!(inner.typed_value_array().is_some()); + + Ok(()) + } + #[test] fn test_variant_get_pushdown_with_typed_value() -> VortexResult<()> { // Create a ParquetVariantArray with shredded typed_value (i32 data) let metadata = buffer![0u8, 1, 2].into_array(); let typed_value = buffer![10i32, 20, 30].into_array(); - let pv_array = ParquetVariantArray::new(metadata, None, Some(typed_value)); + let pv_array = ParquetVariantArray::try_new(metadata, None, Some(typed_value))?; // Wrap it in a VariantArray let variant_array = VariantArray::new(pv_array.into_array()); // Apply variant_get let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); - let result = variant_array - .into_array() - .variant_get("col", target_dtype)?; + let result = variant_array.into_array().variant_get(None, target_dtype)?; // The result should be the typed_value data, cast to nullable i32 assert_eq!( @@ -421,7 +576,7 @@ mod tests { // Create a ParquetVariantArray without typed_value (only value) let metadata = buffer![0u8, 1, 2].into_array(); let value = buffer![0u8, 1, 2].into_array(); - let pv_array = ParquetVariantArray::new(metadata, Some(value), None); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), None)?; // Wrap it in a VariantArray let variant_array = VariantArray::new(pv_array.into_array()); @@ -429,9 +584,7 @@ mod tests { // Apply variant_get - the rule returns None since there's no typed_value, // so the optimizer creates a lazy ScalarFnArray that will error on execute. let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); - let result = variant_array - .into_array() - .variant_get("col", target_dtype)?; + let result = variant_array.into_array().variant_get(None, target_dtype)?; // The result is a lazy expression wrapping the variant array assert_eq!( result.dtype(), @@ -439,4 +592,108 @@ mod tests { ); Ok(()) } + + fn roundtrip(array: ArrayRef) -> ArrayRef { + let dtype = array.dtype().clone(); + let len = array.len(); + + let ctx = ArrayContext::empty(); + let serialized = array.serialize(&ctx, &SerializeOptions::default()).unwrap(); + + let mut concat = ByteBufferMut::empty(); + for buf in serialized { + concat.extend_from_slice(buf.as_ref()); + } + let concat = concat.freeze(); + + let session = VortexSession::empty().with::(); + session + .arrays() + .register(ParquetVariantVTable::ID, ParquetVariantVTable); + session.arrays().register(VariantVTable::ID, VariantVTable); + + let parts = ArrayParts::try_from(concat).unwrap(); + parts + .decode(&dtype, len, &ReadContext::new(ctx.to_ids()), &session) + .unwrap() + } + + #[test] + fn test_serde_roundtrip_typed_value_variant() { + let outer_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + + let inner_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + let inner_value = VarBinViewArray::from_iter_bin([b"\x02", b"\x03", b"\x04"]).into_array(); + let inner_pv = + ParquetVariantArray::try_new(inner_metadata, Some(inner_value), None).unwrap(); + let typed_value = VariantArray::new(inner_pv.into_array()).into_array(); + + let outer_pv = + ParquetVariantArray::try_new(outer_metadata, None, Some(typed_value)).unwrap(); + let array = outer_pv.into_array(); + let decoded = roundtrip(array.clone()); + + assert!(array.array_eq(&decoded, Precision::Value)); + let decoded_pv = decoded.as_opt::().unwrap(); + let typed = decoded_pv.typed_value_array().unwrap(); + assert_eq!(typed.dtype(), &DType::Variant); + } + + #[test] + fn test_serde_roundtrip_typed_value_int32() { + let outer_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + let typed_value = buffer![10i32, 20, 30].into_array(); + + let outer_pv = + ParquetVariantArray::try_new(outer_metadata, None, Some(typed_value)).unwrap(); + let array = outer_pv.into_array(); + let decoded = roundtrip(array.clone()); + + assert!(array.array_eq(&decoded, Precision::Value)); + let decoded_pv = decoded.as_opt::().unwrap(); + let typed = decoded_pv.typed_value_array().unwrap(); + assert_eq!( + typed.dtype(), + &DType::Primitive(PType::I32, Nullability::NonNullable) + ); + } + + #[test] + fn test_arrow_variant_storage_basic() -> VortexResult<()> { + let metadata = VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00"]).into_array(); + let value = VarBinViewArray::from_iter_bin([b"\x10", b"\x11"]).into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), None)?; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let arrow = pv_array.into_array().execute_arrow(None, &mut ctx)?; + let struct_arr = arrow.as_struct(); + + assert_eq!(struct_arr.num_columns(), 2); + assert_eq!(struct_arr.column_names(), &["metadata", "value"]); + + Ok(()) + } + + #[test] + fn test_arrow_variant_storage_with_typed_value() -> VortexResult<()> { + let metadata = VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00"]).into_array(); + let value = VarBinViewArray::from_iter_bin([b"\x10", b"\x11"]).into_array(); + let typed_value = buffer![1i32, 2].into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), Some(typed_value))?; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let arrow = pv_array.into_array().execute_arrow(None, &mut ctx)?; + let struct_arr = arrow.as_struct(); + + assert_eq!(struct_arr.num_columns(), 3); + assert_eq!( + struct_arr.column_names(), + &["metadata", "value", "typed_value"] + ); + + Ok(()) + } } diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index 2c5378cd0d2..10f4db4aafd 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -7,6 +7,7 @@ mod validity; use std::hash::Hasher; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; @@ -144,7 +145,7 @@ impl VTable for VariantVTable { "VariantArray expects exactly 1 child, got {}", children.len() ); - array.child = children.into_iter().next().unwrap(); + array.child = children.into_iter().next().vortex_expect("must exist"); Ok(()) } diff --git a/vortex-array/src/arrays/variant/vtable/rules.rs b/vortex-array/src/arrays/variant/vtable/rules.rs index 7903316b19a..db54c92b1bc 100644 --- a/vortex-array/src/arrays/variant/vtable/rules.rs +++ b/vortex-array/src/arrays/variant/vtable/rules.rs @@ -30,10 +30,9 @@ impl ArrayParentReduceRule for VariantGetPushDownRule { _child_idx: usize, ) -> VortexResult> { let options = parent.options; - Ok(Some( - array - .child() - .variant_get(&options.path, options.dtype.clone())?, - )) + Ok(Some(array.child().variant_get( + options.path().cloned(), + options.dtype().clone(), + )?)) } } diff --git a/vortex-array/src/arrow/executor/mod.rs b/vortex-array/src/arrow/executor/mod.rs index abb55efb97f..53032c7ac4d 100644 --- a/vortex-array/src/arrow/executor/mod.rs +++ b/vortex-array/src/arrow/executor/mod.rs @@ -15,6 +15,7 @@ mod run_end; mod struct_; mod temporal; mod validity; +mod variant; use arrow_array::ArrayRef as ArrowArrayRef; use arrow_array::RecordBatch; @@ -46,6 +47,7 @@ use crate::arrow::executor::primitive::to_arrow_primitive; use crate::arrow::executor::run_end::to_arrow_run_end; use crate::arrow::executor::struct_::to_arrow_struct; use crate::arrow::executor::temporal::to_arrow_temporal; +use crate::arrow::executor::variant::to_arrow_variant; use crate::dtype::DType; use crate::dtype::PType; use crate::executor::ExecutionCtx; @@ -88,6 +90,22 @@ impl ArrowArrayExecutor for ArrayRef { ) -> VortexResult { let len = self.len(); + if self.dtype().is_variant() { + let target_fields = match data_type { + Some(DataType::Struct(fields)) => Some(fields), + Some(_) => { + vortex_bail!("Variant can only be converted to Arrow Struct storage type"); + } + None => None, + }; + let arrow = to_arrow_variant(self, target_fields, ctx)?; + vortex_ensure!( + arrow.len() == len, + "Arrow array length does not match Vortex array length after conversion to Variant" + ); + return Ok(arrow); + } + // Resolve the DataType if it is a leaf type // we should likely make this extensible. let resolved_type: DataType = match data_type { diff --git a/vortex-array/src/arrow/executor/variant.rs b/vortex-array/src/arrow/executor/variant.rs new file mode 100644 index 00000000000..cd274bc6583 --- /dev/null +++ b/vortex-array/src/arrow/executor/variant.rs @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::StructArray as ArrowStructArray; +use arrow_schema::Field; +use arrow_schema::Fields; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::array::ArrayVisitor; +use crate::arrays::VariantVTable; +use crate::arrow::ArrowArrayExecutor; + +pub(super) fn to_arrow_variant( + array: ArrayRef, + target_fields: Option<&Fields>, + ctx: &mut ExecutionCtx, +) -> VortexResult { + let inner = match array.try_into::() { + Ok(variant) => variant.child().clone(), + Err(array) => array, + }; + + let named_children = inner.named_children(); + if named_children.is_empty() { + vortex_bail!("Variant array has no children"); + } + + let mut metadata: Option = None; + let mut value: Option = None; + let mut typed_value: Option = None; + + for (name, child) in named_children { + match name.as_str() { + "metadata" => metadata = Some(child), + "value" => value = Some(child), + "typed_value" => typed_value = Some(child), + _ => { + vortex_bail!("Unsupported variant child {name}"); + } + } + } + + let metadata = match metadata { + Some(metadata) => metadata, + None => vortex_bail!("Variant array missing metadata child"), + }; + + let mut ordered: Vec<(String, ArrayRef)> = vec![("metadata".to_string(), metadata.clone())]; + if let Some(value) = value.clone() { + ordered.push(("value".to_string(), value)); + } + if let Some(typed_value) = typed_value.clone() { + ordered.push(("typed_value".to_string(), typed_value)); + } + + let (fields, arrays) = if let Some(fields) = target_fields { + let mut arrays = Vec::with_capacity(fields.len()); + for field in fields.iter() { + let child = match field.name().as_str() { + "metadata" => Some(&metadata), + "value" => value.as_ref(), + "typed_value" => typed_value.as_ref(), + other => { + vortex_bail!("Unsupported variant field {other}"); + } + }; + + let Some(child) = child else { + vortex_bail!("Variant array missing child for field {}", field.name()); + }; + + arrays.push(child.clone().execute_arrow(Some(field.data_type()), ctx)?); + } + + // Ensure we didn't silently drop any children + vortex_ensure!( + fields.len() == ordered.len(), + "Variant array has {} children but target Arrow type has {} fields", + ordered.len(), + fields.len() + ); + + (fields.clone(), arrays) + } else { + let mut fields = Vec::with_capacity(ordered.len()); + let mut arrays = Vec::with_capacity(ordered.len()); + + for (name, child) in ordered { + let arrow = child.clone().execute_arrow(None, ctx)?; + fields.push(Field::new( + name, + arrow.data_type().clone(), + child.dtype().is_nullable(), + )); + arrays.push(arrow); + } + + (Fields::from(fields), arrays) + }; + + Ok(Arc::new(ArrowStructArray::try_new(fields, arrays, None)?)) +} diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index 207187d186d..9a9315cc805 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -35,6 +35,7 @@ use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::operators::Operator; use crate::scalar_fn::fns::variant_get::VariantGet; use crate::scalar_fn::fns::variant_get::VariantGetOptions; +use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; /// A collection of built-in scalar functions that can be applied to expressions or arrays. @@ -66,7 +67,7 @@ pub trait ExprBuiltins: Sized { fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult; /// Extract data by path and dtype from a variant expression. - fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult; + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult; /// Apply a binary operator to this expression and another. fn binary(&self, rhs: Expression, op: Operator) -> VortexResult; @@ -105,14 +106,8 @@ impl ExprBuiltins for Expression { Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()]) } - fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult { - VariantGet.try_new_expr( - VariantGetOptions { - path: path.into(), - dtype, - }, - [self.clone()], - ) + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult { + VariantGet.try_new_expr(VariantGetOptions::new(path, dtype), [self.clone()]) } fn binary(&self, rhs: Expression, op: Operator) -> VortexResult { @@ -148,7 +143,7 @@ pub trait ArrayBuiltins: Sized { fn list_contains(&self, value: ArrayRef) -> VortexResult; /// Extract data by path and dtype from a variant array. - fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult; + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult; /// Apply a binary operator to this array and another. fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult; @@ -220,14 +215,11 @@ impl ArrayBuiltins for ArrayRef { .optimize() } - fn variant_get(&self, path: impl Into, dtype: DType) -> VortexResult { + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult { VariantGet .try_new_array( self.len(), - VariantGetOptions { - path: path.into(), - dtype, - }, + VariantGetOptions::new(path, dtype), [self.clone()], )? .optimize() diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index fd769a31e85..27c42954245 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -220,7 +220,9 @@ impl Canonical { ext_dtype.clone(), Canonical::empty(ext_dtype.storage_dtype()).into_array(), )), - DType::Variant => todo!(), + DType::Variant => { + vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant") + } } } diff --git a/vortex-array/src/dtype/arrow.rs b/vortex-array/src/dtype/arrow.rs index 50cf53c04c2..7ecf57d3feb 100644 --- a/vortex-array/src/dtype/arrow.rs +++ b/vortex-array/src/dtype/arrow.rs @@ -210,6 +210,14 @@ impl FromArrowType<(&DataType, Nullability)> for DType { impl FromArrowType<&Field> for DType { fn from_arrow(field: &Field) -> Self { + if field + .metadata() + .get("ARROW:extension:name") + .map(|s| s.as_str()) + == Some("arrow.parquet.variant") + { + return DType::Variant; + } Self::from_arrow((field.data_type(), field.is_nullable().into())) } } @@ -228,12 +236,14 @@ impl DType { let mut builder = SchemaBuilder::with_capacity(struct_dtype.names().len()); for (field_name, field_dtype) in struct_dtype.names().iter().zip(struct_dtype.fields()) { let field = if field_dtype.is_variant() { - Field::new( - field_name.as_ref(), - field_dtype.to_arrow_dtype()?, - field_dtype.is_nullable(), + let storage = DataType::Struct(variant_storage_fields_minimal()); + Field::new(field_name.as_ref(), storage, field_dtype.is_nullable()).with_metadata( + [( + "ARROW:extension:name".to_owned(), + "arrow.parquet.variant".to_owned(), + )] + .into(), ) - .with_metadata([("".to_owned(), "arrow.parquet.variant".to_owned())].into()) } else { Field::new( field_name.as_ref(), @@ -310,7 +320,9 @@ impl DType { DataType::Struct(Fields::from(fields)) } - DType::Variant => unimplemented!("should this be struct? fail and ask to use schema?"), + DType::Variant => vortex_bail!( + "DType::Variant requires Arrow Field metadata; use to_arrow_schema or a Field helper" + ), DType::Extension(ext_dtype) => { // Try and match against the known extension DTypes. if let Some(temporal) = ext_dtype.metadata_opt::() { @@ -343,6 +355,13 @@ impl DType { } } +fn variant_storage_fields_minimal() -> Fields { + Fields::from(vec![ + Field::new("metadata", DataType::Binary, false), + Field::new("value", DataType::Binary, true), + ]) +} + #[cfg(test)] mod test { use arrow_schema::DataType; @@ -410,6 +429,12 @@ mod test { ); } + #[test] + fn test_variant_dtype_to_arrow_dtype_errors() { + let err = DType::Variant.to_arrow_dtype().unwrap_err().to_string(); + assert!(err.contains("Variant")); + } + #[test] fn infer_nullable_list_element() { let list_non_nullable = DType::List( @@ -466,6 +491,21 @@ mod test { ); } + #[test] + fn test_schema_variant_field_metadata() { + let dtype = DType::struct_([("v", DType::Variant)], Nullability::NonNullable); + let schema = dtype.to_arrow_schema().unwrap(); + let field = schema.field(0); + assert_eq!( + field + .metadata() + .get("ARROW:extension:name") + .map(|s| s.as_str()), + Some("arrow.parquet.variant") + ); + assert!(matches!(field.data_type(), DataType::Struct(_))); + } + #[rstest] #[should_panic] fn test_schema_conversion_panics(the_struct: StructFields) { diff --git a/vortex-array/src/dtype/serde/flatbuffers.rs b/vortex-array/src/dtype/serde/flatbuffers.rs index 37dc62b8f8f..e91b5dec112 100644 --- a/vortex-array/src/dtype/serde/flatbuffers.rs +++ b/vortex-array/src/dtype/serde/flatbuffers.rs @@ -16,7 +16,6 @@ use vortex_flatbuffers::FlatBuffer; use vortex_flatbuffers::FlatBufferRoot; use vortex_flatbuffers::WriteFlatBuffer; use vortex_flatbuffers::dtype as fbd; -use vortex_flatbuffers::dtype::VariantArgs; use vortex_session::VortexSession; use crate::dtype::DType; diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index 3b1611b796c..7a5c91f2100 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -47,6 +47,7 @@ use crate::scalar_fn::fns::select::FieldSelection; use crate::scalar_fn::fns::select::Select; use crate::scalar_fn::fns::variant_get::VariantGet; use crate::scalar_fn::fns::variant_get::VariantGetOptions; +use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; // ---- Root ---- @@ -668,14 +669,8 @@ pub fn dynamic( // ---- VariantGet ---- /// Creates an expression that extracts data by path and dtype from a variant expression. -pub fn variant_get(path: impl Into, dtype: DType, child: Expression) -> Expression { - VariantGet.new_expr( - VariantGetOptions { - path: path.into(), - dtype, - }, - vec![child], - ) +pub fn variant_get(path: Option, dtype: DType, child: Expression) -> Expression { + VariantGet.new_expr(VariantGetOptions::new(path, dtype), vec![child]) } // ---- ListContains ---- diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 3043ea30a60..e61bb4f49bd 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -7,7 +7,6 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; -use vortex_error::vortex_err; use crate::dtype::DType; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 604b07bef92..192963b90a2 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -119,7 +119,9 @@ impl Scalar { } } DType::Extension(ext_dtype) => ext_dtype.validate_storage_value(value)?, - DType::Variant => unimplemented!(), + DType::Variant => { + vortex_bail!("variant scalars are not supported for validation yet"); + } } Ok(()) diff --git a/vortex-array/src/scalar_fn/fns/variant_get.rs b/vortex-array/src/scalar_fn/fns/variant_get.rs index 149c91ebd55..e89ceeddb35 100644 --- a/vortex-array/src/scalar_fn/fns/variant_get.rs +++ b/vortex-array/src/scalar_fn/fns/variant_get.rs @@ -21,18 +21,41 @@ use crate::scalar_fn::ExecutionArgs; use crate::scalar_fn::ScalarFnId; use crate::scalar_fn::ScalarFnVTable; +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct VariantPath {} + +impl VariantPath { + pub fn is_empty(&self) -> bool { + true + } +} + /// Options for the `VariantGet` scalar function. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct VariantGetOptions { /// The variant field path to extract. - pub path: String, + path: Option, /// The expected return type. - pub dtype: DType, + dtype: DType, +} + +impl VariantGetOptions { + pub fn new(path: Option, dtype: DType) -> Self { + Self { path, dtype } + } + + pub fn path(&self) -> Option<&VariantPath> { + self.path.as_ref() + } + + pub fn dtype(&self) -> &DType { + &self.dtype + } } impl fmt::Display for VariantGetOptions { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "variant_get({}, {:?})", self.path, self.dtype) + write!(f, "variant_get({})", self.dtype) } } @@ -50,8 +73,7 @@ impl ScalarFnVTable for VariantGet { fn serialize(&self, instance: &Self::Options) -> VortexResult>> { Ok(Some( pb::VariantGetOpts { - path: instance.path.clone(), - dtype: Some((&instance.dtype).try_into()?), + dtype: Some(instance.dtype().try_into()?), } .encode_to_vec(), )) @@ -65,10 +87,7 @@ impl ScalarFnVTable for VariantGet { .ok_or_else(|| vortex_err!("VariantGetOpts missing dtype"))?, session, )?; - Ok(VariantGetOptions { - path: opts.path, - dtype, - }) + Ok(VariantGetOptions::new(None, dtype)) } fn arity(&self, _options: &VariantGetOptions) -> Arity { @@ -92,7 +111,8 @@ impl ScalarFnVTable for VariantGet { f: &mut Formatter<'_>, ) -> fmt::Result { expr.children()[0].fmt_sql(f)?; - write!(f, ".{}", options.path) + let _ = options; + Ok(()) } fn return_dtype( @@ -101,7 +121,7 @@ impl ScalarFnVTable for VariantGet { _arg_dtypes: &[DType], ) -> VortexResult { // Always return nullable since Variant data is always nullable - Ok(options.dtype.with_nullability(Nullability::Nullable)) + Ok(options.dtype().with_nullability(Nullability::Nullable)) } fn execute( diff --git a/vortex-proto/proto/expr.proto b/vortex-proto/proto/expr.proto index 4339021bb31..cae11d23e9a 100644 --- a/vortex-proto/proto/expr.proto +++ b/vortex-proto/proto/expr.proto @@ -92,6 +92,5 @@ message CaseWhenOpts { // Options for `vortex.variant_get` message VariantGetOpts { - string path = 1; - vortex.dtype.DType dtype = 2; + vortex.dtype.DType dtype = 1; } diff --git a/vortex-proto/src/generated/vortex.expr.rs b/vortex-proto/src/generated/vortex.expr.rs index a4fb3377f59..6bdf43eb3d8 100644 --- a/vortex-proto/src/generated/vortex.expr.rs +++ b/vortex-proto/src/generated/vortex.expr.rs @@ -158,8 +158,6 @@ pub struct CaseWhenOpts { /// Options for `vortex.variant_get` #[derive(Clone, PartialEq, ::prost::Message)] pub struct VariantGetOpts { - #[prost(string, tag = "1")] - pub path: ::prost::alloc::string::String, - #[prost(message, optional, tag = "2")] + #[prost(message, optional, tag = "1")] pub dtype: ::core::option::Option, } From 5306dd979de54b4897521254dca2518a40c68097 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 15:00:11 +0000 Subject: [PATCH 4/8] fix Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/lib.rs | 5 +++-- vortex-array/src/arrays/variant/vtable/mod.rs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs index 695320ca361..184a906cc42 100644 --- a/encodings/parquet-variant/src/lib.rs +++ b/encodings/parquet-variant/src/lib.rs @@ -25,6 +25,7 @@ use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; +use vortex_array::ExecutionStep; use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::arrays::VariantArray; @@ -397,8 +398,8 @@ impl VTable for ParquetVariantVTable { Ok(()) } - fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { - Ok(array.clone().into_array()) + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + Ok(ExecutionStep::done(array.clone().into_array())) } fn reduce_parent( diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index 10f4db4aafd..08c2c477813 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -18,6 +18,7 @@ use crate::ArrayHash; use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; +use crate::ExecutionStep; use crate::IntoArray; use crate::Precision; use crate::arrays::VariantArray; @@ -149,9 +150,9 @@ impl VTable for VariantVTable { Ok(()) } - fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { // VariantArray is the canonical variant representation. - Ok(array.clone().into_array()) + Ok(ExecutionStep::done(array.clone().into_array())) } fn reduce_parent( From fad903f8cd2a4d2c3e48fa94f55ccf27bddb20ab Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 15:42:43 +0000 Subject: [PATCH 5/8] Fix missing match branches Signed-off-by: Adam Gutglick --- encodings/sparse/src/canonical.rs | 1 + fuzz/src/array/compare.rs | 2 +- fuzz/src/array/filter.rs | 2 +- fuzz/src/array/mod.rs | 2 ++ fuzz/src/array/search_sorted.rs | 2 +- fuzz/src/array/slice.rs | 2 +- fuzz/src/array/sort.rs | 2 +- fuzz/src/array/take.rs | 2 +- vortex-datafusion/src/convert/scalars.rs | 2 +- vortex-duckdb/src/convert/dtype.rs | 3 +++ vortex-duckdb/src/convert/scalar.rs | 1 + vortex-ffi/src/dtype.rs | 1 + vortex-jni/src/dtype.rs | 1 + vortex-python/src/dtype/mod.rs | 4 ++++ vortex-python/src/python_repr.rs | 1 + vortex-python/src/scalar/into_py.rs | 4 ++++ vortex-python/src/scalar/mod.rs | 4 ++++ 17 files changed, 29 insertions(+), 7 deletions(-) diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index 441ee671898..a099e9d0e70 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -117,6 +117,7 @@ pub(super) fn execute_sparse( execute_sparse_fixed_size_list(array, *nullability, ctx)? } DType::Extension(_ext_dtype) => todo!(), + DType::Variant => todo!(), }) } diff --git a/fuzz/src/array/compare.rs b/fuzz/src/array/compare.rs index 698885afa9c..99b19a802c8 100644 --- a/fuzz/src/array/compare.rs +++ b/fuzz/src/array/compare.rs @@ -141,7 +141,7 @@ pub fn compare_canonical_array( ) .into_array() } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/filter.rs b/fuzz/src/array/filter.rs index fb4f96cd484..baef9ea630b 100644 --- a/fuzz/src/array/filter.rs +++ b/fuzz/src/array/filter.rs @@ -115,7 +115,7 @@ pub fn filter_canonical_array(array: &ArrayRef, filter: &[bool]) -> VortexResult } take_canonical_array_non_nullable_indices(array, indices.as_slice()) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/mod.rs b/fuzz/src/array/mod.rs index acd461c3a32..1c97423498a 100644 --- a/fuzz/src/array/mod.rs +++ b/fuzz/src/array/mod.rs @@ -487,6 +487,8 @@ fn actions_for_dtype(dtype: &DType) -> HashSet { // Extension types delegate to storage dtype, support most operations ActionType::iter().collect() } + // Currently, no support at all + DType::Variant => Default::default(), } } diff --git a/fuzz/src/array/search_sorted.rs b/fuzz/src/array/search_sorted.rs index dac57c8d297..2697b7081f7 100644 --- a/fuzz/src/array/search_sorted.rs +++ b/fuzz/src/array/search_sorted.rs @@ -131,7 +131,7 @@ pub fn search_sorted_canonical_array( .collect::>>()?; scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/slice.rs b/fuzz/src/array/slice.rs index ca0524949cf..04492a3786f 100644 --- a/fuzz/src/array/slice.rs +++ b/fuzz/src/array/slice.rs @@ -114,7 +114,7 @@ pub fn slice_canonical_array( .into_array(), ) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/sort.rs b/fuzz/src/array/sort.rs index f6bce78f621..96072bef56e 100644 --- a/fuzz/src/array/sort.rs +++ b/fuzz/src/array/sort.rs @@ -81,7 +81,7 @@ pub fn sort_canonical_array(array: &ArrayRef) -> VortexResult { }); take_canonical_array_non_nullable_indices(array, &sort_indices) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/take.rs b/fuzz/src/array/take.rs index 8c963c2c066..d5921ae952d 100644 --- a/fuzz/src/array/take.rs +++ b/fuzz/src/array/take.rs @@ -138,7 +138,7 @@ pub fn take_canonical_array(array: &ArrayRef, indices: &[Option]) -> Vort } Ok(builder.finish()) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/vortex-datafusion/src/convert/scalars.rs b/vortex-datafusion/src/convert/scalars.rs index c2a5d9d16c8..25cb13c5ad1 100644 --- a/vortex-datafusion/src/convert/scalars.rs +++ b/vortex-datafusion/src/convert/scalars.rs @@ -101,7 +101,6 @@ impl TryToDataFusion for Scalar { } } } - // SAFETY: By construction Utf8 scalar values are utf8 DType::Utf8(_) => ScalarValue::Utf8(self.as_utf8().value().cloned().map(|s| unsafe { String::from_utf8_unchecked(Vec::::from(s.into_inner().into_inner())) })), @@ -160,6 +159,7 @@ impl TryToDataFusion for Scalar { }, } } + DType::Variant => vortex_bail!("Variant scalars aren't supported with DF"), }) } } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index aeffa40140b..8a2411cc085 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -236,6 +236,9 @@ impl TryFrom<&DType> for LogicalType { let element_logical_type = LogicalType::try_from(element_dtype.as_ref())?; return LogicalType::array_type(element_logical_type, *list_size); } + DType::Variant => { + vortex_bail!("Vortex Variant array aren't supported in DuckDB") + } DType::Extension(ext_dtype) => { let Some(temporal) = ext_dtype.metadata_opt::() else { vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id()); diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index 56f2b6baf5b..dfd2bfa6b8b 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -79,6 +79,7 @@ impl ToDuckDBScalar for Scalar { DType::Utf8(_) => self.as_utf8().try_to_duckdb_scalar(), DType::Binary(_) => self.as_binary().try_to_duckdb_scalar(), DType::Struct(..) | DType::List(..) | DType::FixedSizeList(..) => todo!(), + DType::Variant => todo!(), } } } diff --git a/vortex-ffi/src/dtype.rs b/vortex-ffi/src/dtype.rs index a05a53459ca..a6c22fc1e81 100644 --- a/vortex-ffi/src/dtype.rs +++ b/vortex-ffi/src/dtype.rs @@ -68,6 +68,7 @@ impl From<&DType> for vx_dtype_variant { DType::List(..) => vx_dtype_variant::DTYPE_LIST, DType::FixedSizeList(..) => vx_dtype_variant::DTYPE_FIXED_SIZE_LIST, DType::Extension(_) => vx_dtype_variant::DTYPE_EXTENSION, + DType::Variant => vortex_panic!("Variant DType is not supported in FFI yet"), } } } diff --git a/vortex-jni/src/dtype.rs b/vortex-jni/src/dtype.rs index 8830064b8cd..38037c9ad9f 100644 --- a/vortex-jni/src/dtype.rs +++ b/vortex-jni/src/dtype.rs @@ -98,6 +98,7 @@ pub extern "system" fn Java_dev_vortex_jni_NativeDTypeMethods_getVariant( unimplemented!("TODO(connor)[FixedSizeList]") } DType::Extension(_) => DTYPE_EXTENSION, + DType::Variant => unimplemented!("Variant DType is not supported in JNI yet"), } } diff --git a/vortex-python/src/dtype/mod.rs b/vortex-python/src/dtype/mod.rs index 876e41cbbb5..0d8f3dae18c 100644 --- a/vortex-python/src/dtype/mod.rs +++ b/vortex-python/src/dtype/mod.rs @@ -26,6 +26,7 @@ use pyo3::PyClass; use pyo3::PyClassInitializer; use pyo3::PyResult; use pyo3::Python; +use pyo3::exceptions::PyValueError; use pyo3::prelude::PyModule; use pyo3::prelude::PyModuleMethods; use pyo3::pyclass; @@ -124,6 +125,9 @@ impl PyDType { DType::List(..) => Self::with_subclass(py, dtype, PyListDType), DType::FixedSizeList(..) => Self::with_subclass(py, dtype, PyFixedSizeListDType), DType::Extension(..) => Self::with_subclass(py, dtype, PyExtensionDType), + DType::Variant => Err(PyValueError::new_err( + "Variant DType is not supported in Python yet", + )), } } diff --git a/vortex-python/src/python_repr.rs b/vortex-python/src/python_repr.rs index 3c4fe045e94..8786238b2d1 100644 --- a/vortex-python/src/python_repr.rs +++ b/vortex-python/src/python_repr.rs @@ -103,6 +103,7 @@ impl Display for DTypePythonRepr<'_> { } write!(f, ")") } + DType::Variant => write!(f, "variant()"), } } } diff --git a/vortex-python/src/scalar/into_py.rs b/vortex-python/src/scalar/into_py.rs index 70a890821ff..899619c8c5c 100644 --- a/vortex-python/src/scalar/into_py.rs +++ b/vortex-python/src/scalar/into_py.rs @@ -9,6 +9,7 @@ use pyo3::PyAny; use pyo3::PyErr; use pyo3::PyResult; use pyo3::Python; +use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; use pyo3::prelude::PyDictMethods; use pyo3::types::PyBytes; @@ -85,6 +86,9 @@ impl<'py> IntoPyObject<'py> for PyVortex<&'_ Scalar> { DType::Extension(_) => { PyVortex(&self.0.as_extension().to_storage_scalar()).into_pyobject(py) } + DType::Variant => Err(PyValueError::new_err( + "Variant scalars are not supported in Python yet", + )), } } } diff --git a/vortex-python/src/scalar/mod.rs b/vortex-python/src/scalar/mod.rs index d6c18d64ea5..d97fd15d3b1 100644 --- a/vortex-python/src/scalar/mod.rs +++ b/vortex-python/src/scalar/mod.rs @@ -20,6 +20,7 @@ mod struct_; mod utf8; use pyo3::PyClass; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::dtype::DType; use vortex::error::VortexError; @@ -109,6 +110,9 @@ impl PyScalar { Self::with_subclass(py, scalar, PyListScalar) } DType::Extension(..) => Self::with_subclass(py, scalar, PyExtensionScalar), + DType::Variant => Err(PyValueError::new_err( + "Variant scalars are not supported in Python yet", + )), } } From 30c803678ba032e322db726648c39c9afc673519 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 15:44:56 +0000 Subject: [PATCH 6/8] fixes Signed-off-by: Adam Gutglick --- encodings/sparse/src/canonical.rs | 3 ++- vortex-array/src/arrays/variant/mod.rs | 2 +- vortex-datafusion/src/convert/scalars.rs | 1 + vortex-duckdb/src/convert/scalar.rs | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index a099e9d0e70..e3b5a2deb0e 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -50,6 +50,7 @@ use vortex_buffer::buffer_mut; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_panic; use crate::ConstantArray; @@ -117,7 +118,7 @@ pub(super) fn execute_sparse( execute_sparse_fixed_size_list(array, *nullability, ctx)? } DType::Extension(_ext_dtype) => todo!(), - DType::Variant => todo!(), + DType::Variant => vortex_bail!("Sparse canonicalization does not support Variant"), }) } diff --git a/vortex-array/src/arrays/variant/mod.rs b/vortex-array/src/arrays/variant/mod.rs index e56d305ebb4..0aaa66db28e 100644 --- a/vortex-array/src/arrays/variant/mod.rs +++ b/vortex-array/src/arrays/variant/mod.rs @@ -10,7 +10,7 @@ use crate::stats::ArrayStats; /// The canonical in-memory representation of variant (semi-structured) data. /// /// Wraps a single child array that contains the actual variant-encoded data -/// (e.g. a [`ParquetVariantArray`] or any other variant encoding). +/// (e.g. a `ParquetVariantArray` or any other variant encoding). #[derive(Clone, Debug)] pub struct VariantArray { child: ArrayRef, diff --git a/vortex-datafusion/src/convert/scalars.rs b/vortex-datafusion/src/convert/scalars.rs index 25cb13c5ad1..28ed6f67353 100644 --- a/vortex-datafusion/src/convert/scalars.rs +++ b/vortex-datafusion/src/convert/scalars.rs @@ -101,6 +101,7 @@ impl TryToDataFusion for Scalar { } } } + // SAFETY: By construction Utf8 scalar values are utf8. DType::Utf8(_) => ScalarValue::Utf8(self.as_utf8().value().cloned().map(|s| unsafe { String::from_utf8_unchecked(Vec::::from(s.into_inner().into_inner())) })), diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index dfd2bfa6b8b..ac34611fcc8 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -79,7 +79,9 @@ impl ToDuckDBScalar for Scalar { DType::Utf8(_) => self.as_utf8().try_to_duckdb_scalar(), DType::Binary(_) => self.as_binary().try_to_duckdb_scalar(), DType::Struct(..) | DType::List(..) | DType::FixedSizeList(..) => todo!(), - DType::Variant => todo!(), + DType::Variant => { + vortex_bail!("Vortex Variant scalars aren't supported in DuckDB") + } } } } From f1a1ff84a4572bc2dbdbb480c4a29abf55225d08 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 17:00:01 +0000 Subject: [PATCH 7/8] lets make CI green Signed-off-by: Adam Gutglick --- .github/workflows/ci.yml | 2 +- encodings/parquet-variant/public-api.lock | 121 ++++++ vortex-array/public-api.lock | 448 ++++++++++++++++++++++ vortex-flatbuffers/public-api.lock | 58 ++- vortex-proto/public-api.lock | 64 ++++ 5 files changed, 691 insertions(+), 2 deletions(-) create mode 100644 encodings/parquet-variant/public-api.lock diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1d132b36119..cb2381dca1d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,7 +217,7 @@ jobs: target: wasm32-unknown-unknown env: rustflags: "RUSTFLAGS='-A warnings --cfg getrandom_backend=\"wasm_js\"'" - args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest" + args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest --exclude vortex-parquet-variant" steps: - uses: runs-on/action@v2 if: github.repository == 'vortex-data/vortex' diff --git a/encodings/parquet-variant/public-api.lock b/encodings/parquet-variant/public-api.lock new file mode 100644 index 00000000000..7d8ac6378c8 --- /dev/null +++ b/encodings/parquet-variant/public-api.lock @@ -0,0 +1,121 @@ +pub mod vortex_parquet_variant + +pub struct vortex_parquet_variant::ParquetVariantArray + +impl vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::from_arrow_variant(arrow_variant: &parquet_variant_compute::variant_array::VariantArray) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantArray::metadata_array(&self) -> &vortex_array::array::ArrayRef + +pub fn vortex_parquet_variant::ParquetVariantArray::try_new(metadata: vortex_array::array::ArrayRef, value: core::option::Option, typed_value: core::option::Option) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantArray::typed_value_array(&self) -> core::option::Option<&vortex_array::array::ArrayRef> + +pub fn vortex_parquet_variant::ParquetVariantArray::value_array(&self) -> core::option::Option<&vortex_array::array::ArrayRef> + +impl vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::to_array(&self) -> vortex_array::array::ArrayRef + +impl core::clone::Clone for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::clone(&self) -> vortex_parquet_variant::ParquetVariantArray + +impl core::convert::AsRef for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::as_ref(&self) -> &dyn vortex_array::array::DynArray + +impl core::convert::From for vortex_array::array::ArrayRef + +pub fn vortex_array::array::ArrayRef::from(value: vortex_parquet_variant::ParquetVariantArray) -> vortex_array::array::ArrayRef + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_parquet_variant::ParquetVariantArray + +pub type vortex_parquet_variant::ParquetVariantArray::Target = dyn vortex_array::array::DynArray + +pub fn vortex_parquet_variant::ParquetVariantArray::deref(&self) -> &Self::Target + +impl vortex_array::array::IntoArray for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::into_array(self) -> vortex_array::array::ArrayRef + +pub struct vortex_parquet_variant::ParquetVariantMetadata + +pub vortex_parquet_variant::ParquetVariantMetadata::has_value: bool + +pub vortex_parquet_variant::ParquetVariantMetadata::typed_value_dtype: core::option::Option + +impl core::clone::Clone for vortex_parquet_variant::ParquetVariantMetadata + +pub fn vortex_parquet_variant::ParquetVariantMetadata::clone(&self) -> vortex_parquet_variant::ParquetVariantMetadata + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariantMetadata + +pub fn vortex_parquet_variant::ParquetVariantMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_parquet_variant::ParquetVariantVTable + +impl vortex_parquet_variant::ParquetVariantVTable + +pub const vortex_parquet_variant::ParquetVariantVTable::ID: vortex_array::vtable::dyn_::ArrayId + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariantVTable + +pub fn vortex_parquet_variant::ParquetVariantVTable::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::vtable::VTable for vortex_parquet_variant::ParquetVariantVTable + +pub type vortex_parquet_variant::ParquetVariantVTable::Array = vortex_parquet_variant::ParquetVariantArray + +pub type vortex_parquet_variant::ParquetVariantVTable::Metadata = vortex_parquet_variant::ParquetVariantMetadata + +pub type vortex_parquet_variant::ParquetVariantVTable::OperationsVTable = vortex_array::vtable::NotSupported + +pub type vortex_parquet_variant::ParquetVariantVTable::ValidityVTable = vortex_parquet_variant::ParquetVariantVTable + +pub fn vortex_parquet_variant::ParquetVariantVTable::array_eq(array: &vortex_parquet_variant::ParquetVariantArray, other: &vortex_parquet_variant::ParquetVariantArray, precision: vortex_array::hash::Precision) -> bool + +pub fn vortex_parquet_variant::ParquetVariantVTable::array_hash(array: &vortex_parquet_variant::ParquetVariantArray, state: &mut H, precision: vortex_array::hash::Precision) + +pub fn vortex_parquet_variant::ParquetVariantVTable::buffer(_array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_parquet_variant::ParquetVariantVTable::buffer_name(_array: &vortex_parquet_variant::ParquetVariantArray, _idx: usize) -> core::option::Option + +pub fn vortex_parquet_variant::ParquetVariantVTable::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantVTable::child(array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> vortex_array::array::ArrayRef + +pub fn vortex_parquet_variant::ParquetVariantVTable::child_name(array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> alloc::string::String + +pub fn vortex_parquet_variant::ParquetVariantVTable::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantVTable::dtype(_array: &vortex_parquet_variant::ParquetVariantArray) -> &vortex_array::dtype::DType + +pub fn vortex_parquet_variant::ParquetVariantVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantVTable::id(_array: &Self::Array) -> vortex_array::vtable::dyn_::ArrayId + +pub fn vortex_parquet_variant::ParquetVariantVTable::len(array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariantVTable::metadata(array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantVTable::nbuffers(_array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariantVTable::nchildren(array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariantVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_parquet_variant::ParquetVariantVTable::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_parquet_variant::ParquetVariantVTable::stats(array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_array::stats::array::StatsSetRef<'_> + +pub fn vortex_parquet_variant::ParquetVariantVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::validity::ValidityVTable for vortex_parquet_variant::ParquetVariantVTable + +pub fn vortex_parquet_variant::ParquetVariantVTable::validity(_array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_error::VortexResult diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index e2e6584f6bd..61395b61a64 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -4594,6 +4594,118 @@ pub fn vortex_array::arrays::VarBinViewVTable::stats(array: &vortex_array::array pub fn vortex_array::arrays::VarBinViewVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +pub mod vortex_array::arrays::variant + +pub struct vortex_array::arrays::variant::VariantArray + +impl vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::child(&self) -> &vortex_array::ArrayRef + +pub fn vortex_array::arrays::variant::VariantArray::new(child: vortex_array::ArrayRef) -> Self + +impl vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::clone(&self) -> vortex_array::arrays::variant::VariantArray + +impl core::convert::AsRef for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::variant::VariantArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::variant::VariantArray + +pub type vortex_array::arrays::variant::VariantArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::variant::VariantArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::variant::VariantVTable + +impl vortex_array::arrays::VariantVTable + +pub const vortex_array::arrays::VariantVTable::ID: vortex_array::vtable::ArrayId + +impl core::fmt::Debug for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::scalar_at(_array: &::Array, _index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::Array = vortex_array::arrays::variant::VariantArray + +pub type vortex_array::arrays::VariantVTable::Metadata = vortex_array::EmptyMetadata + +pub type vortex_array::arrays::VariantVTable::OperationsVTable = vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::ValidityVTable = vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::VariantVTable::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::VariantVTable::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::VariantVTable::buffer(_array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::VariantVTable::buffer_name(_array: &Self::Array, _idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::VariantVTable::build(dtype: &vortex_array::dtype::DType, len: usize, _metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::VariantVTable::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::VariantVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::dtype(_array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::VariantVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::VariantVTable::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::metadata(_array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::VariantVTable::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::VariantVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::validity(array: &::Array) -> vortex_error::VortexResult + pub struct vortex_array::arrays::BoolArray impl vortex_array::arrays::BoolArray @@ -7820,6 +7932,116 @@ pub fn vortex_array::arrays::VarBinViewVTable::stats(array: &vortex_array::array pub fn vortex_array::arrays::VarBinViewVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +pub struct vortex_array::arrays::VariantArray + +impl vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::child(&self) -> &vortex_array::ArrayRef + +pub fn vortex_array::arrays::variant::VariantArray::new(child: vortex_array::ArrayRef) -> Self + +impl vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::clone(&self) -> vortex_array::arrays::variant::VariantArray + +impl core::convert::AsRef for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::variant::VariantArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::variant::VariantArray + +pub type vortex_array::arrays::variant::VariantArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::variant::VariantArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::VariantVTable + +impl vortex_array::arrays::VariantVTable + +pub const vortex_array::arrays::VariantVTable::ID: vortex_array::vtable::ArrayId + +impl core::fmt::Debug for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::scalar_at(_array: &::Array, _index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::Array = vortex_array::arrays::variant::VariantArray + +pub type vortex_array::arrays::VariantVTable::Metadata = vortex_array::EmptyMetadata + +pub type vortex_array::arrays::VariantVTable::OperationsVTable = vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::ValidityVTable = vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::VariantVTable::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::VariantVTable::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::VariantVTable::buffer(_array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::VariantVTable::buffer_name(_array: &Self::Array, _idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::VariantVTable::build(dtype: &vortex_array::dtype::DType, len: usize, _metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::VariantVTable::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::VariantVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::dtype(_array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::VariantVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::VariantVTable::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::metadata(_array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::VariantVTable::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::VariantVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::validity(array: &::Array) -> vortex_error::VortexResult + pub mod vortex_array::arrow pub mod vortex_array::arrow::bool @@ -9328,6 +9550,8 @@ pub fn vortex_array::builtins::ArrayBuiltins::mask(self, mask: vortex_array::Arr pub fn vortex_array::builtins::ArrayBuiltins::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::builtins::ArrayBuiltins::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::builtins::ArrayBuiltins::zip(&self, if_true: vortex_array::ArrayRef, if_false: vortex_array::ArrayRef) -> vortex_error::VortexResult impl vortex_array::builtins::ArrayBuiltins for vortex_array::ArrayRef @@ -9350,6 +9574,8 @@ pub fn vortex_array::ArrayRef::mask(self, mask: vortex_array::ArrayRef) -> vorte pub fn vortex_array::ArrayRef::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::ArrayRef::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::ArrayRef::zip(&self, if_true: vortex_array::ArrayRef, if_false: vortex_array::ArrayRef) -> vortex_error::VortexResult pub trait vortex_array::builtins::ExprBuiltins: core::marker::Sized @@ -9370,6 +9596,8 @@ pub fn vortex_array::builtins::ExprBuiltins::mask(&self, mask: vortex_array::exp pub fn vortex_array::builtins::ExprBuiltins::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::builtins::ExprBuiltins::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::builtins::ExprBuiltins::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult impl vortex_array::builtins::ExprBuiltins for vortex_array::expr::Expression @@ -9390,6 +9618,8 @@ pub fn vortex_array::expr::Expression::mask(&self, mask: vortex_array::expr::Exp pub fn vortex_array::expr::Expression::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::expr::Expression::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::expr::Expression::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult pub mod vortex_array::compute @@ -10398,6 +10628,8 @@ pub vortex_array::dtype::DType::Struct(vortex_array::dtype::StructFields, vortex pub vortex_array::dtype::DType::Utf8(vortex_array::dtype::Nullability) +pub vortex_array::dtype::DType::Variant + impl vortex_array::dtype::DType pub const vortex_array::dtype::DType::BYTES: Self @@ -10474,6 +10706,8 @@ pub fn vortex_array::dtype::DType::is_unsigned_int(&self) -> bool pub fn vortex_array::dtype::DType::is_utf8(&self) -> bool +pub fn vortex_array::dtype::DType::is_variant(&self) -> bool + pub fn vortex_array::dtype::DType::list(dtype: impl core::convert::Into, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::dtype::DType::nullability(&self) -> vortex_array::dtype::Nullability @@ -13780,6 +14014,8 @@ pub fn vortex_array::expr::Expression::mask(&self, mask: vortex_array::expr::Exp pub fn vortex_array::expr::Expression::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::expr::Expression::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::expr::Expression::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult impl vortex_array::expr::VortexExprExt for vortex_array::expr::Expression @@ -13918,6 +14154,8 @@ pub fn vortex_array::expr::select_exclude(fields: impl core::convert::Into alloc::vec::Vec +pub fn vortex_array::expr::variant_get(path: core::option::Option, dtype: vortex_array::dtype::DType, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression + pub fn vortex_array::expr::zip_expr(mask: vortex_array::expr::Expression, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_array::expr::Expression pub type vortex_array::expr::Annotations<'a, A> = vortex_utils::aliases::hash_map::HashMap<&'a vortex_array::expr::Expression, vortex_utils::aliases::hash_set::HashSet> @@ -18446,6 +18684,114 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +pub mod vortex_array::scalar_fn::fns::variant_get + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::dtype(&self) -> &vortex_array::dtype::DType + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::new(path: core::option::Option, dtype: vortex_array::dtype::DType) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::path(&self) -> core::option::Option<&vortex_array::scalar_fn::fns::variant_get::VariantPath> + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::is_empty(&self) -> bool + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantPath) -> bool + +impl core::default::Default for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::default() -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + pub mod vortex_array::scalar_fn::fns::zip pub struct vortex_array::scalar_fn::fns::zip::Zip @@ -19474,6 +19820,42 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::zip::Zip pub type vortex_array::scalar_fn::fns::zip::Zip::Options = vortex_array::scalar_fn::EmptyOptions @@ -20386,6 +20768,10 @@ impl vortex_array::vtable::OperationsVTable vortex_error::VortexResult +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::scalar_at(_array: &::Array, _index: usize) -> vortex_error::VortexResult + impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::dict::DictVTable pub fn vortex_array::arrays::dict::DictVTable::scalar_at(array: &vortex_array::arrays::dict::DictArray, index: usize) -> vortex_error::VortexResult @@ -21270,6 +21656,60 @@ pub fn vortex_array::arrays::VarBinViewVTable::stats(array: &vortex_array::array pub fn vortex_array::arrays::VarBinViewVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::Array = vortex_array::arrays::variant::VariantArray + +pub type vortex_array::arrays::VariantVTable::Metadata = vortex_array::EmptyMetadata + +pub type vortex_array::arrays::VariantVTable::OperationsVTable = vortex_array::arrays::VariantVTable + +pub type vortex_array::arrays::VariantVTable::ValidityVTable = vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::VariantVTable::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::VariantVTable::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::VariantVTable::buffer(_array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::VariantVTable::buffer_name(_array: &Self::Array, _idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::VariantVTable::build(dtype: &vortex_array::dtype::DType, len: usize, _metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::VariantVTable::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::VariantVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::dtype(_array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::VariantVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::VariantVTable::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::metadata(_array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::VariantVTable::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::VariantVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::VariantVTable::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::VariantVTable::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::VariantVTable::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::dict::DictVTable pub type vortex_array::arrays::dict::DictVTable::Array = vortex_array::arrays::dict::DictArray @@ -21570,6 +22010,10 @@ impl vortex_array::vtable::ValidityVTable fo pub fn vortex_array::arrays::SharedVTable::validity(array: &vortex_array::arrays::SharedArray) -> vortex_error::VortexResult +impl vortex_array::vtable::ValidityVTable for vortex_array::arrays::VariantVTable + +pub fn vortex_array::arrays::VariantVTable::validity(array: &::Array) -> vortex_error::VortexResult + impl vortex_array::vtable::ValidityVTable for vortex_array::arrays::dict::DictVTable pub fn vortex_array::arrays::dict::DictVTable::validity(array: &vortex_array::arrays::dict::DictArray) -> vortex_error::VortexResult @@ -22638,6 +23082,10 @@ impl vortex_array::IntoArray for vortex_array::arrays::slice::SliceArray pub fn vortex_array::arrays::slice::SliceArray::into_array(self) -> vortex_array::ArrayRef +impl vortex_array::IntoArray for vortex_array::arrays::variant::VariantArray + +pub fn vortex_array::arrays::variant::VariantArray::into_array(self) -> vortex_array::ArrayRef + impl vortex_array::IntoArray for vortex_buffer::bit::buf::BitBuffer pub fn vortex_buffer::bit::buf::BitBuffer::into_array(self) -> vortex_array::ArrayRef diff --git a/vortex-flatbuffers/public-api.lock b/vortex-flatbuffers/public-api.lock index 8f1a4291000..952046b78db 100644 --- a/vortex-flatbuffers/public-api.lock +++ b/vortex-flatbuffers/public-api.lock @@ -574,6 +574,8 @@ pub enum vortex_flatbuffers::dtype::Struct_Offset pub enum vortex_flatbuffers::dtype::Utf8Offset +pub enum vortex_flatbuffers::dtype::VariantOffset + pub struct vortex_flatbuffers::dtype::Binary<'a> pub vortex_flatbuffers::dtype::Binary::_tab: flatbuffers::table::Table<'a> @@ -726,6 +728,8 @@ pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_struct_(&self) -> core::op pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_utf_8(&self) -> core::option::Option> +pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_variant(&self) -> core::option::Option> + pub fn vortex_flatbuffers::dtype::DType<'a>::type_type(&self) -> vortex_flatbuffers::dtype::Type impl core::fmt::Debug for vortex_flatbuffers::dtype::DType<'_> @@ -1380,6 +1384,8 @@ pub const vortex_flatbuffers::dtype::Type::Struct_: Self pub const vortex_flatbuffers::dtype::Type::Utf8: Self +pub const vortex_flatbuffers::dtype::Type::Variant: Self + pub fn vortex_flatbuffers::dtype::Type::variant_name(self) -> core::option::Option<&'static str> impl core::clone::Clone for vortex_flatbuffers::dtype::Type @@ -1502,6 +1508,56 @@ pub fn vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A>::finish(self) -> flatbu pub fn vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A>::new(_fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>) -> vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A> +pub struct vortex_flatbuffers::dtype::Variant<'a> + +pub vortex_flatbuffers::dtype::Variant::_tab: flatbuffers::table::Table<'a> + +impl<'a> vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::builder::Allocator + 'bldr>(_fbb: &'mut_bldr mut flatbuffers::builder::FlatBufferBuilder<'bldr, A>, _args: &'args vortex_flatbuffers::dtype::VariantArgs) -> flatbuffers::primitives::WIPOffset> + +pub unsafe fn vortex_flatbuffers::dtype::Variant<'a>::init_from_table(table: flatbuffers::table::Table<'a>) -> Self + +impl core::fmt::Debug for vortex_flatbuffers::dtype::Variant<'_> + +pub fn vortex_flatbuffers::dtype::Variant<'_>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl flatbuffers::verifier::Verifiable for vortex_flatbuffers::dtype::Variant<'_> + +pub fn vortex_flatbuffers::dtype::Variant<'_>::run_verifier(v: &mut flatbuffers::verifier::Verifier<'_, '_>, pos: usize) -> core::result::Result<(), flatbuffers::verifier::InvalidFlatbuffer> + +impl<'a> core::clone::Clone for vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::clone(&self) -> vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> core::cmp::PartialEq for vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::eq(&self, other: &vortex_flatbuffers::dtype::Variant<'a>) -> bool + +impl<'a> core::marker::Copy for vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> core::marker::StructuralPartialEq for vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> flatbuffers::follow::Follow<'a> for vortex_flatbuffers::dtype::Variant<'a> + +pub type vortex_flatbuffers::dtype::Variant<'a>::Inner = vortex_flatbuffers::dtype::Variant<'a> + +pub unsafe fn vortex_flatbuffers::dtype::Variant<'a>::follow(buf: &'a [u8], loc: usize) -> Self::Inner + +pub struct vortex_flatbuffers::dtype::VariantArgs + +impl<'a> core::default::Default for vortex_flatbuffers::dtype::VariantArgs + +pub fn vortex_flatbuffers::dtype::VariantArgs::default() -> Self + +pub struct vortex_flatbuffers::dtype::VariantBuilder<'a: 'b, 'b, A: flatbuffers::builder::Allocator + 'a> + +impl<'a: 'b, 'b, A: flatbuffers::builder::Allocator + 'a> vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A> + +pub fn vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A>::finish(self) -> flatbuffers::primitives::WIPOffset> + +pub fn vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A>::new(_fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>) -> vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A> + pub const vortex_flatbuffers::dtype::ENUM_MAX_PTYPE: u8 pub const vortex_flatbuffers::dtype::ENUM_MAX_TYPE: u8 @@ -1512,7 +1568,7 @@ pub const vortex_flatbuffers::dtype::ENUM_MIN_TYPE: u8 pub const vortex_flatbuffers::dtype::ENUM_VALUES_PTYPE: [vortex_flatbuffers::dtype::PType; 11] -pub const vortex_flatbuffers::dtype::ENUM_VALUES_TYPE: [vortex_flatbuffers::dtype::Type; 11] +pub const vortex_flatbuffers::dtype::ENUM_VALUES_TYPE: [vortex_flatbuffers::dtype::Type; 12] pub fn vortex_flatbuffers::dtype::finish_dtype_buffer<'a, 'b, A: flatbuffers::builder::Allocator + 'a>(fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>, root: flatbuffers::primitives::WIPOffset>) diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 1f6a2f409e7..fc528e532e0 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -26,6 +26,8 @@ pub vortex_proto::dtype::d_type::DtypeType::Struct(vortex_proto::dtype::Struct) pub vortex_proto::dtype::d_type::DtypeType::Utf8(vortex_proto::dtype::Utf8) +pub vortex_proto::dtype::d_type::DtypeType::Variant(vortex_proto::dtype::Variant) + impl vortex_proto::dtype::d_type::DtypeType pub fn vortex_proto::dtype::d_type::DtypeType::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) @@ -608,6 +610,40 @@ pub fn vortex_proto::dtype::Utf8::clear(&mut self) pub fn vortex_proto::dtype::Utf8::encoded_len(&self) -> usize +pub struct vortex_proto::dtype::Variant + +impl core::clone::Clone for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::clone(&self) -> vortex_proto::dtype::Variant + +impl core::cmp::Eq for vortex_proto::dtype::Variant + +impl core::cmp::PartialEq for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::eq(&self, other: &vortex_proto::dtype::Variant) -> bool + +impl core::default::Default for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::default() -> Self + +impl core::fmt::Debug for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::Copy for vortex_proto::dtype::Variant + +impl core::marker::StructuralPartialEq for vortex_proto::dtype::Variant + +impl prost::message::Message for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::clear(&mut self) + +pub fn vortex_proto::dtype::Variant::encoded_len(&self) -> usize + pub mod vortex_proto::expr pub mod vortex_proto::expr::binary_opts @@ -1114,6 +1150,34 @@ pub fn vortex_proto::expr::SelectOpts::clear(&mut self) pub fn vortex_proto::expr::SelectOpts::encoded_len(&self) -> usize +pub struct vortex_proto::expr::VariantGetOpts + +pub vortex_proto::expr::VariantGetOpts::dtype: core::option::Option + +impl core::clone::Clone for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clone(&self) -> vortex_proto::expr::VariantGetOpts + +impl core::cmp::PartialEq for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::eq(&self, other: &vortex_proto::expr::VariantGetOpts) -> bool + +impl core::default::Default for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::default() -> Self + +impl core::fmt::Debug for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::expr::VariantGetOpts + +impl prost::message::Message for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clear(&mut self) + +pub fn vortex_proto::expr::VariantGetOpts::encoded_len(&self) -> usize + pub mod vortex_proto::scalar pub mod vortex_proto::scalar::scalar_value From 83828b25713cc4e203a6c697eceaa757605a0f13 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Mar 2026 17:04:56 +0000 Subject: [PATCH 8/8] minimal arrow version Signed-off-by: Adam Gutglick --- Cargo.toml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aee6131d045..70e97f48eb6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,16 +87,16 @@ arbitrary = "1.3.2" arc-swap = "1.8" arcref = "0.2.0" arrayref = "0.3.7" -arrow-arith = "57.1" -arrow-array = "57.1" -arrow-buffer = "57.1" -arrow-cast = "57.1" -arrow-data = "57.1" -arrow-ipc = "57.1" -arrow-ord = "57.1" -arrow-schema = "57.1" -arrow-select = "57.1" -arrow-string = "57.1" +arrow-arith = "57.2" +arrow-array = "57.2" +arrow-buffer = "57.2" +arrow-cast = "57.2" +arrow-data = "57.2" +arrow-ipc = "57.2" +arrow-ord = "57.2" +arrow-schema = "57.2" +arrow-select = "57.2" +arrow-string = "57.2" async-fs = "2.2.0" async-lock = "3.4" async-stream = "0.3.6" @@ -179,9 +179,9 @@ opentelemetry = "0.31.0" opentelemetry-otlp = "0.31.0" opentelemetry_sdk = "0.31.0" parking_lot = { version = "0.12.3", features = ["nightly"] } -parquet = "57.1" -parquet-variant = "57" -parquet-variant-compute = "57" +parquet = "57.2" +parquet-variant = "57.2" +parquet-variant-compute = "57.2" paste = "1.0.15" pco = "1.0.1" pin-project-lite = "0.2.15"