diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 22125f4a89a..6cdfb0241bc 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -4,6 +4,7 @@ use std::fmt::Debug; use std::hash::Hash; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -20,7 +21,6 @@ use vortex_array::dtype::PType; use vortex_array::patches::Patches; use vortex_array::patches::PatchesMetadata; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -58,26 +58,26 @@ impl VTable for ALPVTable { } fn len(array: &ALPArray) -> usize { - array.encoded.len() + array.common.len() } fn dtype(array: &ALPArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ALPArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ALPArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.encoded.array_hash(state, precision); array.exponents.hash(state); array.patches.array_hash(state, precision); } fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.encoded.array_eq(&other.encoded, precision) && array.exponents == other.exponents && array.patches.array_eq(&other.patches, precision) @@ -261,9 +261,8 @@ impl VTable for ALPVTable { pub struct ALPArray { encoded: ArrayRef, patches: Option, - dtype: DType, + common: ArrayCommon, exponents: Exponents, - stats_set: ArrayStats, } #[derive(Debug)] @@ -428,12 +427,12 @@ impl ALPArray { _ => unreachable!(), }; + let len = encoded.len(); Ok(Self { - dtype, + common: ArrayCommon::new(len, dtype), encoded, exponents, patches, - stats_set: Default::default(), }) } @@ -447,17 +446,17 @@ impl ALPArray { patches: Option, dtype: DType, ) -> Self { + let len = encoded.len(); Self { - dtype, + common: ArrayCommon::new(len, dtype), encoded, exponents, patches, - stats_set: Default::default(), } } pub fn ptype(&self) -> PType { - self.dtype.as_ptype() + self.common.dtype().as_ptype() } pub fn encoded(&self) -> &ArrayRef { @@ -476,7 +475,12 @@ impl ALPArray { /// Consumes the array and returns its parts. #[inline] pub fn into_parts(self) -> (ArrayRef, Exponents, Option, DType) { - (self.encoded, self.exponents, self.patches, self.dtype) + ( + self.encoded, + self.exponents, + self.patches, + self.common.into_dtype(), + ) } } diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index e015937ae78..7242edec0f7 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -5,6 +5,7 @@ use std::fmt::Debug; use std::hash::Hash; use itertools::Itertools; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -23,7 +24,6 @@ use vortex_array::dtype::PType; use vortex_array::patches::Patches; use vortex_array::patches::PatchesMetadata; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -75,19 +75,19 @@ impl VTable for ALPRDVTable { } fn len(array: &ALPRDArray) -> usize { - array.left_parts.len() + array.common.len() } fn dtype(array: &ALPRDArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ALPRDArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ALPRDArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.left_parts.array_hash(state, precision); array.left_parts_dictionary.array_hash(state, precision); array.right_parts.array_hash(state, precision); @@ -96,7 +96,7 @@ impl VTable for ALPRDVTable { } fn array_eq(array: &ALPRDArray, other: &ALPRDArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.left_parts.array_eq(&other.left_parts, precision) && array .left_parts_dictionary @@ -357,13 +357,12 @@ impl VTable for ALPRDVTable { #[derive(Clone, Debug)] pub struct ALPRDArray { - dtype: DType, + common: ArrayCommon, left_parts: ArrayRef, left_parts_patches: Option, left_parts_dictionary: Buffer, right_parts: ArrayRef, right_bit_width: u8, - stats_set: ArrayStats, } #[derive(Debug)] @@ -428,14 +427,14 @@ impl ALPRDArray { }) .transpose()?; + let len = left_parts.len(); Ok(Self { - dtype, + common: ArrayCommon::new(len, dtype), left_parts, left_parts_dictionary, right_parts, right_bit_width, left_parts_patches, - stats_set: Default::default(), }) } @@ -449,14 +448,14 @@ impl ALPRDArray { right_bit_width: u8, left_parts_patches: Option, ) -> Self { + let len = left_parts.len(); Self { - dtype, + common: ArrayCommon::new(len, dtype), left_parts, left_parts_patches, left_parts_dictionary, right_parts, right_bit_width, - stats_set: Default::default(), } } @@ -465,7 +464,7 @@ impl ALPRDArray { /// Returns false if the logical type of the array values is f64. #[inline] pub fn is_f32(&self) -> bool { - matches!(&self.dtype, DType::Primitive(PType::F32, _)) + matches!(self.common.dtype(), DType::Primitive(PType::F32, _)) } /// The leftmost (most significant) bits of the floating point values stored in the array. diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index c7e21795a12..097dbd5e66c 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -4,6 +4,7 @@ use std::fmt::Debug; use std::hash::Hash; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -16,7 +17,6 @@ use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::scalar::Scalar; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -52,15 +52,15 @@ impl VTable for ByteBoolVTable { } fn len(array: &ByteBoolArray) -> usize { - array.buffer.len() + array.common.len() } fn dtype(array: &ByteBoolArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ByteBoolArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -68,13 +68,13 @@ impl VTable for ByteBoolVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.buffer.array_hash(state, precision); array.validity.array_hash(state, precision); } fn array_eq(array: &ByteBoolArray, other: &ByteBoolArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.buffer.array_eq(&other.buffer, precision) && array.validity.array_eq(&other.validity, precision) } @@ -166,7 +166,7 @@ impl VTable for ByteBoolVTable { ); array.validity = if children.is_empty() { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) } else { Validity::Array(children.into_iter().next().vortex_expect("checked")) }; @@ -200,10 +200,9 @@ impl VTable for ByteBoolVTable { #[derive(Clone, Debug)] pub struct ByteBoolArray { - dtype: DType, + common: ArrayCommon, buffer: BufferHandle, validity: Validity, - stats_set: ArrayStats, } #[derive(Debug)] @@ -226,10 +225,9 @@ impl ByteBoolArray { ); } Self { - dtype: DType::Bool(validity.nullability()), + common: ArrayCommon::new(length, DType::Bool(validity.nullability())), buffer, validity, - stats_set: Default::default(), } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index f8b89d1d066..c1ab6444d26 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -4,6 +4,7 @@ use std::fmt::Debug; use std::hash::Hash; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -19,7 +20,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -82,15 +82,15 @@ impl VTable for DateTimePartsVTable { } fn len(array: &DateTimePartsArray) -> usize { - array.days.len() + array.common.len() } fn dtype(array: &DateTimePartsArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &DateTimePartsArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -98,7 +98,7 @@ impl VTable for DateTimePartsVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.days.array_hash(state, precision); array.seconds.array_hash(state, precision); array.subseconds.array_hash(state, precision); @@ -109,7 +109,7 @@ impl VTable for DateTimePartsVTable { other: &DateTimePartsArray, precision: Precision, ) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.days.array_eq(&other.days, precision) && array.seconds.array_eq(&other.seconds, precision) && array.subseconds.array_eq(&other.subseconds, precision) @@ -245,11 +245,10 @@ impl VTable for DateTimePartsVTable { #[derive(Clone, Debug)] pub struct DateTimePartsArray { - dtype: DType, + common: ArrayCommon, days: ArrayRef, seconds: ArrayRef, subseconds: ArrayRef, - stats_set: ArrayStats, } #[derive(Clone, Debug)] @@ -299,11 +298,10 @@ impl DateTimePartsArray { } Ok(Self { - dtype, + common: ArrayCommon::new(length, dtype), days, seconds, subseconds, - stats_set: Default::default(), }) } @@ -313,18 +311,18 @@ impl DateTimePartsArray { seconds: ArrayRef, subseconds: ArrayRef, ) -> Self { + let len = days.len(); Self { - dtype, + common: ArrayCommon::new(len, dtype), days, seconds, subseconds, - stats_set: Default::default(), } } pub fn into_parts(self) -> DateTimePartsArrayParts { DateTimePartsArrayParts { - dtype: self.dtype, + dtype: self.common.into_dtype(), days: self.days, seconds: self.seconds, subseconds: self.subseconds, diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs index da61d1689c9..2babae469d0 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs @@ -38,7 +38,7 @@ impl CompareKernel for DecimalBytePartsVTable { return Ok(None); }; - let nullability = lhs.dtype.nullability() | rhs.dtype().nullability(); + let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); let scalar_type = lhs.msp.dtype().with_nullability(nullability); let rhs_decimal = rhs_const diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index eec4fa1aae4..2d28829608b 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -8,6 +8,7 @@ mod slice; use std::hash::Hash; use prost::Message as _; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -28,7 +29,6 @@ use vortex_array::scalar::DecimalValue; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -69,15 +69,15 @@ impl VTable for DecimalBytePartsVTable { } fn len(array: &DecimalBytePartsArray) -> usize { - array.msp.len() + array.common.len() } fn dtype(array: &DecimalBytePartsArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &DecimalBytePartsArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -85,7 +85,7 @@ impl VTable for DecimalBytePartsVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.msp.array_hash(state, precision); } @@ -94,7 +94,7 @@ impl VTable for DecimalBytePartsVTable { other: &DecimalBytePartsArray, precision: Precision, ) -> bool { - array.dtype == other.dtype && array.msp.array_eq(&other.msp, precision) + array.common.dtype() == other.common.dtype() && array.msp.array_eq(&other.msp, precision) } fn nbuffers(_array: &DecimalBytePartsArray) -> usize { @@ -216,8 +216,7 @@ pub struct DecimalBytePartsArray { // other than the empty Vec. // Must update `DecimalBytePartsArrayParts` too. _lower_parts: Vec, - dtype: DType, - stats_set: ArrayStats, + common: ArrayCommon, } pub struct DecimalBytePartsArrayParts { @@ -232,21 +231,21 @@ impl DecimalBytePartsArray { } let nullable = msp.dtype().nullability(); + let len = msp.len(); Ok(Self { msp, _lower_parts: Vec::new(), - dtype: DType::Decimal(decimal_dtype, nullable), - stats_set: Default::default(), + common: ArrayCommon::new(len, DType::Decimal(decimal_dtype, nullable)), }) } pub(crate) unsafe fn new_unchecked(msp: ArrayRef, decimal_dtype: DecimalDType) -> Self { let nullable = msp.dtype().nullability(); + let len = msp.len(); Self { msp, _lower_parts: Vec::new(), - dtype: DType::Decimal(decimal_dtype, nullable), - stats_set: Default::default(), + common: ArrayCommon::new(len, DType::Decimal(decimal_dtype, nullable)), } } @@ -254,12 +253,13 @@ impl DecimalBytePartsArray { pub fn into_parts(self) -> DecimalBytePartsArrayParts { DecimalBytePartsArrayParts { msp: self.msp, - dtype: self.dtype, + dtype: self.common.into_dtype(), } } pub fn decimal_dtype(&self) -> &DecimalDType { - self.dtype + self.common + .dtype() .as_decimal_opt() .vortex_expect("must be a decimal dtype") } @@ -310,7 +310,7 @@ impl OperationsVTable for DecimalBytePartsVTable { // TODO(joe): extend this to support multiple parts. let value = primitive_scalar.as_::().vortex_expect("non-null"); Scalar::try_new( - array.dtype.clone(), + array.common.dtype().clone(), Some(ScalarValue::Decimal(DecimalValue::I64(value))), ) } diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs index 5f6ad1dbbf0..c0f3e90f4fb 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs @@ -84,7 +84,8 @@ pub fn bitpack_encode( ) }; bitpacked - .stats_set + .common + .stats() .to_ref(bitpacked.as_ref()) .inherit_from(array.statistics()); Ok(bitpacked) @@ -118,7 +119,8 @@ pub unsafe fn bitpack_encode_unchecked( ) }; bitpacked - .stats_set + .common + .stats() .to_ref(bitpacked.as_ref()) .inherit_from(array.statistics()); Ok(bitpacked) diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index cffeac17090..0f9c9f1fdf4 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use fastlanes::BitPacking; +use vortex_array::ArrayCommon; use vortex_array::ArrayRef; use vortex_array::arrays::PrimitiveVTable; use vortex_array::buffer::BufferHandle; @@ -9,7 +10,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; use vortex_array::dtype::PType; use vortex_array::patches::Patches; -use vortex_array::stats::ArrayStats; use vortex_array::validity::Validity; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -37,13 +37,11 @@ pub struct BitPackedArray { /// The offset within the first block (created with a slice). /// 0 <= offset < 1024 pub(super) offset: u16, - pub(super) len: usize, - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) bit_width: u8, pub(super) packed: BufferHandle, pub(super) patches: Option, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } impl BitPackedArray { @@ -78,13 +76,11 @@ impl BitPackedArray { ) -> Self { Self { offset, - len, - dtype, + common: ArrayCommon::new(len, dtype), bit_width, packed, patches, validity, - stats_set: Default::default(), } } @@ -200,7 +196,7 @@ impl BitPackedArray { } pub fn ptype(&self) -> PType { - self.dtype.as_ptype() + self.common.dtype().as_ptype() } /// Underlying bit packed values as byte array @@ -289,7 +285,7 @@ impl BitPackedArray { BitPackedArrayParts { offset: self.offset, bit_width: self.bit_width, - len: self.len, + len: self.common.len(), packed: self.packed, patches: self.patches, validity: self.validity, diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index d213fb9f1ed..77cf102e440 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -74,15 +74,15 @@ impl VTable for BitPackedVTable { } fn len(array: &BitPackedArray) -> usize { - array.len + array.common.len() } fn dtype(array: &BitPackedArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &BitPackedArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -91,8 +91,8 @@ impl VTable for BitPackedVTable { precision: Precision, ) { array.offset.hash(state); - array.len.hash(state); - array.dtype.hash(state); + array.common.len().hash(state); + array.common.dtype().hash(state); array.bit_width.hash(state); array.packed.array_hash(state, precision); array.patches.array_hash(state, precision); @@ -101,8 +101,8 @@ impl VTable for BitPackedVTable { fn array_eq(array: &BitPackedArray, other: &BitPackedArray, precision: Precision) -> bool { array.offset == other.offset - && array.len == other.len - && array.dtype == other.dtype + && array.common.len() == other.common.len() + && array.common.dtype() == other.common.dtype() && array.bit_width == other.bit_width && array.packed.array_eq(&other.packed, precision) && array.patches.array_eq(&other.patches, precision) @@ -141,7 +141,7 @@ impl VTable for BitPackedVTable { idx, ) } else if idx < pc + validity_nchildren(&array.validity) { - validity_to_child(&array.validity, array.len) + validity_to_child(&array.validity, array.common.len()) .vortex_expect("BitPackedArray child index out of bounds") } else { vortex_panic!("BitPackedArray child index {idx} out of bounds") diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 66e3208c926..e83f8307cc4 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use fastlanes::FastLanes; +use vortex_array::ArrayCommon; use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; @@ -56,11 +57,9 @@ pub mod delta_decompress; #[derive(Clone, Debug)] pub struct DeltaArray { pub(super) offset: usize, - pub(super) len: usize, - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) bases: ArrayRef, pub(super) deltas: ArrayRef, - pub(super) stats_set: ArrayStats, } impl DeltaArray { @@ -141,13 +140,12 @@ impl DeltaArray { offset: usize, logical_len: usize, ) -> Self { + let dtype = bases.dtype().with_nullability(deltas.dtype().nullability()); Self { offset, - len: logical_len, - dtype: bases.dtype().with_nullability(deltas.dtype().nullability()), + common: ArrayCommon::new(logical_len, dtype), bases, deltas, - stats_set: Default::default(), } } @@ -170,17 +168,17 @@ impl DeltaArray { #[inline] pub fn len(&self) -> usize { - self.len + self.common.len() } #[inline] pub fn is_empty(&self) -> bool { - self.len == 0 + self.common.len() == 0 } #[inline] pub fn dtype(&self) -> &DType { - &self.dtype + self.common.dtype() } #[inline] @@ -201,7 +199,7 @@ impl DeltaArray { #[inline] pub(crate) fn stats_set(&self) -> &ArrayStats { - &self.stats_set + self.common.stats() } } diff --git a/encodings/fastlanes/src/for/array/mod.rs b/encodings/fastlanes/src/for/array/mod.rs index 18f4e6ee415..5542c336657 100644 --- a/encodings/fastlanes/src/for/array/mod.rs +++ b/encodings/fastlanes/src/for/array/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_array::ArrayCommon; use vortex_array::ArrayRef; use vortex_array::dtype::PType; use vortex_array::scalar::Scalar; @@ -19,7 +20,7 @@ pub mod for_decompress; pub struct FoRArray { pub(super) encoded: ArrayRef, pub(super) reference: Scalar, - pub(super) stats_set: ArrayStats, + pub(super) common: ArrayCommon, } impl FoRArray { @@ -33,18 +34,22 @@ impl FoRArray { .with_nullability(encoded.dtype().nullability()), )?; + let len = encoded.len(); + let dtype = reference.dtype().clone(); Ok(Self { encoded, reference, - stats_set: Default::default(), + common: ArrayCommon::new(len, dtype), }) } pub(crate) unsafe fn new_unchecked(encoded: ArrayRef, reference: Scalar) -> Self { + let len = encoded.len(); + let dtype = reference.dtype().clone(); Self { encoded, reference, - stats_set: Default::default(), + common: ArrayCommon::new(len, dtype), } } @@ -65,6 +70,6 @@ impl FoRArray { #[inline] pub(crate) fn stats_set(&self) -> &ArrayStats { - &self.stats_set + self.common.stats() } } diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index cef31f8250c..df173e39203 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -52,15 +52,15 @@ impl VTable for FoRVTable { } fn len(array: &FoRArray) -> usize { - array.encoded().len() + array.common.len() } fn dtype(array: &FoRArray) -> &DType { - array.reference_scalar().dtype() + array.common.dtype() } fn stats(array: &FoRArray) -> StatsSetRef<'_> { - array.stats_set().to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &FoRArray, state: &mut H, precision: Precision) { diff --git a/encodings/fastlanes/src/rle/array/mod.rs b/encodings/fastlanes/src/rle/array/mod.rs index 83f75ad45ae..95772416889 100644 --- a/encodings/fastlanes/src/rle/array/mod.rs +++ b/encodings/fastlanes/src/rle/array/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_array::ArrayCommon; use vortex_array::ArrayRef; use vortex_array::DynArray; use vortex_array::dtype::DType; @@ -16,7 +17,7 @@ pub mod rle_decompress; #[derive(Clone, Debug)] pub struct RLEArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, /// Run value in the dictionary. pub(super) values: ArrayRef, /// Chunk-local indices from all chunks. The start of each chunk is looked up in `values_idx_offsets`. @@ -32,10 +33,8 @@ pub struct RLEArray { /// ``` pub(super) values_idx_offsets: ArrayRef, - pub(super) stats_set: ArrayStats, // Offset relative to the start of the chunk. pub(super) offset: usize, - pub(super) length: usize, } impl RLEArray { @@ -108,13 +107,11 @@ impl RLEArray { let dtype = DType::Primitive(values.dtype().as_ptype(), indices.dtype().nullability()); Ok(Self { - dtype, + common: ArrayCommon::new(length, dtype), values, indices, values_idx_offsets, - stats_set: ArrayStats::default(), offset, - length, }) } @@ -136,29 +133,27 @@ impl RLEArray { length: usize, ) -> Self { Self { - dtype, + common: ArrayCommon::new(length, dtype), values, indices, values_idx_offsets, - stats_set: ArrayStats::default(), offset, - length, } } #[inline] pub fn len(&self) -> usize { - self.length + self.common.len() } #[inline] pub fn is_empty(&self) -> bool { - self.length == 0 + self.common.len() == 0 } #[inline] pub fn dtype(&self) -> &DType { - &self.dtype + self.common.dtype() } #[inline] @@ -209,7 +204,7 @@ impl RLEArray { #[inline] pub(crate) fn stats_set(&self) -> &ArrayStats { - &self.stats_set + self.common.stats() } } diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 175ca6fdb59..984738aa072 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -10,6 +10,7 @@ use std::sync::LazyLock; use fsst::Compressor; use fsst::Decompressor; use fsst::Symbol; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -30,7 +31,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -85,19 +85,19 @@ impl VTable for FSSTVTable { } fn len(array: &FSSTArray) -> usize { - array.codes().len() + array.common.len() } fn dtype(array: &FSSTArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &FSSTArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &FSSTArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.symbols.array_hash(state, precision); array.symbol_lengths.array_hash(state, precision); array.codes.as_ref().array_hash(state, precision); @@ -105,7 +105,7 @@ impl VTable for FSSTVTable { } fn array_eq(array: &FSSTArray, other: &FSSTArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.symbols.array_eq(&other.symbols, precision) && array .symbol_lengths @@ -362,7 +362,7 @@ impl VTable for FSSTVTable { #[derive(Clone)] pub struct FSSTArray { - dtype: DType, + common: ArrayCommon, symbols: Buffer, symbol_lengths: Buffer, codes: VarBinArray, @@ -370,7 +370,6 @@ pub struct FSSTArray { codes_array: ArrayRef, /// Lengths of the original values before compression, can be compressed. uncompressed_lengths: ArrayRef, - stats_set: ArrayStats, /// Memoized compressor used for push-down of compute by compressing the RHS. compressor: Arc Compressor + Send>>>, @@ -379,7 +378,7 @@ pub struct FSSTArray { impl Debug for FSSTArray { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("FSSTArray") - .field("dtype", &self.dtype) + .field("common", &self.common) .field("symbols", &self.symbols) .field("symbol_lengths", &self.symbol_lengths) .field("codes", &self.codes) @@ -457,16 +456,16 @@ impl FSSTArray { Compressor::rebuild_from(symbols2.as_slice(), symbol_lengths2.as_slice()) }) as Box Compressor + Send>)); + let len = codes.len(); let codes_array = codes.clone().into_array(); Self { - dtype, + common: ArrayCommon::new(len, dtype), symbols, symbol_lengths, codes, codes_array, uncompressed_lengths, - stats_set: Default::default(), compressor, } } diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index e1c99d37d45..bd9627cbad3 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -15,6 +15,7 @@ use pco::wrapped::ChunkDecompressor; use pco::wrapped::FileCompressor; use pco::wrapped::FileDecompressor; use prost::Message; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -32,7 +33,6 @@ use vortex_array::dtype::PType; use vortex_array::dtype::half; use vortex_array::scalar::Scalar; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -94,19 +94,19 @@ impl VTable for PcoVTable { } fn len(array: &PcoArray) -> usize { - array.slice_stop - array.slice_start + array.common.len() } fn dtype(array: &PcoArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &PcoArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &PcoArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.unsliced_validity.array_hash(state, precision); array.unsliced_n_rows.hash(state); array.slice_start.hash(state); @@ -121,7 +121,7 @@ impl VTable for PcoVTable { } fn array_eq(array: &PcoArray, other: &PcoArray, precision: Precision) -> bool { - if array.dtype != other.dtype + if array.common.dtype() != other.common.dtype() || !array .unsliced_validity .array_eq(&other.unsliced_validity, precision) @@ -253,7 +253,7 @@ impl VTable for PcoVTable { ); if children.is_empty() { - array.unsliced_validity = Validity::from(array.dtype.nullability()); + array.unsliced_validity = Validity::from(array.common.dtype().nullability()); } else { array.unsliced_validity = Validity::Array(children.into_iter().next().vortex_expect("validity child")); @@ -317,10 +317,9 @@ pub struct PcoArray { pub(crate) chunk_metas: Vec, pub(crate) pages: Vec, pub(crate) metadata: PcoMetadata, - dtype: DType, + pub(crate) common: ArrayCommon, pub(crate) unsliced_validity: Validity, unsliced_n_rows: usize, - stats_set: ArrayStats, slice_start: usize, slice_stop: usize, } @@ -338,10 +337,9 @@ impl PcoArray { chunk_metas, pages, metadata, - dtype, + common: ArrayCommon::new(len, dtype), unsliced_validity: validity, unsliced_n_rows: len, - stats_set: Default::default(), slice_start: 0, slice_stop: len, } @@ -439,7 +437,7 @@ impl PcoArray { pub fn decompress(&self) -> VortexResult { // To start, we figure out which chunks and pages we need to decompress, and with // what value offset into the first such page. - let number_type = number_type_from_dtype(&self.dtype); + let number_type = number_type_from_dtype(self.common.dtype()); let values_byte_buffer = match_number_enum!( number_type, NumberType => { @@ -449,10 +447,10 @@ impl PcoArray { Ok(PrimitiveArray::from_values_byte_buffer( values_byte_buffer, - self.dtype.as_ptype(), + self.common.dtype().as_ptype(), self.unsliced_validity .slice(self.slice_start..self.slice_stop)?, - self.slice_stop - self.slice_start, + self.common.len(), )) } @@ -530,16 +528,18 @@ impl PcoArray { } pub(crate) fn _slice(&self, start: usize, stop: usize) -> Self { + let new_start = self.slice_start + start; + let new_stop = self.slice_start + stop; PcoArray { - slice_start: self.slice_start + start, - slice_stop: self.slice_start + stop, - stats_set: Default::default(), + common: ArrayCommon::new(new_stop - new_start, self.common.dtype().clone()), + slice_start: new_start, + slice_stop: new_stop, ..self.clone() } } pub(crate) fn dtype(&self) -> &DType { - &self.dtype + self.common.dtype() } pub(crate) fn slice_start(&self) -> usize { diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index d36b77ffbb4..0bfb439c010 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -4,6 +4,7 @@ use std::fmt::Debug; use std::hash::Hash; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -24,7 +25,6 @@ use vortex_array::scalar::PValue; use vortex_array::search_sorted::SearchSorted; use vortex_array::search_sorted::SearchSortedSide; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -69,29 +69,29 @@ impl VTable for RunEndVTable { } fn len(array: &RunEndArray) -> usize { - array.length + array.common.len() } fn dtype(array: &RunEndArray) -> &DType { - array.values.dtype() + array.common.dtype() } fn stats(array: &RunEndArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &RunEndArray, state: &mut H, precision: Precision) { array.ends.array_hash(state, precision); array.values.array_hash(state, precision); array.offset.hash(state); - array.length.hash(state); + array.common.len().hash(state); } fn array_eq(array: &RunEndArray, other: &RunEndArray, precision: Precision) -> bool { array.ends.array_eq(&other.ends, precision) && array.values.array_eq(&other.values, precision) && array.offset == other.offset - && array.length == other.length + && array.common.len() == other.common.len() } fn nbuffers(_array: &RunEndArray) -> usize { @@ -209,11 +209,10 @@ impl VTable for RunEndVTable { #[derive(Clone, Debug)] pub struct RunEndArray { + common: ArrayCommon, ends: ArrayRef, values: ArrayRef, offset: usize, - length: usize, - stats_set: ArrayStats, } pub struct RunEndArrayParts { @@ -359,13 +358,13 @@ impl RunEndArray { length: usize, ) -> VortexResult { Self::validate(&ends, &values, offset, length)?; + let dtype = values.dtype().clone(); Ok(Self { + common: ArrayCommon::new(length, dtype), ends, values, offset, - length, - stats_set: Default::default(), }) } @@ -383,12 +382,12 @@ impl RunEndArray { offset: usize, length: usize, ) -> Self { + let dtype = values.dtype().clone(); Self { + common: ArrayCommon::new(length, dtype), ends, values, offset, - length, - stats_set: Default::default(), } } diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 33612e94b67..ca4d9e44d2c 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -4,6 +4,7 @@ use std::hash::Hash; use num_traits::cast::FromPrimitive; +use vortex_array::ArrayCommon; use vortex_array::ArrayRef; use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; @@ -74,11 +75,9 @@ pub struct SequenceArrayParts { #[derive(Clone, Debug)] /// An array representing the equation `A[i] = base + i * multiplier`. pub struct SequenceArray { + common: ArrayCommon, base: PValue, multiplier: PValue, - dtype: DType, - pub(crate) len: usize, - stats_set: ArrayStats, } impl SequenceArray { @@ -162,16 +161,14 @@ impl SequenceArray { }; Self { + common: ArrayCommon::new_with_stats(length, dtype, ArrayStats::from(stats_set)), base, multiplier, - dtype, - len: length, - stats_set: ArrayStats::from(stats_set), } } pub fn ptype(&self) -> PType { - self.dtype.as_ptype() + self.common.dtype().as_ptype() } pub fn base(&self) -> PValue { @@ -203,7 +200,10 @@ impl SequenceArray { } pub(crate) fn index_value(&self, idx: usize) -> PValue { - assert!(idx < self.len, "index_value({idx}): index out of bounds"); + assert!( + idx < self.common.len(), + "index_value({idx}): index out of bounds" + ); match_each_native_ptype!(self.ptype(), |P| { let base = self.base.cast::

().vortex_expect("must be able to cast"); @@ -219,7 +219,7 @@ impl SequenceArray { /// Returns the validated final value of a sequence array pub fn last(&self) -> PValue { - Self::try_last(self.base, self.multiplier, self.ptype(), self.len) + Self::try_last(self.base, self.multiplier, self.ptype(), self.common.len()) .vortex_expect("validated array") } @@ -227,9 +227,9 @@ impl SequenceArray { SequenceArrayParts { base: self.base, multiplier: self.multiplier, - len: self.len, - ptype: self.dtype.as_ptype(), - nullability: self.dtype.nullability(), + len: self.common.len(), + ptype: self.common.dtype().as_ptype(), + nullability: self.common.dtype().nullability(), } } } @@ -246,15 +246,15 @@ impl VTable for SequenceVTable { } fn len(array: &SequenceArray) -> usize { - array.len + array.common.len() } fn dtype(array: &SequenceArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &SequenceArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -264,15 +264,15 @@ impl VTable for SequenceVTable { ) { array.base.hash(state); array.multiplier.hash(state); - array.dtype.hash(state); - array.len.hash(state); + array.common.dtype().hash(state); + array.common.len().hash(state); } fn array_eq(array: &SequenceArray, other: &SequenceArray, _precision: Precision) -> bool { array.base == other.base && array.multiplier == other.multiplier - && array.dtype == other.dtype - && array.len == other.len + && array.common.dtype() == other.common.dtype() + && array.common.len() == other.common.len() } fn nbuffers(_array: &SequenceArray) -> usize { diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 69cf9ca715d..f78ee9dad49 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -6,6 +6,7 @@ use std::hash::Hash; use kernel::PARENT_KERNELS; use prost::Message as _; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -25,7 +26,6 @@ use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; use vortex_array::scalar_fn::fns::operators::Operator; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -84,15 +84,15 @@ impl VTable for SparseVTable { } fn len(array: &SparseArray) -> usize { - array.patches.array_len() + array.common.len() } fn dtype(array: &SparseArray) -> &DType { - array.fill_scalar().dtype() + array.common.dtype() } fn stats(array: &SparseArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &SparseArray, state: &mut H, precision: Precision) { @@ -262,9 +262,9 @@ impl VTable for SparseVTable { #[derive(Clone, Debug)] pub struct SparseArray { + common: ArrayCommon, patches: Patches, fill_value: Scalar, - stats_set: ArrayStats, } #[derive(Debug)] @@ -306,11 +306,12 @@ impl SparseArray { } } + let dtype = fill_value.dtype().clone(); Ok(Self { + common: ArrayCommon::new(len, dtype), // TODO(0ax1): handle chunk offsets patches: Patches::new(len, 0, indices, values, None)?, fill_value, - stats_set: Default::default(), }) } @@ -323,19 +324,23 @@ impl SparseArray { patches.values().dtype(), fill_value.dtype(), ); + let len = patches.array_len(); + let dtype = fill_value.dtype().clone(); Ok(Self { + common: ArrayCommon::new(len, dtype), patches, fill_value, - stats_set: Default::default(), }) } pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> Self { + let len = patches.array_len(); + let dtype = fill_value.dtype().clone(); Self { + common: ArrayCommon::new(len, dtype), patches, fill_value, - stats_set: Default::default(), } } diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 319c7e2616d..5386cc5c454 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -3,6 +3,7 @@ use std::hash::Hash; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -17,7 +18,6 @@ use vortex_array::dtype::PType; use vortex_array::match_each_unsigned_integer_ptype; use vortex_array::scalar::Scalar; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -52,24 +52,25 @@ impl VTable for ZigZagVTable { } fn len(array: &ZigZagArray) -> usize { - array.encoded.len() + array.common.len() } fn dtype(array: &ZigZagArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ZigZagArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ZigZagArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.encoded.array_hash(state, precision); } fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool { - array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision) + array.common.dtype() == other.common.dtype() + && array.encoded.array_eq(&other.encoded, precision) } fn nbuffers(_array: &ZigZagArray) -> usize { @@ -172,9 +173,8 @@ impl VTable for ZigZagVTable { #[derive(Clone, Debug)] pub struct ZigZagArray { - dtype: DType, + common: ArrayCommon, encoded: ArrayRef, - stats_set: ArrayStats, } #[derive(Debug)] @@ -197,11 +197,11 @@ impl ZigZagArray { let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed()) .with_nullability(encoded_dtype.nullability()); + let len = encoded.len(); Ok(Self { - dtype, + common: ArrayCommon::new(len, dtype), encoded, - stats_set: Default::default(), }) } diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index bd35689f418..db16ad9a56a 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use itertools::Itertools as _; use prost::Message as _; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -26,7 +27,6 @@ use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::scalar::Scalar; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; @@ -92,15 +92,15 @@ impl VTable for ZstdVTable { } fn len(array: &ZstdArray) -> usize { - array.slice_stop - array.slice_start + array.common.len() } fn dtype(array: &ZstdArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ZstdArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ZstdArray, state: &mut H, precision: Precision) { @@ -116,7 +116,7 @@ impl VTable for ZstdVTable { for frame in &array.frames { frame.array_hash(state, precision); } - array.dtype.hash(state); + array.common.dtype().hash(state); array.unsliced_validity.array_hash(state, precision); array.unsliced_n_rows.hash(state); array.slice_start.hash(state); @@ -139,7 +139,7 @@ impl VTable for ZstdVTable { return false; } } - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array .unsliced_validity .array_eq(&other.unsliced_validity, precision) @@ -263,7 +263,7 @@ impl VTable for ZstdVTable { ); array.unsliced_validity = if children.is_empty() { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) } else { Validity::Array(children.into_iter().next().vortex_expect("checked")) }; @@ -296,10 +296,9 @@ pub struct ZstdArray { pub(crate) dictionary: Option, pub(crate) frames: Vec, pub(crate) metadata: ZstdMetadata, - dtype: DType, + pub(crate) common: ArrayCommon, pub(crate) unsliced_validity: Validity, unsliced_n_rows: usize, - stats_set: ArrayStats, slice_start: usize, slice_stop: usize, } @@ -411,10 +410,9 @@ impl ZstdArray { dictionary, frames, metadata, - dtype, + common: ArrayCommon::new(n_rows, dtype), unsliced_validity: validity, unsliced_n_rows: n_rows, - stats_set: Default::default(), slice_start: 0, slice_stop: n_rows, } @@ -691,8 +689,8 @@ impl ZstdArray { } fn byte_width(&self) -> usize { - if self.dtype.is_primitive() { - self.dtype.as_ptype().byte_width() + if self.common.dtype().is_primitive() { + self.common.dtype().as_ptype().byte_width() } else { 1 } @@ -790,14 +788,15 @@ impl ZstdArray { ); slice_validity = Validity::NonNullable; - } else if self.dtype.is_nullable() && slice_validity == Validity::NonNullable { + } else if self.common.dtype().is_nullable() && slice_validity == Validity::NonNullable { slice_validity = Validity::AllValid; } // // END OF IMPORTANT BLOCK // - match &self.dtype { + let dtype = self.common.dtype(); + match dtype { DType::Primitive(..) => { let slice_values_buffer = decompressed.slice( (slice_value_idx_start - n_skipped_values) * byte_width @@ -805,7 +804,7 @@ impl ZstdArray { ); let primitive = PrimitiveArray::from_values_byte_buffer( slice_values_buffer, - self.dtype.as_ptype(), + dtype.as_ptype(), slice_validity, slice_n_rows, ); @@ -828,14 +827,14 @@ impl ZstdArray { VarBinViewArray::new_unchecked( valid_views, Arc::from([decompressed]), - self.dtype.clone(), + dtype.clone(), slice_validity, ) } .into_array()) } AllOr::None => Ok(ConstantArray::new( - Scalar::null(self.dtype.clone()), + Scalar::null(dtype.clone()), slice_n_rows, ) .into_array()), @@ -858,7 +857,7 @@ impl ZstdArray { VarBinViewArray::new_unchecked( views.freeze(), Arc::from([decompressed]), - self.dtype.clone(), + dtype.clone(), slice_validity, ) } @@ -866,7 +865,7 @@ impl ZstdArray { } } } - _ => vortex_panic!("Unsupported dtype for Zstd array: {}", self.dtype), + _ => vortex_panic!("Unsupported dtype for Zstd array: {}", dtype), } } @@ -886,10 +885,12 @@ impl ZstdArray { self.slice_stop ); + let new_start = self.slice_start + start; + let new_stop = self.slice_start + stop; ZstdArray { - slice_start: self.slice_start + start, - slice_stop: self.slice_start + stop, - stats_set: Default::default(), + common: ArrayCommon::new(new_stop - new_start, self.common.dtype().clone()), + slice_start: new_start, + slice_stop: new_stop, ..self.clone() } } @@ -900,7 +901,7 @@ impl ZstdArray { dictionary: self.dictionary, frames: self.frames, metadata: self.metadata, - dtype: self.dtype, + dtype: self.common.into_dtype(), validity: self.unsliced_validity, n_rows: self.unsliced_n_rows, slice_start: self.slice_start, @@ -909,7 +910,7 @@ impl ZstdArray { } pub(crate) fn dtype(&self) -> &DType { - &self.dtype + self.common.dtype() } pub(crate) fn slice_start(&self) -> usize { diff --git a/encodings/zstd/src/zstd_buffers.rs b/encodings/zstd/src/zstd_buffers.rs index 30c26bbf30a..f9baaf4da48 100644 --- a/encodings/zstd/src/zstd_buffers.rs +++ b/encodings/zstd/src/zstd_buffers.rs @@ -6,6 +6,7 @@ use std::hash::Hash; use std::sync::Arc; use prost::Message as _; +use vortex_array::ArrayCommon; use vortex_array::ArrayEq; use vortex_array::ArrayHash; use vortex_array::ArrayRef; @@ -17,7 +18,6 @@ use vortex_array::dtype::DType; use vortex_array::scalar::Scalar; use vortex_array::serde::ArrayChildren; use vortex_array::session::ArraySessionExt; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -56,9 +56,7 @@ pub struct ZstdBuffersArray { uncompressed_sizes: Vec, buffer_alignments: Vec, children: Vec, - dtype: DType, - len: usize, - stats_set: ArrayStats, + pub(crate) common: ArrayCommon, } #[derive(Clone, Debug)] @@ -172,12 +170,11 @@ impl ZstdBuffersArray { uncompressed_sizes, buffer_alignments, children, - dtype: array.dtype().clone(), - len: array.len(), - stats_set: Default::default(), + common: ArrayCommon::new(array.len(), array.dtype().clone()), }; compressed - .stats_set + .common + .stats() .to_ref(compressed.as_ref()) .inherit_from(array.statistics()); Ok(compressed) @@ -250,8 +247,8 @@ impl ZstdBuffersArray { let children = self.children.as_slice(); inner_vtable.build( self.inner_encoding_id.clone(), - &self.dtype, - self.len, + self.common.dtype(), + self.common.len(), &self.inner_metadata, buffer_handles, &children, @@ -334,15 +331,15 @@ impl VTable for ZstdBuffersVTable { } fn len(array: &ZstdBuffersArray) -> usize { - array.len + array.common.len() } fn dtype(array: &ZstdBuffersArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ZstdBuffersArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -357,8 +354,8 @@ impl VTable for ZstdBuffersVTable { } array.uncompressed_sizes.hash(state); array.buffer_alignments.hash(state); - array.dtype.hash(state); - array.len.hash(state); + array.common.dtype().hash(state); + array.common.len().hash(state); for child in &array.children { child.array_hash(state, precision); } @@ -375,8 +372,8 @@ impl VTable for ZstdBuffersVTable { .all(|(a, b)| a.array_eq(b, precision)) && array.uncompressed_sizes == other.uncompressed_sizes && array.buffer_alignments == other.buffer_alignments - && array.dtype == other.dtype - && array.len == other.len + && array.common.dtype() == other.common.dtype() + && array.common.len() == other.common.len() && array.children.len() == other.children.len() && array .children @@ -452,9 +449,7 @@ impl VTable for ZstdBuffersVTable { uncompressed_sizes: metadata.0.uncompressed_sizes.clone(), buffer_alignments: metadata.0.buffer_alignments.clone(), children: child_arrays, - dtype: dtype.clone(), - len, - stats_set: Default::default(), + common: ArrayCommon::new(len, dtype.clone()), }; array.validate()?; @@ -485,7 +480,7 @@ impl OperationsVTable for ZstdBuffersVTable { impl ValidityVTable for ZstdBuffersVTable { fn validity(array: &ZstdBuffersArray) -> VortexResult { - if !array.dtype.is_nullable() { + if !array.common.dtype().is_nullable() { return Ok(vortex_array::validity::Validity::NonNullable); } @@ -553,8 +548,8 @@ mod tests { fn test_roundtrip(#[case] input: ArrayRef) -> VortexResult<()> { let compressed = ZstdBuffersArray::compress(&input, 3)?; - assert_eq!(compressed.len, input.len()); - assert_eq!(&compressed.dtype, input.dtype()); + assert_eq!(compressed.common.len(), input.len()); + assert_eq!(compressed.common.dtype(), input.dtype()); let mut ctx = LEGACY_SESSION.create_execution_ctx(); let decompressed = compressed.into_array().execute::(&mut ctx)?; diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 2b72aab9d90..35d20a0ada1 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2844,7 +2844,7 @@ pub fn vortex_array::arrays::null::NullVTable::child_name(_array: &vortex_array: pub fn vortex_array::arrays::null::NullVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::arrays::null::NullVTable::dtype(_array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType +pub fn vortex_array::arrays::null::NullVTable::dtype(array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType pub fn vortex_array::arrays::null::NullVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -6522,7 +6522,7 @@ pub fn vortex_array::arrays::null::NullVTable::child_name(_array: &vortex_array: pub fn vortex_array::arrays::null::NullVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::arrays::null::NullVTable::dtype(_array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType +pub fn vortex_array::arrays::null::NullVTable::dtype(array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType pub fn vortex_array::arrays::null::NullVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -21270,7 +21270,7 @@ pub fn vortex_array::arrays::null::NullVTable::child_name(_array: &vortex_array: pub fn vortex_array::arrays::null::NullVTable::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::arrays::null::NullVTable::dtype(_array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType +pub fn vortex_array::arrays::null::NullVTable::dtype(array: &vortex_array::arrays::null::NullArray) -> &vortex_array::dtype::DType pub fn vortex_array::arrays::null::NullVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -21872,6 +21872,36 @@ pub fn vortex_array::ArrayAdapter::node_dtype(&self) -> vortex_error::VortexR pub fn vortex_array::ArrayAdapter::scalar_fn(&self) -> core::option::Option<&vortex_array::scalar_fn::ScalarFnRef> +pub struct vortex_array::ArrayCommon + +impl vortex_array::ArrayCommon + +pub fn vortex_array::ArrayCommon::dtype(&self) -> &vortex_array::dtype::DType + +pub fn vortex_array::ArrayCommon::dtype_mut(&mut self) -> &mut vortex_array::dtype::DType + +pub fn vortex_array::ArrayCommon::into_dtype(self) -> vortex_array::dtype::DType + +pub fn vortex_array::ArrayCommon::is_empty(&self) -> bool + +pub fn vortex_array::ArrayCommon::len(&self) -> usize + +pub fn vortex_array::ArrayCommon::new(len: usize, dtype: vortex_array::dtype::DType) -> Self + +pub fn vortex_array::ArrayCommon::new_with_stats(len: usize, dtype: vortex_array::dtype::DType, stats: vortex_array::stats::ArrayStats) -> Self + +pub fn vortex_array::ArrayCommon::set_len(&mut self, len: usize) + +pub fn vortex_array::ArrayCommon::stats(&self) -> &vortex_array::stats::ArrayStats + +impl core::clone::Clone for vortex_array::ArrayCommon + +pub fn vortex_array::ArrayCommon::clone(&self) -> vortex_array::ArrayCommon + +impl core::fmt::Debug for vortex_array::ArrayCommon + +pub fn vortex_array::ArrayCommon::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub struct vortex_array::CanonicalValidity(pub vortex_array::Canonical) impl vortex_array::Executable for vortex_array::CanonicalValidity diff --git a/vortex-array/src/array/common.rs b/vortex-array/src/array/common.rs new file mode 100644 index 00000000000..296978b6e5d --- /dev/null +++ b/vortex-array/src/array/common.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::dtype::DType; +use crate::stats::ArrayStats; + +/// Common fields shared by all array types. +/// +/// This type will be used during the migration from dynamic array trait to the vtable structs. +/// In the first phase, all arrays will be converted to have a common struct for shared fields. +/// In the second phase, we invert the relationship to have an `Array` where the common fields +/// are hoisted into the generic array struct. +#[derive(Clone, Debug)] +pub struct ArrayCommon { + len: usize, + dtype: DType, + stats: ArrayStats, +} + +impl ArrayCommon { + /// Creates a new `ArrayCommon` with default stats. + pub fn new(len: usize, dtype: DType) -> Self { + Self { + len, + dtype, + stats: ArrayStats::default(), + } + } + + /// Creates a new `ArrayCommon` with pre-existing stats. + pub fn new_with_stats(len: usize, dtype: DType, stats: ArrayStats) -> Self { + Self { len, dtype, stats } + } + + /// Returns the number of elements in the array. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns whether the array is empty (has zero elements). + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the logical [`DType`] of the array. + #[inline] + pub fn dtype(&self) -> &DType { + &self.dtype + } + + /// Returns a mutable reference to the [`DType`]. + #[inline] + pub fn dtype_mut(&mut self) -> &mut DType { + &mut self.dtype + } + + /// Sets the number of elements in the array. + #[inline] + pub fn set_len(&mut self, len: usize) { + self.len = len; + } + + /// Consumes this `ArrayCommon` and returns the owned [`DType`]. + #[inline] + pub fn into_dtype(self) -> DType { + self.dtype + } + + /// Returns the [`ArrayStats`] for this array. + #[inline] + pub fn stats(&self) -> &ArrayStats { + &self.stats + } +} diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index fc47631f35a..ef328f9e950 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +mod common; +pub use common::ArrayCommon; mod visitor; use std::any::Any; diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index 124aa3f2427..53947838296 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -9,12 +9,12 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_mask::Mask; +use crate::ArrayCommon; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::bool; use crate::buffer::BufferHandle; use crate::dtype::DType; -use crate::stats::ArrayStats; use crate::validity::Validity; /// A boolean array that stores true/false values in a compact bit-packed format. @@ -51,12 +51,10 @@ use crate::validity::Validity; /// ``` #[derive(Clone, Debug)] pub struct BoolArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) bits: BufferHandle, pub(super) offset: usize, - pub(super) len: usize, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct BoolArrayParts { @@ -101,12 +99,10 @@ impl BoolArray { let (offset, len, buffer) = bits.into_inner(); Ok(Self { - dtype: DType::Bool(validity.nullability()), + common: ArrayCommon::new(len, DType::Bool(validity.nullability())), bits: BufferHandle::new_host(buffer), offset, - len, validity, - stats_set: ArrayStats::default(), }) } @@ -138,12 +134,10 @@ impl BoolArray { ); Ok(Self { - dtype: DType::Bool(validity.nullability()), + common: ArrayCommon::new(len, DType::Bool(validity.nullability())), bits, offset, - len, validity, - stats_set: ArrayStats::default(), }) } @@ -159,12 +153,10 @@ impl BoolArray { let (offset, len, buffer) = bits.into_inner(); Self { - dtype: DType::Bool(validity.nullability()), + common: ArrayCommon::new(len, DType::Bool(validity.nullability())), bits: BufferHandle::new_host(buffer), offset, - len, validity, - stats_set: ArrayStats::default(), } } } @@ -197,7 +189,7 @@ impl BoolArray { BoolArrayParts { bits: self.bits, offset: self.offset, - len: self.len, + len: self.common.len(), validity: self.validity, } } @@ -219,7 +211,7 @@ impl BoolArray { pub fn to_bit_buffer(&self) -> BitBuffer { let buffer = self.bits.as_host().clone(); - BitBuffer::new_with_offset(buffer, self.len, self.offset) + BitBuffer::new_with_offset(buffer, self.common.len(), self.offset) } /// Returns the underlying [`BitBuffer`] of the array diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index f935f89236c..19fe62c24d2 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -60,25 +60,25 @@ impl VTable for BoolVTable { } fn len(array: &BoolArray) -> usize { - array.len + array.common.len() } fn dtype(array: &BoolArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &BoolArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &BoolArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.to_bit_buffer().array_hash(state, precision); array.validity.array_hash(state, precision); } fn array_eq(array: &BoolArray, other: &BoolArray, precision: Precision) -> bool { - if array.dtype != other.dtype { + if array.common.dtype() != other.common.dtype() { return false; } array diff --git a/vortex-array/src/arrays/chunked/array.rs b/vortex-array/src/arrays/chunked/array.rs index 1f37712b991..df08162753b 100644 --- a/vortex-array/src/arrays/chunked/array.rs +++ b/vortex-array/src/arrays/chunked/array.rs @@ -14,6 +14,7 @@ use vortex_error::VortexExpect as _; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::IntoArray; @@ -23,18 +24,15 @@ use crate::iter::ArrayIterator; use crate::iter::ArrayIteratorAdapter; use crate::search_sorted::SearchSorted; use crate::search_sorted::SearchSortedSide; -use crate::stats::ArrayStats; use crate::stream::ArrayStream; use crate::stream::ArrayStreamAdapter; use crate::validity::Validity; #[derive(Clone, Debug)] pub struct ChunkedArray { - pub(super) dtype: DType, - pub(super) len: usize, + pub(super) common: ArrayCommon, pub(super) chunk_offsets: PrimitiveArray, pub(super) chunks: Vec, - pub(super) stats_set: ArrayStats, } impl ChunkedArray { @@ -80,14 +78,14 @@ impl ChunkedArray { let chunk_offsets = PrimitiveArray::new(chunk_offsets_buf.freeze(), Validity::NonNullable); + let len: usize = curr_offset + .try_into() + .vortex_expect("chunk offset must fit in usize"); + Self { - dtype, - len: curr_offset - .try_into() - .vortex_expect("chunk offset must fit in usize"), + common: ArrayCommon::new(len, dtype), chunk_offsets, chunks, - stats_set: Default::default(), } } diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index ab05f262563..21bd4a4a44b 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -59,20 +59,20 @@ impl VTable for ChunkedVTable { } fn len(array: &ChunkedArray) -> usize { - array.len + array.common.len() } fn dtype(array: &ChunkedArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ChunkedArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ChunkedArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); - array.len.hash(state); + array.common.dtype().hash(state); + array.common.len().hash(state); array.chunk_offsets.as_ref().array_hash(state, precision); for chunk in &array.chunks { chunk.array_hash(state, precision); @@ -80,8 +80,8 @@ impl VTable for ChunkedVTable { } fn array_eq(array: &ChunkedArray, other: &ChunkedArray, precision: Precision) -> bool { - array.dtype == other.dtype - && array.len == other.len + array.common.dtype() == other.common.dtype() + && array.common.len() == other.common.len() && array .chunk_offsets .as_ref() @@ -189,11 +189,9 @@ impl VTable for ChunkedVTable { // Construct directly using the struct fields to avoid recomputing chunk_offsets Ok(ChunkedArray { - dtype: dtype.clone(), - len, + common: crate::ArrayCommon::new(len, dtype.clone()), chunk_offsets, chunks, - stats_set: Default::default(), }) } @@ -222,8 +220,9 @@ impl VTable for ChunkedVTable { let total_len = chunk_offsets_buf .last() .ok_or_else(|| vortex_err!("chunk_offsets must not be empty"))?; - array.len = usize::try_from(*total_len) + let len = usize::try_from(*total_len) .map_err(|_| vortex_err!("total length {} exceeds usize range", total_len))?; + array.common.set_len(len); Ok(()) } diff --git a/vortex-array/src/arrays/constant/array.rs b/vortex-array/src/arrays/constant/array.rs index e8bcb7d88c6..456873adc40 100644 --- a/vortex-array/src/arrays/constant/array.rs +++ b/vortex-array/src/arrays/constant/array.rs @@ -1,14 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use crate::ArrayCommon; use crate::scalar::Scalar; -use crate::stats::ArrayStats; #[derive(Clone, Debug)] pub struct ConstantArray { pub(super) scalar: Scalar, - pub(super) len: usize, - pub(super) stats_set: ArrayStats, + pub(super) common: ArrayCommon, } impl ConstantArray { @@ -17,11 +16,8 @@ impl ConstantArray { S: Into, { let scalar = scalar.into(); - Self { - scalar, - len, - stats_set: Default::default(), - } + let common = ArrayCommon::new(len, scalar.dtype().clone()); + Self { scalar, common } } /// Returns the [`Scalar`] value of this constant array. diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index 14017fd4044..ffe6a2378a9 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -62,15 +62,15 @@ impl VTable for ConstantVTable { } fn len(array: &ConstantArray) -> usize { - array.len + array.common.len() } fn dtype(array: &ConstantArray) -> &DType { - array.scalar.dtype() + array.common.dtype() } fn stats(array: &ConstantArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -79,11 +79,11 @@ impl VTable for ConstantVTable { _precision: Precision, ) { array.scalar.hash(state); - array.len.hash(state); + array.common.len().hash(state); } fn array_eq(array: &ConstantArray, other: &ConstantArray, _precision: Precision) -> bool { - array.scalar == other.scalar && array.len == other.len + array.scalar == other.scalar && array.common.len() == other.common.len() } fn nbuffers(_array: &ConstantArray) -> usize { diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index 9f1be7e487a..9d6507968fc 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -11,6 +11,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; +use crate::ArrayCommon; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::PrimitiveArray; @@ -24,7 +25,6 @@ use crate::dtype::NativeDecimalType; use crate::match_each_decimal_value_type; use crate::match_each_integer_ptype; use crate::patches::Patches; -use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -87,11 +87,10 @@ use crate::vtable::ValidityHelper; /// ``` #[derive(Clone, Debug)] pub struct DecimalArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) values: BufferHandle, pub(super) values_type: DecimalType, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct DecimalArrayParts { @@ -222,12 +221,13 @@ impl DecimalArray { .vortex_expect("[Debug Assertion]: Invalid `DecimalArray` parameters"); } + let dtype = DType::Decimal(decimal_dtype, validity.nullability()); + let len = values.len() / values_type.byte_width(); Self { + common: ArrayCommon::new(len, dtype), values, values_type, - dtype: DType::Decimal(decimal_dtype, validity.nullability()), validity, - stats_set: Default::default(), } } @@ -279,7 +279,11 @@ impl DecimalArray { } pub fn into_parts(self) -> DecimalArrayParts { - let decimal_dtype = self.dtype.into_decimal_opt().vortex_expect("cannot fail"); + let decimal_dtype = *self + .common + .dtype() + .as_decimal_opt() + .vortex_expect("cannot fail"); DecimalArrayParts { decimal_dtype, @@ -307,10 +311,10 @@ impl DecimalArray { /// Returns the decimal type information pub fn decimal_dtype(&self) -> DecimalDType { - if let DType::Decimal(decimal_dtype, _) = self.dtype { - decimal_dtype + if let DType::Decimal(decimal_dtype, _) = self.common.dtype() { + *decimal_dtype } else { - vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype) + vortex_panic!("Expected Decimal dtype, got {:?}", self.common.dtype()) } } diff --git a/vortex-array/src/arrays/decimal/compute/between.rs b/vortex-array/src/arrays/decimal/compute/between.rs index 01127a99acf..a95e1050d11 100644 --- a/vortex-array/src/arrays/decimal/compute/between.rs +++ b/vortex-array/src/arrays/decimal/compute/between.rs @@ -37,7 +37,7 @@ impl BetweenKernel for DecimalVTable { // NOTE: we know that have checked before that the lower and upper bounds are not all null. let nullability = - arr.dtype.nullability() | lower.dtype().nullability() | upper.dtype().nullability(); + arr.dtype().nullability() | lower.dtype().nullability() | upper.dtype().nullability(); match_each_decimal_value_type!(arr.values_type(), |D| { between_unpack::(arr, lower, upper, nullability, options) diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index 2f49a8c545b..4ad745aee4e 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -62,34 +62,26 @@ impl VTable for DecimalVTable { } fn len(array: &DecimalArray) -> usize { - let divisor = match array.values_type { - DecimalType::I8 => 1, - DecimalType::I16 => 2, - DecimalType::I32 => 4, - DecimalType::I64 => 8, - DecimalType::I128 => 16, - DecimalType::I256 => 32, - }; - array.values.len() / divisor + array.common.len() } fn dtype(array: &DecimalArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &DecimalArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &DecimalArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.values.array_hash(state, precision); std::mem::discriminant(&array.values_type).hash(state); array.validity.array_hash(state, precision); } fn array_eq(array: &DecimalArray, other: &DecimalArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.values.array_eq(&other.values, precision) && array.values_type == other.values_type && array.validity.array_eq(&other.validity, precision) @@ -194,7 +186,7 @@ impl VTable for DecimalVTable { ); if children.is_empty() { - array.validity = Validity::from(array.dtype.nullability()); + array.validity = Validity::from(array.common.dtype().nullability()); } else { array.validity = Validity::Array( children diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs index 59662528240..d58ac415c11 100644 --- a/vortex-array/src/arrays/dict/array.rs +++ b/vortex-array/src/arrays/dict/array.rs @@ -8,12 +8,12 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_mask::AllOr; +use crate::ArrayCommon; use crate::ArrayRef; use crate::ToCanonical; use crate::dtype::DType; use crate::dtype::PType; use crate::match_each_integer_ptype; -use crate::stats::ArrayStats; #[derive(Clone, prost::Message)] pub struct DictMetadata { @@ -33,10 +33,9 @@ pub struct DictMetadata { #[derive(Debug, Clone)] pub struct DictArray { + pub(super) common: ArrayCommon, pub(super) codes: ArrayRef, pub(super) values: ArrayRef, - pub(super) stats_set: ArrayStats, - pub(super) dtype: DType, /// Indicates whether all dictionary values are definitely referenced by at least one code. /// `true` = all values are referenced (computed during encoding). /// `false` = unknown/might have unreferenced values. @@ -62,11 +61,11 @@ impl DictArray { let dtype = values .dtype() .union_nullability(codes.dtype().nullability()); + let len = codes.len(); Self { + common: ArrayCommon::new(len, dtype), codes, values, - stats_set: Default::default(), - dtype, all_values_referenced: false, } } @@ -124,7 +123,7 @@ impl DictArray { DictArrayParts { codes: self.codes, values: self.values, - dtype: self.dtype, + dtype: self.common.dtype().clone(), } } diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 6c6555b596a..9298c398877 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -62,25 +62,25 @@ impl VTable for DictVTable { } fn len(array: &DictArray) -> usize { - array.codes.len() + array.common.len() } fn dtype(array: &DictArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &DictArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &DictArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.codes.array_hash(state, precision); array.values.array_hash(state, precision); } fn array_eq(array: &DictArray, other: &DictArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.codes.array_eq(&other.codes, precision) && array.values.array_eq(&other.values, precision) } diff --git a/vortex-array/src/arrays/extension/array.rs b/vortex-array/src/arrays/extension/array.rs index 8df5963a5e9..04eb1ca5ef2 100644 --- a/vortex-array/src/arrays/extension/array.rs +++ b/vortex-array/src/arrays/extension/array.rs @@ -1,11 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use crate::ArrayCommon; use crate::ArrayRef; use crate::dtype::DType; use crate::dtype::extension::ExtDTypeRef; use crate::dtype::extension::ExtId; -use crate::stats::ArrayStats; /// An extension array that wraps another array with additional type information. /// @@ -47,9 +47,8 @@ use crate::stats::ArrayStats; /// - Scalar access wraps storage scalars with extension metadata #[derive(Clone, Debug)] pub struct ExtensionArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) storage: ArrayRef, - pub(super) stats_set: ArrayStats, } impl ExtensionArray { @@ -59,15 +58,15 @@ impl ExtensionArray { storage.dtype(), "ExtensionArray: storage_dtype must match storage array DType", ); + let len = storage.len(); Self { - dtype: DType::Extension(ext_dtype), + common: ArrayCommon::new(len, DType::Extension(ext_dtype)), storage, - stats_set: ArrayStats::default(), } } pub fn ext_dtype(&self) -> &ExtDTypeRef { - let DType::Extension(ext) = &self.dtype else { + let DType::Extension(ext) = self.common.dtype() else { unreachable!("ExtensionArray: dtype must be an ExtDType") }; ext diff --git a/vortex-array/src/arrays/extension/compute/cast.rs b/vortex-array/src/arrays/extension/compute/cast.rs index 86ee5a5a573..cbf557e41a3 100644 --- a/vortex-array/src/arrays/extension/compute/cast.rs +++ b/vortex-array/src/arrays/extension/compute/cast.rs @@ -72,7 +72,7 @@ mod tests { let storage = Buffer::::empty().into_array(); let arr = ExtensionArray::new(ext_dtype.clone(), storage); - assert!(!arr.dtype.is_nullable()); + assert!(!arr.common.dtype().is_nullable()); let new_dtype = DType::Extension(ext_dtype).with_nullability(Nullability::Nullable); diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index 60bc27cffcd..63b48180e94 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -47,15 +47,15 @@ impl VTable for ExtensionVTable { } fn len(array: &ExtensionArray) -> usize { - array.storage.len() + array.common.len() } fn dtype(array: &ExtensionArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ExtensionArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -63,12 +63,13 @@ impl VTable for ExtensionVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.storage.array_hash(state, precision); } fn array_eq(array: &ExtensionArray, other: &ExtensionArray, precision: Precision) -> bool { - array.dtype == other.dtype && array.storage.array_eq(&other.storage, precision) + array.common.dtype() == other.common.dtype() + && array.storage.array_eq(&other.storage, precision) } fn nbuffers(_array: &ExtensionArray) -> usize { diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index 40857487a65..39d1fe05b90 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -6,8 +6,8 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure_eq; use vortex_mask::Mask; +use crate::ArrayCommon; use crate::ArrayRef; -use crate::stats::ArrayStats; /// Decomposed parts of the filter array. pub struct FilterArrayParts { @@ -30,8 +30,8 @@ pub struct FilterArray { /// The boolean mask selecting which elements to keep. pub(super) mask: Mask, - /// The stats for this array. - pub(super) stats: ArrayStats, + /// Common array fields (len, dtype, stats). + pub(super) common: ArrayCommon, } impl FilterArray { @@ -48,10 +48,12 @@ impl FilterArray { mask.len() ); + let len = mask.true_count(); + let dtype = array.dtype().clone(); Ok(Self { child: array, mask, - stats: ArrayStats::default(), + common: ArrayCommon::new(len, dtype), }) } diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index 23b22612873..3467e244b0c 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -56,15 +56,15 @@ impl VTable for FilterVTable { } fn len(array: &FilterArray) -> usize { - array.mask.true_count() + array.common.len() } fn dtype(array: &FilterArray) -> &DType { - array.child.dtype() + array.common.dtype() } fn stats(array: &FilterArray) -> StatsSetRef<'_> { - array.stats.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &FilterArray, state: &mut H, precision: Precision) { @@ -134,11 +134,7 @@ impl VTable for FilterVTable { ) -> VortexResult { assert_eq!(len, metadata.0.true_count()); let child = children.get(0, dtype, metadata.0.len())?; - Ok(FilterArray { - child, - mask: metadata.0.clone(), - stats: Default::default(), - }) + FilterArray::try_new(child, metadata.0.clone()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/fixed_size_list/array.rs b/vortex-array/src/arrays/fixed_size_list/array.rs index 12a9e044c11..b66a94eb936 100644 --- a/vortex-array/src/arrays/fixed_size_list/array.rs +++ b/vortex-array/src/arrays/fixed_size_list/array.rs @@ -7,10 +7,10 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::dtype::DType; -use crate::stats::ArrayStats; use crate::validity::Validity; /// The canonical encoding for fixed-size list arrays. @@ -62,10 +62,8 @@ use crate::validity::Validity; /// ``` #[derive(Clone, Debug)] pub struct FixedSizeListArray { - /// The [`DType`] of the fixed-size list. - /// - /// This type **must** be the variant [`DType::FixedSizeList`]. - pub(super) dtype: DType, + /// Common fields (len, dtype, stats) shared by all array types. + pub(super) common: ArrayCommon, /// The `elements` data array, where each fixed-size list scalar is a _slice_ of the `elements` /// array, and each inner list element is a _scalar_ of the `elements` array. @@ -85,18 +83,6 @@ pub struct FixedSizeListArray { /// sub-elements of fixed-size list scalars are null. The `elements` array will track individual /// value nullability. pub(super) validity: Validity, - - /// The length of the array. - /// - /// Note that this is different from the size of each fixed-size list scalar (`list_size`). - /// - /// The main reason we need to store this (rather than calculate it on the fly via `list_size` - /// and `elements.len()`) is because in the degenerate case where `list_size == 0`, we cannot - /// use `0 / 0` to determine the length. - pub(super) len: usize, - - /// The stats for this array. - pub(super) stats_set: ArrayStats, } impl FixedSizeListArray { @@ -158,19 +144,20 @@ impl FixedSizeListArray { .vortex_expect("[Debug Assertion]: Invalid `FixedSizeListArray` parameters"); let nullability = validity.nullability(); + let dtype = + DType::FixedSizeList(Arc::new(elements.dtype().clone()), list_size, nullability); Self { - dtype: DType::FixedSizeList(Arc::new(elements.dtype().clone()), list_size, nullability), + common: ArrayCommon::new(len, dtype), elements, list_size, validity, - len, - stats_set: Default::default(), } } pub fn into_parts(self) -> (ArrayRef, Validity, DType) { - (self.elements, self.validity, self.dtype) + let dtype = self.common.dtype().clone(); + (self.elements, self.validity, dtype) } /// Validates the components that would be used to create a [`FixedSizeListArray`]. @@ -226,10 +213,10 @@ impl FixedSizeListArray { /// Returns an error if the index is out of bounds or the slice operation fails. pub fn fixed_size_list_elements_at(&self, index: usize) -> VortexResult { debug_assert!( - index < self.len, + index < self.common.len(), "index {} out of bounds: the len is {}", index, - self.len, + self.common.len(), ); debug_assert!(self.validity.is_valid(index).unwrap_or(false)); diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index 4544dd976a4..68c8d82a84f 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -53,15 +53,15 @@ impl VTable for FixedSizeListVTable { } fn len(array: &FixedSizeListArray) -> usize { - array.len + array.common.len() } fn dtype(array: &FixedSizeListArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &FixedSizeListArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -69,11 +69,11 @@ impl VTable for FixedSizeListVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.elements().array_hash(state, precision); array.list_size().hash(state); array.validity.array_hash(state, precision); - array.len.hash(state); + array.common.len().hash(state); } fn array_eq( @@ -81,11 +81,11 @@ impl VTable for FixedSizeListVTable { other: &FixedSizeListArray, precision: Precision, ) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.elements().array_eq(other.elements(), precision) && array.list_size() == other.list_size() && array.validity.array_eq(&other.validity, precision) - && array.len == other.len + && array.common.len() == other.common.len() } fn nbuffers(_array: &FixedSizeListArray) -> usize { @@ -107,7 +107,7 @@ impl VTable for FixedSizeListVTable { fn child(array: &FixedSizeListArray, idx: usize) -> ArrayRef { match idx { 0 => array.elements().clone(), - 1 => validity_to_child(&array.validity, array.len()) + 1 => validity_to_child(&array.validity, array.common.len()) .vortex_expect("FixedSizeListArray validity child out of bounds"), _ => vortex_panic!("FixedSizeListArray child index {idx} out of bounds"), } @@ -209,11 +209,11 @@ impl VTable for FixedSizeListVTable { let validity = if let Some(validity_array) = iter.next() { Validity::Array(validity_array) } else { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) }; let new_array = - FixedSizeListArray::try_new(elements, array.list_size(), validity, array.len())?; + FixedSizeListArray::try_new(elements, array.list_size(), validity, array.common.len())?; *array = new_array; Ok(()) } diff --git a/vortex-array/src/arrays/list/array.rs b/vortex-array/src/arrays/list/array.rs index cdca1ae04f2..fda6c12d3ad 100644 --- a/vortex-array/src/arrays/list/array.rs +++ b/vortex-array/src/arrays/list/array.rs @@ -10,6 +10,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::IntoArray; @@ -23,7 +24,6 @@ use crate::dtype::NativePType; use crate::match_each_integer_ptype; use crate::match_each_native_ptype; use crate::scalar_fn::fns::operators::Operator; -use crate::stats::ArrayStats; use crate::validity::Validity; /// A list array that stores variable-length lists of elements, similar to `Vec>`. @@ -80,11 +80,10 @@ use crate::validity::Validity; /// ``` #[derive(Clone, Debug)] pub struct ListArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) elements: ArrayRef, pub(super) offsets: ArrayRef, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct ListArrayParts { @@ -145,12 +144,13 @@ impl ListArray { Self::validate(&elements, &offsets, &validity) .vortex_expect("[Debug Assertion]: Invalid `ListViewArray` parameters"); + let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability()); + let len = offsets.len().saturating_sub(1); Self { - dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()), + common: ArrayCommon::new(len, dtype), elements, offsets, validity, - stats_set: Default::default(), } } @@ -241,7 +241,7 @@ impl ListArray { /// Splits an array into its parts pub fn into_parts(self) -> ListArrayParts { ListArrayParts { - dtype: self.dtype, + dtype: self.common.dtype().clone(), elements: self.elements, offsets: self.offsets, validity: self.validity, @@ -295,9 +295,9 @@ impl ListArray { /// Returns the element dtype of the list array. pub fn element_dtype(&self) -> &Arc { - match &self.dtype { + match self.common.dtype() { DType::List(element_dtype, _) => element_dtype, - _ => vortex_panic!("ListArray has invalid dtype {}", self.dtype), + _ => vortex_panic!("ListArray has invalid dtype {}", self.common.dtype()), } } diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index e1cce8ab080..5d23b2162cb 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -60,26 +60,26 @@ impl VTable for ListVTable { } fn len(array: &ListArray) -> usize { - array.offsets.len().saturating_sub(1) + array.common.len() } fn dtype(array: &ListArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ListArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ListArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.elements.array_hash(state, precision); array.offsets.array_hash(state, precision); array.validity.array_hash(state, precision); } fn array_eq(array: &ListArray, other: &ListArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.elements.array_eq(&other.elements, precision) && array.offsets.array_eq(&other.offsets, precision) && array.validity.array_eq(&other.validity, precision) @@ -202,7 +202,7 @@ impl VTable for ListVTable { let validity = if let Some(validity_array) = iter.next() { Validity::Array(validity_array) } else { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) }; let new_array = ListArray::try_new(elements, offsets, validity)?; diff --git a/vortex-array/src/arrays/listview/array.rs b/vortex-array/src/arrays/listview/array.rs index 2d43320bd95..33a8892b8ed 100644 --- a/vortex-array/src/arrays/listview/array.rs +++ b/vortex-array/src/arrays/listview/array.rs @@ -10,6 +10,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::ToCanonical; @@ -19,7 +20,6 @@ use crate::arrays::bool; use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::match_each_integer_ptype; -use crate::stats::ArrayStats; use crate::validity::Validity; /// The canonical encoding for variable-length list arrays. @@ -86,10 +86,8 @@ use crate::validity::Validity; /// [`ListArray`]: crate::arrays::ListArray #[derive(Clone, Debug)] pub struct ListViewArray { - /// The [`DType`] of the list array. - /// - /// This type **must** be the variant [`DType::List`]. - pub(super) dtype: DType, + /// Common fields (len, dtype, stats) shared by all array types. + pub(super) common: ArrayCommon, /// The `elements` data array, where each list scalar is a _slice_ of the `elements` array, and /// each inner list element is a _scalar_ of the `elements` array. @@ -122,9 +120,6 @@ pub struct ListViewArray { /// Note that this null map refers to which list scalars are null, **not** which sub-elements of /// list scalars are null. The `elements` array will track individual value nullability. pub(super) validity: Validity, - - /// The stats for this array. - pub(super) stats_set: ArrayStats, } pub struct ListViewArrayParts { @@ -169,14 +164,15 @@ impl ListViewArray { ) -> VortexResult { Self::validate(&elements, &offsets, &sizes, &validity)?; + let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability()); + let len = offsets.len(); Ok(Self { - dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()), + common: ArrayCommon::new(len, dtype), elements, offsets, sizes, validity, is_zero_copy_to_list: false, - stats_set: Default::default(), }) } @@ -210,14 +206,15 @@ impl ListViewArray { .vortex_expect("Failed to crate `ListViewArray`"); } + let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability()); + let len = offsets.len(); Self { - dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()), + common: ArrayCommon::new(len, dtype), elements, offsets, sizes, validity, is_zero_copy_to_list: false, - stats_set: Default::default(), } } @@ -342,7 +339,12 @@ impl ListViewArray { } pub fn into_parts(self) -> ListViewArrayParts { - let dtype = self.dtype.into_list_element_opt().vortex_expect("is list"); + let dtype = self + .common + .dtype() + .clone() + .into_list_element_opt() + .vortex_expect("is list"); ListViewArrayParts { elements_dtype: dtype, elements: self.elements, diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index cbef285662e..847a233cd45 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -66,16 +66,15 @@ impl VTable for ListViewVTable { } fn len(array: &ListViewArray) -> usize { - debug_assert_eq!(array.offsets().len(), array.sizes().len()); - array.offsets().len() + array.common.len() } fn dtype(array: &ListViewArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ListViewArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -83,7 +82,7 @@ impl VTable for ListViewVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.elements().array_hash(state, precision); array.offsets().array_hash(state, precision); array.sizes().array_hash(state, precision); @@ -91,7 +90,7 @@ impl VTable for ListViewVTable { } fn array_eq(array: &ListViewArray, other: &ListViewArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.elements().array_eq(other.elements(), precision) && array.offsets().array_eq(other.offsets(), precision) && array.sizes().array_eq(other.sizes(), precision) @@ -230,7 +229,7 @@ impl VTable for ListViewVTable { let validity = if let Some(validity_array) = iter.next() { Validity::Array(validity_array) } else { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) }; let new_array = ListViewArray::try_new(elements, offsets, sizes, validity)?; diff --git a/vortex-array/src/arrays/listview/vtable/operations.rs b/vortex-array/src/arrays/listview/vtable/operations.rs index 0d05dc24591..529937ee36c 100644 --- a/vortex-array/src/arrays/listview/vtable/operations.rs +++ b/vortex-array/src/arrays/listview/vtable/operations.rs @@ -21,7 +21,7 @@ impl OperationsVTable for ListViewVTable { Ok(Scalar::list( Arc::new(list.dtype().clone()), children, - array.dtype.nullability(), + array.common.dtype().nullability(), )) } } diff --git a/vortex-array/src/arrays/masked/array.rs b/vortex-array/src/arrays/masked/array.rs index 38317caacd7..debfef24330 100644 --- a/vortex-array/src/arrays/masked/array.rs +++ b/vortex-array/src/arrays/masked/array.rs @@ -4,17 +4,15 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; +use crate::ArrayCommon; use crate::ArrayRef; -use crate::dtype::DType; -use crate::stats::ArrayStats; use crate::validity::Validity; #[derive(Clone, Debug)] pub struct MaskedArray { pub(super) child: ArrayRef, pub(super) validity: Validity, - pub(super) dtype: DType, - pub(super) stats: ArrayStats, + pub(super) common: ArrayCommon, } impl MaskedArray { @@ -36,12 +34,12 @@ impl MaskedArray { // MaskedArray's nullability is determined solely by its validity, not the child's dtype. // The child can have nullable dtype but must not have any actual null values. let dtype = child.dtype().as_nullable(); + let len = child.len(); Ok(Self { child, validity, - dtype, - stats: ArrayStats::default(), + common: ArrayCommon::new(len, dtype), }) } diff --git a/vortex-array/src/arrays/masked/compute/slice.rs b/vortex-array/src/arrays/masked/compute/slice.rs index 90a552dd907..a15568c250b 100644 --- a/vortex-array/src/arrays/masked/compute/slice.rs +++ b/vortex-array/src/arrays/masked/compute/slice.rs @@ -10,21 +10,12 @@ use crate::IntoArray; use crate::arrays::MaskedArray; use crate::arrays::MaskedVTable; use crate::arrays::slice::SliceReduce; -use crate::stats::ArrayStats; impl SliceReduce for MaskedVTable { fn slice(array: &Self::Array, range: Range) -> VortexResult> { let child = array.child.slice(range.clone())?; let validity = array.validity.slice(range)?; - Ok(Some( - MaskedArray { - child, - validity, - dtype: array.dtype.clone(), - stats: ArrayStats::default(), - } - .into_array(), - )) + Ok(Some(MaskedArray::try_new(child, validity)?.into_array())) } } diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 30b97088b29..3d8c7333edc 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -58,27 +58,27 @@ impl VTable for MaskedVTable { } fn len(array: &MaskedArray) -> usize { - array.child.len() + array.common.len() } fn dtype(array: &MaskedArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &MaskedArray) -> StatsSetRef<'_> { - array.stats.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &MaskedArray, state: &mut H, precision: Precision) { array.child.array_hash(state, precision); array.validity.array_hash(state, precision); - array.dtype.hash(state); + array.common.dtype().hash(state); } fn array_eq(array: &MaskedArray, other: &MaskedArray, precision: Precision) -> bool { array.child.array_eq(&other.child, precision) && array.validity.array_eq(&other.validity, precision) - && array.dtype == other.dtype + && array.common.dtype() == other.common.dtype() } fn nbuffers(_array: &Self::Array) -> usize { @@ -203,7 +203,7 @@ impl VTable for MaskedVTable { let validity = if let Some(validity_array) = iter.next() { Validity::Array(validity_array) } else { - Validity::from(array.dtype.nullability()) + Validity::from(array.common.dtype().nullability()) }; let new_array = MaskedArray::try_new(child, validity)?; diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index c3549fa9255..2eeee56db6d 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -8,6 +8,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::VortexSession; +use crate::ArrayCommon; use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; @@ -18,7 +19,6 @@ use crate::buffer::BufferHandle; use crate::dtype::DType; use crate::scalar::Scalar; use crate::serde::ArrayChildren; -use crate::stats::ArrayStats; use crate::stats::StatsSetRef; use crate::validity::Validity; use crate::vtable; @@ -43,23 +43,23 @@ impl VTable for NullVTable { } fn len(array: &NullArray) -> usize { - array.len + array.common.len() } - fn dtype(_array: &NullArray) -> &DType { - &DType::Null + fn dtype(array: &NullArray) -> &DType { + array.common.dtype() } fn stats(array: &NullArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &NullArray, state: &mut H, _precision: Precision) { - array.len.hash(state); + array.common.len().hash(state); } fn array_eq(array: &NullArray, other: &NullArray, _precision: Precision) -> bool { - array.len == other.len + array.common.len() == other.common.len() } fn nbuffers(_array: &NullArray) -> usize { @@ -165,8 +165,7 @@ impl VTable for NullVTable { /// ``` #[derive(Clone, Debug)] pub struct NullArray { - len: usize, - stats_set: ArrayStats, + common: ArrayCommon, } #[derive(Debug)] @@ -179,8 +178,7 @@ impl NullVTable { impl NullArray { pub fn new(len: usize) -> Self { Self { - len, - stats_set: Default::default(), + common: ArrayCommon::new(len, DType::Null), } } } diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index 37c9b475800..ab4c445b4e0 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -12,13 +12,13 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_err; +use crate::ArrayCommon; use crate::ToCanonical; use crate::dtype::DType; use crate::dtype::NativePType; use crate::dtype::Nullability; use crate::dtype::PType; use crate::match_each_native_ptype; -use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -70,10 +70,9 @@ use crate::buffer::BufferHandle; /// ``` #[derive(Clone, Debug)] pub struct PrimitiveArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) buffer: BufferHandle, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct PrimitiveArrayParts { @@ -95,11 +94,11 @@ impl PrimitiveArray { ptype: PType, validity: Validity, ) -> Self { + let len = handle.len() / ptype.byte_width(); Self { + common: ArrayCommon::new(len, DType::Primitive(ptype, validity.nullability())), buffer: handle, - dtype: DType::Primitive(ptype, validity.nullability()), validity, - stats_set: ArrayStats::default(), } } @@ -148,11 +147,12 @@ impl PrimitiveArray { Self::validate(&buffer, &validity) .vortex_expect("[Debug Assertion]: Invalid `PrimitiveArray` parameters"); + let byte_buffer = buffer.into_byte_buffer(); + let len = byte_buffer.len() / T::PTYPE.byte_width(); Self { - dtype: DType::Primitive(T::PTYPE, validity.nullability()), - buffer: BufferHandle::new_host(buffer.into_byte_buffer()), + common: ArrayCommon::new(len, DType::Primitive(T::PTYPE, validity.nullability())), + buffer: BufferHandle::new_host(byte_buffer), validity, - stats_set: Default::default(), } } @@ -202,12 +202,12 @@ impl PrimitiveArray { } pub fn from_buffer_handle(handle: BufferHandle, ptype: PType, validity: Validity) -> Self { + let len = handle.len() / ptype.byte_width(); let dtype = DType::Primitive(ptype, validity.nullability()); Self { + common: ArrayCommon::new(len, dtype), buffer: handle, - dtype, validity, - stats_set: ArrayStats::default(), } } diff --git a/vortex-array/src/arrays/primitive/compute/between.rs b/vortex-array/src/arrays/primitive/compute/between.rs index 8c1868eb1eb..6eb5333b637 100644 --- a/vortex-array/src/arrays/primitive/compute/between.rs +++ b/vortex-array/src/arrays/primitive/compute/between.rs @@ -34,7 +34,7 @@ impl BetweenKernel for PrimitiveVTable { // null values let nullability = - arr.dtype.nullability() | lower.dtype().nullability() | upper.dtype().nullability(); + arr.dtype().nullability() | lower.dtype().nullability() | upper.dtype().nullability(); Ok(Some(match_each_native_ptype!(arr.ptype(), |P| { between_impl::

( diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index 479d05bb2d2..f06134a5f12 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -54,25 +54,25 @@ impl VTable for PrimitiveVTable { } fn len(array: &PrimitiveArray) -> usize { - array.buffer_handle().len() / array.ptype().byte_width() + array.common.len() } fn dtype(array: &PrimitiveArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &PrimitiveArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &PrimitiveArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.buffer.array_hash(state, precision); array.validity.array_hash(state, precision); } fn array_eq(array: &PrimitiveArray, other: &PrimitiveArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.buffer.array_eq(&other.buffer, precision) && array.validity.array_eq(&other.validity, precision) } diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 1e3ef1d14ba..d588f6f275a 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -4,19 +4,16 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; -use crate::dtype::DType; use crate::scalar_fn::ScalarFnRef; -use crate::stats::ArrayStats; #[derive(Clone, Debug)] pub struct ScalarFnArray { pub(super) scalar_fn: ScalarFnRef, - pub(super) dtype: DType, - pub(super) len: usize, + pub(super) common: ArrayCommon, pub(super) children: Vec, - pub(super) stats: ArrayStats, } impl ScalarFnArray { @@ -32,10 +29,8 @@ impl ScalarFnArray { Ok(Self { scalar_fn: bound, - dtype, - len, + common: ArrayCommon::new(len, dtype), children, - stats: Default::default(), }) } diff --git a/vortex-array/src/arrays/scalar_fn/rules.rs b/vortex-array/src/arrays/scalar_fn/rules.rs index 55af03a7f39..dddb54b695b 100644 --- a/vortex-array/src/arrays/scalar_fn/rules.rs +++ b/vortex-array/src/arrays/scalar_fn/rules.rs @@ -61,7 +61,7 @@ impl ArrayReduceRule for ScalarFnPackToStructRule { StructArray::try_new( pack_options.names.clone(), array.children.clone(), - array.len, + array.common.len(), validity, )? .into_array(), @@ -80,7 +80,9 @@ impl ArrayReduceRule for ScalarFnConstantRule { Ok(Some(Canonical::empty(array.dtype()).into_array())) } else { let result = array.scalar_at(0)?; - Ok(Some(ConstantArray::new(result, array.len).into_array())) + Ok(Some( + ConstantArray::new(result, array.common.len()).into_array(), + )) } } } @@ -89,10 +91,12 @@ impl ArrayReduceRule for ScalarFnConstantRule { struct ScalarFnAbstractReduceRule; impl ArrayReduceRule for ScalarFnAbstractReduceRule { fn reduce(&self, array: &ScalarFnArray) -> VortexResult> { - if let Some(reduced) = array - .scalar_fn - .reduce(array, &ArrayReduceCtx { len: array.len })? - { + if let Some(reduced) = array.scalar_fn.reduce( + array, + &ArrayReduceCtx { + len: array.common.len(), + }, + )? { return Ok(Some( reduced .as_any() diff --git a/vortex-array/src/arrays/scalar_fn/slice.rs b/vortex-array/src/arrays/scalar_fn/slice.rs index b2389475b86..5050993fbf4 100644 --- a/vortex-array/src/arrays/scalar_fn/slice.rs +++ b/vortex-array/src/arrays/scalar_fn/slice.rs @@ -5,6 +5,7 @@ use std::ops::Range; use vortex_error::VortexResult; +use crate::ArrayCommon; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::ScalarFnArray; @@ -22,10 +23,8 @@ impl SliceReduce for ScalarFnVTable { Ok(Some( ScalarFnArray { scalar_fn: array.scalar_fn.clone(), - dtype: array.dtype.clone(), - len: range.len(), + common: ArrayCommon::new(range.len(), array.common.dtype().clone()), children, - stats: Default::default(), } .into_array(), )) diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 8e1debf9554..93c0c99d7ec 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -17,6 +17,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::VortexSession; +use crate::ArrayCommon; use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; @@ -60,20 +61,20 @@ impl VTable for ScalarFnVTable { } fn len(array: &ScalarFnArray) -> usize { - array.len + array.common.len() } fn dtype(array: &ScalarFnArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &ScalarFnArray) -> StatsSetRef<'_> { - array.stats.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &ScalarFnArray, state: &mut H, precision: Precision) { - array.len.hash(state); - array.dtype.hash(state); + array.common.len().hash(state); + array.common.dtype().hash(state); array.scalar_fn.hash(state); for child in &array.children { child.array_hash(state, precision); @@ -81,10 +82,10 @@ impl VTable for ScalarFnVTable { } fn array_eq(array: &ScalarFnArray, other: &ScalarFnArray, precision: Precision) -> bool { - if array.len != other.len { + if array.common.len() != other.common.len() { return false; } - if array.dtype != other.dtype { + if array.common.dtype() != other.common.dtype() { return false; } if array.scalar_fn != other.scalar_fn { @@ -176,10 +177,8 @@ impl VTable for ScalarFnVTable { Ok(ScalarFnArray { // This requires a new Arc, but we plan to remove this later anyway. scalar_fn: metadata.scalar_fn.clone(), - dtype: dtype.clone(), - len, + common: ArrayCommon::new(len, dtype.clone()), children, - stats: Default::default(), }) } @@ -196,7 +195,7 @@ impl VTable for ScalarFnVTable { fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { ctx.log(format_args!("scalar_fn({}): executing", array.scalar_fn)); - let args = VecExecutionArgs::new(array.children.clone(), array.len); + let args = VecExecutionArgs::new(array.children.clone(), array.common.len()); array.scalar_fn.execute(&args, ctx) } @@ -234,10 +233,8 @@ pub trait ScalarFnArrayExt: scalar_fn::ScalarFnVTable { Ok(ScalarFnArray { scalar_fn, - dtype, - len, + common: ArrayCommon::new(len, dtype), children, - stats: Default::default(), } .into_array()) } diff --git a/vortex-array/src/arrays/scalar_fn/vtable/operations.rs b/vortex-array/src/arrays/scalar_fn/vtable/operations.rs index 628c829a4c3..dad28fd4904 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/operations.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/operations.rs @@ -40,11 +40,11 @@ impl OperationsVTable for ScalarFnVTable { debug_assert_eq!( scalar.dtype(), - &array.dtype, + array.common.dtype(), "Scalar function {} returned dtype {:?} but expected {:?}", array.scalar_fn, scalar.dtype(), - array.dtype + array.common.dtype() ); Ok(scalar) diff --git a/vortex-array/src/arrays/shared/array.rs b/vortex-array/src/arrays/shared/array.rs index c69ae39395d..e64327b459c 100644 --- a/vortex-array/src/arrays/shared/array.rs +++ b/vortex-array/src/arrays/shared/array.rs @@ -9,11 +9,10 @@ use async_lock::Mutex as AsyncMutex; use vortex_error::SharedVortexResult; use vortex_error::VortexResult; +use crate::ArrayCommon; use crate::ArrayRef; use crate::Canonical; use crate::IntoArray; -use crate::dtype::DType; -use crate::stats::ArrayStats; /// A lazily-executing array wrapper with a one-way transition from source to cached form. /// @@ -24,18 +23,17 @@ pub struct SharedArray { source: ArrayRef, cached: Arc>>, async_compute_lock: Arc>, - pub(super) dtype: DType, - pub(super) stats: ArrayStats, + pub(super) common: ArrayCommon, } impl SharedArray { pub fn new(source: ArrayRef) -> Self { + let common = ArrayCommon::new(source.len(), source.dtype().clone()); Self { - dtype: source.dtype().clone(), source, cached: Arc::new(OnceLock::new()), async_compute_lock: Arc::new(AsyncMutex::new(())), - stats: ArrayStats::default(), + common, } } @@ -92,7 +90,7 @@ impl SharedArray { } pub(super) fn set_source(&mut self, source: ArrayRef) { - self.dtype = source.dtype().clone(); + self.common = ArrayCommon::new(source.len(), source.dtype().clone()); self.source = source; self.cached = Arc::new(OnceLock::new()); self.async_compute_lock = Arc::new(AsyncMutex::new(())); diff --git a/vortex-array/src/arrays/shared/vtable.rs b/vortex-array/src/arrays/shared/vtable.rs index 2484cee1968..55464ff7b35 100644 --- a/vortex-array/src/arrays/shared/vtable.rs +++ b/vortex-array/src/arrays/shared/vtable.rs @@ -48,27 +48,27 @@ impl VTable for SharedVTable { } fn len(array: &SharedArray) -> usize { - array.current_array_ref().len() + array.common.len() } fn dtype(array: &SharedArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &SharedArray) -> StatsSetRef<'_> { - array.stats.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &SharedArray, state: &mut H, precision: Precision) { let current = array.current_array_ref(); current.array_hash(state, precision); - array.dtype.hash(state); + array.common.dtype().hash(state); } fn array_eq(array: &SharedArray, other: &SharedArray, precision: Precision) -> bool { let current = array.current_array_ref(); let other_current = other.current_array_ref(); - current.array_eq(other_current, precision) && array.dtype == other.dtype + current.array_eq(other_current, precision) && array.common.dtype() == other.common.dtype() } fn nbuffers(_array: &Self::Array) -> usize { diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index 8a0dbbb642c..f8b7c0b77b7 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -7,14 +7,14 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_panic; +use crate::ArrayCommon; use crate::ArrayRef; -use crate::stats::ArrayStats; #[derive(Clone, Debug)] pub struct SliceArray { pub(super) child: ArrayRef, pub(super) range: Range, - pub(super) stats: ArrayStats, + pub(super) common: ArrayCommon, } pub struct SliceArrayParts { @@ -31,10 +31,12 @@ impl SliceArray { child.len() ); } + let len = range.len(); + let dtype = child.dtype().clone(); Ok(Self { child, range, - stats: ArrayStats::default(), + common: ArrayCommon::new(len, dtype), }) } diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index f90392f0dbb..5a0ee265852 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -55,15 +55,15 @@ impl VTable for SliceVTable { } fn len(array: &SliceArray) -> usize { - array.range.len() + array.common.len() } fn dtype(array: &SliceArray) -> &DType { - array.child.dtype() + array.common.dtype() } fn stats(array: &SliceArray) -> StatsSetRef<'_> { - array.stats.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &SliceArray, state: &mut H, precision: Precision) { @@ -134,11 +134,7 @@ impl VTable for SliceVTable { ) -> VortexResult { assert_eq!(len, metadata.0.len()); let child = children.get(0, dtype, metadata.0.end)?; - Ok(SliceArray { - child, - range: metadata.0.clone(), - stats: Default::default(), - }) + SliceArray::try_new(child, metadata.0.clone()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index a351cc84a5d..02991a19c6b 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -10,6 +10,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::IntoArray; @@ -17,7 +18,6 @@ use crate::dtype::DType; use crate::dtype::FieldName; use crate::dtype::FieldNames; use crate::dtype::StructFields; -use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -141,11 +141,9 @@ use crate::vtable::ValidityHelper; /// ``` #[derive(Clone, Debug)] pub struct StructArray { - pub(super) len: usize, - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) fields: Arc<[ArrayRef]>, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct StructArrayParts { @@ -182,7 +180,7 @@ impl StructArray { } pub fn struct_fields(&self) -> &StructFields { - let Some(struct_dtype) = &self.dtype.as_struct_fields_opt() else { + let Some(struct_dtype) = &self.common.dtype().as_struct_fields_opt() else { unreachable!( "struct arrays must have be a DType::Struct, this is likely an internal bug." ) @@ -279,11 +277,9 @@ impl StructArray { .vortex_expect("[Debug Assertion]: Invalid `StructArray` parameters"); Self { - len: length, - dtype: DType::Struct(dtype, validity.nullability()), + common: ArrayCommon::new(length, DType::Struct(dtype, validity.nullability())), fields, validity, - stats_set: Default::default(), } } @@ -354,7 +350,7 @@ impl StructArray { } pub fn into_parts(self) -> StructArrayParts { - let struct_fields = self.dtype.into_struct_fields(); + let struct_fields = self.common.into_dtype().into_struct_fields(); StructArrayParts { struct_fields, fields: self.fields, @@ -450,7 +446,8 @@ impl StructArray { if let Ok(new_dtype) = struct_dtype.without_field(position) { self.fields = new_fields; - self.dtype = DType::Struct(new_dtype, self.dtype.nullability()); + let nullability = self.common.dtype().nullability(); + *self.common.dtype_mut() = DType::Struct(new_dtype, nullability); return Some(field); } None @@ -467,6 +464,11 @@ impl StructArray { let children: Arc<[ArrayRef]> = self.fields.iter().cloned().chain(once(array)).collect(); - Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone()) + Self::try_new_with_dtype( + children, + new_fields, + self.common.len(), + self.validity.clone(), + ) } } diff --git a/vortex-array/src/arrays/struct_/compute/rules.rs b/vortex-array/src/arrays/struct_/compute/rules.rs index b2ca2457c8c..f97f71d97ec 100644 --- a/vortex-array/src/arrays/struct_/compute/rules.rs +++ b/vortex-array/src/arrays/struct_/compute/rules.rs @@ -81,7 +81,7 @@ impl ArrayParentReduceRule for StructCastPushDownRule { array .validity() .clone() - .into_non_nullable(array.len) + .into_non_nullable(array.len()) .ok_or_else(|| vortex_err!("Failed to cast nullable struct to non-nullable"))? }; diff --git a/vortex-array/src/arrays/struct_/compute/zip.rs b/vortex-array/src/arrays/struct_/compute/zip.rs index ddf5310d4a4..3594b1b9bbd 100644 --- a/vortex-array/src/arrays/struct_/compute/zip.rs +++ b/vortex-array/src/arrays/struct_/compute/zip.rs @@ -53,7 +53,7 @@ impl ZipKernel for StructVTable { let combined = (v1m.bitand(&mask_mask)).bitor(&v2m.bitand(&mask_mask.not())); Validity::from_mask( combined, - if_true.dtype.nullability() | if_false.dtype.nullability(), + if_true.dtype().nullability() | if_false.dtype().nullability(), ) } }; diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index 498bfdee19f..e105c03f0a4 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -51,20 +51,20 @@ impl VTable for StructVTable { } fn len(array: &StructArray) -> usize { - array.len + array.common.len() } fn dtype(array: &StructArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &StructArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &StructArray, state: &mut H, precision: Precision) { - array.len.hash(state); - array.dtype.hash(state); + array.common.len().hash(state); + array.common.dtype().hash(state); for field in array.fields.iter() { field.array_hash(state, precision); } @@ -72,8 +72,8 @@ impl VTable for StructVTable { } fn array_eq(array: &StructArray, other: &StructArray, precision: Precision) -> bool { - array.len == other.len - && array.dtype == other.dtype + array.common.len() == other.common.len() + && array.common.dtype() == other.common.dtype() && array.fields.len() == other.fields.len() && array .fields @@ -175,8 +175,8 @@ impl VTable for StructVTable { } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { - let DType::Struct(struct_dtype, _nullability) = &array.dtype else { - vortex_bail!("Expected struct dtype, found {:?}", array.dtype) + let DType::Struct(struct_dtype, _nullability) = array.common.dtype() else { + vortex_bail!("Expected struct dtype, found {:?}", array.common.dtype()) }; // First child is validity (if present), followed by fields diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index 9c72e80487d..a9d63d61908 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -8,6 +8,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_err; +use crate::ArrayCommon; use crate::ArrayRef; use crate::DynArray; use crate::ToCanonical; @@ -17,16 +18,14 @@ use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::dtype::Nullability; use crate::match_each_integer_ptype; -use crate::stats::ArrayStats; use crate::validity::Validity; #[derive(Clone, Debug)] pub struct VarBinArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) bytes: BufferHandle, pub(super) offsets: ArrayRef, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } impl VarBinArray { @@ -154,12 +153,12 @@ impl VarBinArray { Self::validate(&offsets, &bytes, &dtype, &validity) .vortex_expect("[Debug Assertion]: Invalid `VarBinArray` parameters"); + let len = offsets.len().saturating_sub(1); Self { - dtype, + common: ArrayCommon::new(len, dtype), bytes, offsets, validity, - stats_set: Default::default(), } } @@ -371,7 +370,12 @@ impl VarBinArray { /// Consumes self, returning a tuple containing the `DType`, the `bytes` array, /// the `offsets` array, and the `validity`. pub fn into_parts(self) -> (DType, BufferHandle, ArrayRef, Validity) { - (self.dtype, self.bytes, self.offsets, self.validity) + ( + self.common.dtype().clone(), + self.bytes, + self.offsets, + self.validity, + ) } } diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index 91d85a1ffa1..971d459df56 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -61,26 +61,26 @@ impl VTable for VarBinVTable { } fn len(array: &VarBinArray) -> usize { - array.offsets().len().saturating_sub(1) + array.common.len() } fn dtype(array: &VarBinArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &VarBinArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash(array: &VarBinArray, state: &mut H, precision: Precision) { - array.dtype.hash(state); + array.common.dtype().hash(state); array.bytes().array_hash(state, precision); array.offsets().array_hash(state, precision); array.validity.array_hash(state, precision); } fn array_eq(array: &VarBinArray, other: &VarBinArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.bytes().array_eq(other.bytes(), precision) && array.offsets().array_eq(other.offsets(), precision) && array.validity.array_eq(&other.validity, precision) diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index 0c1276c928f..184eaca78ed 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -13,13 +13,13 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use crate::ArrayCommon; use crate::arrays::varbinview::BinaryView; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; use crate::builders::VarBinViewBuilder; use crate::dtype::DType; use crate::dtype::Nullability; -use crate::stats::ArrayStats; use crate::validity::Validity; /// A variable-length binary view array that stores strings and binary data efficiently. @@ -83,11 +83,10 @@ use crate::validity::Validity; /// ``` #[derive(Clone, Debug)] pub struct VarBinViewArray { - pub(super) dtype: DType, + pub(super) common: ArrayCommon, pub(super) buffers: Arc<[BufferHandle]>, pub(super) views: BufferHandle, pub(super) validity: Validity, - pub(super) stats_set: ArrayStats, } pub struct VarBinViewArrayParts { @@ -244,12 +243,12 @@ impl VarBinViewArray { dtype: DType, validity: Validity, ) -> Self { + let len = views.len() / size_of::(); Self { + common: ArrayCommon::new(len, dtype), views, buffers, - dtype, validity, - stats_set: Default::default(), } } @@ -345,7 +344,7 @@ impl VarBinViewArray { /// Splits the array into owned parts pub fn into_parts(self) -> VarBinViewArrayParts { VarBinViewArrayParts { - dtype: self.dtype, + dtype: self.common.dtype().clone(), buffers: self.buffers, views: self.views, validity: self.validity, diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index 9f738c0cb81..e655a0d9455 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -58,15 +58,15 @@ impl VTable for VarBinViewVTable { } fn len(array: &VarBinViewArray) -> usize { - array.views_handle().len() / size_of::() + array.common.len() } fn dtype(array: &VarBinViewArray) -> &DType { - &array.dtype + array.common.dtype() } fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> { - array.stats_set.to_ref(array.as_ref()) + array.common.stats().to_ref(array.as_ref()) } fn array_hash( @@ -74,7 +74,7 @@ impl VTable for VarBinViewVTable { state: &mut H, precision: Precision, ) { - array.dtype.hash(state); + array.common.dtype().hash(state); for buffer in array.buffers.iter() { buffer.array_hash(state, precision); } @@ -83,7 +83,7 @@ impl VTable for VarBinViewVTable { } fn array_eq(array: &VarBinViewArray, other: &VarBinViewArray, precision: Precision) -> bool { - array.dtype == other.dtype + array.common.dtype() == other.common.dtype() && array.buffers.len() == other.buffers.len() && array .buffers