diff --git a/encodings/datetime-parts/src/compute/compare.rs b/encodings/datetime-parts/src/compute/compare.rs index 9ef6a1b5893..4895e06660d 100644 --- a/encodings/datetime-parts/src/compute/compare.rs +++ b/encodings/datetime-parts/src/compute/compare.rs @@ -321,7 +321,8 @@ mod test { .unwrap(); // Timestamp with a value larger than i32::MAX. - let rhs = dtp_array_from_timestamp(i64::MAX, rhs_validity); + // https://github.com/BurntSushi/jiff/blob/e5b7f0d061e4da9598aed73f6171e78baa8b007f/src/shared/tzif.rs#L23 + let rhs = dtp_array_from_timestamp(253_402_207_200i64, rhs_validity); let comparison = lhs .clone() diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index ccadf82286a..288a7468ec0 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -10238,6 +10238,8 @@ pub fn vortex_array::dtype::extension::ExtVTable::serialize_metadata(&self, meta pub fn vortex_array::dtype::extension::ExtVTable::unpack_native<'a>(&self, metadata: &'a Self::Metadata, storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::dtype::extension::ExtVTable::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::dtype::extension::ExtVTable::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::dtype::extension::ExtVTable::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10256,6 +10258,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::validate_array(&self, _metadata: &Self::Metadata, _storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10274,6 +10278,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10292,7 +10298,9 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult -pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Timestamp::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + +pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10310,6 +10318,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata: pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::uuid::Uuid::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14142,6 +14152,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::validate_array(&self, _metadata: &Self::Metadata, _storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14192,6 +14204,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14244,7 +14258,9 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult -pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Timestamp::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + +pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14320,6 +14336,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata: pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::uuid::Uuid::validate_array(&self, metadata: &Self::Metadata, storage_array: &dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> diff --git a/vortex-array/src/arrays/extension/array.rs b/vortex-array/src/arrays/extension/array.rs index 799646817a9..8184a3f19da 100644 --- a/vortex-array/src/arrays/extension/array.rs +++ b/vortex-array/src/arrays/extension/array.rs @@ -75,13 +75,7 @@ impl ExtensionArray { /// /// Returns an error if the storage array in not compatible with the extension dtype. pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult { - // TODO(connor): Replace these statements once we add `validate_storage_array`. - // ext_dtype.validate_storage_array(&storage_array)?; - assert_eq!( - ext_dtype.storage_dtype(), - storage_array.dtype(), - "ExtensionArray: storage_dtype must match storage array DType", - ); + ext_dtype.validate_storage_array(&storage_array)?; // SAFETY: we validate that the inputs are valid above. Ok(unsafe { Self::new_unchecked(ext_dtype, storage_array) }) @@ -95,16 +89,10 @@ impl ExtensionArray { /// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been /// called successfully on this storage array. pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self { - // TODO(connor): Replace these statements once we add `validate_storage_array`. - // #[cfg(debug_assertions)] - // ext_dtype - // .validate_storage_array(&storage_array) - // .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`"); - debug_assert_eq!( - ext_dtype.storage_dtype(), - storage_array.dtype(), - "ExtensionArray: storage_dtype must match storage array DType", - ); + #[cfg(debug_assertions)] + ext_dtype + .validate_storage_array(&storage_array) + .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`"); Self { dtype: DType::Extension(ext_dtype), diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index d231c2691f6..1ddbf62b101 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -111,6 +111,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } fn test_ext_dtype() -> ExtDTypeRef { @@ -213,6 +221,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let ext_dtype1 = ExtDType::::try_new( diff --git a/vortex-array/src/dtype/dtype_impl.rs b/vortex-array/src/dtype/dtype_impl.rs index 36476d6f620..6f0022c14dd 100644 --- a/vortex-array/src/dtype/dtype_impl.rs +++ b/vortex-array/src/dtype/dtype_impl.rs @@ -487,7 +487,7 @@ mod tests { Timestamp::new_with_tz(TimeUnit::Seconds, Some("UTC".into()), Nullable).erased(), ); let t2 = DType::Extension( - Timestamp::new_with_tz(TimeUnit::Seconds, Some("ET".into()), Nullable).erased(), + Timestamp::new_with_tz(TimeUnit::Seconds, Some("EST".into()), Nullable).erased(), ); assert!(!t1.eq_ignore_nullability(&t2)); } diff --git a/vortex-array/src/dtype/extension/erased.rs b/vortex-array/src/dtype/extension/erased.rs index fd2b1814c35..1015a7abeae 100644 --- a/vortex-array/src/dtype/extension/erased.rs +++ b/vortex-array/src/dtype/extension/erased.rs @@ -13,6 +13,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::extension::ExtDType; @@ -100,6 +101,11 @@ impl ExtDTypeRef { pub(crate) fn validate_storage_value(&self, storage_value: &ScalarValue) -> VortexResult<()> { self.0.value_validate(storage_value) } + + /// Validates that the given storage scalar value is valid for this dtype. + pub(crate) fn validate_storage_array(&self, storage_array: &dyn DynArray) -> VortexResult<()> { + self.0.array_validate(storage_array) + } } /// Methods for downcasting type-erased extension dtypes. diff --git a/vortex-array/src/dtype/extension/typed.rs b/vortex-array/src/dtype/extension/typed.rs index 4cb1aff624c..6c2374c90cd 100644 --- a/vortex-array/src/dtype/extension/typed.rs +++ b/vortex-array/src/dtype/extension/typed.rs @@ -15,7 +15,9 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_ensure_eq; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::extension::ExtDTypeRef; @@ -128,11 +130,13 @@ pub(super) trait DynExtDType: 'static + Send + Sync + super::sealed::Sealed { fn metadata_serialize(&self) -> VortexResult>; /// Returns a new [`ExtDTypeRef`] with the given nullability. fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef; - /// Validates that the given storage scalar value is valid for this dtype. + /// Validates that the given storage scalar value is valid for this extension type. fn value_validate(&self, storage_value: &ScalarValue) -> VortexResult<()>; /// Formats an extension scalar value using the current dtype for metadata context. fn value_display(&self, f: &mut fmt::Formatter<'_>, storage_value: &ScalarValue) -> fmt::Result; + /// Validates that the given storage array is valid for this extension type. + fn array_validate(&self, storage_array: &dyn DynArray) -> VortexResult<()>; } impl DynExtDType for ExtDTypeInner { @@ -208,4 +212,14 @@ impl DynExtDType for ExtDTypeInner { ), } } + + fn array_validate(&self, storage_array: &dyn DynArray) -> VortexResult<()> { + vortex_ensure_eq!( + &self.storage_dtype, + storage_array.dtype(), + "tried to construct an extension array with a storage array that has the wrong dtype" + ); + + self.vtable.validate_array(&self.metadata, storage_array) + } } diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index 84f530ea06f..fc598c4e42c 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -7,6 +7,7 @@ use std::hash::Hash; use vortex_error::VortexResult; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::extension::ExtId; use crate::scalar::ScalarValue; @@ -36,9 +37,11 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult; /// Validate that the given storage type is compatible with this extension type. + /// + /// Implementors should additionally validate that the metadata makes sense for the give dtype. fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()>; - // Methods related to the extension scalar values. + // Methods related to extension scalar values. /// Validate the given storage value is compatible with the extension type. /// @@ -72,4 +75,17 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { storage_dtype: &'a DType, storage_value: &'a ScalarValue, ) -> VortexResult>; + + // Methods related to extension arrays. + + /// Validates that the given storage array is compatible with this extension type and type + /// medatada. + /// + /// Note that [`ExtVTable::validate_dtype()`] is always called first on the dtype of the storage + /// array to validate the storage [`DType`]. + fn validate_array( + &self, + metadata: &Self::Metadata, + storage_array: &dyn DynArray, + ) -> VortexResult<()>; } diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 802bd8cf6a2..44f5f4e216c 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -7,9 +7,10 @@ use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -95,11 +96,10 @@ impl ExtVTable for Date { let ptype = date_ptype(metadata) .ok_or_else(|| vortex_err!("Date type does not support time unit {}", metadata))?; - vortex_ensure!( - storage_dtype.as_ptype() == ptype, - "Date storage dtype for {} must be {}", - metadata, - ptype + vortex_ensure_eq!( + storage_dtype.as_ptype(), + ptype, + "Date storage dtype for {metadata} must be {ptype}", ); Ok(()) @@ -119,6 +119,15 @@ impl ExtVTable for Date { _ => vortex_bail!("Date type does not support time unit {}", metadata), } } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + // All i64 and i32 values are valid dates. + Ok(()) + } } #[cfg(test)] diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index 77982f5ee8e..62bc4a1e685 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -10,6 +10,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -143,12 +144,75 @@ impl ExtVTable for Time { Ok(value) } + + fn validate_array( + &self, + metadata: &Self::Metadata, + storage_array: &dyn DynArray, + ) -> VortexResult<()> { + // We check both min and max because the stored integer can be negative, which is invalid + // for a time of day. + let stats = storage_array.statistics(); + + match metadata { + TimeUnit::Seconds | TimeUnit::Milliseconds => { + let build_span = |v: i32| match metadata { + TimeUnit::Seconds => Span::new().seconds(v), + TimeUnit::Milliseconds => Span::new().milliseconds(v), + _ => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::civil::Time::MIN + .checked_add(build_span(min)) + .map_err(|e| { + vortex_err!("Time array min value {min} is out of range: {e}") + })?; + } + if let Some(max) = stats.compute_max::() { + jiff::civil::Time::MIN + .checked_add(build_span(max)) + .map_err(|e| { + vortex_err!("Time array max value {max} is out of range: {e}") + })?; + } + } + TimeUnit::Microseconds | TimeUnit::Nanoseconds => { + let build_span = |v: i64| match metadata { + TimeUnit::Microseconds => Span::new().microseconds(v), + TimeUnit::Nanoseconds => Span::new().nanoseconds(v), + _ => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::civil::Time::MIN + .checked_add(build_span(min)) + .map_err(|e| { + vortex_err!("Time array min value {min} is out of range: {e}") + })?; + } + if let Some(max) = stats.compute_max::() { + jiff::civil::Time::MIN + .checked_add(build_span(max)) + .map_err(|e| { + vortex_err!("Time array max value {max} is out of range: {e}") + })?; + } + } + d @ TimeUnit::Days => vortex_bail!("Time type does not support time unit {d}"), + } + + Ok(()) + } } #[cfg(test)] mod tests { use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability::Nullable; use crate::extension::datetime::Time; @@ -187,4 +251,27 @@ mod tests { let scalar = Scalar::new(dtype, Some(ScalarValue::Primitive(PValue::I32(0)))); assert_eq!(format!("{}", scalar.as_extension()), "00:00:00"); } + + #[test] + fn validate_time_array() -> VortexResult<()> { + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i32), Some(3661), Some(86399)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } + + #[test] + fn reject_time_array_out_of_range() { + // 86400 seconds = exactly 24 hours, which exceeds the valid time-of-day range. + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i32), Some(86400)]); + assert!(ExtensionArray::try_new(ext_dtype, storage.into_array()).is_err()); + } + + #[test] + fn reject_time_array_negative() { + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(-1i32)]); + assert!(ExtensionArray::try_new(ext_dtype, storage.into_array()).is_err()); + } } diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index d65f6b24cce..9faebdd201a 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -14,6 +14,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -169,15 +170,18 @@ impl ExtVTable for Timestamp { }) } - fn validate_dtype( - &self, - _metadata: &Self::Metadata, - storage_dtype: &DType, - ) -> VortexResult<()> { + fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()> { vortex_ensure!( matches!(storage_dtype, DType::Primitive(PType::I64, _)), "Timestamp storage dtype must be i64" ); + + if let Some(tz) = &metadata.tz { + jiff::Timestamp::UNIX_EPOCH + .in_tz(tz.as_ref()) + .map_err(|e| vortex_err!("Invalid timezone for timestamp: {}", e))?; + } + Ok(()) } @@ -222,6 +226,37 @@ impl ExtVTable for Timestamp { Ok(value) } + + fn validate_array( + &self, + metadata: &Self::Metadata, + storage_array: &dyn DynArray, + ) -> VortexResult<()> { + // The stored i64 can be negative (representing times before the Unix epoch), which is + // valid. Both bounds only fail for extreme values near the edges of jiff's supported + // range. + let stats = storage_array.statistics(); + let build_span = |v: i64| match metadata.unit { + TimeUnit::Nanoseconds => Span::new().nanoseconds(v), + TimeUnit::Microseconds => Span::new().microseconds(v), + TimeUnit::Milliseconds => Span::new().milliseconds(v), + TimeUnit::Seconds => Span::new().seconds(v), + TimeUnit::Days => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::Timestamp::UNIX_EPOCH + .checked_add(build_span(min)) + .map_err(|e| vortex_err!("Timestamp array min value {min} is out of range: {e}"))?; + } + if let Some(max) = stats.compute_max::() { + jiff::Timestamp::UNIX_EPOCH + .checked_add(build_span(max)) + .map_err(|e| vortex_err!("Timestamp array max value {max} is out of range: {e}"))?; + } + + Ok(()) + } } #[cfg(test)] @@ -230,10 +265,15 @@ mod tests { use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability::Nullable; + use crate::dtype::extension::ExtDType; use crate::extension::datetime::TimeUnit; use crate::extension::datetime::Timestamp; + use crate::extension::datetime::TimestampOptions; use crate::scalar::PValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -249,15 +289,13 @@ mod tests { #[cfg_attr(miri, ignore)] #[test] fn reject_timestamp_with_invalid_timezone() { - let dtype = DType::Extension( - Timestamp::new_with_tz( - TimeUnit::Seconds, - Some(Arc::from("Not/A/Timezone")), - Nullable, - ) - .erased(), + let result = ExtDType::::try_new( + TimestampOptions { + unit: TimeUnit::Seconds, + tz: Some(Arc::from("Not/A/Timezone")), + }, + DType::Primitive(crate::dtype::PType::I64, Nullable), ); - let result = Scalar::try_new(dtype, Some(ScalarValue::Primitive(PValue::I64(0)))); assert!(result.is_err()); } @@ -284,4 +322,26 @@ mod tests { "1969-12-31T19:00:00-05:00[America/New_York]" ); } + + #[test] + fn validate_timestamp_array() -> VortexResult<()> { + let ext_dtype = Timestamp::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i64), Some(1_000_000)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } + + #[cfg_attr(miri, ignore)] + #[test] + fn validate_timestamp_array_with_timezone() -> VortexResult<()> { + let ext_dtype = Timestamp::new_with_tz( + TimeUnit::Seconds, + Some(Arc::from("America/New_York")), + Nullable, + ) + .erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i64)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } } diff --git a/vortex-array/src/extension/tests/divisible_int.rs b/vortex-array/src/extension/tests/divisible_int.rs index c34fcea3df6..6f668ca1466 100644 --- a/vortex-array/src/extension/tests/divisible_int.rs +++ b/vortex-array/src/extension/tests/divisible_int.rs @@ -6,9 +6,11 @@ use std::fmt; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; +use crate::DynArray; +use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::PType; use crate::dtype::extension::ExtId; @@ -70,11 +72,40 @@ impl ExtVTable for DivisibleInt { storage_value: &ScalarValue, ) -> VortexResult> { let value = storage_value.as_primitive().cast::()?; - if value % metadata.0 != 0 { - vortex_bail!("{} is not divisible by {}", value, metadata.0); - } + vortex_ensure_eq!( + value % metadata.0, + 0, + "{value} is not divisible by {}", + metadata.0 + ); + Ok(value) } + + fn validate_array( + &self, + metadata: &Self::Metadata, + storage_array: &dyn DynArray, + ) -> VortexResult<()> { + let primitive = DynArray::as_any(storage_array) + .downcast_ref::() + .ok_or_else(|| { + vortex_error::vortex_err!("expected PrimitiveArray for divisible int storage") + })?; + + let slice = primitive.as_slice::(); + + for value in slice { + vortex_ensure_eq!( + value % metadata.0, + 0, + "{value} is not divisible by {}", + metadata.0 + ); + } + + Ok(()) + } } #[cfg(test)] diff --git a/vortex-array/src/extension/uuid/vtable.rs b/vortex-array/src/extension/uuid/vtable.rs index 3564d0cf39f..be31c1bf9ea 100644 --- a/vortex-array/src/extension/uuid/vtable.rs +++ b/vortex-array/src/extension/uuid/vtable.rs @@ -2,12 +2,15 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use uuid; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; +use crate::DynArray; +use crate::ToCanonical; use crate::dtype::DType; use crate::dtype::PType; use crate::dtype::extension::ExtId; @@ -122,6 +125,40 @@ impl ExtVTable for Uuid { Ok(parsed) } + + fn validate_array( + &self, + metadata: &Self::Metadata, + storage_array: &dyn DynArray, + ) -> VortexResult<()> { + let fsl = storage_array.to_fixed_size_list(); + let elements = fsl.elements().to_primitive(); + let bytes = elements.as_slice::(); + + for chunk in bytes.chunks_exact(UUID_BYTE_LEN) { + let bytes: [u8; UUID_BYTE_LEN] = chunk + .try_into() + .map_err(|_| vortex_err!("hack bc we can't have nice things")) + .vortex_expect("The chunk needs to be exactly size UUID_BYTE_LEN"); + + let uuid = uuid::Uuid::from_bytes(bytes); + let actual = uuid + .get_version() + .ok_or_else(|| vortex_err!("UUID has unrecognized version nibble"))? + as u8; + + if let Some(expected) = metadata.version { + let expected = expected as u8; + vortex_ensure_eq!( + expected, + actual, + "UUID version mismatch: expected v{expected}, got v{actual}", + ); + } + } + + Ok(()) + } } #[expect( @@ -134,17 +171,24 @@ mod tests { use rstest::rstest; use uuid::Version; + use vortex_buffer::Buffer; use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::FixedSizeListArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; + use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtVTable; use crate::extension::uuid::Uuid; use crate::extension::uuid::UuidMetadata; use crate::extension::uuid::vtable::UUID_BYTE_LEN; use crate::scalar::Scalar; use crate::scalar::ScalarValue; + use crate::validity::Validity; #[rstest] #[case::no_version(None)] @@ -352,4 +396,76 @@ mod tests { nullability, ) } + + /// Builds a [`FixedSizeListArray`] storage array from a slice of UUIDs. + fn uuid_storage_array(uuids: &[uuid::Uuid]) -> crate::ArrayRef { + let flat_bytes: Vec = uuids + .iter() + .flat_map(|u| u.as_bytes().iter().copied()) + .collect(); + let elements = + PrimitiveArray::new(Buffer::copy_from(&flat_bytes), Validity::NonNullable).into_array(); + FixedSizeListArray::new( + elements, + UUID_BYTE_LEN as u32, + Validity::NonNullable, + uuids.len(), + ) + .into_array() + } + + #[test] + fn validate_uuid_array_with_matching_version() -> VortexResult<()> { + let v4_a = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + let v4_b = uuid::Uuid::parse_str("6ba7b810-9dad-41d4-80b4-00c04fd430c8") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata { + version: Some(Version::Random), + }, + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_a, v4_b]); + ExtensionArray::try_new(ext_dtype, storage)?; + Ok(()) + } + + #[test] + fn validate_uuid_array_rejects_version_mismatch() -> VortexResult<()> { + // This is a v4 UUID, but the metadata says v7. + let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata { + version: Some(Version::SortRand), + }, + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_uuid]); + assert!(ExtensionArray::try_new(ext_dtype, storage).is_err()); + Ok(()) + } + + #[test] + fn validate_uuid_array_no_version_constraint() -> VortexResult<()> { + let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata::default(), + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_uuid]); + ExtensionArray::try_new(ext_dtype, storage)?; + Ok(()) + } } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index bce11e27e0f..db0ee96f9cc 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -195,6 +195,7 @@ mod tests { use vortex_error::VortexResult; use vortex_error::vortex_bail; + use crate::DynArray; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::FieldDType; @@ -480,6 +481,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar = Scalar::extension::( diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 72b44a5ed55..051def8f68e 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -11,6 +11,7 @@ mod tests { use vortex_error::VortexResult; use vortex_error::vortex_bail; + use crate::DynArray; use crate::dtype::DType; use crate::dtype::FieldDType; use crate::dtype::Nullability; @@ -59,6 +60,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } impl Apples { @@ -280,6 +289,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let storage_dtype = DType::Primitive(PType::F16, Nullability::NonNullable); @@ -343,6 +360,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let struct_dtype = DType::Struct( diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index 52a4127e12c..9b38334adc2 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -4,6 +4,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -48,6 +49,14 @@ impl ExtVTable for TestI32Ext { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } impl TestI32Ext { @@ -137,6 +146,14 @@ fn test_ext_scalar_partial_ord_different_types() { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar1 = Scalar::extension::( @@ -325,6 +342,14 @@ fn test_ext_scalar_with_metadata() { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar = Scalar::extension::( diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index aeffa40140b..9cc5a06511d 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -348,6 +348,7 @@ mod tests { use vortex::extension::datetime::Time; use vortex::extension::datetime::Timestamp; use vortex::scalar::ScalarValue; + use vortex_array::DynArray; use crate::cpp; use crate::duckdb::LogicalType; @@ -607,6 +608,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let ext_dtype = ExtDType::::try_new( diff --git a/vortex-duckdb/src/convert/vector.rs b/vortex-duckdb/src/convert/vector.rs index e5c948f7ed4..7046bcceacd 100644 --- a/vortex-duckdb/src/convert/vector.rs +++ b/vortex-duckdb/src/convert/vector.rs @@ -515,12 +515,15 @@ mod tests { fn test_timestamp_extreme_values() { // Test extreme timestamp values let values = vec![ - i64::MAX, // Maximum possible timestamp - i64::MIN, // Minimum possible timestamp - 0i64, // Epoch - 9_223_372_036_854_775_000_i64, // Near max but reasonable - -9_223_372_036_854_775_000_i64, // Near min but reasonable + i32::MAX as i64, + i32::MIN as i64, + 0i64, + // These don't work because they get added to the unix epoch, so it overflows + // 631_107_417_600_000_000, // The max for microseconds in jiff. + // -631_107_417_600_000_000, // The min for microseconds in jiff. ]; + // https://docs.rs/jiff/latest/jiff/struct.Span.html#method.microseconds + let len = values.len(); let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock index 52a9d275354..8fd738d397a 100644 --- a/vortex-tensor/public-api.lock +++ b/vortex-tensor/public-api.lock @@ -42,6 +42,8 @@ pub fn vortex_tensor::fixed_shape::FixedShapeTensor::serialize_metadata(&self, m pub fn vortex_tensor::fixed_shape::FixedShapeTensor::unpack_native<'a>(&self, _metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::scalar_value::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_tensor::fixed_shape::FixedShapeTensor::validate_array(&self, _metadata: &Self::Metadata, _storage_array: &dyn vortex_array::array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_tensor::fixed_shape::FixedShapeTensor::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> pub struct vortex_tensor::fixed_shape::FixedShapeTensorMetadata diff --git a/vortex-tensor/src/fixed_shape/vtable.rs b/vortex-tensor/src/fixed_shape/vtable.rs index fd3b1533ee7..211d21500d5 100644 --- a/vortex-tensor/src/fixed_shape/vtable.rs +++ b/vortex-tensor/src/fixed_shape/vtable.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex::array::DynArray; use vortex::dtype::DType; use vortex::dtype::extension::ExtId; use vortex::dtype::extension::ExtVTable; @@ -72,6 +73,15 @@ impl ExtVTable for FixedShapeTensor { // should be valid for a given tensor. Ok(storage_value) } + + fn validate_array( + &self, + _metadata: &Self::Metadata, + _storage_array: &dyn DynArray, + ) -> VortexResult<()> { + // As long as the dtype and the metadata is correct, any value is valid under a tensor. + Ok(()) + } } #[cfg(test)]