-
Notifications
You must be signed in to change notification settings - Fork 149
Expand file tree
/
Copy patharray.rs
More file actions
134 lines (120 loc) · 4.95 KB
/
array.rs
File metadata and controls
134 lines (120 loc) · 4.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use crate::ArrayRef;
use crate::arrays::extension::view::ExtArray;
use crate::dtype::DType;
use crate::dtype::extension::ExtDTypeRef;
use crate::dtype::extension::ExtVTable;
use crate::stats::ArrayStats;
/// An extension array that wraps another array with additional type information.
///
/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
/// in future versions. The extension type system is still evolving.
///
/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
/// mechanism for adding semantic meaning to existing array types without requiring
/// changes to the core type system.
///
/// ## Design Philosophy
///
/// Extension arrays serve as a type-safe wrapper that:
/// - Preserves the underlying storage format and operations
/// - Adds semantic type information via `ExtDType`
/// - Enables custom serialization and deserialization logic
/// - Allows domain-specific interpretations of generic data
///
/// ## Storage and Type Relationship
///
/// The extension array maintains a strict contract:
/// - **Storage array**: Contains the actual data in a standard Vortex encoding
/// - **Extension type**: Defines how to interpret the storage data semantically
/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
///
/// ## Use Cases
///
/// Extension arrays are ideal for:
/// - **Custom numeric types**: Units of measurement, currencies
/// - **Temporal types**: Custom date/time formats, time zones, calendars
/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
/// - **Encoded types**: Base64 strings, compressed data, encrypted values
///
/// ## Validity and Operations
///
/// Extension arrays delegate validity and most operations to their storage array:
/// - Validity is inherited from the underlying storage
/// - Slicing preserves the extension type
/// - Scalar access wraps storage scalars with extension metadata
#[derive(Clone, Debug)]
pub struct ExtensionArray {
/// The storage dtype. This **must** be a [`Extension::DType`] variant.
pub(super) dtype: DType,
/// The backing storage array for this extension array.
pub(super) storage_array: ArrayRef,
/// The stats for this array.
pub(super) stats_set: ArrayStats,
}
impl ExtensionArray {
/// Constructs a new `ExtensionArray`.
///
/// # Panics
///
/// Panics if the storage array in not compatible with the extension dtype.
pub fn new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
Self::try_new(ext_dtype, storage_array).vortex_expect("Failed to create `ExtensionArray`")
}
/// Tries to construct a new `ExtensionArray`.
///
/// # Errors
///
/// Returns an error if the storage array in not compatible with the extension dtype.
pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// ext_dtype.validate_storage_array(&storage_array)?;
assert_eq!(
ext_dtype.storage_dtype(),
storage_array.dtype(),
"ExtensionArray: storage_dtype must match storage array DType",
);
// SAFETY: we validate that the inputs are valid above.
Ok(unsafe { Self::new_unchecked(ext_dtype, storage_array) })
}
/// Creates a new `ExtensionArray`.
///
/// # Safety
///
/// The caller must ensure that the storage array is compatible with the extension dtype. In
/// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
/// called successfully on this storage array.
pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// #[cfg(debug_assertions)]
// ext_dtype
// .validate_storage_array(&storage_array)
// .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
debug_assert_eq!(
ext_dtype.storage_dtype(),
storage_array.dtype(),
"ExtensionArray: storage_dtype must match storage array DType",
);
Self {
dtype: DType::Extension(ext_dtype),
storage_array,
stats_set: ArrayStats::default(),
}
}
/// The extension dtype of this array.
pub fn ext_dtype(&self) -> &ExtDTypeRef {
let DType::Extension(ext) = &self.dtype else {
unreachable!("ExtensionArray: dtype must be an ExtDType")
};
ext
}
pub fn storage_array(&self) -> &ArrayRef {
&self.storage_array
}
pub fn downcast_ref<V: ExtVTable>(&self) -> Option<ExtArray<'_, V>> {
ExtArray::try_new(self)
}
}