Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6756,6 +6756,72 @@ pub fn vortex_array::arrow::byte_view::canonical_varbinview_to_arrow<T: arrow_ar

pub fn vortex_array::arrow::byte_view::execute_varbinview_to_arrow<T: arrow_array::types::ByteViewType>(array: &vortex_array::arrays::VarBinViewArray, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub mod vortex_array::arrow::export_plugin

pub trait vortex_array::arrow::export_plugin::ArrowExportPlugin: 'static + core::marker::Send + core::marker::Sync + core::fmt::Debug

pub fn vortex_array::arrow::export_plugin::ArrowExportPlugin::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::arrow::export_plugin::ArrowExportPlugin::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::arrow::export_plugin::ArrowExportPlugin::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::DateArrowExport

pub fn vortex_array::extension::datetime::DateArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::DateArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::DateArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimeArrowExport

pub fn vortex_array::extension::datetime::TimeArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimeArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimeArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimestampArrowExport

pub fn vortex_array::extension::datetime::TimestampArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimestampArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimestampArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

pub type vortex_array::arrow::export_plugin::ArrowExportPluginRef = alloc::sync::Arc<dyn vortex_array::arrow::ArrowExportPlugin>

pub mod vortex_array::arrow::export_session

pub struct vortex_array::arrow::export_session::ArrowExportSession

impl vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::find(&self, id: &vortex_array::dtype::extension::ExtId) -> core::option::Option<vortex_array::arrow::ArrowExportPluginRef>

pub fn vortex_array::arrow::ArrowExportSession::register(&self, plugin: impl vortex_array::arrow::ArrowExportPlugin)

pub fn vortex_array::arrow::ArrowExportSession::registry(&self) -> &vortex_array::arrow::ArrowExportRegistry

impl core::default::Default for vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::default() -> Self

impl core::fmt::Debug for vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub trait vortex_array::arrow::export_session::ArrowExportSessionExt: vortex_session::SessionExt

pub fn vortex_array::arrow::export_session::ArrowExportSessionExt::arrow_exports(&self) -> vortex_session::Ref<'_, vortex_array::arrow::ArrowExportSession>

impl<S: vortex_session::SessionExt> vortex_array::arrow::ArrowExportSessionExt for S

pub fn S::arrow_exports(&self) -> vortex_session::Ref<'_, vortex_array::arrow::ArrowExportSession>

pub type vortex_array::arrow::export_session::ArrowExportRegistry = vortex_session::registry::Registry<vortex_array::arrow::ArrowExportPluginRef>

pub mod vortex_array::arrow::null

pub fn vortex_array::arrow::null::canonical_null_to_arrow(array: &vortex_array::arrays::null::NullArray) -> arrow_array::array::ArrayRef
Expand All @@ -6780,6 +6846,24 @@ impl vortex_array::iter::ArrayIterator for vortex_array::arrow::ArrowArrayStream

pub fn vortex_array::arrow::ArrowArrayStreamAdapter::dtype(&self) -> &vortex_array::dtype::DType

pub struct vortex_array::arrow::ArrowExportSession

impl vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::find(&self, id: &vortex_array::dtype::extension::ExtId) -> core::option::Option<vortex_array::arrow::ArrowExportPluginRef>

pub fn vortex_array::arrow::ArrowExportSession::register(&self, plugin: impl vortex_array::arrow::ArrowExportPlugin)

pub fn vortex_array::arrow::ArrowExportSession::registry(&self) -> &vortex_array::arrow::ArrowExportRegistry

impl core::default::Default for vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::default() -> Self

impl core::fmt::Debug for vortex_array::arrow::ArrowExportSession

pub fn vortex_array::arrow::ArrowExportSession::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub struct vortex_array::arrow::Datum

impl vortex_array::arrow::Datum
Expand Down Expand Up @@ -6816,6 +6900,46 @@ pub fn vortex_array::ArrayRef::execute_record_batch(self, schema: &arrow_schema:

pub fn vortex_array::ArrayRef::execute_record_batches(self, schema: &arrow_schema::schema::Schema, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<alloc::vec::Vec<arrow_array::record_batch::RecordBatch>>

pub trait vortex_array::arrow::ArrowExportPlugin: 'static + core::marker::Send + core::marker::Sync + core::fmt::Debug

pub fn vortex_array::arrow::ArrowExportPlugin::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::arrow::ArrowExportPlugin::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::arrow::ArrowExportPlugin::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::DateArrowExport

pub fn vortex_array::extension::datetime::DateArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::DateArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::DateArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimeArrowExport

pub fn vortex_array::extension::datetime::TimeArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimeArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimeArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimestampArrowExport

pub fn vortex_array::extension::datetime::TimestampArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimestampArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimestampArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

pub trait vortex_array::arrow::ArrowExportSessionExt: vortex_session::SessionExt

pub fn vortex_array::arrow::ArrowExportSessionExt::arrow_exports(&self) -> vortex_session::Ref<'_, vortex_array::arrow::ArrowExportSession>

impl<S: vortex_session::SessionExt> vortex_array::arrow::ArrowExportSessionExt for S

pub fn S::arrow_exports(&self) -> vortex_session::Ref<'_, vortex_array::arrow::ArrowExportSession>

pub trait vortex_array::arrow::FromArrowArray<A>

pub fn vortex_array::arrow::FromArrowArray::from_arrow(array: A, nullable: bool) -> vortex_error::VortexResult<Self> where Self: core::marker::Sized
Expand Down Expand Up @@ -6986,6 +7110,10 @@ pub fn vortex_array::arrow::to_arrow_null_buffer(validity: vortex_array::validit

pub fn vortex_array::arrow::to_null_buffer(mask: vortex_mask::Mask) -> core::option::Option<arrow_buffer::buffer::null::NullBuffer>

pub type vortex_array::arrow::ArrowExportPluginRef = alloc::sync::Arc<dyn vortex_array::arrow::ArrowExportPlugin>

pub type vortex_array::arrow::ArrowExportRegistry = vortex_session::registry::Registry<vortex_array::arrow::ArrowExportPluginRef>

pub mod vortex_array::buffer

pub struct vortex_array::buffer::BufferHandle(_)
Expand Down Expand Up @@ -12628,6 +12756,20 @@ pub fn vortex_array::extension::datetime::Date::validate_dtype(ext_dtype: &vorte

pub fn vortex_array::extension::datetime::Date::validate_scalar_value(ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>

pub struct vortex_array::extension::datetime::DateArrowExport

impl core::fmt::Debug for vortex_array::extension::datetime::DateArrowExport

pub fn vortex_array::extension::datetime::DateArrowExport::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::DateArrowExport

pub fn vortex_array::extension::datetime::DateArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::DateArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::DateArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

pub struct vortex_array::extension::datetime::Time

impl vortex_array::extension::datetime::Time
Expand Down Expand Up @@ -12684,6 +12826,20 @@ pub fn vortex_array::extension::datetime::Time::validate_dtype(ext_dtype: &vorte

pub fn vortex_array::extension::datetime::Time::validate_scalar_value(ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>

pub struct vortex_array::extension::datetime::TimeArrowExport

impl core::fmt::Debug for vortex_array::extension::datetime::TimeArrowExport

pub fn vortex_array::extension::datetime::TimeArrowExport::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimeArrowExport

pub fn vortex_array::extension::datetime::TimeArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimeArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimeArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

pub struct vortex_array::extension::datetime::Timestamp

impl vortex_array::extension::datetime::Timestamp
Expand Down Expand Up @@ -12742,6 +12898,20 @@ pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(ext_dtype: &

pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>

pub struct vortex_array::extension::datetime::TimestampArrowExport

impl core::fmt::Debug for vortex_array::extension::datetime::TimestampArrowExport

pub fn vortex_array::extension::datetime::TimestampArrowExport::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl vortex_array::arrow::ArrowExportPlugin for vortex_array::extension::datetime::TimestampArrowExport

pub fn vortex_array::extension::datetime::TimestampArrowExport::execute_to_arrow(&self, array: vortex_array::ArrayRef, target: &arrow_schema::datatype::DataType, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<arrow_array::array::ArrayRef>

pub fn vortex_array::extension::datetime::TimestampArrowExport::id(&self) -> vortex_array::dtype::extension::ExtId

pub fn vortex_array::extension::datetime::TimestampArrowExport::to_arrow_data_type(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<arrow_schema::datatype::DataType>

pub struct vortex_array::extension::datetime::TimestampOptions

pub vortex_array::extension::datetime::TimestampOptions::tz: core::option::Option<alloc::sync::Arc<str>>
Expand Down
52 changes: 37 additions & 15 deletions vortex-array/src/arrow/executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ pub mod null;
pub mod primitive;
mod run_end;
mod struct_;
mod temporal;
mod validity;

use arrow_array::ArrayRef as ArrowArrayRef;
Expand Down Expand Up @@ -46,7 +45,7 @@ use crate::arrow::executor::null::to_arrow_null;
use crate::arrow::executor::primitive::to_arrow_primitive;
use crate::arrow::executor::run_end::to_arrow_run_end;
use crate::arrow::executor::struct_::to_arrow_struct;
use crate::arrow::executor::temporal::to_arrow_temporal;
use crate::arrow::export_session::ArrowExportSessionExt;
use crate::dtype::DType;
use crate::dtype::PType;
use crate::executor::ExecutionCtx;
Expand Down Expand Up @@ -89,11 +88,30 @@ impl ArrowArrayExecutor for ArrayRef {
) -> VortexResult<ArrowArrayRef> {
let len = self.len();

// Resolve the DataType if it is a leaf type
// we should likely make this extensible.
if let Some(ext) = self.dtype().as_extension_opt() {
let plugin = ctx.session().arrow_exports().find(&ext.id());
let Some(plugin) = plugin else {
vortex_bail!(
"no ArrowExportPlugin registered for extension id {}",
ext.id()
);
};
let target = match data_type {
Some(dt) => dt.clone(),
None => plugin.to_arrow_data_type(ext)?,
};
let arrow = plugin.execute_to_arrow(self, &target, ctx)?;
vortex_ensure!(
arrow.len() == len,
"Arrow array length does not match Vortex array length after conversion to {:?}",
arrow
);
return Ok(arrow);
}

let resolved_type: DataType = match data_type {
Some(dt) => dt.clone(),
None => preferred_arrow_type(&self)?,
None => preferred_arrow_type(&self, ctx)?,
};

let arrow = match &resolved_type {
Expand All @@ -110,11 +128,6 @@ impl ArrowArrayExecutor for ArrayRef {
DataType::Float16 => to_arrow_primitive::<Float16Type>(self, ctx),
DataType::Float32 => to_arrow_primitive::<Float32Type>(self, ctx),
DataType::Float64 => to_arrow_primitive::<Float64Type>(self, ctx),
DataType::Timestamp(..)
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_) => to_arrow_temporal(self, &resolved_type, ctx),
DataType::Binary => to_arrow_byte_array::<BinaryType>(self, ctx),
DataType::LargeBinary => to_arrow_byte_array::<LargeBinaryType>(self, ctx),
DataType::Utf8 => to_arrow_byte_array::<Utf8Type>(self, ctx),
Expand Down Expand Up @@ -157,7 +170,12 @@ impl ArrowArrayExecutor for ArrayRef {
DataType::RunEndEncoded(ends_type, values_type) => {
to_arrow_run_end(self, ends_type.data_type(), values_type, ctx)
}
DataType::FixedSizeBinary(_)
DataType::Timestamp(..)
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::FixedSizeBinary(_)
| DataType::Map(..)
| DataType::Duration(_)
| DataType::Interval(_)
Expand Down Expand Up @@ -192,7 +210,7 @@ impl ArrowArrayExecutor for ArrayRef {
/// However, some encodings have cheaper Arrow representations:
/// - `VarBinArray`: Uses `Utf8`/`Binary` (offset-based) instead of `Utf8View`/`BinaryView`
/// - `ListArray`: Uses `List` instead of `ListView`
fn preferred_arrow_type(array: &ArrayRef) -> VortexResult<DataType> {
fn preferred_arrow_type(array: &ArrayRef, ctx: &ExecutionCtx) -> VortexResult<DataType> {
// VarBinArray: use offset-based Binary/Utf8 instead of View types
if let Some(varbin) = array.as_opt::<VarBin>() {
let offsets_ptype = PType::try_from(varbin.offsets().dtype())?;
Expand All @@ -211,8 +229,7 @@ fn preferred_arrow_type(array: &ArrayRef) -> VortexResult<DataType> {
if let Some(list) = array.as_opt::<List>() {
let offsets_ptype = PType::try_from(list.offsets().dtype())?;
let use_large = matches!(offsets_ptype, PType::I64 | PType::U64);
// Recursively get the preferred type for elements
let elem_dtype = preferred_arrow_type(list.elements())?;
let elem_dtype = preferred_arrow_type(list.elements(), ctx)?;
let field = FieldRef::new(Field::new_list_field(
elem_dtype,
list.elements().dtype().is_nullable(),
Expand All @@ -225,6 +242,11 @@ fn preferred_arrow_type(array: &ArrayRef) -> VortexResult<DataType> {
});
}

// Everything else: use canonical dtype conversion
if let Some(ext) = array.dtype().as_extension_opt()
&& let Some(plugin) = ctx.session().arrow_exports().find(&ext.id())
{
return plugin.to_arrow_data_type(ext);
}

array.dtype().to_arrow_dtype()
}
Loading