Skip to content

Commit e2db7d4

Browse files
authored
[Variant]: Implement DataType::FixedSizeList support for cast_to_variant kernel (#8282)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #8281. # Rationale for this change # What changes are included in this PR? Support the last DataType in `cast_to_variant` kernel # Are these changes tested? Yes # Are there any user-facing changes? New type supported
1 parent b540248 commit e2db7d4

2 files changed

Lines changed: 117 additions & 12 deletions

File tree

parquet-variant-compute/src/arrow_to_variant.rs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
use crate::type_conversion::{decimal_to_variant_decimal, CastOptions};
1919
use arrow::array::{
20-
Array, AsArray, GenericBinaryArray, GenericListArray, GenericListViewArray, GenericStringArray,
21-
OffsetSizeTrait, PrimitiveArray,
20+
Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
21+
GenericStringArray, OffsetSizeTrait, PrimitiveArray,
2222
};
2323
use arrow::compute::kernels::cast;
2424
use arrow::datatypes::{
@@ -82,6 +82,7 @@ pub(crate) enum ArrowToVariantRowBuilder<'a> {
8282
LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
8383
ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
8484
LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
85+
FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
8586
Struct(StructArrowToVariantBuilder<'a>),
8687
Map(MapArrowToVariantBuilder<'a>),
8788
Union(UnionArrowToVariantBuilder<'a>),
@@ -138,6 +139,7 @@ impl<'a> ArrowToVariantRowBuilder<'a> {
138139
LargeList(b) => b.append_row(builder, index),
139140
ListView(b) => b.append_row(builder, index),
140141
LargeListView(b) => b.append_row(builder, index),
142+
FixedSizeList(b) => b.append_row(builder, index),
141143
Struct(b) => b.append_row(builder, index),
142144
Map(b) => b.append_row(builder, index),
143145
Union(b) => b.append_row(builder, index),
@@ -255,6 +257,10 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
255257
array.as_list_view(),
256258
options,
257259
)?),
260+
DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
261+
array.as_fixed_size_list(),
262+
options,
263+
)?),
258264
DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
259265
array.as_struct(),
260266
options,
@@ -281,11 +287,6 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
281287
)));
282288
}
283289
},
284-
dt => {
285-
return Err(ArrowError::CastError(format!(
286-
"Unsupported data type for casting to Variant: {dt}",
287-
)));
288-
}
289290
};
290291
Ok(builder)
291292
}
@@ -523,7 +524,8 @@ impl NullArrowToVariantBuilder {
523524
}
524525
}
525526

526-
/// Generic list builder for List, LargeList, ListView, and LargeListView types
527+
/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
528+
/// and FixedSizeList
527529
pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
528530
list_array: &'a L,
529531
values_builder: Box<ArrowToVariantRowBuilder<'a>>,
@@ -599,6 +601,18 @@ impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
599601
}
600602
}
601603

604+
impl ListLikeArray for FixedSizeListArray {
605+
fn values(&self) -> &dyn Array {
606+
self.values()
607+
}
608+
609+
fn element_range(&self, index: usize) -> Range<usize> {
610+
let value_length = self.value_length().as_usize();
611+
let offset = index * value_length;
612+
offset..(offset + value_length)
613+
}
614+
}
615+
602616
/// Struct builder for StructArray
603617
pub(crate) struct StructArrowToVariantBuilder<'a> {
604618
struct_array: &'a arrow::array::StructArray,
@@ -645,8 +659,7 @@ impl<'a> StructArrowToVariantBuilder<'a> {
645659

646660
// Process each field
647661
for (field_name, row_builder) in &mut self.field_builders {
648-
let mut field_builder =
649-
parquet_variant::ObjectFieldBuilder::new(field_name, &mut obj_builder);
662+
let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
650663
row_builder.append_row(&mut field_builder, index)?;
651664
}
652665

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ mod tests {
9191
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
9292
Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, DurationMicrosecondArray,
9393
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray,
94-
FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder,
95-
GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
94+
FixedSizeBinaryBuilder, FixedSizeListBuilder, Float16Array, Float32Array, Float64Array,
95+
GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
9696
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeListArray,
9797
LargeListViewBuilder, LargeStringArray, ListArray, ListViewBuilder, MapArray, NullArray,
9898
StringArray, StringRunBuilder, StringViewArray, StructArray, Time32MillisecondArray,
@@ -1407,6 +1407,98 @@ mod tests {
14071407
);
14081408
}
14091409

1410+
#[test]
1411+
fn test_cast_to_variant_fixed_size_list() {
1412+
let mut builder = FixedSizeListBuilder::new(Int32Array::builder(0), 2);
1413+
builder.values().append_value(0);
1414+
builder.values().append_value(1);
1415+
builder.append(true); // First list: [0, 1]
1416+
1417+
builder.values().append_null();
1418+
builder.values().append_value(3);
1419+
builder.append(true); // Second list: [null, 3]
1420+
1421+
builder.values().append_value(4);
1422+
builder.values().append_null();
1423+
builder.append(false); // Third list: null
1424+
1425+
builder.values().append_nulls(2);
1426+
builder.append(true); // Last list: [null, null]
1427+
1428+
let fixed_size_list_array = builder.finish();
1429+
1430+
// Expected values
1431+
let (metadata, value) = {
1432+
let mut builder = VariantBuilder::new();
1433+
let mut list = builder.new_list();
1434+
list.append_value(0i32);
1435+
list.append_value(1i32);
1436+
list.finish();
1437+
builder.finish()
1438+
};
1439+
let variant0 = Variant::new(&metadata, &value);
1440+
1441+
let (metadata, value) = {
1442+
let mut builder = VariantBuilder::new();
1443+
let mut list = builder.new_list();
1444+
list.append_null();
1445+
list.append_value(3i32);
1446+
list.finish();
1447+
builder.finish()
1448+
};
1449+
let variant1 = Variant::new(&metadata, &value);
1450+
1451+
let (metadata, value) = {
1452+
let mut builder = VariantBuilder::new();
1453+
let mut list = builder.new_list();
1454+
list.append_null();
1455+
list.append_null();
1456+
list.finish();
1457+
builder.finish()
1458+
};
1459+
let variant3 = Variant::new(&metadata, &value);
1460+
1461+
run_test(
1462+
Arc::new(fixed_size_list_array),
1463+
vec![Some(variant0), Some(variant1), None, Some(variant3)],
1464+
);
1465+
}
1466+
1467+
#[test]
1468+
fn test_cast_to_variant_sliced_fixed_size_list() {
1469+
// Create a FixedSizeListArray with size 2
1470+
let mut builder = FixedSizeListBuilder::new(Int64Array::builder(0), 2);
1471+
builder.values().append_value(0);
1472+
builder.values().append_value(1);
1473+
builder.append(true); // First list: [0, 1]
1474+
1475+
builder.values().append_null();
1476+
builder.values().append_value(3);
1477+
builder.append(true); // Second list: [null, 3]
1478+
1479+
builder.values().append_value(4);
1480+
builder.values().append_null();
1481+
builder.append(false); // Third list: null
1482+
1483+
let fixed_size_list_array = builder.finish();
1484+
1485+
// Expected value for slice(1, 2) - should get the second and third elements
1486+
let (metadata, value) = {
1487+
let mut builder = VariantBuilder::new();
1488+
let mut list = builder.new_list();
1489+
list.append_null();
1490+
list.append_value(3i64);
1491+
list.finish();
1492+
builder.finish()
1493+
};
1494+
let variant = Variant::new(&metadata, &value);
1495+
1496+
run_test(
1497+
Arc::new(fixed_size_list_array.slice(1, 2)),
1498+
vec![Some(variant), None],
1499+
);
1500+
}
1501+
14101502
#[test]
14111503
fn test_cast_to_variant_struct() {
14121504
// Test a simple struct with two fields: id (int64) and age (int32)

0 commit comments

Comments
 (0)