Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,28 @@ impl<R: RunEndIndexType> RunArray<R> {
Ok(array_data.into())
}

/// Create a new [`RunArray`] from the provided parts, without validation
///
/// # Safety
///
/// Safe if [`Self::try_new`] would not error
pub unsafe fn new_unchecked(
data_type: DataType,
run_ends: RunEndBuffer<R::Native>,
values: ArrayRef,
) -> Self {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add force validate here like for other arrays

pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
if cfg!(feature = "force_validate") {
return Self::new(keys, values);
}

pub unsafe fn new_unchecked(
offsets: OffsetBuffer<T::Offset>,
values: Buffer,
nulls: Option<NullBuffer>,
) -> Self {
if cfg!(feature = "force_validate") {
return Self::new(offsets, values, nulls);
}
Self {
data_type: T::DATA_TYPE,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree -- we should make REE consistent with other arrays

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't add it on purpose as we can't use the try_new as it get different arguments entirely

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does feel like we should have some consistency to include this force validate path, though it might need a bit of refactoring of the validate logic in try_new to make it fit here 🤔

I think only union array is the other array type with new_unchecked that doesn't have this force validate path:

pub unsafe fn new_unchecked(
fields: UnionFields,
type_ids: ScalarBuffer<i8>,
offsets: Option<ScalarBuffer<i32>>,
children: Vec<ArrayRef>,
) -> Self {
let mode = if offsets.is_some() {
UnionMode::Dense
} else {
UnionMode::Sparse
};
let len = type_ids.len();
let builder = ArrayData::builder(DataType::Union(fields, mode))
.add_buffer(type_ids.into_inner())
.child_data(children.into_iter().map(Array::into_data).collect())
.len(len);
let data = match offsets {
Some(offsets) => unsafe { builder.add_buffer(offsets.into_inner()).build_unchecked() },
None => unsafe { builder.build_unchecked() },
};
Self::from(data)
}

All the others include it

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

Self {
data_type,
run_ends,
values,
}
}

/// Deconstruct this array into its constituent parts
pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
(self.data_type, self.run_ends, self.values)
}

/// Returns a reference to the [`RunEndBuffer`].
pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
&self.run_ends
Expand Down Expand Up @@ -258,11 +280,7 @@ impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };

let values = make_array(values_child);
Self {
data_type,
run_ends,
values,
}
unsafe { Self::new_unchecked(data_type, run_ends, values) }
}
}

Expand Down Expand Up @@ -1295,4 +1313,27 @@ mod tests {
let slice3 = array1.slice(0, 4); // a, a, b, b
assert_ne!(slice1, slice3);
}

#[test]
fn allow_to_create_invalid_array_using_new_unchecked() {
let valid = RunArray::<Int32Type>::from_iter(["32"]);
let (_, buffer, values) = valid.into_parts();

let _ = unsafe {
// mismatch data type
RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
};
}

#[test]
fn test_run_array_roundtrip() {
let run = Int32Array::from(vec![3, 6, 9, 12]);
let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
let array = RunArray::try_new(&run, &values).unwrap();

let (dt, buffer, values) = array.clone().into_parts();
let created_from_parts =
unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
assert_eq!(array, created_from_parts);
}
}
Loading