Skip to content
11 changes: 9 additions & 2 deletions arrow-buffer/src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,15 @@ impl std::iter::FromIterator<bool> for MutableBuffer {

impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut buffer = Self::default();
buffer.extend_from_iter(iter.into_iter());
let into_iter = iter.into_iter();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this creates a new MutableBuffer, we could also do something like

MutableBuffer::from(iter.into_iter().collect::<Vec<_>>())

That would enable any optimizations that Vec has internally for trusted length iterators.


// if we know the size of the iterator, allocate that much capacity upfront
let mut buffer = match into_iter.size_hint().1 {
Some(size) => Self::new(size),
None => Self::default(),
};

buffer.extend_from_iter(into_iter);
buffer
}
}
Expand Down
44 changes: 19 additions & 25 deletions arrow-buffer/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ use std::marker::PhantomData;
#[derive(Debug)]
pub struct BufferBuilder<T: ArrowNativeType> {
buffer: MutableBuffer,
len: usize,
_marker: PhantomData<T>,
}

Expand Down Expand Up @@ -88,7 +87,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {

Self {
buffer,
len: 0,
_marker: PhantomData,
}
}
Expand All @@ -99,10 +97,8 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
///
/// - `buffer` bytes must be aligned to type `T`
pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
let buffer_len = buffer.len();
Self {
buffer,
len: buffer_len / std::mem::size_of::<T>(),
_marker: PhantomData,
}
}
Expand All @@ -118,8 +114,9 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
///
/// assert_eq!(builder.len(), 1);
/// ```
#[inline]
pub fn len(&self) -> usize {
self.len
self.buffer.len() / std::mem::size_of::<T>()
}

/// Returns whether the internal buffer is empty.
Expand All @@ -134,7 +131,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
/// assert_eq!(builder.is_empty(), false);
/// ```
pub fn is_empty(&self) -> bool {
self.len == 0
self.buffer.is_empty()
}

/// Returns the actual capacity (number of elements) of the internal buffer.
Expand Down Expand Up @@ -166,7 +163,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn advance(&mut self, i: usize) {
self.buffer.extend_zeros(i * std::mem::size_of::<T>());
self.len += i;
}

/// Reserves memory for _at least_ `n` more elements of type `T`.
Expand Down Expand Up @@ -199,9 +195,9 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
/// ```
#[inline]
pub fn append(&mut self, v: T) {
self.reserve(1);
self.buffer.push(v);
self.len += 1;
self.buffer.reserve(std::mem::size_of::<T>());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we revert this? as the reserve already does that and it should be inlined

Suggested change
self.buffer.reserve(std::mem::size_of::<T>());
self.reserve(1);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think this is going to be faster since we don't need to multiply it by n

// SAFETY: reserve ensures capacity() - len() >= size_of::<T>()
unsafe { self.buffer.push_unchecked(v) };
}

/// Appends a value of type `T` into the builder N times,
Expand All @@ -219,7 +215,10 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn append_n(&mut self, n: usize, v: T) {
self.reserve(n);
self.extend(std::iter::repeat_n(v, n))
for _ in 0..n {
// SAFETY: reserve ensures enough capacity for n elements
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think extend already reserves

fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
let iterator = iter.into_iter();
self.extend_from_iter(iterator)
}

https://github.com/apache/arrow-rs/blob/1a169cd638aa4b72ccb4961e37e5014a66308718/arrow-buffer/src/buffer/mutable.rs#L829-L828

unsafe { self.buffer.push_unchecked(v) };
}
}

/// Appends `n`, zero-initialized values
Expand All @@ -237,7 +236,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn append_n_zeroed(&mut self, n: usize) {
self.buffer.extend_zeros(n * std::mem::size_of::<T>());
self.len += n;
}

/// Appends a slice of type `T`, growing the internal buffer as needed.
Expand All @@ -254,7 +252,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn append_slice(&mut self, slice: &[T]) {
self.buffer.extend_from_slice(slice);
self.len += slice.len();
}

/// View the contents of this buffer as a slice
Expand All @@ -274,7 +271,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
// - MutableBuffer is aligned and initialized for len elements of T
// - MutableBuffer corresponds to a single allocation
// - MutableBuffer does not support modification whilst active immutable borrows
unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len()) }
}

/// View the contents of this buffer as a mutable slice
Expand All @@ -298,7 +295,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
// - MutableBuffer is aligned and initialized for len elements of T
// - MutableBuffer corresponds to a single allocation
// - MutableBuffer does not support modification whilst active immutable borrows
unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len()) }
}

/// Shorten this BufferBuilder to `len` items
Expand All @@ -323,7 +320,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn truncate(&mut self, len: usize) {
self.buffer.truncate(len * std::mem::size_of::<T>());
self.len = self.len.min(len);
}

/// # Safety
Expand All @@ -336,8 +332,8 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
.size_hint()
.1
.expect("append_trusted_len_iter expects upper bound");
self.reserve(len);
self.extend(iter);
self.buffer.reserve(len * std::mem::size_of::<T>());
self.buffer.extend(iter);
}

/// Resets this builder and returns an immutable [Buffer].
Expand All @@ -356,7 +352,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
#[inline]
pub fn finish(&mut self) -> Buffer {
let buf = std::mem::take(&mut self.buffer);
self.len = 0;
buf.into()
}

Expand Down Expand Up @@ -387,9 +382,7 @@ impl<T: ArrowNativeType> Default for BufferBuilder<T> {

impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
self.buffer.extend(iter.into_iter().inspect(|_| {
self.len += 1;
}))
self.buffer.extend(iter);
}
}

Expand All @@ -404,9 +397,10 @@ impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {

impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut builder = Self::default();
builder.extend(iter);
builder
Self {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems nicer too

buffer: MutableBuffer::from_iter(iter),
_marker: PhantomData,
}
}
}

Expand Down
Loading