-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Buffer builder hot path opts #9393
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d5a5d37
08e34ba
2e70130
3c110f4
25d8f66
83b9c82
6795bdd
94b7716
a10bf30
59d66b5
2099c33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -58,7 +58,6 @@ use std::marker::PhantomData; | |||||||||
| #[derive(Debug)] | ||||||||||
| pub struct BufferBuilder<T: ArrowNativeType> { | ||||||||||
| buffer: MutableBuffer, | ||||||||||
| len: usize, | ||||||||||
| _marker: PhantomData<T>, | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
|
@@ -88,7 +87,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
|
|
||||||||||
| Self { | ||||||||||
| buffer, | ||||||||||
| len: 0, | ||||||||||
| _marker: PhantomData, | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
@@ -99,10 +97,8 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| /// | ||||||||||
| /// - `buffer` bytes must be aligned to type `T` | ||||||||||
| pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self { | ||||||||||
| let buffer_len = buffer.len(); | ||||||||||
| Self { | ||||||||||
| buffer, | ||||||||||
| len: buffer_len / std::mem::size_of::<T>(), | ||||||||||
| _marker: PhantomData, | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
@@ -118,8 +114,9 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| /// | ||||||||||
| /// assert_eq!(builder.len(), 1); | ||||||||||
| /// ``` | ||||||||||
| #[inline] | ||||||||||
| pub fn len(&self) -> usize { | ||||||||||
| self.len | ||||||||||
| self.buffer.len() / std::mem::size_of::<T>() | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Returns whether the internal buffer is empty. | ||||||||||
|
|
@@ -134,7 +131,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| /// assert_eq!(builder.is_empty(), false); | ||||||||||
| /// ``` | ||||||||||
| pub fn is_empty(&self) -> bool { | ||||||||||
| self.len == 0 | ||||||||||
| self.buffer.is_empty() | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Returns the actual capacity (number of elements) of the internal buffer. | ||||||||||
|
|
@@ -166,7 +163,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn advance(&mut self, i: usize) { | ||||||||||
| self.buffer.extend_zeros(i * std::mem::size_of::<T>()); | ||||||||||
| self.len += i; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Reserves memory for _at least_ `n` more elements of type `T`. | ||||||||||
|
|
@@ -199,9 +195,9 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| /// ``` | ||||||||||
| #[inline] | ||||||||||
| pub fn append(&mut self, v: T) { | ||||||||||
| self.reserve(1); | ||||||||||
| self.buffer.push(v); | ||||||||||
| self.len += 1; | ||||||||||
| self.buffer.reserve(std::mem::size_of::<T>()); | ||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we revert this? as the
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think this is going to be faster since we don't need to multiply it by |
||||||||||
| // SAFETY: reserve ensures capacity() - len() >= size_of::<T>() | ||||||||||
| unsafe { self.buffer.push_unchecked(v) }; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Appends a value of type `T` into the builder N times, | ||||||||||
|
|
@@ -219,7 +215,10 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn append_n(&mut self, n: usize, v: T) { | ||||||||||
| self.reserve(n); | ||||||||||
| self.extend(std::iter::repeat_n(v, n)) | ||||||||||
| for _ in 0..n { | ||||||||||
| // SAFETY: reserve ensures enough capacity for n elements | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think extend already reserves arrow-rs/arrow-buffer/src/buffer/mutable.rs Lines 793 to 796 in 1a169cd
|
||||||||||
| unsafe { self.buffer.push_unchecked(v) }; | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Appends `n`, zero-initialized values | ||||||||||
|
|
@@ -237,7 +236,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn append_n_zeroed(&mut self, n: usize) { | ||||||||||
| self.buffer.extend_zeros(n * std::mem::size_of::<T>()); | ||||||||||
| self.len += n; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Appends a slice of type `T`, growing the internal buffer as needed. | ||||||||||
|
|
@@ -254,7 +252,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn append_slice(&mut self, slice: &[T]) { | ||||||||||
| self.buffer.extend_from_slice(slice); | ||||||||||
| self.len += slice.len(); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// View the contents of this buffer as a slice | ||||||||||
|
|
@@ -274,7 +271,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| // - MutableBuffer is aligned and initialized for len elements of T | ||||||||||
| // - MutableBuffer corresponds to a single allocation | ||||||||||
| // - MutableBuffer does not support modification whilst active immutable borrows | ||||||||||
| unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) } | ||||||||||
| unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len()) } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// View the contents of this buffer as a mutable slice | ||||||||||
|
|
@@ -298,7 +295,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| // - MutableBuffer is aligned and initialized for len elements of T | ||||||||||
| // - MutableBuffer corresponds to a single allocation | ||||||||||
| // - MutableBuffer does not support modification whilst active immutable borrows | ||||||||||
| unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) } | ||||||||||
| unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len()) } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Shorten this BufferBuilder to `len` items | ||||||||||
|
|
@@ -323,7 +320,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn truncate(&mut self, len: usize) { | ||||||||||
| self.buffer.truncate(len * std::mem::size_of::<T>()); | ||||||||||
| self.len = self.len.min(len); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// # Safety | ||||||||||
|
|
@@ -336,8 +332,8 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| .size_hint() | ||||||||||
| .1 | ||||||||||
| .expect("append_trusted_len_iter expects upper bound"); | ||||||||||
| self.reserve(len); | ||||||||||
| self.extend(iter); | ||||||||||
| self.buffer.reserve(len * std::mem::size_of::<T>()); | ||||||||||
| self.buffer.extend(iter); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Resets this builder and returns an immutable [Buffer]. | ||||||||||
|
|
@@ -356,7 +352,6 @@ impl<T: ArrowNativeType> BufferBuilder<T> { | |||||||||
| #[inline] | ||||||||||
| pub fn finish(&mut self) -> Buffer { | ||||||||||
| let buf = std::mem::take(&mut self.buffer); | ||||||||||
| self.len = 0; | ||||||||||
| buf.into() | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
|
@@ -387,9 +382,7 @@ impl<T: ArrowNativeType> Default for BufferBuilder<T> { | |||||||||
|
|
||||||||||
| impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> { | ||||||||||
| fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { | ||||||||||
| self.buffer.extend(iter.into_iter().inspect(|_| { | ||||||||||
| self.len += 1; | ||||||||||
| })) | ||||||||||
| self.buffer.extend(iter); | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
|
@@ -404,9 +397,10 @@ impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> { | |||||||||
|
|
||||||||||
| impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> { | ||||||||||
| fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { | ||||||||||
| let mut builder = Self::default(); | ||||||||||
| builder.extend(iter); | ||||||||||
| builder | ||||||||||
| Self { | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems nicer too |
||||||||||
| buffer: MutableBuffer::from_iter(iter), | ||||||||||
| _marker: PhantomData, | ||||||||||
| } | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this creates a new
MutableBuffer, we could also do something likeThat would enable any optimizations that
Vechas internally for trusted length iterators.