diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs index d94df49de256..4898fea3d249 100644 --- a/arrow-arith/src/boolean.rs +++ b/arrow-arith/src/boolean.rs @@ -23,7 +23,7 @@ //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. use arrow_array::*; -use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper}; +use arrow_buffer::buffer::bitwise_quaternary_op_helper; use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not}; use arrow_schema::ArrowError; @@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result Result { // Same as above - Some(bitwise_bin_op_helper( + Some(BooleanBuffer::from_bitwise_binary_op( right_null_buffer.buffer(), right_null_buffer.offset(), left_values.inner(), @@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result Result Result Result { // Same as above - Some(bitwise_bin_op_helper( + Some(BooleanBuffer::from_bitwise_binary_op( right_nulls.buffer(), right_nulls.offset(), left_values.inner(), @@ -195,7 +197,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result Result( + left: impl AsRef<[u8]>, + left_offset_in_bits: usize, + right: impl AsRef<[u8]>, + right_offset_in_bits: usize, + len_in_bits: usize, + mut op: F, + ) -> Self + where + F: FnMut(u64, u64) -> u64, + { + // Fast path for aligned inputs + if left_offset_in_bits % 8 == 0 && right_offset_in_bits % 8 == 0 { + if let Some(result) = Self::try_from_aligned_bitwise_binary_op( + &left.as_ref()[left_offset_in_bits / 8..], // aligned to byte boundary + &right.as_ref()[right_offset_in_bits / 8..], + len_in_bits, + &mut op, + ) { + return result; + } + } + + // each chunk is 64 bits + let left_chunks = BitChunks::new(left.as_ref(), left_offset_in_bits, len_in_bits); + let right_chunks = BitChunks::new(right.as_ref(), right_offset_in_bits, len_in_bits); + + let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() * 8); + + for (left, right) in left_chunks.iter().zip(right_chunks.iter()) { + // SAFETY: we have reserved enough capacity above, and we are + // pushing exactly num_u64s() items and `BitChunks` correctly + // reports its upper bound + unsafe { + result.push_unchecked(op(left, right)); + } + } + if left_chunks.remainder_len() > 0 { + debug_assert!(result.capacity() >= result.len() + 8); // should not reallocate + result.push(op( + left_chunks.remainder_bits(), + right_chunks.remainder_bits(), + )); + // Just pushed one u64, which may have trailing zeros, + // so truncate back to the correct length + result.truncate(left_chunks.num_bytes()); + } + + BooleanBuffer { + buffer: Buffer::from(result), + offset: 0, + len: len_in_bits, + } + } + + /// Like [`Self::from_bitwise_binary_op`] but optimized for the case where the + /// inputs are aligned to byte boundaries + /// + /// Returns `None` if the inputs are not fully u64 aligned + fn try_from_aligned_bitwise_binary_op( + left: &[u8], + right: &[u8], + len_in_bits: usize, + op: &mut F, + ) -> Option + where + F: FnMut(u64, u64) -> u64, + { + unsafe { + // safety: all bytes are valid u64s + let (left_prefix, left_u64s, left_suffix) = left.align_to::(); + let (right_prefix, right_u64s, right_suffix) = right.align_to::(); + // if there is no prefix or suffix, both buffers are aligned and we can do the operation directly + // on u64s + // TODO also handle non empty suffixes by processing them separately + if left_prefix.is_empty() + && right_prefix.is_empty() + && left_suffix.is_empty() + && right_suffix.is_empty() + { + let result_u64s = left_u64s + .iter() + .zip(right_u64s.iter()) + .map(|(l, r)| op(*l, *r)) + .collect::>(); + Some(BooleanBuffer::new( + Buffer::from(result_u64s), + 0, + len_in_bits, + )) + } else { + None + } + } + } + + /// Create a new [`BooleanBuffer`] by applying the bitwise operation to `op` to an input buffer. + /// + /// This function is much faster than applying the operation bit by bit as + /// it processes input buffers in chunks of 64 bits (8 bytes) at a time + /// + /// # Notes: + /// * `op` takes a single `u64` inputs and produces one `u64` output + /// operating on 64 bits at a time. + /// * `op` must only apply bitwise operations + /// on the relevant bits, as the input `u64` may contain irrelevant bits + /// and may be processed differently on different endian architectures. + /// * The inputs are treated as bitmaps, meaning that offsets and length + /// are specified in number of bits. + /// * The output always has zero offset + /// + /// # See Also + /// - [`BooleanBuffer::from_bitwise_binary_op`] for binary operations on a single input buffer. + /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations + /// + /// # Example: Create new [`Buffer`] from bitwise `NOT` of an input [`Buffer`] + /// ``` + /// # use arrow_buffer::BooleanBuffer; + /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits + /// // NOT of the first 12 bits + /// let result = BooleanBuffer::from_bitwise_unary_op( + /// &input, 0, 12, |a| !a + /// ); + /// assert_eq!(result.inner().as_slice(), &[0b00110011u8, 0b11110101u8]); + /// ``` + /// + /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in input slice + /// ``` + /// # use arrow_buffer::BooleanBuffer; + /// let input = [0b11001100u8, 0b10111010u8]; + /// // // Copy bits 4..16 from input + /// let result = BooleanBuffer::from_bitwise_unary_op( + /// &input, 4, 12, |a| a + /// ); + /// assert_eq!(result.inner().as_slice(), &[0b10101100u8, 0b00001011u8]); + pub fn from_bitwise_unary_op( + left: impl AsRef<[u8]>, + offset_in_bits: usize, + len_in_bits: usize, + mut op: F, + ) -> Self + where + F: FnMut(u64) -> u64, + { + // try fast path for aligned input + if offset_in_bits % 8 == 0 { + if let Some(result) = Self::try_from_aligned_bitwise_unary_op( + &left.as_ref()[offset_in_bits / 8..], // align to byte boundary + len_in_bits, + &mut op, + ) { + return result; + } + } + + // each chunk is 64 bits + let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits, len_in_bits); + let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() * 8); + for left in left_chunks.iter() { + // SAFETY: we have reserved enough capacity above, and we are + // pushing exactly num_u64s() items and `BitChunks` correctly + // reports its upper bound + unsafe { + result.push_unchecked(op(left)); + } + } + if left_chunks.remainder_len() > 0 { + debug_assert!(result.capacity() >= result.len() + 8); // should not reallocate + result.push(op(left_chunks.remainder_bits())); + // Just pushed one u64, which may have have trailing zeros, + result.truncate(left_chunks.num_bytes()); + } + + BooleanBuffer { + buffer: Buffer::from(result), + offset: 0, + len: len_in_bits, + } + } + + /// Like [`Self::from_bitwise_unary_op`] but optimized for the case where the + /// input is aligned to byte boundaries + fn try_from_aligned_bitwise_unary_op( + left: &[u8], + len_in_bits: usize, + op: &mut F, + ) -> Option + where + F: FnMut(u64) -> u64, + { + unsafe { + // safety: all valid bytes are valid u64s + let (left_prefix, left_u64s, left_suffix) = left.align_to::(); + // if there is no prefix or suffix, the buffer is aligned and we can do the operation directly + // on u64s + // TODO also handle non empty suffixes by processing them separately + if left_prefix.is_empty() && left_suffix.is_empty() { + let result_u64s = left_u64s.iter().map(|l| op(*l)).collect::>(); + Some(BooleanBuffer::new( + Buffer::from(result_u64s), + 0, + len_in_bits, + )) + } else { + None + } + } + } + /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer` pub fn collect_bool bool>(len: usize, f: F) -> Self { let buffer = MutableBuffer::collect_bool(len, f); @@ -188,6 +443,8 @@ impl BooleanBuffer { } /// Returns the inner [`Buffer`] + /// + /// Note: this does not account for offset and length of this [`BooleanBuffer`] #[inline] pub fn inner(&self) -> &Buffer { &self.buffer @@ -437,4 +694,80 @@ mod tests { assert_eq!(buf.values().len(), 1); assert!(buf.value(0)); } + + #[test] + fn test_from_bitwise_unary_op() { + // Use 1024 boolean values so that at least some of the tests cover multiple u64 chunks and + // perfect alignment + let input_bools = (0..1024) + .map(|_| rand::random::()) + .collect::>(); + let input_buffer = BooleanBuffer::from(&input_bools[..]); + + // Note ensure we test offsets over 100 to cover multiple u64 chunks + for offset in 0..1024 { + let result = BooleanBuffer::from_bitwise_unary_op( + input_buffer.values(), + offset, + input_buffer.len() - offset, + |a| !a, + ); + let expected = input_bools[offset..] + .iter() + .map(|b| !*b) + .collect::(); + assert_eq!(result, expected); + } + + // Also test when the input doesn't cover the entire buffer + for offset in 0..512 { + let len = 512 - offset; // fixed length less than total + let result = + BooleanBuffer::from_bitwise_unary_op(input_buffer.values(), offset, len, |a| !a); + let expected = input_bools[offset..] + .iter() + .take(len) + .map(|b| !*b) + .collect::(); + assert_eq!(result, expected); + } + } + + #[test] + fn test_from_bitwise_binary_op() { + // pick random boolean inputs + let input_bools_left = (0..1024) + .map(|_| rand::random::()) + .collect::>(); + let input_bools_right = (0..1024) + .map(|_| rand::random::()) + .collect::>(); + let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]); + let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]); + + for left_offset in 0..200 { + for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] { + for len_offset in [0, 1, 44, 100, 256, 300, 512] { + let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds + // compute with AND + let result = BooleanBuffer::from_bitwise_binary_op( + input_buffer_left.values(), + left_offset, + input_buffer_right.values(), + right_offset, + len, + |a, b| a & b, + ); + // compute directly from bools + let expected = input_bools_left[left_offset..] + .iter() + .zip(&input_bools_right[right_offset..]) + .take(len) + .map(|(a, b)| *a & *b) + .collect::(); + assert_eq!(result, expected); + } + } + } + } } diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index eebd6434986c..971c590ada6b 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -20,15 +20,13 @@ use std::fmt::Debug; use std::ptr::NonNull; use std::sync::Arc; -use crate::BufferBuilder; use crate::alloc::{Allocation, Deallocation}; -use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; -use crate::{bit_util, bytes::Bytes, native::ArrowNativeType}; - #[cfg(feature = "pool")] use crate::pool::MemoryPool; +use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; +use crate::{BooleanBuffer, BufferBuilder}; +use crate::{bit_util, bytes::Bytes, native::ArrowNativeType}; -use super::ops::bitwise_unary_op_helper; use super::{MutableBuffer, ScalarBuffer}; /// A contiguous memory region that can be shared with other buffers and across @@ -344,10 +342,10 @@ impl Buffer { return self.slice_with_length(offset / 8, bit_util::ceil(len, 8)); } - bitwise_unary_op_helper(self, offset, len, |a| a) + BooleanBuffer::from_bitwise_unary_op(self, offset, len, |a| a).into_inner() } - /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits + /// Returns a `BitChunks` instance which can be used to iterate over this buffer's bits /// in larger chunks and starting at arbitrary bit offsets. /// Note that both `offset` and `length` are measured in bits. pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> { diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs index c69e5c6deb10..b10c1ed52e4c 100644 --- a/arrow-buffer/src/buffer/ops.rs +++ b/arrow-buffer/src/buffer/ops.rs @@ -16,6 +16,7 @@ // under the License. use super::{Buffer, MutableBuffer}; +use crate::BooleanBuffer; use crate::util::bit_util::ceil; /// Apply a bitwise operation `op` to four inputs and return the result as a Buffer. @@ -60,69 +61,48 @@ where /// Apply a bitwise operation `op` to two inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. +#[deprecated( + since = "57.1.0", + note = "use BooleanBuffer::from_bitwise_binary_op instead" +)] pub fn bitwise_bin_op_helper( left: &Buffer, left_offset_in_bits: usize, right: &Buffer, right_offset_in_bits: usize, len_in_bits: usize, - mut op: F, + op: F, ) -> Buffer where F: FnMut(u64, u64) -> u64, { - let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits); - let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits); - - let chunks = left_chunks - .iter() - .zip(right_chunks.iter()) - .map(|(left, right)| op(left, right)); - // Soundness: `BitChunks` is a `BitChunks` iterator which - // correctly reports its upper bound - let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) }; - - let remainder_bytes = ceil(left_chunks.remainder_len(), 8); - let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); - // we are counting its starting from the least significant bit, to to_le_bytes should be correct - let rem = &rem.to_le_bytes()[0..remainder_bytes]; - buffer.extend_from_slice(rem); - - buffer.into() + BooleanBuffer::from_bitwise_binary_op( + left, + left_offset_in_bits, + right, + right_offset_in_bits, + len_in_bits, + op, + ) + .into_inner() } /// Apply a bitwise operation `op` to one input and return the result as a Buffer. /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits. +#[deprecated( + since = "57.1.0", + note = "use BooleanBuffer::from_bitwise_unary_op instead" +)] pub fn bitwise_unary_op_helper( left: &Buffer, offset_in_bits: usize, len_in_bits: usize, - mut op: F, + op: F, ) -> Buffer where F: FnMut(u64) -> u64, { - // reserve capacity and set length so we can get a typed view of u64 chunks - let mut result = - MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); - - let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits); - - let result_chunks = result.typed_data_mut::().iter_mut(); - - result_chunks - .zip(left_chunks.iter()) - .for_each(|(res, left)| { - *res = op(left); - }); - - let remainder_bytes = ceil(left_chunks.remainder_len(), 8); - let rem = op(left_chunks.remainder_bits()); - // we are counting its starting from the least significant bit, to to_le_bytes should be correct - let rem = &rem.to_le_bytes()[0..remainder_bytes]; - result.extend_from_slice(rem); - - result.into() + BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, op).into_inner() } /// Apply a bitwise and to two inputs and return the result as a Buffer. @@ -134,7 +114,7 @@ pub fn buffer_bin_and( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - bitwise_bin_op_helper( + BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, @@ -142,6 +122,7 @@ pub fn buffer_bin_and( len_in_bits, |a, b| a & b, ) + .into_inner() } /// Apply a bitwise or to two inputs and return the result as a Buffer. @@ -153,7 +134,7 @@ pub fn buffer_bin_or( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - bitwise_bin_op_helper( + BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, @@ -161,6 +142,7 @@ pub fn buffer_bin_or( len_in_bits, |a, b| a | b, ) + .into_inner() } /// Apply a bitwise xor to two inputs and return the result as a Buffer. @@ -172,7 +154,7 @@ pub fn buffer_bin_xor( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - bitwise_bin_op_helper( + BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, @@ -180,6 +162,7 @@ pub fn buffer_bin_xor( len_in_bits, |a, b| a ^ b, ) + .into_inner() } /// Apply a bitwise and_not to two inputs and return the result as a Buffer. @@ -191,7 +174,7 @@ pub fn buffer_bin_and_not( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - bitwise_bin_op_helper( + BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, @@ -199,10 +182,11 @@ pub fn buffer_bin_and_not( len_in_bits, |a, b| a & !b, ) + .into_inner() } /// Apply a bitwise not to one input and return the result as a Buffer. /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits. pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer { - bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a) + BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a).into_inner() } diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs index e11383f6f3db..d2028cd20048 100644 --- a/arrow-buffer/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -259,12 +259,32 @@ impl<'a> BitChunks<'a> { self.remainder_len } - /// Returns the number of chunks + /// Returns the number of `u64` chunks #[inline] pub const fn chunk_len(&self) -> usize { self.chunk_len } + /// Return the number of `u64` that are needed to represent all bits + /// (including remainder) + /// + /// This is the size of a + #[inline] + pub fn num_u64s(&self) -> usize { + if self.remainder_len == 0 { + self.chunk_len + } else { + self.chunk_len + 1 + } + } + + /// Return the number of bytes that are needed to represent all bits + /// (including remainder) + #[inline] + pub fn num_bytes(&self) -> usize { + ceil(self.chunk_len * 64 + self.remainder_len, 8) + } + /// Returns the bitmask of remaining bits #[inline] pub fn remainder_bits(&self) -> u64 { diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index 8e3cc7d56c71..f05ca3804cce 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -18,7 +18,6 @@ //! Implements the `nullif` function for Arrow arrays. use arrow_array::{Array, ArrayRef, BooleanArray, make_array}; -use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper}; use arrow_buffer::{BooleanBuffer, NullBuffer}; use arrow_schema::{ArrowError, DataType}; @@ -75,7 +74,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result { let mut valid_count = 0; - let b = bitwise_bin_op_helper( + let b = BooleanBuffer::from_bitwise_binary_op( left.buffer(), left.offset(), right.inner(), @@ -91,16 +90,16 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result { let mut null_count = 0; - let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| { - let t = !b; - null_count += t.count_zeros() as usize; - t - }); + let buffer = + BooleanBuffer::from_bitwise_unary_op(right.inner(), right.offset(), len, |b| { + let t = !b; + null_count += t.count_zeros() as usize; + t + }); (buffer, null_count) } }; - let combined = BooleanBuffer::new(combined, 0, len); // Safety: // Counted nulls whilst computing let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };