diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b771dc94..077dcd32 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,6 +6,7 @@ on: pull_request: branches: - main + merge_group: name: Continuous integration @@ -37,9 +38,10 @@ jobs: # so we don't need to run `cargo check` or `cargo build` # use different features to check if everything is fine # the incremental compilation will make this faster + # We disallow todo!s in the code too. run: | cargo clippy -p roaring --all-targets --no-default-features -- -D warnings - cargo clippy -p roaring --all-targets --features serde -- -D warnings + cargo clippy -p roaring --all-targets --features serde -- -Dclippy::todo -D warnings - name: Check SIMD if: matrix.rust == 'nightly' diff --git a/Cargo.toml b/Cargo.toml index 3256f873..6e43a8ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,10 @@ [workspace] -members = ["roaring", "benchmarks"] +members = [ "benchmarks", "roaring" ] resolver = "2" [workspace.dependencies] roaring = { path = "roaring" } -bincode = "1.3.3" bytemuck = "1.21.0" byteorder = "1.5.0" criterion = "0.5" @@ -13,9 +12,10 @@ git2 = { version = "0.20", default-features = false } indicatif = "0.17" itertools = "0.14" once_cell = "1.20" +postcard = { version = "1.1", features = [ "alloc" ] } proptest = "1.6.0" serde = "1.0.217" -serde_json = "1.0.135" +serde_json = "1.0.138" zip = { version = "0.6", default-features = false } [profile.test] diff --git a/roaring/Cargo.toml b/roaring/Cargo.toml index 746384a7..4c16b8b6 100644 --- a/roaring/Cargo.toml +++ b/roaring/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "roaring" -version = "0.10.10" +version = "0.10.12" rust-version = "1.65.0" authors = ["Wim Looman ", "Kerollmops "] description = "A better compressed bitset - pure Rust implementation" @@ -29,4 +29,4 @@ std = ["dep:bytemuck", "dep:byteorder"] [dev-dependencies] proptest = { workspace = true } serde_json = { workspace = true } -bincode = { workspace = true } +postcard = { workspace = true } diff --git a/roaring/src/bitmap/arbitrary.rs b/roaring/src/bitmap/arbitrary.rs index e71740f8..11de91ff 100644 --- a/roaring/src/bitmap/arbitrary.rs +++ b/roaring/src/bitmap/arbitrary.rs @@ -4,12 +4,10 @@ mod test { use crate::bitmap::store::{ArrayStore, BitmapStore, Store}; use crate::RoaringBitmap; use core::fmt::{Debug, Formatter}; - use proptest::bits::{BitSetLike, BitSetStrategy, SampledBitSetStrategy}; + use proptest::bits::{BitSetLike, SampledBitSetStrategy}; use proptest::collection::{vec, SizeRange}; use proptest::prelude::*; - #[cfg(not(feature = "std"))] - use alloc::boxed::Box; #[cfg(not(feature = "std"))] use alloc::vec::Vec; @@ -62,28 +60,12 @@ mod test { impl BitmapStore { const MAX: usize = u16::MAX as usize; - pub fn universe() -> Self { - BitmapStore::try_from(1 + u16::MAX as u64, Box::new([u64::MAX; 1024])).unwrap() - } - - pub fn between(min: u16, max: u16) -> BitSetStrategy { - BitSetStrategy::new(min as usize, max as usize) - } - - pub fn masked(mask: Self) -> BitSetStrategy { - BitSetStrategy::masked(mask) - } - pub fn sampled( size: impl Into, bits: impl Into, ) -> SampledBitSetStrategy { SampledBitSetStrategy::new(size.into(), bits.into()) } - - pub fn arbitrary() -> SampledBitSetStrategy { - Self::sampled(..=u16::MAX as usize, ..=u16::MAX as usize) - } } impl Debug for ArrayStore { @@ -135,24 +117,12 @@ mod test { impl ArrayStore { const MAX: usize = u16::MAX as usize; - pub fn between(min: u16, max: u16) -> BitSetStrategy { - BitSetStrategy::new(min as usize, max as usize) - } - - pub fn masked(mask: ArrayStore) -> BitSetStrategy { - BitSetStrategy::masked(mask) - } - pub fn sampled( size: impl Into, bits: impl Into, ) -> SampledBitSetStrategy { SampledBitSetStrategy::new(size.into(), bits.into()) } - - pub fn arbitrary() -> SampledBitSetStrategy { - Self::sampled(..=4096_usize, ..=u16::MAX as usize) - } } impl Debug for Store { diff --git a/roaring/src/bitmap/cmp.rs b/roaring/src/bitmap/cmp.rs index 366ff090..370bfb08 100644 --- a/roaring/src/bitmap/cmp.rs +++ b/roaring/src/bitmap/cmp.rs @@ -101,7 +101,7 @@ impl RoaringBitmap { /// Returns the smallest container according to its key /// or both if the key is the same. It is useful when you need /// to iterate over two containers to do operations on them. -pub struct Pairs +pub(crate) struct Pairs where I: Iterator, J: Iterator, diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 2f6e934e..a02a8555 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -12,13 +12,13 @@ pub const ARRAY_LIMIT: u64 = 4096; use alloc::vec::Vec; #[derive(PartialEq, Clone)] -pub struct Container { +pub(crate) struct Container { pub key: u16, pub store: Store, } #[derive(Clone)] -pub struct Iter<'a> { +pub(crate) struct Iter<'a> { pub key: u16, inner: store::Iter<'a>, } diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 8e1fb803..470bf7f9 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -1,6 +1,8 @@ use core::cmp::Ordering; +use core::mem::size_of; use core::ops::RangeBounds; +use crate::bitmap::store::BITMAP_LENGTH; use crate::RoaringBitmap; use super::container::Container; @@ -34,6 +36,139 @@ impl RoaringBitmap { RoaringBitmap { containers: (0..=u16::MAX).map(Container::full).collect() } } + /// Creates a `RoaringBitmap` from a byte slice, interpreting the bytes as a bitmap with a specified offset. + /// + /// # Arguments + /// + /// - `offset: u32` - The starting position in the bitmap where the byte slice will be applied, specified in bits. + /// This means that if `offset` is `n`, the first byte in the slice will correspond to the `n`th bit(0-indexed) in the bitmap. + /// - `bytes: &[u8]` - The byte slice containing the bitmap data. The bytes are interpreted in "Least-Significant-First" bit order. + /// + /// # Interpretation of `bytes` + /// + /// The `bytes` slice is interpreted in "Least-Significant-First" bit order. Each byte is read from least significant bit (LSB) to most significant bit (MSB). + /// For example, the byte `0b00000101` represents the bits `1, 0, 1, 0, 0, 0, 0, 0` in that order (see Examples section). + /// + /// + /// # Panics + /// + /// This function will panic if `bytes.len() + offset` is greater than 2^32. + /// + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bytes = [0b00000101, 0b00000010, 0b00000000, 0b10000000]; + /// // ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ + /// // 76543210 98 + /// let rb = RoaringBitmap::from_lsb0_bytes(0, &bytes); + /// assert!(rb.contains(0)); + /// assert!(!rb.contains(1)); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(9)); + /// assert!(rb.contains(31)); + /// + /// let rb = RoaringBitmap::from_lsb0_bytes(8, &bytes); + /// assert!(rb.contains(8)); + /// assert!(!rb.contains(9)); + /// assert!(rb.contains(10)); + /// assert!(rb.contains(17)); + /// assert!(rb.contains(39)); + /// + /// let rb = RoaringBitmap::from_lsb0_bytes(3, &bytes); + /// assert!(rb.contains(3)); + /// assert!(!rb.contains(4)); + /// assert!(rb.contains(5)); + /// assert!(rb.contains(12)); + /// assert!(rb.contains(34)); + /// ``` + pub fn from_lsb0_bytes(offset: u32, mut bytes: &[u8]) -> RoaringBitmap { + fn shift_bytes(bytes: &[u8], amount: usize) -> Vec { + let mut result = Vec::with_capacity(bytes.len() + 1); + let mut carry = 0u8; + + for &byte in bytes { + let shifted = (byte << amount) | carry; + carry = byte >> (8 - amount); + result.push(shifted); + } + + if carry != 0 { + result.push(carry); + } + + result + } + if offset % 8 != 0 { + let shift = offset as usize % 8; + let shifted_bytes = shift_bytes(bytes, shift); + return RoaringBitmap::from_lsb0_bytes(offset - shift as u32, &shifted_bytes); + } + + if bytes.is_empty() { + return RoaringBitmap::new(); + } + + // Using inclusive range avoids overflow: the max exclusive value is 2^32 (u32::MAX + 1). + let end_bit_inc = u32::try_from(bytes.len()) + .ok() + .and_then(|len_bytes| len_bytes.checked_mul(8)) + // `bytes` is non-empty, so len_bits is > 0 + .and_then(|len_bits| offset.checked_add(len_bits - 1)) + .expect("offset + bytes.len() must be <= 2^32"); + + // offsets are in bytes + let (mut start_container, start_offset) = + (offset as usize >> 16, (offset as usize % 0x1_0000) / 8); + let (end_container_inc, end_offset) = + (end_bit_inc as usize >> 16, (end_bit_inc as usize % 0x1_0000 + 1) / 8); + + let n_containers_needed = end_container_inc + 1 - start_container; + let mut containers = Vec::with_capacity(n_containers_needed); + + // Handle a partial first container + if start_offset != 0 { + let end_byte = if end_container_inc == start_container { + end_offset + } else { + BITMAP_LENGTH * size_of::() + }; + + let (src, rest) = bytes.split_at(end_byte - start_offset); + bytes = rest; + + if let Some(container) = + Container::from_lsb0_bytes(start_container as u16, src, start_offset) + { + containers.push(container); + } + + start_container += 1; + } + + // Handle all full containers + for full_container_key in start_container..end_container_inc { + let (src, rest) = bytes.split_at(BITMAP_LENGTH * size_of::()); + bytes = rest; + + if let Some(container) = Container::from_lsb0_bytes(full_container_key as u16, src, 0) { + containers.push(container); + } + } + + // Handle a last container + if !bytes.is_empty() { + if let Some(container) = Container::from_lsb0_bytes(end_container_inc as u16, bytes, 0) + { + containers.push(container); + } + } + + RoaringBitmap { containers } + } + /// Adds a value to the set. /// /// Returns whether the value was absent from the set. diff --git a/roaring/src/bitmap/mod.rs b/roaring/src/bitmap/mod.rs index b5e7e972..a63ebf24 100644 --- a/roaring/src/bitmap/mod.rs +++ b/roaring/src/bitmap/mod.rs @@ -18,7 +18,7 @@ mod ops_with_serialized; #[cfg(feature = "serde")] mod serde; #[cfg(feature = "std")] -pub(crate) mod serialization; +mod serialization; use self::cmp::Pairs; pub use self::iter::IntoIter; diff --git a/roaring/src/bitmap/multiops.rs b/roaring/src/bitmap/multiops.rs index 66a4e085..f6bdec92 100644 --- a/roaring/src/bitmap/multiops.rs +++ b/roaring/src/bitmap/multiops.rs @@ -332,7 +332,7 @@ fn try_multi_or_ref<'a, E: 'a>( // Phase 3: Clean up let containers: Vec<_> = containers .into_iter() - .filter(|container| container.len() > 0) + .filter(|container| !container.is_empty()) .map(|c| { // Any borrowed bitmaps or arrays left over get cloned here let mut container = c.into_owned(); @@ -373,7 +373,7 @@ fn try_multi_xor_ref<'a, E: 'a>( // Phase 3: Clean up let containers: Vec<_> = containers .into_iter() - .filter(|container| container.len() > 0) + .filter(|container| !container.is_empty()) .map(|c| { // Any borrowed bitmaps or arrays left over get cloned here let mut container = c.into_owned(); diff --git a/roaring/src/bitmap/serde.rs b/roaring/src/bitmap/serde.rs index 26c5d6ab..08fa4627 100644 --- a/roaring/src/bitmap/serde.rs +++ b/roaring/src/bitmap/serde.rs @@ -72,11 +72,11 @@ mod test { } #[test] - fn test_bincode( + fn test_postcard( bitmap in RoaringBitmap::arbitrary(), ) { - let buffer = bincode::serialize(&bitmap).unwrap(); - prop_assert_eq!(bitmap, bincode::deserialize(&buffer).unwrap()); + let buffer = postcard::to_allocvec(&bitmap).unwrap(); + prop_assert_eq!(bitmap, postcard::from_bytes(&buffer).unwrap()); } } } diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index de7c6325..fa90cd13 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -4,18 +4,17 @@ use crate::RoaringBitmap; use bytemuck::cast_slice_mut; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use core::convert::Infallible; -use core::mem::size_of; use core::ops::RangeInclusive; use std::error::Error; use std::io; -pub const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; -pub const SERIAL_COOKIE: u16 = 12347; -pub const NO_OFFSET_THRESHOLD: usize = 4; +pub(crate) const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; +pub(crate) const SERIAL_COOKIE: u16 = 12347; +pub(crate) const NO_OFFSET_THRESHOLD: usize = 4; // Sizes of header structures -pub const DESCRIPTION_BYTES: usize = 4; -pub const OFFSET_BYTES: usize = 4; +pub(crate) const DESCRIPTION_BYTES: usize = 4; +pub(crate) const OFFSET_BYTES: usize = 4; impl RoaringBitmap { /// Return the size in bytes of the serialized output. @@ -47,139 +46,6 @@ impl RoaringBitmap { 8 + container_sizes } - /// Creates a `RoaringBitmap` from a byte slice, interpreting the bytes as a bitmap with a specified offset. - /// - /// # Arguments - /// - /// - `offset: u32` - The starting position in the bitmap where the byte slice will be applied, specified in bits. - /// This means that if `offset` is `n`, the first byte in the slice will correspond to the `n`th bit(0-indexed) in the bitmap. - /// - `bytes: &[u8]` - The byte slice containing the bitmap data. The bytes are interpreted in "Least-Significant-First" bit order. - /// - /// # Interpretation of `bytes` - /// - /// The `bytes` slice is interpreted in "Least-Significant-First" bit order. Each byte is read from least significant bit (LSB) to most significant bit (MSB). - /// For example, the byte `0b00000101` represents the bits `1, 0, 1, 0, 0, 0, 0, 0` in that order (see Examples section). - /// - /// - /// # Panics - /// - /// This function will panic if `bytes.len() + offset` is greater than 2^32. - /// - /// - /// # Examples - /// - /// ```rust - /// use roaring::RoaringBitmap; - /// - /// let bytes = [0b00000101, 0b00000010, 0b00000000, 0b10000000]; - /// // ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ - /// // 76543210 98 - /// let rb = RoaringBitmap::from_lsb0_bytes(0, &bytes); - /// assert!(rb.contains(0)); - /// assert!(!rb.contains(1)); - /// assert!(rb.contains(2)); - /// assert!(rb.contains(9)); - /// assert!(rb.contains(31)); - /// - /// let rb = RoaringBitmap::from_lsb0_bytes(8, &bytes); - /// assert!(rb.contains(8)); - /// assert!(!rb.contains(9)); - /// assert!(rb.contains(10)); - /// assert!(rb.contains(17)); - /// assert!(rb.contains(39)); - /// - /// let rb = RoaringBitmap::from_lsb0_bytes(3, &bytes); - /// assert!(rb.contains(3)); - /// assert!(!rb.contains(4)); - /// assert!(rb.contains(5)); - /// assert!(rb.contains(12)); - /// assert!(rb.contains(34)); - /// ``` - pub fn from_lsb0_bytes(offset: u32, mut bytes: &[u8]) -> RoaringBitmap { - fn shift_bytes(bytes: &[u8], amount: usize) -> Vec { - let mut result = Vec::with_capacity(bytes.len() + 1); - let mut carry = 0u8; - - for &byte in bytes { - let shifted = (byte << amount) | carry; - carry = byte >> (8 - amount); - result.push(shifted); - } - - if carry != 0 { - result.push(carry); - } - - result - } - if offset % 8 != 0 { - let shift = offset as usize % 8; - let shifted_bytes = shift_bytes(bytes, shift); - return RoaringBitmap::from_lsb0_bytes(offset - shift as u32, &shifted_bytes); - } - - if bytes.is_empty() { - return RoaringBitmap::new(); - } - - // Using inclusive range avoids overflow: the max exclusive value is 2^32 (u32::MAX + 1). - let end_bit_inc = u32::try_from(bytes.len()) - .ok() - .and_then(|len_bytes| len_bytes.checked_mul(8)) - // `bytes` is non-empty, so len_bits is > 0 - .and_then(|len_bits| offset.checked_add(len_bits - 1)) - .expect("offset + bytes.len() must be <= 2^32"); - - // offsets are in bytes - let (mut start_container, start_offset) = - (offset as usize >> 16, (offset as usize % 0x1_0000) / 8); - let (end_container_inc, end_offset) = - (end_bit_inc as usize >> 16, (end_bit_inc as usize % 0x1_0000 + 1) / 8); - - let n_containers_needed = end_container_inc + 1 - start_container; - let mut containers = Vec::with_capacity(n_containers_needed); - - // Handle a partial first container - if start_offset != 0 { - let end_byte = if end_container_inc == start_container { - end_offset - } else { - BITMAP_LENGTH * size_of::() - }; - - let (src, rest) = bytes.split_at(end_byte - start_offset); - bytes = rest; - - if let Some(container) = - Container::from_lsb0_bytes(start_container as u16, src, start_offset) - { - containers.push(container); - } - - start_container += 1; - } - - // Handle all full containers - for full_container_key in start_container..end_container_inc { - let (src, rest) = bytes.split_at(BITMAP_LENGTH * size_of::()); - bytes = rest; - - if let Some(container) = Container::from_lsb0_bytes(full_container_key as u16, src, 0) { - containers.push(container); - } - } - - // Handle a last container - if !bytes.is_empty() { - if let Some(container) = Container::from_lsb0_bytes(end_container_inc as u16, bytes, 0) - { - containers.push(container); - } - } - - RoaringBitmap { containers } - } - /// Serialize this bitmap into [the standard Roaring on-disk format][format]. /// This is compatible with the official C/C++, Java and Go implementations. /// diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index c8a4b8c9..26c3a7e5 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -18,7 +18,7 @@ use alloc::boxed::Box; use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; #[derive(Clone, Eq, PartialEq)] -pub struct ArrayStore { +pub(crate) struct ArrayStore { vec: Vec, } @@ -27,6 +27,7 @@ impl ArrayStore { ArrayStore { vec: vec![] } } + #[cfg(feature = "std")] pub fn with_capacity(capacity: usize) -> ArrayStore { ArrayStore { vec: Vec::with_capacity(capacity) } } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index aec2404a..a7a954b4 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -273,7 +273,7 @@ impl BitmapStore { self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == i1) } - pub fn to_array_store(&self) -> ArrayStore { + pub(crate) fn to_array_store(&self) -> ArrayStore { let mut vec = Vec::with_capacity(self.len as usize); for (index, mut bit) in self.bits.iter().cloned().enumerate() { while bit != 0 { @@ -336,7 +336,7 @@ impl BitmapStore { self.bits.iter().zip(other.bits.iter()).map(|(&a, &b)| (a & b).count_ones() as u64).sum() } - pub fn intersection_len_array(&self, other: &ArrayStore) -> u64 { + pub(crate) fn intersection_len_array(&self, other: &ArrayStore) -> u64 { other .iter() .map(|&index| { @@ -356,6 +356,7 @@ impl BitmapStore { BitmapIter::new(self.bits) } + #[cfg(feature = "std")] pub fn as_array(&self) -> &[u64; BITMAP_LENGTH] { &self.bits } diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index cb9e3e3e..384b7fe8 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -11,7 +11,7 @@ use core::slice; pub use self::bitmap_store::BITMAP_LENGTH; use self::Store::{Array, Bitmap}; -pub use self::array_store::ArrayStore; +pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; use crate::bitmap::container::ARRAY_LIMIT; @@ -20,13 +20,13 @@ use crate::bitmap::container::ARRAY_LIMIT; use alloc::boxed::Box; #[derive(Clone)] -pub enum Store { +pub(crate) enum Store { Array(ArrayStore), Bitmap(BitmapStore), } #[derive(Clone)] -pub enum Iter<'a> { +pub(crate) enum Iter<'a> { Array(slice::Iter<'a, u16>), Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), @@ -38,6 +38,7 @@ impl Store { Store::Array(ArrayStore::new()) } + #[cfg(feature = "std")] pub fn with_capacity(capacity: usize) -> Store { if capacity <= ARRAY_LIMIT as usize { Store::Array(ArrayStore::with_capacity(capacity)) diff --git a/roaring/src/lib.rs b/roaring/src/lib.rs index b44c19c2..1a78f890 100644 --- a/roaring/src/lib.rs +++ b/roaring/src/lib.rs @@ -13,6 +13,8 @@ #![warn(unsafe_op_in_unsafe_fn)] #![warn(variant_size_differences)] #![allow(unknown_lints)] // For clippy +#![allow(clippy::doc_overindented_list_items)] +#![deny(unnameable_types)] #[cfg(feature = "std")] extern crate byteorder; diff --git a/roaring/src/treemap/iter.rs b/roaring/src/treemap/iter.rs index f8094a37..57b39b90 100644 --- a/roaring/src/treemap/iter.rs +++ b/roaring/src/treemap/iter.rs @@ -1,5 +1,6 @@ use alloc::collections::{btree_map, BTreeMap}; use core::iter; +use core::ops::Add; use super::util; use crate::bitmap::IntoIter as IntoIter32; @@ -11,12 +12,26 @@ struct To64Iter<'a> { inner: Iter32<'a>, } +impl To64Iter<'_> { + fn advance_to(&mut self, n: u32) { + self.inner.advance_to(n) + } + + fn advance_back_to(&mut self, n: u32) { + self.inner.advance_back_to(n) + } +} + impl Iterator for To64Iter<'_> { type Item = u64; fn next(&mut self) -> Option { self.inner.next().map(|n| util::join(self.hi, n)) } + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + #[inline] fn fold(self, init: B, mut f: F) -> B where @@ -42,8 +57,8 @@ impl DoubleEndedIterator for To64Iter<'_> { } } -fn to64iter<'a>(t: (&'a u32, &'a RoaringBitmap)) -> To64Iter<'a> { - To64Iter { hi: *t.0, inner: t.1.iter() } +fn to64iter(t: (u32, &RoaringBitmap)) -> To64Iter<'_> { + To64Iter { hi: t.0, inner: t.1.iter() } } struct To64IntoIter { @@ -86,11 +101,6 @@ fn to64intoiter(t: (u32, RoaringBitmap)) -> To64IntoIter { To64IntoIter { hi: t.0, inner: t.1.into_iter() } } -type InnerIter<'a> = iter::FlatMap< - btree_map::Iter<'a, u32, RoaringBitmap>, - To64Iter<'a>, - fn((&'a u32, &'a RoaringBitmap)) -> To64Iter<'a>, ->; type InnerIntoIter = iter::FlatMap< btree_map::IntoIter, To64IntoIter, @@ -99,8 +109,9 @@ type InnerIntoIter = iter::FlatMap< /// An iterator for `RoaringTreemap`. pub struct Iter<'a> { - inner: InnerIter<'a>, - size_hint: u64, + outer: BitmapIter<'a>, + front: Option>, + back: Option>, } /// An iterator for `RoaringTreemap`. @@ -111,9 +122,84 @@ pub struct IntoIter { impl Iter<'_> { fn new(map: &BTreeMap) -> Iter { - let size_hint: u64 = map.iter().map(|(_, r)| r.len()).sum(); - let i = map.iter().flat_map(to64iter as _); - Iter { inner: i, size_hint } + let outer = BitmapIter::new(map); + Iter { outer, front: None, back: None } + } + + /// Advance the iterator to the first position where the item has a value >= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_to(2); + /// + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn advance_to(&mut self, n: u64) { + let (key, index) = util::split(n); + + self.outer.advance_to(key); + + if self.front.is_none() { + let Some(next) = self.outer.next() else { + // if the current front iterator is empty or not yet initialized, + // but the outer bitmap iterator is empty, then consume the back + // iterator from the front if it is not also exhausted + if let Some(ref mut back) = self.back { + back.advance_to(index); + } + return; + }; + self.front = Some(to64iter(next)); + } + + if let Some(ref mut front) = self.front { + front.advance_to(index); + } + } + + /// Advance the back of the iterator to the first position where the item has a value <= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_back_to(1); + /// + /// assert_eq!(iter.next_back(), Some(1)); + /// assert_eq!(iter.next_back(), None); + /// ``` + pub fn advance_back_to(&mut self, n: u64) { + let (key, index) = util::split(n); + + self.outer.advance_back_to(key); + + if self.back.is_none() { + let Some(next_back) = self.outer.next_back() else { + // if the current back iterator is empty or not yet initialized, + // but the outer bitmap iterator is empty, then consume the front + // iterator from the back if it is not also exhausted + if let Some(ref mut front) = self.front { + front.advance_back_to(index); + } + return; + }; + self.back = Some(to64iter(next_back)); + } + + if let Some(ref mut back) = self.back { + back.advance_back_to(index); + } } } @@ -129,47 +215,69 @@ impl Iterator for Iter<'_> { type Item = u64; fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() + if let Some(ref mut front) = &mut self.front { + if let Some(inner) = front.next() { + return Some(inner); + } + } + + let Some(outer_next) = self.outer.next() else { + // if the current front iterator is empty or not yet initialized, + // but the outer bitmap iterator is empty, then consume the back + // iterator from the front if it is not also exhausted + if let Some(ref mut back) = &mut self.back { + if let Some(next) = back.next() { + return Some(next); + } + } + return None; + }; + + self.front = Some(to64iter(outer_next)); + self.next() } fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } - } + let front_size_hint = self.front.as_ref().map_or(0, |f| f.size_hint().0); + let back_size_hint = self.back.as_ref().map_or(0, |b| b.size_hint().0); - #[inline] - fn fold(self, init: B, f: F) -> B - where - Self: Sized, - F: FnMut(B, Self::Item) -> B, - { - self.inner.fold(init, f) + let size_hint = front_size_hint + .saturating_add(back_size_hint) + .saturating_add(self.outer.remaining() as usize); + + (size_hint, Some(size_hint)) } } impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() - } + if let Some(ref mut back) = &mut self.back { + if let Some(inner) = back.next_back() { + return Some(inner); + } + } - #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc - where - Fold: FnMut(Acc, Self::Item) -> Acc, - { - self.inner.rfold(init, fold) + let Some(outer_next_back) = self.outer.next_back() else { + // if the current back iterator is empty or not yet initialized, + // but the outer bitmap iterator is empty, then consume the front + // iterator from the back if it is not also exhausted + if let Some(ref mut front) = &mut self.front { + if let Some(next_back) = front.next_back() { + return Some(next_back); + } + } + return None; + }; + + self.back = Some(to64iter(outer_next_back)); + self.next_back() } } #[cfg(target_pointer_width = "64")] impl ExactSizeIterator for Iter<'_> { fn len(&self) -> usize { - self.size_hint as usize + self.size_hint().0 } } @@ -258,7 +366,7 @@ impl RoaringTreemap { /// assert_eq!(bitmaps.next(), None); /// ``` pub fn bitmaps(&self) -> BitmapIter { - BitmapIter(self.map.iter()) + BitmapIter::new(&self.map) } /// Construct a RoaringTreemap from an iterator of partition number and RoaringBitmap pairs. @@ -415,17 +523,97 @@ impl RoaringTreemap { } } -pub struct BitmapIter<'a>(btree_map::Iter<'a, u32, RoaringBitmap>); +/// An iterator of `RoaringBitmap`s for `RoaringTreemap`. +pub struct BitmapIter<'a> { + treemap: &'a BTreeMap, + range: btree_map::Range<'a, u32, RoaringBitmap>, + latest_front_idx: Option, + latest_back_idx: Option, +} + +impl<'a> BitmapIter<'a> { + fn new(treemap: &'a BTreeMap) -> Self { + let range = treemap.range(..); + Self { treemap, range, latest_back_idx: None, latest_front_idx: None } + } + + fn advance_to(&mut self, new_front_idx: u32) { + match self.latest_back_idx { + Some(latest_back_idx) => match self.latest_front_idx { + Some(last_idx) if last_idx >= new_front_idx => {} + _ => { + // if asked to advance to beyond the back iterator, + // update the self.range iterator to be empty + if new_front_idx >= latest_back_idx { + self.range = self.treemap.range(0..1); + self.range.next_back(); + } else { + // otherwise shrink the remaining range from the front + self.range = self.treemap.range(new_front_idx..latest_back_idx); + } + + // self.range = self.treemap.range(new_front_idx..latest_back_idx); + } + }, + None => match self.latest_front_idx { + Some(latest_idx) if latest_idx >= new_front_idx => {} + _ => { + self.range = self.treemap.range(new_front_idx..); + } + }, + } + } + + fn advance_back_to(&mut self, new_back_idx: u32) { + match self.latest_front_idx { + Some(latest_front_idx) => match self.latest_back_idx { + // do nothing if asked to advance back to a higher index than the back is already at + Some(latest_back_idx) if latest_back_idx <= new_back_idx => {} + _ => { + // if asked to advance back to beyond the front iterator, + // update the self.range iterator to be empty + if new_back_idx <= latest_front_idx { + self.range = self.treemap.range(0..1); + self.range.next_back(); + } else { + // otherwise shrink the remaining range from the back + self.range = self.treemap.range((latest_front_idx + 1)..new_back_idx); + } + } + }, + None => match self.latest_back_idx { + Some(latest_back_idx) if latest_back_idx <= new_back_idx => {} + _ => { + self.range = self.treemap.range(..=new_back_idx); + } + }, + } + } + + fn remaining(&self) -> u64 { + let range = self.range.clone(); + range.fold(0, |acc, (_, bitmap)| acc.add(bitmap.len())) + } +} impl<'a> Iterator for BitmapIter<'a> { type Item = (u32, &'a RoaringBitmap); fn next(&mut self) -> Option { - self.0.next().map(|(&p, b)| (p, b)) + match self.range.next().map(|(&p, b)| (p, b)) { + None => { + self.latest_front_idx = None; + None + } + Some((next_idx, next_map)) => { + self.latest_front_idx = Some(next_idx); + Some((next_idx, next_map)) + } + } } fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() + self.range.size_hint() } } @@ -434,3 +622,18 @@ impl FromIterator<(u32, RoaringBitmap)> for RoaringTreemap { Self::from_bitmaps(iterator) } } + +impl DoubleEndedIterator for BitmapIter<'_> { + fn next_back(&mut self) -> Option { + match self.range.next_back().map(|(&p, b)| (p, b)) { + None => { + self.latest_back_idx = None; + None + } + Some((next_back_idx, next_back_map)) => { + self.latest_back_idx = Some(next_back_idx); + Some((next_back_idx, next_back_map)) + } + } + } +} diff --git a/roaring/src/treemap/mod.rs b/roaring/src/treemap/mod.rs index 45eea164..765f8482 100644 --- a/roaring/src/treemap/mod.rs +++ b/roaring/src/treemap/mod.rs @@ -17,7 +17,7 @@ mod serde; #[cfg(feature = "std")] mod serialization; -pub use self::iter::{IntoIter, Iter}; +pub use self::iter::{BitmapIter, IntoIter, Iter}; /// A compressed bitmap with u64 values. /// Implemented as a `BTreeMap` of `RoaringBitmap`s. diff --git a/roaring/src/treemap/serde.rs b/roaring/src/treemap/serde.rs index 46e7c2aa..6cec735f 100644 --- a/roaring/src/treemap/serde.rs +++ b/roaring/src/treemap/serde.rs @@ -72,11 +72,11 @@ mod test { } #[test] - fn test_bincode( + fn test_postcard( treemap in RoaringTreemap::arbitrary(), ) { - let buffer = bincode::serialize(&treemap).unwrap(); - prop_assert_eq!(treemap, bincode::deserialize(&buffer).unwrap()); + let buffer = postcard::to_allocvec(&treemap).unwrap(); + prop_assert_eq!(treemap, postcard::from_bytes(&buffer).unwrap()); } } } diff --git a/roaring/tests/treemap_iter_advance_to.rs b/roaring/tests/treemap_iter_advance_to.rs new file mode 100644 index 00000000..bead176b --- /dev/null +++ b/roaring/tests/treemap_iter_advance_to.rs @@ -0,0 +1,257 @@ +extern crate roaring; +use roaring::RoaringTreemap; + +#[test] +fn iter_basic() { + let bm = RoaringTreemap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_to(10); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)) + } + assert_eq!(i.next(), None); +} + +#[test] +fn to_missing_container() { + let bm = RoaringTreemap::from([1, 0x2_0001, 0x2_0002]); + let mut i = bm.iter(); + i.advance_to(0x1_0000); + assert_eq!(i.next(), Some(0x2_0001)); + assert_eq!(i.next(), Some(0x2_0002)); + assert_eq!(i.next(), None); +} + +#[test] +fn to_next_bitmap() { + let bm = + RoaringTreemap::from([1u64, 0x2_0001u64 + u32::MAX as u64, 0x2_0002u64 + u32::MAX as u64]); + let mut i = bm.iter(); + i.advance_to(0x1_0000); + assert_eq!(i.next(), Some(0x2_0001u64 + u32::MAX as u64)); + assert_eq!(i.next(), Some(0x2_0002u64 + u32::MAX as u64)); + assert_eq!(i.next(), None); +} + +#[test] +fn iter_back_basic() { + let bm = RoaringTreemap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_back_to(10); + assert_eq!(i.next(), Some(1)); + assert_eq!(i.next(), Some(2)); + assert_eq!(i.next_back(), Some(4)); + assert_eq!(i.next_back(), Some(3)); + + assert_eq!(i.next(), None); + assert_eq!(i.next_back(), None); +} + +#[test] +fn iter_advance_past_end() { + let bm = RoaringTreemap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_to(15); + assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); +} + +#[test] +fn iter_multi_container() { + let bm = RoaringTreemap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter(); + i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.size_hint(), (2, Some(2))); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (1, Some(1))); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); +} + +#[test] +fn iter_multi_container_multi_bitmap() { + let bm = RoaringTreemap::from([ + 1, + 2, + 3, + 100000, + 100001, + 1u64 + u32::MAX as u64, + 2u64 + u32::MAX as u64, + 3u64 + u32::MAX as u64, + 100000u64 + u32::MAX as u64, + 100001u64 + u32::MAX as u64, + ]); + let mut i = bm.iter(); + i.advance_to(3); + assert_eq!(i.size_hint(), (8, Some(8))); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.size_hint(), (7, Some(7))); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (6, Some(6))); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.size_hint(), (5, Some(5))); + assert_eq!(i.next(), Some(1u64 + u32::MAX as u64)); + assert_eq!(i.size_hint(), (4, Some(4))); + assert_eq!(i.next(), Some(2u64 + u32::MAX as u64)); + assert_eq!(i.size_hint(), (3, Some(3))); + assert_eq!(i.next(), Some(3u64 + u32::MAX as u64)); + assert_eq!(i.size_hint(), (2, Some(2))); + assert_eq!(i.next(), Some(100000u64 + u32::MAX as u64)); + assert_eq!(i.size_hint(), (1, Some(1))); + assert_eq!(i.next(), Some(100001u64 + u32::MAX as u64)); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); +} + +#[test] +fn iter_empty() { + let bm = RoaringTreemap::new(); + let mut i = bm.iter(); + i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +#[test] +fn iter_back_empty() { + let bm = RoaringTreemap::new(); + let mut i = bm.iter(); + i.advance_back_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +/*#[test] +fn into_iter_basic() { + let bm = RoaringTreemap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.into_iter(); + i.advance_to(10); + let mut expected_size_hint = 4; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)); + expected_size_hint -= 1; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); + } + assert_eq!(i.next(), None); +}*/ + +/*#[test] +fn into_iter_multi_container() { + let bm = RoaringTreemap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.into_iter(); + i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.next(), None); +}*/ + +/*#[test] +fn into_iter_empty() { + let bm = RoaringTreemap::new(); + let mut i = bm.into_iter(); + i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +}*/ + +/*#[test] +fn into_iter_back_empty() { + let bm = RoaringTreemap::new(); + let mut i = bm.into_iter(); + i.advance_back_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +}*/ + +#[test] +fn advance_to_with_tail_iter() { + let bm = RoaringTreemap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter(); + i.next_back(); + i.advance_to(100000); + assert_eq!(i.size_hint(), (1, Some(1))); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); +} + +#[test] +fn advance_to_end() { + let bitmap = RoaringTreemap::from([u64::MAX]); + let mut iter = bitmap.iter(); + iter.advance_to(u64::MAX); + assert_eq!(Some(u64::MAX), iter.next()); + assert_eq!(None, iter.next()); +} + +#[test] +fn advance_bitset() { + let mut bitmap = RoaringTreemap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(0x1_0000 - 4); + // 0x1_0000 + 5 is not in the bitmap, so the next value will be the first value less than that + iter.advance_back_to(0x1_0000 + 5); + assert_eq!(iter.next(), Some(0x1_0000 - 4)); + assert_eq!(iter.next_back(), Some(0x1_0000 + 4)); + + assert_eq!(iter.next(), Some(0x1_0000 - 2)); + assert_eq!(iter.next(), Some(0x1_0000)); + assert_eq!(iter.next(), Some(0x1_0000 + 2)); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); +} + +#[test] +fn advance_bitset_current_word() { + let mut bitmap = RoaringTreemap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(4); + iter.advance_back_to(0x2_0000 - 4); + for i in (4..=(0x2_0000 - 4)).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_bitset_to_end_word() { + let mut bitmap = RoaringTreemap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(0x1_0000 - 4); + for i in ((0x1_0000 - 4)..=0x2_0000).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_bitset_back_to_start_word() { + let mut bitmap = RoaringTreemap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_back_to(0x1_0000 - 4); + for i in (0..=(0x1_0000 - 4)).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +}