From 24a0c64097f3dec5136a2700d2a3a8a50fe1411c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 9 Dec 2024 22:50:46 +0100 Subject: [PATCH 1/8] Introduce the RoaringBitmap::insert_many method --- roaring/src/bitmap/inherent.rs | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 06d293e4..25bfcf04 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -61,8 +61,8 @@ impl RoaringBitmap { container.insert(index) } - /// Search for the specific container by the given key. - /// Create a new container if not exist. + /// Searches for the specific container by the given key. + /// Creates a new container if it doesn't exist. /// /// Return the index of the target container. fn find_container_by_key(&mut self, key: u16) -> usize { @@ -75,6 +75,40 @@ impl RoaringBitmap { } } + /// Inserts multiple values and returns the count of new additions. + /// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value. + /// + /// The provided integers values don't have to be in sorted order, but it may be preferable + /// to sort them from a performance point of view. + #[inline] + pub fn insert_many(&mut self, values: &[u32]) -> u64 { + let (&value, values) = match values.split_first() { + Some(split) => split, + None => return 0, + }; + + let mut inserted = 0; + let (mut currenthb, lowbit) = util::split(value); + let mut current_container_index = self.find_container_by_key(currenthb); + let mut current_cont = &mut self.containers[current_container_index]; + inserted += current_cont.insert(lowbit) as u64; + + for val in values.iter().copied() { + let (newhb, lowbit) = util::split(val); + if currenthb == newhb { + // easy case, this could be quite frequent + inserted += current_cont.insert(lowbit) as u64; + } else { + currenthb = newhb; + current_container_index = self.find_container_by_key(currenthb); + current_cont = &mut self.containers[current_container_index]; + inserted += current_cont.insert(lowbit) as u64; + } + } + + inserted + } + /// Inserts a range of values. /// Returns the number of inserted values. /// From fcb7dfbc398f258c18330d433fbed9a3e8a76652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:26:25 +0100 Subject: [PATCH 2/8] Make the insert_many method accept an Iterator --- roaring/src/bitmap/inherent.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 25bfcf04..c20bad42 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -80,10 +80,26 @@ impl RoaringBitmap { /// /// The provided integers values don't have to be in sorted order, but it may be preferable /// to sort them from a performance point of view. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// rb.insert_many([1, 2, 3, 4, 1500, 1508, 1507, 1509]); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(1508)); + /// assert!(!rb.contains(5)); + /// ``` #[inline] - pub fn insert_many(&mut self, values: &[u32]) -> u64 { - let (&value, values) = match values.split_first() { - Some(split) => split, + pub fn insert_many(&mut self, values: I) -> u64 + where + I: IntoIterator, + { + let mut values = values.into_iter(); + let value = match values.next() { + Some(value) => value, None => return 0, }; @@ -93,7 +109,7 @@ impl RoaringBitmap { let mut current_cont = &mut self.containers[current_container_index]; inserted += current_cont.insert(lowbit) as u64; - for val in values.iter().copied() { + for val in values { let (newhb, lowbit) = util::split(val); if currenthb == newhb { // easy case, this could be quite frequent From 245d6041ce86dea4a810e0506d2f9306d34ec241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:29:19 +0100 Subject: [PATCH 3/8] Mark many inherent method inline --- roaring/src/bitmap/inherent.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index c20bad42..f3644d25 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -65,6 +65,7 @@ impl RoaringBitmap { /// Creates a new container if it doesn't exist. /// /// Return the index of the target container. + #[inline] fn find_container_by_key(&mut self, key: u16) -> usize { match self.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => loc, @@ -139,6 +140,7 @@ impl RoaringBitmap { /// assert!(rb.contains(3)); /// assert!(!rb.contains(4)); /// ``` + #[inline] pub fn insert_range(&mut self, range: R) -> u64 where R: RangeBounds, @@ -203,6 +205,7 @@ impl RoaringBitmap { /// /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` + #[inline] pub fn push(&mut self, value: u32) -> bool { let (key, index) = util::split(value); @@ -225,6 +228,7 @@ impl RoaringBitmap { /// # Panics /// /// If debug_assertions enabled and index is > self.max() + #[inline] pub(crate) fn push_unchecked(&mut self, value: u32) { let (key, index) = util::split(value); @@ -254,6 +258,7 @@ impl RoaringBitmap { /// assert_eq!(rb.remove(3), false); /// assert_eq!(rb.contains(3), false); /// ``` + #[inline] pub fn remove(&mut self, value: u32) -> bool { let (key, index) = util::split(value); match self.containers.binary_search_by_key(&key, |c| c.key) { @@ -284,6 +289,7 @@ impl RoaringBitmap { /// rb.insert(3); /// assert_eq!(rb.remove_range(2..4), 2); /// ``` + #[inline] pub fn remove_range(&mut self, range: R) -> u64 where R: RangeBounds, @@ -327,6 +333,7 @@ impl RoaringBitmap { /// assert_eq!(rb.contains(1), true); /// assert_eq!(rb.contains(100), false); /// ``` + #[inline] pub fn contains(&self, value: u32) -> bool { let (key, index) = util::split(value); match self.containers.binary_search_by_key(&key, |c| c.key) { @@ -354,6 +361,7 @@ impl RoaringBitmap { /// // 0xFFF is not contained /// assert!(!rb.contains_range(1..=0xFFF)); /// ``` + #[inline] pub fn contains_range(&self, range: R) -> bool where R: RangeBounds, @@ -414,6 +422,7 @@ impl RoaringBitmap { /// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0); /// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3); /// ``` + #[inline] pub fn range_cardinality(&self, range: R) -> u64 where R: RangeBounds, @@ -473,6 +482,7 @@ impl RoaringBitmap { /// rb.clear(); /// assert_eq!(rb.contains(1), false); /// ``` + #[inline] pub fn clear(&mut self) { self.containers.clear(); } @@ -490,6 +500,7 @@ impl RoaringBitmap { /// rb.insert(3); /// assert_eq!(rb.is_empty(), false); /// ``` + #[inline] pub fn is_empty(&self) -> bool { self.containers.is_empty() } @@ -505,6 +516,7 @@ impl RoaringBitmap { /// assert!(!rb.is_empty()); /// assert!(rb.is_full()); /// ``` + #[inline] pub fn is_full(&self) -> bool { self.containers.len() == (u16::MAX as usize + 1) && self.containers.iter().all(Container::is_full) @@ -527,6 +539,7 @@ impl RoaringBitmap { /// rb.insert(4); /// assert_eq!(rb.len(), 2); /// ``` + #[inline] pub fn len(&self) -> u64 { self.containers.iter().map(|container| container.len()).sum() } @@ -545,6 +558,7 @@ impl RoaringBitmap { /// rb.insert(4); /// assert_eq!(rb.min(), Some(3)); /// ``` + #[inline] pub fn min(&self) -> Option { self.containers.first().and_then(|tail| tail.min().map(|min| util::join(tail.key, min))) } @@ -583,6 +597,7 @@ impl RoaringBitmap { /// assert_eq!(rb.rank(3), 1); /// assert_eq!(rb.rank(10), 2) /// ``` + #[inline] pub fn rank(&self, value: u32) -> u64 { // if len becomes cached for RoaringBitmap: return len if len > value @@ -617,6 +632,7 @@ impl RoaringBitmap { /// assert_eq!(rb.select(2), Some(100)); /// assert_eq!(rb.select(3), None); /// ``` + #[inline] pub fn select(&self, n: u32) -> Option { let mut n = n as u64; @@ -648,6 +664,7 @@ impl RoaringBitmap { /// let mut rb = RoaringBitmap::from_iter([1, 3, 7, 9]); /// rb.remove_smallest(2); /// assert_eq!(rb, RoaringBitmap::from_iter([7, 9])); + #[inline] pub fn remove_smallest(&mut self, mut n: u64) { // remove containers up to the front of the target let position = self.containers.iter().position(|container| { @@ -682,6 +699,7 @@ impl RoaringBitmap { /// assert_eq!(rb, RoaringBitmap::from_iter([1, 5])); /// rb.remove_biggest(1); /// assert_eq!(rb, RoaringBitmap::from_iter([1])); + #[inline] pub fn remove_biggest(&mut self, mut n: u64) { // remove containers up to the back of the target let position = self.containers.iter().rposition(|container| { From 1eb31a0855db35b9ef522b76b124cc729a1095f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:48:43 +0100 Subject: [PATCH 4/8] Replace insert_many by the Extend trait --- roaring/src/bitmap/inherent.rs | 52 +-------------------------- roaring/src/bitmap/iter.rs | 66 ++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index f3644d25..8e1fb803 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -66,7 +66,7 @@ impl RoaringBitmap { /// /// Return the index of the target container. #[inline] - fn find_container_by_key(&mut self, key: u16) -> usize { + pub(crate) fn find_container_by_key(&mut self, key: u16) -> usize { match self.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => loc, Err(loc) => { @@ -76,56 +76,6 @@ impl RoaringBitmap { } } - /// Inserts multiple values and returns the count of new additions. - /// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value. - /// - /// The provided integers values don't have to be in sorted order, but it may be preferable - /// to sort them from a performance point of view. - /// - /// # Examples - /// - /// ```rust - /// use roaring::RoaringBitmap; - /// - /// let mut rb = RoaringBitmap::new(); - /// rb.insert_many([1, 2, 3, 4, 1500, 1508, 1507, 1509]); - /// assert!(rb.contains(2)); - /// assert!(rb.contains(1508)); - /// assert!(!rb.contains(5)); - /// ``` - #[inline] - pub fn insert_many(&mut self, values: I) -> u64 - where - I: IntoIterator, - { - let mut values = values.into_iter(); - let value = match values.next() { - Some(value) => value, - None => return 0, - }; - - let mut inserted = 0; - let (mut currenthb, lowbit) = util::split(value); - let mut current_container_index = self.find_container_by_key(currenthb); - let mut current_cont = &mut self.containers[current_container_index]; - inserted += current_cont.insert(lowbit) as u64; - - for val in values { - let (newhb, lowbit) = util::split(val); - if currenthb == newhb { - // easy case, this could be quite frequent - inserted += current_cont.insert(lowbit) as u64; - } else { - currenthb = newhb; - current_container_index = self.find_container_by_key(currenthb); - current_cont = &mut self.containers[current_container_index]; - inserted += current_cont.insert(lowbit) as u64; - } - } - - inserted - } - /// Inserts a range of values. /// Returns the number of inserted values. /// diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 31273cfc..ae8d3e4d 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -715,18 +715,70 @@ impl<'a> FromIterator<&'a u32> for RoaringBitmap { } impl Extend for RoaringBitmap { - fn extend>(&mut self, iterator: I) { - for value in iterator { - self.insert(value); + /// Inserts multiple values and returns the count of new additions. + /// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value. + /// + /// The provided integers values don't have to be in sorted order, but it may be preferable + /// to sort them from a performance point of view. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(1508)); + /// assert!(!rb.contains(5)); + /// ``` + fn extend>(&mut self, values: I) { + let mut values = values.into_iter(); + let value = match values.next() { + Some(value) => value, + None => return, + }; + + let (mut currenthb, lowbit) = util::split(value); + let mut current_container_index = self.find_container_by_key(currenthb); + let mut current_cont = &mut self.containers[current_container_index]; + current_cont.insert(lowbit) as u64; + + for val in values { + let (newhb, lowbit) = util::split(val); + if currenthb == newhb { + // easy case, this could be quite frequent + current_cont.insert(lowbit) as u64; + } else { + currenthb = newhb; + current_container_index = self.find_container_by_key(currenthb); + current_cont = &mut self.containers[current_container_index]; + current_cont.insert(lowbit) as u64; + } } } } impl<'a> Extend<&'a u32> for RoaringBitmap { - fn extend>(&mut self, iterator: I) { - for value in iterator { - self.insert(*value); - } + /// Inserts multiple values and returns the count of new additions. + /// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value. + /// + /// The provided integers values don't have to be in sorted order, but it may be preferable + /// to sort them from a performance point of view. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(1508)); + /// assert!(!rb.contains(5)); + /// ``` + fn extend>(&mut self, values: I) { + self.extend(values.into_iter().copied()); } } From d5dca9a9e75c91e11b53d8f451ab5de6114fd82b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:54:47 +0100 Subject: [PATCH 5/8] Make clippy happy --- roaring/src/bitmap/iter.rs | 8 +++++--- roaring/src/bitmap/store/bitmap_store.rs | 6 +++--- roaring/src/treemap/inherent.rs | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index ae8d3e4d..537acc3a 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -732,6 +732,7 @@ impl Extend for RoaringBitmap { /// assert!(rb.contains(1508)); /// assert!(!rb.contains(5)); /// ``` + #[inline] fn extend>(&mut self, values: I) { let mut values = values.into_iter(); let value = match values.next() { @@ -742,18 +743,18 @@ impl Extend for RoaringBitmap { let (mut currenthb, lowbit) = util::split(value); let mut current_container_index = self.find_container_by_key(currenthb); let mut current_cont = &mut self.containers[current_container_index]; - current_cont.insert(lowbit) as u64; + current_cont.insert(lowbit); for val in values { let (newhb, lowbit) = util::split(val); if currenthb == newhb { // easy case, this could be quite frequent - current_cont.insert(lowbit) as u64; + current_cont.insert(lowbit); } else { currenthb = newhb; current_container_index = self.find_container_by_key(currenthb); current_cont = &mut self.containers[current_container_index]; - current_cont.insert(lowbit) as u64; + current_cont.insert(lowbit); } } } @@ -777,6 +778,7 @@ impl<'a> Extend<&'a u32> for RoaringBitmap { /// assert!(rb.contains(1508)); /// assert!(!rb.contains(5)); /// ``` + #[inline] fn extend>(&mut self, values: I) { self.extend(values.into_iter().copied()); } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index cf4649c1..aec2404a 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -102,7 +102,7 @@ impl BitmapStore { pub fn insert(&mut self, index: u16) -> bool { let (key, bit) = (key(index), bit(index)); let old_w = self.bits[key]; - let new_w = old_w | 1 << bit; + let new_w = old_w | (1 << bit); let inserted = (old_w ^ new_w) >> bit; // 1 or 0 self.bits[key] = new_w; self.len += inserted; @@ -634,7 +634,7 @@ impl BitOrAssign<&ArrayStore> for BitmapStore { for &index in rhs.iter() { let (key, bit) = (key(index), bit(index)); let old_w = self.bits[key]; - let new_w = old_w | 1 << bit; + let new_w = old_w | (1 << bit); self.len += (old_w ^ new_w) >> bit; self.bits[key] = new_w; } @@ -679,7 +679,7 @@ impl BitXorAssign<&ArrayStore> for BitmapStore { for &index in rhs.iter() { let (key, bit) = (key(index), bit(index)); let old_w = self.bits[key]; - let new_w = old_w ^ 1 << bit; + let new_w = old_w ^ (1 << bit); len += 1 - 2 * (((1 << bit) & old_w) >> bit) as i64; // +1 or -1 self.bits[key] = new_w; } diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index 9db01ebf..63e2cb3b 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -409,7 +409,7 @@ impl RoaringTreemap { for (&key, bitmap) in &self.map { let len = bitmap.len(); if len > n { - return Some((key as u64) << 32 | bitmap.select(n as u32).unwrap() as u64); + return Some(((key as u64) << 32) | bitmap.select(n as u32).unwrap() as u64); } n -= len; } From 33827058052aae3fceaa6ea13cdb3bff4d35a80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:38:27 +0100 Subject: [PATCH 6/8] Upgrade compatible dependencies --- Cargo.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 945fa1c2..c6ee9061 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,16 +6,16 @@ resolver = "2" roaring = { path = "roaring" } bincode = "1.3.3" -bytemuck = "1.16.1" +bytemuck = "1.21.0" byteorder = "1.5.0" criterion = "0.3" git2 = { version = "0.19", default-features = false } indicatif = "0.17" itertools = "0.13" -once_cell = "1.9" -proptest = "1.5.0" -serde = "1.0.203" -serde_json = "1.0.120" +once_cell = "1.20" +proptest = "1.6.0" +serde = "1.0.217" +serde_json = "1.0.135" zip = { version = "0.6", default-features = false } [profile.test] From fce866f4c782e15b9eb6137eeb3ee4ca9cdf095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Jan 2025 15:41:26 +0100 Subject: [PATCH 7/8] Upgrade incompatible dependencies --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c6ee9061..3256f873 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,10 @@ roaring = { path = "roaring" } bincode = "1.3.3" bytemuck = "1.21.0" byteorder = "1.5.0" -criterion = "0.3" -git2 = { version = "0.19", default-features = false } +criterion = "0.5" +git2 = { version = "0.20", default-features = false } indicatif = "0.17" -itertools = "0.13" +itertools = "0.14" once_cell = "1.20" proptest = "1.6.0" serde = "1.0.217" From 625f66e8cc15c37e7ed658ee12e9fb988516a2dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 8 Jan 2025 11:09:58 +0100 Subject: [PATCH 8/8] Bump version to v0.10.10 --- roaring/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roaring/Cargo.toml b/roaring/Cargo.toml index 74e8c2cb..746384a7 100644 --- a/roaring/Cargo.toml +++ b/roaring/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "roaring" -version = "0.10.9" +version = "0.10.10" rust-version = "1.65.0" authors = ["Wim Looman ", "Kerollmops "] description = "A better compressed bitset - pure Rust implementation"