Skip to content

Commit 27743fb

Browse files
authored
Merge pull request #306 from RoaringBitmap/insert-many
Improve the `Extend::extend` implementation for performances
2 parents d2ec04f + d5dca9a commit 27743fb

File tree

4 files changed

+86
-14
lines changed

4 files changed

+86
-14
lines changed

roaring/src/bitmap/inherent.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,12 @@ impl RoaringBitmap {
6161
container.insert(index)
6262
}
6363

64-
/// Search for the specific container by the given key.
65-
/// Create a new container if not exist.
64+
/// Searches for the specific container by the given key.
65+
/// Creates a new container if it doesn't exist.
6666
///
6767
/// Return the index of the target container.
68-
fn find_container_by_key(&mut self, key: u16) -> usize {
68+
#[inline]
69+
pub(crate) fn find_container_by_key(&mut self, key: u16) -> usize {
6970
match self.containers.binary_search_by_key(&key, |c| c.key) {
7071
Ok(loc) => loc,
7172
Err(loc) => {
@@ -89,6 +90,7 @@ impl RoaringBitmap {
8990
/// assert!(rb.contains(3));
9091
/// assert!(!rb.contains(4));
9192
/// ```
93+
#[inline]
9294
pub fn insert_range<R>(&mut self, range: R) -> u64
9395
where
9496
R: RangeBounds<u32>,
@@ -153,6 +155,7 @@ impl RoaringBitmap {
153155
///
154156
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), vec![1, 3, 5]);
155157
/// ```
158+
#[inline]
156159
pub fn push(&mut self, value: u32) -> bool {
157160
let (key, index) = util::split(value);
158161

@@ -175,6 +178,7 @@ impl RoaringBitmap {
175178
/// # Panics
176179
///
177180
/// If debug_assertions enabled and index is > self.max()
181+
#[inline]
178182
pub(crate) fn push_unchecked(&mut self, value: u32) {
179183
let (key, index) = util::split(value);
180184

@@ -204,6 +208,7 @@ impl RoaringBitmap {
204208
/// assert_eq!(rb.remove(3), false);
205209
/// assert_eq!(rb.contains(3), false);
206210
/// ```
211+
#[inline]
207212
pub fn remove(&mut self, value: u32) -> bool {
208213
let (key, index) = util::split(value);
209214
match self.containers.binary_search_by_key(&key, |c| c.key) {
@@ -234,6 +239,7 @@ impl RoaringBitmap {
234239
/// rb.insert(3);
235240
/// assert_eq!(rb.remove_range(2..4), 2);
236241
/// ```
242+
#[inline]
237243
pub fn remove_range<R>(&mut self, range: R) -> u64
238244
where
239245
R: RangeBounds<u32>,
@@ -277,6 +283,7 @@ impl RoaringBitmap {
277283
/// assert_eq!(rb.contains(1), true);
278284
/// assert_eq!(rb.contains(100), false);
279285
/// ```
286+
#[inline]
280287
pub fn contains(&self, value: u32) -> bool {
281288
let (key, index) = util::split(value);
282289
match self.containers.binary_search_by_key(&key, |c| c.key) {
@@ -304,6 +311,7 @@ impl RoaringBitmap {
304311
/// // 0xFFF is not contained
305312
/// assert!(!rb.contains_range(1..=0xFFF));
306313
/// ```
314+
#[inline]
307315
pub fn contains_range<R>(&self, range: R) -> bool
308316
where
309317
R: RangeBounds<u32>,
@@ -364,6 +372,7 @@ impl RoaringBitmap {
364372
/// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0);
365373
/// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3);
366374
/// ```
375+
#[inline]
367376
pub fn range_cardinality<R>(&self, range: R) -> u64
368377
where
369378
R: RangeBounds<u32>,
@@ -423,6 +432,7 @@ impl RoaringBitmap {
423432
/// rb.clear();
424433
/// assert_eq!(rb.contains(1), false);
425434
/// ```
435+
#[inline]
426436
pub fn clear(&mut self) {
427437
self.containers.clear();
428438
}
@@ -440,6 +450,7 @@ impl RoaringBitmap {
440450
/// rb.insert(3);
441451
/// assert_eq!(rb.is_empty(), false);
442452
/// ```
453+
#[inline]
443454
pub fn is_empty(&self) -> bool {
444455
self.containers.is_empty()
445456
}
@@ -455,6 +466,7 @@ impl RoaringBitmap {
455466
/// assert!(!rb.is_empty());
456467
/// assert!(rb.is_full());
457468
/// ```
469+
#[inline]
458470
pub fn is_full(&self) -> bool {
459471
self.containers.len() == (u16::MAX as usize + 1)
460472
&& self.containers.iter().all(Container::is_full)
@@ -477,6 +489,7 @@ impl RoaringBitmap {
477489
/// rb.insert(4);
478490
/// assert_eq!(rb.len(), 2);
479491
/// ```
492+
#[inline]
480493
pub fn len(&self) -> u64 {
481494
self.containers.iter().map(|container| container.len()).sum()
482495
}
@@ -495,6 +508,7 @@ impl RoaringBitmap {
495508
/// rb.insert(4);
496509
/// assert_eq!(rb.min(), Some(3));
497510
/// ```
511+
#[inline]
498512
pub fn min(&self) -> Option<u32> {
499513
self.containers.first().and_then(|tail| tail.min().map(|min| util::join(tail.key, min)))
500514
}
@@ -533,6 +547,7 @@ impl RoaringBitmap {
533547
/// assert_eq!(rb.rank(3), 1);
534548
/// assert_eq!(rb.rank(10), 2)
535549
/// ```
550+
#[inline]
536551
pub fn rank(&self, value: u32) -> u64 {
537552
// if len becomes cached for RoaringBitmap: return len if len > value
538553

@@ -567,6 +582,7 @@ impl RoaringBitmap {
567582
/// assert_eq!(rb.select(2), Some(100));
568583
/// assert_eq!(rb.select(3), None);
569584
/// ```
585+
#[inline]
570586
pub fn select(&self, n: u32) -> Option<u32> {
571587
let mut n = n as u64;
572588

@@ -598,6 +614,7 @@ impl RoaringBitmap {
598614
/// let mut rb = RoaringBitmap::from_iter([1, 3, 7, 9]);
599615
/// rb.remove_smallest(2);
600616
/// assert_eq!(rb, RoaringBitmap::from_iter([7, 9]));
617+
#[inline]
601618
pub fn remove_smallest(&mut self, mut n: u64) {
602619
// remove containers up to the front of the target
603620
let position = self.containers.iter().position(|container| {
@@ -632,6 +649,7 @@ impl RoaringBitmap {
632649
/// assert_eq!(rb, RoaringBitmap::from_iter([1, 5]));
633650
/// rb.remove_biggest(1);
634651
/// assert_eq!(rb, RoaringBitmap::from_iter([1]));
652+
#[inline]
635653
pub fn remove_biggest(&mut self, mut n: u64) {
636654
// remove containers up to the back of the target
637655
let position = self.containers.iter().rposition(|container| {

roaring/src/bitmap/iter.rs

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -715,18 +715,72 @@ impl<'a> FromIterator<&'a u32> for RoaringBitmap {
715715
}
716716

717717
impl Extend<u32> for RoaringBitmap {
718-
fn extend<I: IntoIterator<Item = u32>>(&mut self, iterator: I) {
719-
for value in iterator {
720-
self.insert(value);
718+
/// Inserts multiple values and returns the count of new additions.
719+
/// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value.
720+
///
721+
/// The provided integers values don't have to be in sorted order, but it may be preferable
722+
/// to sort them from a performance point of view.
723+
///
724+
/// # Examples
725+
///
726+
/// ```rust
727+
/// use roaring::RoaringBitmap;
728+
///
729+
/// let mut rb = RoaringBitmap::new();
730+
/// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]);
731+
/// assert!(rb.contains(2));
732+
/// assert!(rb.contains(1508));
733+
/// assert!(!rb.contains(5));
734+
/// ```
735+
#[inline]
736+
fn extend<I: IntoIterator<Item = u32>>(&mut self, values: I) {
737+
let mut values = values.into_iter();
738+
let value = match values.next() {
739+
Some(value) => value,
740+
None => return,
741+
};
742+
743+
let (mut currenthb, lowbit) = util::split(value);
744+
let mut current_container_index = self.find_container_by_key(currenthb);
745+
let mut current_cont = &mut self.containers[current_container_index];
746+
current_cont.insert(lowbit);
747+
748+
for val in values {
749+
let (newhb, lowbit) = util::split(val);
750+
if currenthb == newhb {
751+
// easy case, this could be quite frequent
752+
current_cont.insert(lowbit);
753+
} else {
754+
currenthb = newhb;
755+
current_container_index = self.find_container_by_key(currenthb);
756+
current_cont = &mut self.containers[current_container_index];
757+
current_cont.insert(lowbit);
758+
}
721759
}
722760
}
723761
}
724762

725763
impl<'a> Extend<&'a u32> for RoaringBitmap {
726-
fn extend<I: IntoIterator<Item = &'a u32>>(&mut self, iterator: I) {
727-
for value in iterator {
728-
self.insert(*value);
729-
}
764+
/// Inserts multiple values and returns the count of new additions.
765+
/// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value.
766+
///
767+
/// The provided integers values don't have to be in sorted order, but it may be preferable
768+
/// to sort them from a performance point of view.
769+
///
770+
/// # Examples
771+
///
772+
/// ```rust
773+
/// use roaring::RoaringBitmap;
774+
///
775+
/// let mut rb = RoaringBitmap::new();
776+
/// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]);
777+
/// assert!(rb.contains(2));
778+
/// assert!(rb.contains(1508));
779+
/// assert!(!rb.contains(5));
780+
/// ```
781+
#[inline]
782+
fn extend<I: IntoIterator<Item = &'a u32>>(&mut self, values: I) {
783+
self.extend(values.into_iter().copied());
730784
}
731785
}
732786

roaring/src/bitmap/store/bitmap_store.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ impl BitmapStore {
102102
pub fn insert(&mut self, index: u16) -> bool {
103103
let (key, bit) = (key(index), bit(index));
104104
let old_w = self.bits[key];
105-
let new_w = old_w | 1 << bit;
105+
let new_w = old_w | (1 << bit);
106106
let inserted = (old_w ^ new_w) >> bit; // 1 or 0
107107
self.bits[key] = new_w;
108108
self.len += inserted;
@@ -634,7 +634,7 @@ impl BitOrAssign<&ArrayStore> for BitmapStore {
634634
for &index in rhs.iter() {
635635
let (key, bit) = (key(index), bit(index));
636636
let old_w = self.bits[key];
637-
let new_w = old_w | 1 << bit;
637+
let new_w = old_w | (1 << bit);
638638
self.len += (old_w ^ new_w) >> bit;
639639
self.bits[key] = new_w;
640640
}
@@ -679,7 +679,7 @@ impl BitXorAssign<&ArrayStore> for BitmapStore {
679679
for &index in rhs.iter() {
680680
let (key, bit) = (key(index), bit(index));
681681
let old_w = self.bits[key];
682-
let new_w = old_w ^ 1 << bit;
682+
let new_w = old_w ^ (1 << bit);
683683
len += 1 - 2 * (((1 << bit) & old_w) >> bit) as i64; // +1 or -1
684684
self.bits[key] = new_w;
685685
}

roaring/src/treemap/inherent.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ impl RoaringTreemap {
409409
for (&key, bitmap) in &self.map {
410410
let len = bitmap.len();
411411
if len > n {
412-
return Some((key as u64) << 32 | bitmap.select(n as u32).unwrap() as u64);
412+
return Some(((key as u64) << 32) | bitmap.select(n as u32).unwrap() as u64);
413413
}
414414
n -= len;
415415
}

0 commit comments

Comments
 (0)