From 9b67893b7dbfe5091b6fd3352d3c60ac651af2a1 Mon Sep 17 00:00:00 2001 From: Joseph Glanville Date: Mon, 23 Mar 2020 10:57:08 +0700 Subject: [PATCH 01/83] WIP: Run container --- src/bitmap/container.rs | 24 ++- src/bitmap/fmt.rs | 5 +- src/bitmap/inherent.rs | 18 ++ src/bitmap/serialization.rs | 171 +++++++++++++--- src/bitmap/store.rs | 387 +++++++++++++++++++++++++++++++++--- tests/bitmapwithruns.bin | Bin 0 -> 48056 bytes tests/clone.rs | 13 ++ tests/lib.rs | 22 ++ tests/serialization.rs | 34 +++- tests/size_hint.rs | 28 +++ 10 files changed, 635 insertions(+), 67 deletions(-) create mode 100644 tests/bitmapwithruns.bin diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index e80cdef23..22f04427e 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -3,7 +3,8 @@ use std::fmt; use super::store::{self, Store}; use super::util; -const ARRAY_LIMIT: u64 = 4096; +pub const ARRAY_LIMIT: u64 = 4096; +pub const RUN_MAX_SIZE: u64 = 2048; #[derive(PartialEq, Clone)] pub struct Container { @@ -103,7 +104,7 @@ impl Container { self.store.max() } - fn ensure_correct_store(&mut self) { + fn ensure_correct_store(&mut self) -> bool { let new_store = match (&self.store, self.len) { (store @ &Store::Bitmap(..), len) if len <= ARRAY_LIMIT => Some(store.to_array()), (store @ &Store::Array(..), len) if len > ARRAY_LIMIT => Some(store.to_bitmap()), @@ -111,6 +112,25 @@ impl Container { }; if let Some(new_store) = new_store { self.store = new_store; + true + } else { + false + } + } + + pub fn optimize(&mut self) -> bool { + match self.store { + Store::Array(..) | Store::Bitmap(..) => { + let num_runs = self.store.count_runs(); + if num_runs <= RUN_MAX_SIZE && num_runs <= self.len / 2 { + // convert to run container + self.store = self.store.to_run(); + true + } else { + self.ensure_correct_store() + } + } + Store::Run(..) => self.ensure_correct_store(), } } } diff --git a/src/bitmap/fmt.rs b/src/bitmap/fmt.rs index 7dca81705..bb4c66be7 100644 --- a/src/bitmap/fmt.rs +++ b/src/bitmap/fmt.rs @@ -9,10 +9,11 @@ impl fmt::Debug for RoaringBitmap { } else { write!( f, - "RoaringBitmap<{:?} values between {:?} and {:?}>", + "RoaringBitmap<{:?} values between {:?} and {:?} in {:?} containers>", self.len(), self.min().unwrap(), - self.max().unwrap() + self.max().unwrap(), + self.containers.len(), ) } } diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index c1ef81c9d..935c7fd60 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -247,6 +247,24 @@ impl RoaringBitmap { .last() .map(|tail| util::join(tail.key, tail.max())) } + + // TODO(jpg) actually come up with example that illustrates creation of run containers + /// Optimizes the container storage for this bitmap. + /// Returns true if the container storage was modified, false if not. + /// + /// # Examples + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::from_iter(1000..100000) + /// rb.optimize() + /// ``` + pub fn optimize(&mut self) -> bool { + let mut changed = false; + for container in &mut self.containers { + changed |= container.optimize() + } + changed + } } impl Default for RoaringBitmap { diff --git a/src/bitmap/serialization.rs b/src/bitmap/serialization.rs index 3ecff3948..2689ed41e 100644 --- a/src/bitmap/serialization.rs +++ b/src/bitmap/serialization.rs @@ -3,12 +3,25 @@ use std::io; use super::container::Container; use super::store::Store; +use crate::bitmap::container::ARRAY_LIMIT; +use crate::bitmap::store::{Interval, BITMAP_LENGTH}; use crate::RoaringBitmap; const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; const SERIAL_COOKIE: u16 = 12347; -// TODO: Need this once run containers are supported -// const NO_OFFSET_THRESHOLD: u8 = 4; +const NO_OFFSET_THRESHOLD: usize = 4; + +// Sizes of header structures +const COOKIE_BYTES: usize = 4; +const SIZE_BYTES: usize = 4; +const DESCRIPTION_BYTES: usize = 4; +const OFFSET_BYTES: usize = 4; + +// Sizes of container structures +const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; +const ARRAY_ELEMENT_BYTES: usize = 2; +const RUN_NUM_BYTES: usize = 2; +const RUN_ELEMENT_BYTES: usize = 4; impl RoaringBitmap { /// Return the size in bytes of the serialized output. @@ -27,17 +40,23 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialized_size(&self) -> usize { + let mut has_run_containers = false; + let size = self.containers.len(); let container_sizes: usize = self .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => 8 + values.len() * 2, - Store::Bitmap(..) => 8 + 8 * 1024, + Store::Array(ref values) => values.len() * ARRAY_ELEMENT_BYTES, + Store::Bitmap(..) => BITMAP_BYTES, + Store::Run(ref intervals) => { + has_run_containers = true; + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.len()) + } }) .sum(); // header + container sizes - 8 + container_sizes + header_size(size, has_run_containers) + container_sizes } /// Serialize this bitmap into [the standard Roaring on-disk format][format]. @@ -58,27 +77,61 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialize_into(&self, mut writer: W) -> io::Result<()> { - writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; - writer.write_u32::(self.containers.len() as u32)?; + let has_run_containers = self.containers.iter().any(|c| { + if let Store::Run(_) = c.store { + true + } else { + false + } + }); + let size = self.containers.len(); + + // Depending on if run containers are present or not write the appropriate header + if has_run_containers { + // The new format stores the container count in the most significant bits of the header + let cookie = SERIAL_COOKIE as u32 | ((size as u32 - 1) << 16); + writer.write_u32::(cookie)?; + // It is then followed by a bitset indicating which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + let mut run_container_bitmap = vec![0; run_container_bitmap_size]; + for (i, container) in self.containers.iter().enumerate() { + if let Store::Run(_) = container.store { + run_container_bitmap[i / 8] |= 1 << (i % 8); + } + } + writer.write_all(&run_container_bitmap)?; + } else { + // Write old format, cookie followed by container count + writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + writer.write_u32::(size as u32)?; + } + // Write the container descriptions for container in &self.containers { writer.write_u16::(container.key)?; writer.write_u16::((container.len - 1) as u16)?; } - let mut offset = 8 + 8 * self.containers.len() as u32; - for container in &self.containers { - writer.write_u32::(offset)?; - match container.store { - Store::Array(ref values) => { - offset += values.len() as u32 * 2; - } - Store::Bitmap(..) => { - offset += 8 * 1024; + // Write offsets if there are no runs or NO_OFFSET_THRESHOLD containers is reached + if !has_run_containers || size >= NO_OFFSET_THRESHOLD { + let mut offset = header_size(size, has_run_containers) as u32; + for container in &self.containers { + writer.write_u32::(offset)?; + match container.store { + Store::Array(ref values) => { + offset += (values.len() * ARRAY_ELEMENT_BYTES) as u32; + } + Store::Bitmap(..) => { + offset += BITMAP_BYTES as u32; + } + Store::Run(ref intervals) => { + offset += (RUN_NUM_BYTES + (intervals.len() * RUN_ELEMENT_BYTES)) as u32; + } } } } + // Finally serialize each of the containers for container in &self.containers { match container.store { Store::Array(ref values) => { @@ -91,6 +144,13 @@ impl RoaringBitmap { writer.write_u64::(value)?; } } + Store::Run(ref intervals) => { + writer.write_u16::(intervals.len() as u16)?; + for iv in intervals { + writer.write_u16::(iv.start)?; + writer.write_u16::(iv.end - iv.start)?; + } + } } } @@ -116,20 +176,28 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn deserialize_from(mut reader: R) -> io::Result { - let (size, has_offsets) = { + // First read the cookie to determine which version of the format we are reading + let (size, has_offsets, has_run_containers) = { let cookie = reader.read_u32::()?; if cookie == SERIAL_COOKIE_NO_RUNCONTAINER { - (reader.read_u32::()? as usize, true) + (reader.read_u32::()? as usize, true, false) } else if (cookie as u16) == SERIAL_COOKIE { - return Err(io::Error::new( - io::ErrorKind::Other, - "run containers are unsupported", - )); + let size = ((cookie >> 16) + 1) as usize; + (size, size >= NO_OFFSET_THRESHOLD, true) } else { return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value")); } }; + // Read the run container bitmap if necessary + let run_container_bitmap = if has_run_containers { + let mut bitmap = vec![0u8; (size + 7) / 8]; + reader.read_exact(&mut bitmap)?; + Some(bitmap) + } else { + None + }; + if size > u16::max_value() as usize { return Err(io::Error::new( io::ErrorKind::Other, @@ -137,39 +205,80 @@ impl RoaringBitmap { )); } - let mut description_bytes = vec![0u8; size * 4]; + // Read the container descriptions + let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES]; reader.read_exact(&mut description_bytes)?; let description_bytes = &mut &description_bytes[..]; + // Read the offsets if present if has_offsets { - let mut offsets = vec![0u8; size * 4]; + let mut offsets = vec![0u8; size * OFFSET_BYTES]; reader.read_exact(&mut offsets)?; drop(offsets); // Not useful when deserializing into memory } let mut containers = Vec::with_capacity(size); - for _ in 0..size { + // Read each of the containers + for i in 0..size { let key = description_bytes.read_u16::()?; - let len = u64::from(description_bytes.read_u16::()?) + 1; + let cardinality = u64::from(description_bytes.read_u16::()?) + 1; - let store = if len <= 4096 { - let mut values = Vec::with_capacity(len as usize); - for _ in 0..len { + // If the run container bitmap is present, check if this container is a run container + let is_run_container = match run_container_bitmap { + Some(ref bm) => bm[i / 8] & (1 << (i % 8)) != 0, + None => false, + }; + + let store = if is_run_container { + let runs = reader.read_u16::()?; + let mut intervals = Vec::with_capacity(runs as usize); + for _ in 0..runs { + let start = reader.read_u16::()?; + let run_len = reader.read_u16::()?; + let end = start + run_len; + intervals.push(Interval { start, end }) + } + Store::Run(intervals) + } else if cardinality <= ARRAY_LIMIT { + let mut values = Vec::with_capacity(cardinality as usize); + for _ in 0..cardinality { values.push(reader.read_u16::()?); } Store::Array(values) } else { - let mut values = Box::new([0; 1024]); + let mut values = Box::new([0; BITMAP_LENGTH]); for value in values.iter_mut() { *value = reader.read_u64::()?; } Store::Bitmap(values) }; - containers.push(Container { key, len, store }); + containers.push(Container { + key, + len: cardinality, + store, + }); } Ok(RoaringBitmap { containers }) } } + +fn header_size(size: usize, has_run_containers: bool) -> usize { + if has_run_containers { + // New format encodes the size (number of containers) into the 4 byte cookie + // Additionally a bitmap is included marking which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + // New format conditionally includes offsets if there are 4 or more containers + if size >= NO_OFFSET_THRESHOLD { + COOKIE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + run_container_bitmap_size + } else { + COOKIE_BYTES + (DESCRIPTION_BYTES * size) + run_container_bitmap_size + } + } else { + // Old format encodes cookie followed by container count + // It also always includes the offsets + COOKIE_BYTES + SIZE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + } +} diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 3aaa31107..1e21a1d72 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,14 +1,32 @@ use std::borrow::Borrow; use std::cmp::Ordering::{Equal, Greater, Less}; -use std::slice; use std::vec; +use std::{fmt, slice}; -const BITMAP_LENGTH: usize = 1024; +use self::Store::{Array, Bitmap, Run}; + +pub const BITMAP_LENGTH: usize = 1024; + +#[derive(PartialEq, Clone, Debug)] +pub struct Interval { + pub start: u16, + pub end: u16, +} + +impl Interval { + pub fn new(start: u16, end: u16) -> Interval { + Interval { start, end } + } + + pub fn run_len(&self) -> u64 { + (self.end - self.start) as u64 + 1 + } +} -use self::Store::{Array, Bitmap}; pub enum Store { Array(Vec), Bitmap(Box<[u64; BITMAP_LENGTH]>), + Run(Vec), } pub enum Iter<'a> { @@ -16,6 +34,7 @@ pub enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), + Run(RunIter), } pub struct BitmapIter> { @@ -24,6 +43,12 @@ pub struct BitmapIter> { bits: B, } +pub struct RunIter { + run: usize, + offset: u64, + intervals: Vec, +} + impl Store { pub fn insert(&mut self, index: u16) -> bool { match *self { @@ -40,6 +65,43 @@ impl Store { false } } + Run(ref mut vec) => { + vec.binary_search_by_key(&index, |iv| iv.start) + .map_err(|loc| { + // Value is beyond end of interval + if vec[loc].end < index { + // If immediately follows this interval + if index == vec[loc].end - 1 { + if loc < vec.len() && index == vec[loc + 1].start { + // Merge with following interval + vec[loc].end = vec[loc + 1].end; + vec.remove(loc + 1); + return; + } + // Extend end of this interval by 1 + vec[loc].end += 1 + } else { + // Otherwise create new standalone interval + vec.insert(loc, Interval::new(index, index)); + } + } else if vec[loc].start == index + 1 { + // Value immediately precedes interval + if loc > 0 && vec[loc - 1].end == &index - 1 { + // Merge with preceding interval + vec[loc - 1].end = vec[loc].end; + vec.remove(loc); + return; + } + vec[loc].start -= 1; + } else if loc > 0 && index - 1 == vec[loc - 1].end { + // Immediately follows the previous interval + vec[loc - 1].end += 1 + } else { + vec.insert(loc, Interval::new(index, index)); + } + }) + .is_err() + } } } @@ -55,6 +117,27 @@ impl Store { false } } + Run(ref mut vec) => vec + .binary_search_by_key(&index, |iv| iv.start) + .map(|loc| { + if index == vec[loc].start && index == vec[loc].end { + // Remove entire run if it only contains this value + vec.remove(loc); + } else if index == vec[loc].end { + // Value is last in this interval + vec[loc].end -= 1; + } else if index == vec[loc].start { + // Value is first in this interval + vec[loc].start += 1; + } else { + // Value lies inside the interval, we need to split it + // First shrink the current interval + vec[loc].end = index - 1; + // Then insert a new index leaving gap where value was removed + vec.insert(loc + 1, Interval::new(index + 1, vec[loc].end)); + } + }) + .is_ok(), } } @@ -105,6 +188,8 @@ impl Store { bits[end_key] &= !(!0u64).wrapping_shr(64 - end_bit); u64::from(removed) } + // TODO(jpg): Remove range + Run(ref mut _intervals) => unimplemented!(), } } @@ -112,6 +197,9 @@ impl Store { match *self { Array(ref vec) => vec.binary_search(&index).is_ok(), Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, + Run(ref intervals) => intervals + .binary_search_by_key(&index, |iv| iv.start) + .is_ok(), } } @@ -136,6 +224,13 @@ impl Store { (&Array(ref vec), store @ &Bitmap(..)) | (store @ &Bitmap(..), &Array(ref vec)) => { vec.iter().all(|&i| !store.contains(i)) } + // TODO(jpg) is_disjoint + (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref _intervals), &Array(ref _vec)) | (&Array(ref _vec), &Run(ref _intervals)) => { + unimplemented!() + } + (&Run(ref _intervals), _store @ &Bitmap(..)) + | (_store @ &Bitmap(..), &Run(ref _intervals)) => unimplemented!(), } } @@ -159,12 +254,22 @@ impl Store { } } } + (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), + // TODO(jpg) is_subset array, run + (&Array(ref _vec), &Run(ref _intervals)) => unimplemented!(), + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1 .iter() .zip(bits2.iter()) .all(|(&i1, &i2)| (i1 & i2) == i1), - (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), (&Bitmap(..), &Array(..)) => false, + // TODO(jpg) is subset bitmap, run + (&Bitmap(..), &Run(ref _vec)) => unimplemented!(), + + // TODO(jpg) is_subset run, * + (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref _intervals), &Array(ref _vec)) => unimplemented!(), + (&Run(ref _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -182,6 +287,13 @@ impl Store { } Array(vec) } + Run(ref intervals) => Array( + intervals + .iter() + .map(|iv| iv.start..iv.end) + .flatten() + .collect(), + ), } } @@ -195,6 +307,78 @@ impl Store { Bitmap(bits) } Bitmap(..) => panic!("Cannot convert bitmap to bitmap"), + Run(ref intervals) => { + let mut bits = Box::new([0; BITMAP_LENGTH]); + for iv in intervals { + for index in iv.start..iv.end { + bits[key(index)] |= 1 << bit(index); + } + } + Bitmap(bits) + } + } + } + + pub fn to_run(&self) -> Self { + match *self { + Array(ref vec) => { + let mut intervals = Vec::new(); + let mut start = *vec.first().unwrap(); + for (idx, &v) in vec[1..].iter().enumerate() { + if v - vec[idx] > 1 { + intervals.push(Interval::new(start, vec[idx])); + start = v + } + } + intervals.push(Interval::new(start, *vec.last().unwrap())); + Run(intervals) + } + Bitmap(ref bits) => { + let mut current = bits[0]; + let mut i = 0u16; + let mut start; + let mut last; + + let mut intervals = Vec::new(); + + loop { + // Skip over empty words + while current == 0 && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits[i as usize]; + } + // Reached end of the bitmap without finding anymore bits set + if current == 0 { + break; + } + let current_start = current.trailing_zeros() as u16; + start = 64 * i + current_start; + + // Pad LSBs with 1s + current |= current - 1; + + // Find next 0 + while current == std::u64::MAX && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits[i as usize]; + } + + // Run continues until end of this container + if current == std::u64::MAX { + intervals.push(Interval::new(start, std::u16::MAX)); + break; + } + + let current_last = (!current).trailing_zeros() as u16; + last = 64 * i + current_last; + intervals.push(Interval::new(start, last - 1)); + + // pad LSBs with 0s + current &= current + 1; + } + Run(intervals) + } + Run(ref _intervals) => panic!("Cannot convert run to run"), } } @@ -216,17 +400,28 @@ impl Store { } vec1.extend(iter2); } - (ref mut this @ &mut Bitmap(..), &Array(ref vec)) => { - for &index in vec { - this.insert(index); - } + (this @ &mut Array(..), &Bitmap(..)) => { + *this = this.to_bitmap(); + this.union_with(other); } + // TODO(jpg) union_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 |= index2; } } - (this @ &mut Array(..), &Bitmap(..)) => { + (ref mut this @ &mut Bitmap(..), &Array(ref vec)) => { + for &index in vec { + this.insert(index); + } + } + // TODO(jpg) union_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + // TODO(jpg) union_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); } @@ -254,11 +449,6 @@ impl Store { } } } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { - *index1 &= index2; - } - } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { for i in (0..(vec.len())).rev() { if !store.contains(vec[i]) { @@ -266,11 +456,24 @@ impl Store { } } } + // TODO(jpg) intersect_with array, run + (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { + *index1 &= index2; + } + } (this @ &mut Bitmap(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); *this = new; } + // TODO(jpg) intersect_with bitmap, run + (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), + // TODO(jpg) intersect_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -296,6 +499,16 @@ impl Store { } } } + (&mut Array(ref mut vec), store @ &Bitmap(..)) => { + for i in (0..vec.len()).rev() { + if store.contains(vec[i]) { + vec.remove(i); + } + } + } + // TODO(jpg) difference_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), + (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { this.remove(*index); @@ -306,13 +519,13 @@ impl Store { *index1 &= !*index2; } } - (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..vec.len()).rev() { - if store.contains(vec[i]) { - vec.remove(i); - } - } - } + // TODO(jpg) difference_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + + // TODO(jpg) difference_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -344,6 +557,18 @@ impl Store { vec1.extend(iter2.cloned()); } } + (this @ &mut Array(..), &Bitmap(..)) => { + let mut new = other.clone(); + new.symmetric_difference_with(this); + *this = new; + } + // TODO(jpg) symmetric_difference_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { + *index1 ^= index2; + } + } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { if this.contains(*index) { @@ -353,16 +578,12 @@ impl Store { } } } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { - *index1 ^= index2; - } - } - (this @ &mut Array(..), &Bitmap(..)) => { - let mut new = other.clone(); - new.symmetric_difference_with(this); - *this = new; - } + // TODO(jpg) symmetric_difference_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _vec)) => unimplemented!(), + // TODO(jpg) symmetric_difference_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (_this @ &mut Run(..), &Bitmap(..)) => unimplemented!(), } } @@ -370,6 +591,7 @@ impl Store { match *self { Array(ref vec) => vec.len() as u64, Bitmap(ref bits) => bits.iter().map(|bit| u64::from(bit.count_ones())).sum(), + Run(ref intervals) => intervals.iter().map(|iv| iv.run_len() as u64).sum(), } } @@ -382,6 +604,7 @@ impl Store { .find(|&(_, &bit)| bit != 0) .map(|(index, bit)| index * 64 + (bit.trailing_zeros() as usize)) .unwrap() as u16, + Run(ref intervals) => intervals.first().unwrap().start, } } @@ -395,6 +618,39 @@ impl Store { .find(|&(_, &bit)| bit != 0) .map(|(index, bit)| index * 64 + (63 - bit.leading_zeros() as usize)) .unwrap() as u16, + Run(ref intervals) => intervals.last().unwrap().end, + } + } + + pub fn count_runs(&self) -> u64 { + match *self { + Array(ref vec) => { + vec.iter() + .fold((-2, 0u64), |(prev, runs), &v| { + let new = v as i32; + if prev + 1 != new { + (new, runs + 1) + } else { + (new, runs) + } + }) + .1 + } + Bitmap(ref bits) => { + let mut num_runs = 0u64; + + for i in 0..BITMAP_LENGTH - 1 { + let word = bits[i]; + let next_word = bits[i + 1]; + num_runs += + ((word << 1) & !word).count_ones() as u64 + ((word >> 63) & !next_word); + } + + let last = bits[BITMAP_LENGTH - 1]; + num_runs += ((last << 1) & !last).count_ones() as u64 + (last >> 63); + num_runs + } + Run(ref intervals) => intervals.len() as u64, } } } @@ -406,6 +662,7 @@ impl<'a> IntoIterator for &'a Store { match *self { Array(ref vec) => Iter::Array(vec.iter()), Bitmap(ref bits) => Iter::BitmapBorrowed(BitmapIter::new(&**bits)), + Run(ref intervals) => Iter::Run(RunIter::new(intervals.to_vec())), } } } @@ -417,6 +674,7 @@ impl IntoIterator for Store { match self { Array(vec) => Iter::Vec(vec.into_iter()), Bitmap(bits) => Iter::BitmapOwned(BitmapIter::new(bits)), + Run(intervals) => Iter::Run(RunIter::new(intervals)), } } } @@ -428,6 +686,7 @@ impl PartialEq for Store { (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } + (&Run(ref intervals1), &Run(ref intervals2)) => intervals1 == intervals2, _ => false, } } @@ -438,7 +697,43 @@ impl Clone for Store { match *self { Array(ref vec) => Array(vec.clone()), Bitmap(ref bits) => Bitmap(Box::new(**bits)), + Run(ref intervals) => Run(intervals.clone().to_vec()), + } + } +} + +impl RunIter { + fn new(intervals: Vec) -> RunIter { + RunIter { + run: 0, + offset: 0, + intervals, + } + } + + fn move_next(&mut self) { + self.offset += 1; + if self.offset == self.intervals[self.run].run_len() { + self.offset = 0; + self.run += 1; + } + } +} + +impl Iterator for RunIter { + type Item = u16; + + fn next(&mut self) -> Option { + if self.run == self.intervals.len() { + return None; } + let result = self.intervals[self.run].start + self.offset as u16; + self.move_next(); + Some(result) + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") } } @@ -493,6 +788,7 @@ impl<'a> Iterator for Iter<'a> { Iter::Vec(ref mut inner) => inner.next(), Iter::BitmapBorrowed(ref mut inner) => inner.next(), Iter::BitmapOwned(ref mut inner) => inner.next(), + Iter::Run(ref mut inner) => inner.next(), } } @@ -510,3 +806,32 @@ fn key(index: u16) -> usize { fn bit(index: u16) -> usize { index as usize % 64 } + +impl fmt::Debug for Store { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Array(_) => format!( + "Array<{} values from {} to {}>", + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + Bitmap(_) => format!( + "Bitmap<{} bits set from {} to {}>", + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + Run(intervals) => format!( + "Run<{} runs totalling {} values from {} to {}>", + intervals.len(), + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + } + } +} diff --git a/tests/bitmapwithruns.bin b/tests/bitmapwithruns.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed243753e169295a32d6251db66180f23ceac06 GIT binary patch literal 48056 zcmeIuQyb)3w5Z{vVpUYBB$bL&skUv~wr$(CZQHhO+qPG)cJFo0f7tsf^BLb5*YnMt zfdl{y015&iU;{8fdI(UZDhgDsiv4%-|000=3<=DyNCAKU(!k#=^#A~P^j|*sU-o|$ z{I7=pHSoU{0>HA}0I=r2HvJa>ynrrXJE#Th2(g1)Ln~mWaJl5SR1eaf$h0Bbj9dfq zwJ21fSdLOwIlR(`syAvLsJo)!gr+@OHfUR-V}`CuPoVFM0T+hs7_nf?hzT90)R<9V zPKpKGl5fi&tUj@R!{!;=2kdUKzrx`h$F0-&`45+FTsv@U#k~oSdOU0Js>HhtpOmk_ zj~Bl$fIE-{#0aJXUqdROrqDV2oA)5ai8LEB%*ZkzM~gfa3gjqamB7nU@R~Jm<&t1Go*`10DiTfaky~;4Sb0_zZjnz5~C2zd#5? zKpdpNFvx=uPzGb54qBiK`d~^h4VWIx1ZD+ufVshZU_r15SR5<`mIW(-mBDIYO|TAF zA8Z6R1zUiv!FFIrunX87>;?7(2Y`dYVch zsvW8qY8Yw~Y94A8Y8&bh>Ky78>KW=2>K_^u8X6iA8XX!Jni!fAnjV@Jnj2aWS{zyy zS{YgsS|8dJ+8Wvs+8x>#Iv6?H+nJ`auJsA<%GW6f_o^08NIbK{KH_(0phS zv=mwat%lY?8=)=Gc4!y07dikPhK@lep)=5V=n`}lx&hsW?m-WsC(v`~74#PR0DXqO zLf@fZ&|fG7BQOqAa2V#{2rR=fScfgxg?%_BoCZ!0XM(fBIpExIKDZ!U1TGGjg3H1c z;L30{xF%c&t`9eYo5C&N)^IzxBise<4)=ol!UN#J@Gy8JJO&;QPlBhyGvL|qJa{3z z1YQoWg4e(i!Q7^hEj~{gFY)P-Fx$8X1R7M5Z9qky*%GWC5}m zS%$1c)*$PVO~_Vc2eKR4ha5zXAjgqY$XVn9av8aX+(hmm_mM})Q{)Bm8hM9&M7|*Z zB0rGd2!KK;iV`S|vZ#PcsDf&!f!e5tCec)AS~LTi8O?^~MDw8e(L!iZv;;r(I93WPi&elX zW7V*lSRJfB)(C5gwZK|q?XZqm7pyzh3+sywzy@Q(u#wmpY&|73?~83%iRwz#e1Iu$R~y>^=4g`v?1m z{lxxYAP(agPT~yC;UXTzRXmQHxPvF~6nJVp9i9=-f@jBb;d$`_cwxL4UJ@^Zm&YsN zRq+~lZM+`d5O0Dv$6MiT@eX)ryc^yV?}PWp2jN5U5%_3)96k}Bf=|b1;dAi?_+oq+ zz7k)9ug5pxTk#$EZhRkp5I=$+$4}vB@eBB6{2G1}zk}b$AK_2&7x-)Z9sUvjg8z&E zz<=Wa0TC!c5H!IO0wEC!p%DgQ6CRNyQW0s13`Axk8vMJeuY)!T!JCa?FOpZt>*Ou+F8P3bOgr4~?&sb$nk zY7MoX+C*)oc2K*iebhnf2z8t~MV+NCP?xD|)J^IRb)R}fJ*8eyuc>#`N9qgpFZF}^ zO#w7SqclO&G)oJ#L@TsL8?;S(bdpX*r=>H{ndxkFPC5^rpDsifrAyGI>2h>Mx(Z#L zu0_|S8_2>r*dJDas-bL@F5739{WAsV-41J!yL|>(E(6{M(^h5dy{hWS9zokFWpXsmkclsCo zmku!qgEJHpW_TvT$V`mU8H;flpGnE2VbU|1n5;|=CO4ChDaaIIiZi8{vP=c0GECE6T?%&)5EjEbHfY5 zi^I#pE5mET>%*JETf;lTyTkj!2g66g$HS+>XTul5m&4b>H^X)aV-413JvPavV$-r2*vxD;HYb~h&CeEMi?Suy z(rh`lB3p&6&eme@oHvdxkyFUShAZH`v?kJ@z5{gniDwV&AeK z*w5@&_B;EF{mX_pgu^+C3v)ad;bbnx>72#6oX@4?(s1dyOk7qj2bY`6#}(v?aK*V& zTv@IHSDCBE)#U1M^|?k|Q?3Qqnrp{(~#&F}gN!(O!1~;3V z$1UWRaLc(>+*)n}x0&0FZ0*=W^Y8eN z{1^UT{s;e?2Lwn!1wx<&RuBY9Py|gd1Y7Wgq>xHTD`XHd3)zI6LLMQ%P)H~$ln_b_ z<%Eht6`{INOQD@+h33)6&|!W?0~ zut-=ctPoZU>x7NM7Gb-vOV}$M5Dp8+gphr$!#x$sJOD|`?> z3txrr!Y|>k5E2m)7b!6;@?u1k#h9pzmgtJUm{Lq5rWZ4bS;ZV;ZZV%&P%I)A7fXp{ z#R_6&v6@&@tRvPJ8;MQD7Gi6$o!C+AB6b&hiG9TZ;$U%@I8q!Vju$71Q^gtLY;m5r zP+TG|7gvdE#SP+SahteP+#~K64~a*`6XI#{oOn^ZB3>77iFd^Z;$!id_)>f$z861< z|A^njpW+`8jKC2rLPnSf7ZD@Th#HAU%!m_7L{dajN76+yMzTb*M{-5-MhZja*aMs`GYNA^VyMvg>|M@~h~MlM7yN3KO~M(#xJM;=9< zMqWf-N8Uv~M!rP;jr@rGjsOxQp%Nj{5-SOkBq@?68Imn|Qc_AKrIj*BnWbz}PAQL+ zUn(ROl}bpZrE*e5sftuxswLHx8c2<$W>QP3jnrQ1Bz2W~NWG?`qo1PxM88FUM*l=X8J01blo^?mML8;~a$GiLM^4Bo>Xd_+DjpOVkY7v#(G zHTkA|N4_sVlAp>ij{qEbbvuGCWMDh-szN;9RU(ne{obW*x1J(S)`KV_gY zL>aD(QpPG1l*!68Wu`JmnXfETmMSZh)yg_$qq0TWuIy6wDhHIq$}#1naz;6?TvDzo zH{YDM0u{fQr;>bl+Vgn<-77r`KyFfM8#D~4XeBwQDrrz>Z+x>s;{P0)2Qjy zOlnp&hnic>rxsL;sKwP%YFV{{T3M~8)>P}L_0>jdQ?-TKT5YFxRJ*9%)m~~}b$~ip z9j1;{$Ef4gN$ON}hB{lFr!G{NsLRz=>RNS!x>?<(?o{`v`_)70QT2p+T0N&;RIjMl z)m!Ra^?~|WeWt!t->C1^PwGGFH}$9bM+IYW42zL5CdS3YSTv@_;xRMk#1gR-vDC42 zv5c`SvFx#2vAnSYvBI%pv68VevGTD>v8u5evD&eEv4*iGvF5Q>v9_@evCgq>v7WI$ zvHr0^v7xaMvC*+{v5B!MvFWi{vAMAYvBj}vv6ZnkvGuV{v8}NkvE8wKv4gQAvE#8* zv9qxYvCFY*v750wvHP({v8S;YvDdM8v5&DYv43MfV!va625G29Xtc&^f+lH-rfG&| zYo3Ro7~1b+rasW38FiQfs5N*E(rk zwH{h;t)Dhf8=?)@MrmWU3EE_Bnl@9Lqs`YAX-l;g+G=f`wo%)nZP#{bd$j}FVeOc9 zQahua*Dh&SwHw-P?Vk2fd!jwpUTJT&587w#tM*;{rTx`HaU_n%sdzZf$0Kn$9*gU7 zEAGbqc*=O1c=~vzc-DB1cQ>zc++@`c*-`11Iw_}ch}_~!Vw_|EvA z`2P5z_|f=@`04n$_{I2@`1Sa$_}%z}_~ZDq_{;d4`1|;$_&@P)@t^TOaZrbKOeb|l z=X6nz>Z%^sP2JHGdI~+Yo=(rGXVJ6kx%9kx0llzZOfRXI(aYTrX zo9nIgwt5G>v))bbsrS+Q>x1;6`UrirK2D#gPtm9Av-G+80)4T*Okb(5(bwyn^sV|1 zeYd_(Kd2wkkL#!Ov-$=7vVKj!so&A>>yPxO`V0NF{!ag>f6@Qdf9St;z<>5V|#sp)sG0m81%rWL0i;Shl3S+gg&e&*dF}54K zjJ?JIdRd-Id|kNM5~Y5p-m3$`$ev>1!C zL@R2kR@^cz$4Xc!tkhOIE2EXg%5LSd@>&I~!d5Y>q*cZ$Z&k9YS~aZNRz0hs)x>IU zwX)h;9jwk)H>;=B$LenlvW8kCtkKpuYoay9nr_Xq=2{D^#nv)wrM1RdZ*8)+T05-W z);{Z?b;LSuowCka7p%+HHS4Bz$GUGlvYuKmtk>2%>!bC>`q%nl{k8xbvQe9`X`8hL zTe1~fvklv}Jv(WqveVib?96sHJExt;&Tkj8i`pgZ(snt!qFu$VZr8Hw+70Z+b~C%B z-NtTjce1-H|*Q?J^P{k#C~qSvftVt?9cXB`@8+i{%eOE#K9fP2|K(K zabzdv=#J&Mj_;&&(m3gzOiorOhm+gM=M;2`IK`b(PFbgdQ`xEJ)O6}N^_@meQ>TT~ z+G*!>bh~!`x z`<+A1QRjqn+BxT3bgnqpom;ihm?yXoAFZWcGYo6F7X7H|u@#oUr^8MnM!$*tV$JGW*+nyW`x6?i6>rJIkHxE^rsS%iNXj8h5?B$=&MiaCf`= z+=K2B_qcn?J?ma@FT2;=o9-R=zWd01>b`JayYJkO?icr8_lNu21w6<@J;I|s))PF* zQ#{QxJlpfUq?gJ|>t*mVd)d64ULG&MSI8^smGDY?<-Ces6|cHi%d6`(@EUu~yp~=Y zuf5mF>+1FJdVBr6f!+{rxHrlh>rL<`d(*s`-W+efx5!)St?*WR>%5KL7H_+^%iHT6 z@D6*&yp!G;@4R=(yXxKWZhQBwWM(dtbfp-Y@U37fK)rJV7PG2|f`? z$cb1&Pgn^z;U`ii(j?L+G9|JmawKvm@+Ar;iX@6BN+rrBDkLf=swHYB>LltX8YP-0 zS|nO0+9f(Bx+J|;LZGd|~we$-d}xNrK7pYT)osr__*Mn8+6-OuIc^$Ylg{bGJezl>krujE(tYxuSO zdVWK{iQn9B<+t@a_?`W3eow!T-`^kP5A{d*qy2IIM1P7u-Jj*p^%wYy{bl}2e~rK1 z-{f!gclf*gef~lJh=1Ha<)8I0_?P`_{!Ramf8T%PKlNYuul;xaNB@ieum8jU?E^_D zi6)68on(_jQc5aGEomg}q?b%4Qzg?TGbA%7vn6vT^Ca^p3nhytOC(Dt%Oxu&t0b!@ zYbEO@8zdVin69`4vF`0RH~{|2xb70P7F}00000 literal 0 HcmV?d00001 diff --git a/tests/clone.rs b/tests/clone.rs index fca6e1993..b70307bc0 100644 --- a/tests/clone.rs +++ b/tests/clone.rs @@ -42,3 +42,16 @@ fn bitmaps() { assert_eq!(clone, original); } + +#[test] +fn runs() { + let mut original = RoaringBitmap::from_iter( + (0..6000) + .chain(1_000_000..1_012_000) + .chain(2_000_000..2_010_000), + ); + original.optimize(); + let clone = original.clone(); + + assert_eq!(clone, original); +} diff --git a/tests/lib.rs b/tests/lib.rs index 88b6932cc..c3610c80a 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -139,3 +139,25 @@ fn to_array() { assert_eq!(bitmap.contains(i), false); } } + +#[test] +fn optimize_array() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn optimize_bitmap() { + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn optimize_run() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + // Calling optimize a second time should return false as no changes will be made + assert!(!bitmap.optimize()); +} diff --git a/tests/serialization.rs b/tests/serialization.rs index bb20b4f8b..987798154 100644 --- a/tests/serialization.rs +++ b/tests/serialization.rs @@ -6,6 +6,7 @@ use roaring::RoaringBitmap; // Test data from https://github.com/RoaringBitmap/RoaringFormatSpec/tree/master/testdata static BITMAP_WITHOUT_RUNS: &[u8] = include_bytes!("bitmapwithoutruns.bin"); +static BITMAP_WITH_RUNS: &[u8] = include_bytes!("bitmapwithruns.bin"); fn test_data_bitmap() -> RoaringBitmap { RoaringBitmap::from_iter( @@ -24,13 +25,24 @@ fn serialize_and_deserialize(bitmap: &RoaringBitmap) -> RoaringBitmap { } #[test] -fn test_deserialize_from_provided_data() { +fn test_deserialize_without_runs_from_provided_data() { assert_eq!( RoaringBitmap::deserialize_from(&mut &BITMAP_WITHOUT_RUNS[..]).unwrap(), test_data_bitmap() ); } +#[test] +fn test_deserialize_with_runs_from_provided_data() { + let mut expected = test_data_bitmap(); + // Call optimize to create run containers + expected.optimize(); + assert_eq!( + RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), + expected + ); +} + #[test] fn test_serialize_into_provided_data() { let bitmap = test_data_bitmap(); @@ -39,6 +51,16 @@ fn test_serialize_into_provided_data() { assert!(BITMAP_WITHOUT_RUNS == &buffer[..]); } +#[test] +fn test_serialize_with_runs_into_provided_data() { + let mut bitmap = test_data_bitmap(); + // Call optimize to create run containers + bitmap.optimize(); + let mut buffer = vec![]; + bitmap.serialize_into(&mut buffer).unwrap(); + assert!(BITMAP_WITH_RUNS == &buffer[..]); +} + #[test] fn test_empty() { let original = RoaringBitmap::new(); @@ -520,3 +542,13 @@ fn test_strange() { let new = serialize_and_deserialize(&original); assert_eq!(original, new); } + +#[test] +fn test_runs() { + let mut original = RoaringBitmap::from_iter((1000..3000).chain(70000..77000)); + original.optimize(); + let new = serialize_and_deserialize(&original); + assert_eq!(original.len(), new.len()); + assert_eq!(original.min(), new.min()); + assert_eq!(original.max(), new.max()); +} diff --git a/tests/size_hint.rs b/tests/size_hint.rs index 30a30822b..061c1ea23 100644 --- a/tests/size_hint.rs +++ b/tests/size_hint.rs @@ -25,6 +25,18 @@ fn bitmap() { assert_eq!((0, Some(0)), iter.size_hint()); } +#[test] +fn run() { + let mut bitmap = RoaringBitmap::from_iter(0..6000); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((6000, Some(6000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((3000, Some(3000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +} + #[test] fn arrays() { let bitmap = RoaringBitmap::from_iter( @@ -58,3 +70,19 @@ fn bitmaps() { iter.by_ref().for_each(drop); assert_eq!((0, Some(0)), iter.size_hint()); } + +#[test] +fn runs() { + let mut bitmap = RoaringBitmap::from_iter( + (0..2000) + .chain(1_000_000..1_002_000) + .chain(2_000_000..2_001_000), + ); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((5000, Some(5000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((2000, Some(2000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +} From 3124aa4d5a259dd205b6c7c1423a8ff1192f2022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 17:58:04 +0200 Subject: [PATCH 02/83] Fix some bugs in the run container implementation --- src/bitmap/store.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 1e21a1d72..6d99eb9be 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::borrow::Borrow; -use std::cmp::Ordering::{Equal, Greater, Less}; +use std::cmp::Ordering::{self, Equal, Greater, Less}; use std::vec; use std::{fmt, slice}; @@ -7,12 +7,22 @@ use self::Store::{Array, Bitmap, Run}; pub const BITMAP_LENGTH: usize = 1024; -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Copy, Clone, Debug)] pub struct Interval { pub start: u16, pub end: u16, } +fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { + if index < iv.start { + Less + } else if index > iv.end { + Greater + } else { + Less + } +} + impl Interval { pub fn new(start: u16, end: u16) -> Interval { Interval { start, end } @@ -66,7 +76,7 @@ impl Store { } } Run(ref mut vec) => { - vec.binary_search_by_key(&index, |iv| iv.start) + vec.binary_search_by(|iv| cmp_index_interval(index, *iv)) .map_err(|loc| { // Value is beyond end of interval if vec[loc].end < index { @@ -118,23 +128,25 @@ impl Store { } } Run(ref mut vec) => vec - .binary_search_by_key(&index, |iv| iv.start) + .binary_search_by(|iv| cmp_index_interval(index, *iv)) .map(|loc| { if index == vec[loc].start && index == vec[loc].end { // Remove entire run if it only contains this value vec.remove(loc); } else if index == vec[loc].end { // Value is last in this interval - vec[loc].end -= 1; + vec[loc].end = index - 1; } else if index == vec[loc].start { // Value is first in this interval - vec[loc].start += 1; + vec[loc].start = index + 1; } else { // Value lies inside the interval, we need to split it - // First shrink the current interval + // First construct a new interval with the right part + let new_interval = Interval::new(index + 1, vec[loc].end); + // Then shrink the current interval vec[loc].end = index - 1; - // Then insert a new index leaving gap where value was removed - vec.insert(loc + 1, Interval::new(index + 1, vec[loc].end)); + // Then insert the new interval leaving gap where value was removed + vec.insert(loc + 1, new_interval); } }) .is_ok(), @@ -198,7 +210,7 @@ impl Store { Array(ref vec) => vec.binary_search(&index).is_ok(), Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, Run(ref intervals) => intervals - .binary_search_by_key(&index, |iv| iv.start) + .binary_search_by(|iv| cmp_index_interval(index, *iv)) .is_ok(), } } From 2068bb6809c17c47052d26d1e1e29f26695b8906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:01:14 +0200 Subject: [PATCH 03/83] Fix the to_array/bitmap impl for runs, the end bound is inclusive --- src/bitmap/store.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 6d99eb9be..31fcc90c0 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -300,11 +300,7 @@ impl Store { Array(vec) } Run(ref intervals) => Array( - intervals - .iter() - .map(|iv| iv.start..iv.end) - .flatten() - .collect(), + intervals.iter().flat_map(|iv| iv.start..=iv.end).collect() ), } } @@ -322,7 +318,7 @@ impl Store { Run(ref intervals) => { let mut bits = Box::new([0; BITMAP_LENGTH]); for iv in intervals { - for index in iv.start..iv.end { + for index in iv.start..=iv.end { bits[key(index)] |= 1 << bit(index); } } From e605f640b18eded117810ca9cd950778a6e491e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:18:12 +0200 Subject: [PATCH 04/83] Rework the array bitmap intersect_with using Vec::retain --- src/bitmap/store.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 31fcc90c0..ee65b61b8 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -458,11 +458,7 @@ impl Store { } } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..(vec.len())).rev() { - if !store.contains(vec[i]) { - vec.remove(i); - } - } + vec.retain(|i| store.contains(*i)); } // TODO(jpg) intersect_with array, run (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} From 9321618e732ff287a7dc3e10b2bfbe22f25c9efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:19:52 +0200 Subject: [PATCH 05/83] Implement the array run intersect_with operation --- src/bitmap/store.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index ee65b61b8..38c974d3f 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -460,8 +460,9 @@ impl Store { (&mut Array(ref mut vec), store @ &Bitmap(..)) => { vec.retain(|i| store.contains(*i)); } - // TODO(jpg) intersect_with array, run - (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} + (&mut Array(ref mut vec), run @ &Run(..)) => { + vec.retain(|i| run.contains(*i)); + } (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 &= index2; From a62fc7d059ed0d9cfea273f9f4b3c26a3e4e818a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:57:04 +0200 Subject: [PATCH 06/83] Implement the run array intersect_with operation --- src/bitmap/store.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 38c974d3f..4c1251057 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -477,7 +477,11 @@ impl Store { (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), // TODO(jpg) intersect_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (this @ &mut Run(..), &Array(..)) => { + let mut new = other.clone(); + new.intersect_with(this); + *this = new; + }, (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } From d658f2851f5c05cce422268968637ea2ad9bffd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 20:38:58 +0200 Subject: [PATCH 07/83] Implement the run run union_with operation --- src/bitmap/store.rs | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 4c1251057..d6ed9327b 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,7 +1,6 @@ use std::borrow::Borrow; use std::cmp::Ordering::{self, Equal, Greater, Less}; -use std::vec; -use std::{fmt, slice}; +use std::{cmp, fmt, vec, slice}; use self::Store::{Array, Bitmap, Run}; @@ -426,8 +425,39 @@ impl Store { } // TODO(jpg) union_with bitmap, run (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), - // TODO(jpg) union_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { + let mut merged = Vec::new(); + + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + // Iterate over two iterators and return the lowest value at each step. + let iv = match (iv1, iv2) { + (None, None) => break, + (Some(v1), None) => { iv1 = i1.next(); v1 }, + (None, Some(v2)) => { iv2 = i2.next(); v2 }, + (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { + Equal => { iv1 = i1.next(); iv2 = i2.next(); v1 }, + Less => { iv1 = i1.next(); v1 }, + Greater => { iv2 = i2.next(); v2 }, + }, + }; + + match merged.last_mut() { + // If the list of merged intervals is empty, append the interval. + None => merged.push(*iv), + Some(last) => if last.end < iv.start { + // If the interval does not overlap with the previous, append it. + merged.push(*iv); + } else { + // If there is overlap, so we merge the current and previous intervals. + last.end = cmp::max(last.end, iv.end); + }, + } + } + + *intervals1 = merged; + }, (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); From 0ded0285a08e8aa433e8d83e8e6a1274957274dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 10:57:08 +0200 Subject: [PATCH 08/83] Implement the run array union_with operation --- src/bitmap/store.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index d6ed9327b..5f94cb19c 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -458,7 +458,11 @@ impl Store { *intervals1 = merged; }, - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Array(ref vec)) => { + for i in vec { + this.insert(*i); + } + }, (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); From fe8a4ab7137f2102e822279c03bb65166c85d278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:01:50 +0200 Subject: [PATCH 09/83] Implement the array run union_with operation --- src/bitmap/store.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 5f94cb19c..618eccc07 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -411,8 +411,11 @@ impl Store { *this = this.to_bitmap(); this.union_with(other); } - // TODO(jpg) union_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (this @ &mut Array(..), run @ &Run(..)) => { + let mut new = run.clone(); + new.union_with(this); + *this = new; + } (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 |= index2; @@ -457,12 +460,12 @@ impl Store { } *intervals1 = merged; - }, + } (ref mut this @ &mut Run(..), &Array(ref vec)) => { for i in vec { this.insert(*i); } - }, + } (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); From 613163fc0c0547231b8e9cacf0f90d944c17dfb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:08:23 +0200 Subject: [PATCH 10/83] Implement the bitmap run union_with operation --- src/bitmap/store.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 618eccc07..fd94b1e14 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -411,8 +411,8 @@ impl Store { *this = this.to_bitmap(); this.union_with(other); } - (this @ &mut Array(..), run @ &Run(..)) => { - let mut new = run.clone(); + (this @ &mut Array(..), &Run(..)) => { + let mut new = other.clone(); new.union_with(this); *this = new; } @@ -426,8 +426,10 @@ impl Store { this.insert(index); } } - // TODO(jpg) union_with bitmap, run - (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + (this @ &mut Bitmap(..), &Run(..)) => { + let other = other.to_bitmap(); + this.union_with(&other); + } (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { let mut merged = Vec::new(); From 0a664836772ba30a58ae1224c6169c560f524367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:26:44 +0200 Subject: [PATCH 11/83] Implement the run run intersect_with operation --- src/bitmap/store.rs | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index fd94b1e14..6ad93793f 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -514,8 +514,36 @@ impl Store { } // TODO(jpg) intersect_with bitmap, run (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), - // TODO(jpg) intersect_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { + let mut merged = Vec::new(); + + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + if let (Some(v1), Some(v2)) = (iv1, iv2) { + let start = cmp::max(v1.start, v2.start); + let end = cmp::min(v1.end, v2.end); + let iv = Interval::new(start, end); + if iv.run_len() > 0 { + merged.push(iv); + } + } + + // Iterate over two iterators, consuming the lowest first, like merge join. + match (iv1, iv2) { + (None, None) => break, + (Some(v1), None) => iv1 = i1.next(), + (None, Some(v2)) => iv2 = i2.next(), + (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { + Equal => { iv1 = i1.next(); iv2 = i2.next(); }, + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + }, + } + } + + *intervals1 = merged; + }, (this @ &mut Run(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); From 9af436678706efc495d543a71a5ccd9fd70f11a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:33:12 +0200 Subject: [PATCH 12/83] Implement the bitmap run intersect_with operation --- src/bitmap/store.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 6ad93793f..515b7889b 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -512,8 +512,10 @@ impl Store { new.intersect_with(this); *this = new; } - // TODO(jpg) intersect_with bitmap, run - (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), + (this @ &mut Bitmap(..), &Run(..)) => { + let other = other.to_bitmap(); + this.intersect_with(&other); + } (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { let mut merged = Vec::new(); From 9612ae98ffe8b0e193d533a71548e7210aafdd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:33:27 +0200 Subject: [PATCH 13/83] Implement the run bitmap intersect_with operation --- src/bitmap/store.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 515b7889b..94e1559cc 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -545,13 +545,17 @@ impl Store { } *intervals1 = merged; - }, + } (this @ &mut Run(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); *this = new; - }, - (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), + } + (this @ &mut Run(..), &Bitmap(..)) => { + let mut new = other.clone(); + new.intersect_with(this); + *this = new; + } } } From 4ae8986a9cec36239c99dea5e29ff57b03b32af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 18:33:19 +0200 Subject: [PATCH 14/83] Simplify the run run intersect_with operation --- src/bitmap/store.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 94e1559cc..9566d8db1 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -521,26 +521,23 @@ impl Store { let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); let (mut iv1, mut iv2) = (i1.next(), i2.next()); - loop { - if let (Some(v1), Some(v2)) = (iv1, iv2) { + + // Iterate over both iterators. + while let (Some(v1), Some(v2)) = (iv1, iv2) { + if v2.start <= v1.end && v1.start <= v2.end { let start = cmp::max(v1.start, v2.start); let end = cmp::min(v1.end, v2.end); let iv = Interval::new(start, end); - if iv.run_len() > 0 { - merged.push(iv); - } + merged.push(iv); } - // Iterate over two iterators, consuming the lowest first, like merge join. - match (iv1, iv2) { - (None, None) => break, - (Some(v1), None) => iv1 = i1.next(), - (None, Some(v2)) => iv2 = i2.next(), - (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { - Equal => { iv1 = i1.next(); iv2 = i2.next(); }, - Less => iv1 = i1.next(), - Greater => iv2 = i2.next(), - }, + if v1.end < v2.end { + iv1 = i1.next(); + } else if v1.end > v2.end { + iv2 = i2.next(); + } else { + iv1 = i1.next(); + iv2 = i2.next(); } } From 924d4dbeea361174ffc20705b78c710c74919850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 19:47:58 +0200 Subject: [PATCH 15/83] Implement the remove_range operation for the run store type --- src/bitmap/store.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 9566d8db1..d06771623 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -199,8 +199,44 @@ impl Store { bits[end_key] &= !(!0u64).wrapping_shr(64 - end_bit); u64::from(removed) } - // TODO(jpg): Remove range - Run(ref mut _intervals) => unimplemented!(), + // TODO we must test that algorithm + Run(ref mut intervals) => { + let mut count = 0; + let mut search_end = false; + + for iv in intervals.iter_mut() { + if !search_end && cmp_index_interval(start as u16, *iv) == Equal { + count += Interval::new(iv.end, start as u16).run_len(); + iv.end = start as u16; + search_end = true; + } + + if search_end { + // The end bound is non-inclusive therefore we must search for end - 1. + match cmp_index_interval(end as u16 - 1, *iv) { + Less => { + // We invalidate the intervals that are contained in + // the start and end but doesn't touch the bounds. + count += iv.run_len(); + *iv = Interval::new(u16::max_value(), 0); + }, + Equal => { + // We shrink this interval by moving the start of it to be + // the end bound which is non-inclusive. + count += Interval::new(end as u16, iv.start).run_len(); + iv.start = end as u16; + }, + Greater => break, + } + } + } + + // We invalidated the intervals to remove, + // the start is greater than the end. + intervals.retain(|iv| iv.start <= iv.end); + + count + }, } } From d7bcad321b41c392acc12d29b20de2954c6777bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 19:50:34 +0200 Subject: [PATCH 16/83] Implement the run array and array run is_disjoint operation --- src/bitmap/store.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index d06771623..36bcede08 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -273,8 +273,8 @@ impl Store { } // TODO(jpg) is_disjoint (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&Run(ref _intervals), &Array(ref _vec)) | (&Array(ref _vec), &Run(ref _intervals)) => { - unimplemented!() + (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { + vec.iter().all(|&i| !run.contains(i)) } (&Run(ref _intervals), _store @ &Bitmap(..)) | (_store @ &Bitmap(..), &Run(ref _intervals)) => unimplemented!(), From cb69d80b5073729066a6d8e586c02a496e7d00d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 13:20:55 +0200 Subject: [PATCH 17/83] Implement the run run is_disjoint operation --- src/bitmap/store.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 36bcede08..2ebf6106c 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -272,7 +272,29 @@ impl Store { vec.iter().all(|&i| !store.contains(i)) } // TODO(jpg) is_disjoint - (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref intervals1), &Run(ref intervals2)) => { + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + match (iv1, iv2) { + (Some(v1), Some(v2)) => { + if v2.start <= v1.end && v1.start <= v2.end { + return false; + } + + if v1.end < v2.end { + iv1 = i1.next(); + } else if v1.end > v2.end { + iv2 = i2.next(); + } else { + iv1 = i1.next(); + iv2 = i2.next(); + } + }, + (_, _) => return true, + } + } + }, (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { vec.iter().all(|&i| !run.contains(i)) } From c77c0f867469838288f78acf3a8f91542494725b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:30:24 +0200 Subject: [PATCH 18/83] Simplify the array bitmap difference_with operation --- src/bitmap/store.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 2ebf6106c..8981c1e79 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -637,11 +637,7 @@ impl Store { } } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..vec.len()).rev() { - if store.contains(vec[i]) { - vec.remove(i); - } - } + vec.retain(|i| !store.contains(*i)); } // TODO(jpg) difference_with array, run (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), From 3a9eefdecb5503dd1b96ceb2fd226517b010423b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:30:47 +0200 Subject: [PATCH 19/83] Implement the array run difference_with operation --- src/bitmap/store.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 8981c1e79..f80d22abd 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -639,9 +639,9 @@ impl Store { (&mut Array(ref mut vec), store @ &Bitmap(..)) => { vec.retain(|i| !store.contains(*i)); } - // TODO(jpg) difference_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), - + (&mut Array(ref mut vec), run @ &Run(..)) => { + vec.retain(|i| !run.contains(*i)); + } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { this.remove(*index); From 183c1bbb76ad8351f1089abfd219bcb55d906baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 14:58:45 +0200 Subject: [PATCH 20/83] Implement the bitmap run difference_with operation --- src/bitmap/store.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index f80d22abd..02e8b1426 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -652,9 +652,11 @@ impl Store { *index1 &= !*index2; } } - // TODO(jpg) difference_with bitmap, run - (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), - + (ref mut this @ &mut Bitmap(..), &Run(ref intervals)) => { + for iv in intervals { + this.remove_range(iv.start as u32, iv.end as u32 + 1); + } + } // TODO(jpg) difference_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), From 07d0fcc36ae31659740678b273bae3afa12a3eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:35:40 +0200 Subject: [PATCH 21/83] Clippy and fmt pass --- src/bitmap/store.rs | 88 +++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 02e8b1426..97121c0ba 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,6 +1,6 @@ use std::borrow::Borrow; use std::cmp::Ordering::{self, Equal, Greater, Less}; -use std::{cmp, fmt, vec, slice}; +use std::{cmp, fmt, slice, vec}; use self::Store::{Array, Bitmap, Run}; @@ -219,13 +219,13 @@ impl Store { // the start and end but doesn't touch the bounds. count += iv.run_len(); *iv = Interval::new(u16::max_value(), 0); - }, + } Equal => { // We shrink this interval by moving the start of it to be // the end bound which is non-inclusive. count += Interval::new(end as u16, iv.start).run_len(); iv.start = end as u16; - }, + } Greater => break, } } @@ -236,7 +236,7 @@ impl Store { intervals.retain(|iv| iv.start <= iv.end); count - }, + } } } @@ -282,19 +282,19 @@ impl Store { return false; } - if v1.end < v2.end { - iv1 = i1.next(); - } else if v1.end > v2.end { - iv2 = i2.next(); - } else { - iv1 = i1.next(); - iv2 = i2.next(); + match v1.end.cmp(&v2.end) { + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } } - }, + } (_, _) => return true, } } - }, + } (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { vec.iter().all(|&i| !run.contains(i)) } @@ -356,9 +356,9 @@ impl Store { } Array(vec) } - Run(ref intervals) => Array( - intervals.iter().flat_map(|iv| iv.start..=iv.end).collect() - ), + Run(ref intervals) => { + Array(intervals.iter().flat_map(|iv| iv.start..=iv.end).collect()) + } } } @@ -497,25 +497,43 @@ impl Store { // Iterate over two iterators and return the lowest value at each step. let iv = match (iv1, iv2) { (None, None) => break, - (Some(v1), None) => { iv1 = i1.next(); v1 }, - (None, Some(v2)) => { iv2 = i2.next(); v2 }, + (Some(v1), None) => { + iv1 = i1.next(); + v1 + } + (None, Some(v2)) => { + iv2 = i2.next(); + v2 + } (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { - Equal => { iv1 = i1.next(); iv2 = i2.next(); v1 }, - Less => { iv1 = i1.next(); v1 }, - Greater => { iv2 = i2.next(); v2 }, + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + v1 + } + Less => { + iv1 = i1.next(); + v1 + } + Greater => { + iv2 = i2.next(); + v2 + } }, }; match merged.last_mut() { // If the list of merged intervals is empty, append the interval. None => merged.push(*iv), - Some(last) => if last.end < iv.start { - // If the interval does not overlap with the previous, append it. - merged.push(*iv); - } else { - // If there is overlap, so we merge the current and previous intervals. - last.end = cmp::max(last.end, iv.end); - }, + Some(last) => { + if last.end < iv.start { + // If the interval does not overlap with the previous, append it. + merged.push(*iv); + } else { + // If there is overlap, so we merge the current and previous intervals. + last.end = cmp::max(last.end, iv.end); + } + } } } @@ -589,13 +607,13 @@ impl Store { merged.push(iv); } - if v1.end < v2.end { - iv1 = i1.next(); - } else if v1.end > v2.end { - iv2 = i2.next(); - } else { - iv1 = i1.next(); - iv2 = i2.next(); + match v1.end.cmp(&v2.end) { + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } } } From 3c99804e0758798e84a3cf2f2fdc1306b1fca542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 18:07:05 +0200 Subject: [PATCH 22/83] Implement the run array difference_with operation --- src/bitmap/store.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 97121c0ba..18d18ff89 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -661,7 +661,7 @@ impl Store { vec.retain(|i| !run.contains(*i)); } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { - for index in vec2.iter() { + for index in vec2 { this.remove(*index); } } @@ -677,7 +677,11 @@ impl Store { } // TODO(jpg) difference_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Array(ref vec)) => { + for i in vec { + this.remove(*i); + } + } (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } From c762f938a6bbec52f6d38f42f504b913c95315db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 16:36:47 +0200 Subject: [PATCH 23/83] Mark array run symmetric_difference_with operation as unimplemented --- src/bitmap/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 18d18ff89..ad6285960 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -720,7 +720,7 @@ impl Store { *this = new; } // TODO(jpg) symmetric_difference_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 ^= index2; From 9744f12a9f0a829b57aa75fef11ba45a4db3c8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 16:57:54 +0200 Subject: [PATCH 24/83] Implement the array run is_subset operation --- src/bitmap/store.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index ad6285960..949aff541 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -324,8 +324,7 @@ impl Store { } } (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), - // TODO(jpg) is_subset array, run - (&Array(ref _vec), &Run(ref _intervals)) => unimplemented!(), + (&Array(ref vec), run @ &Run(..)) => vec.iter().all(|&i| run.contains(i)), (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1 .iter() From 67784ad109cafbbb5104d3b76a0511875ab1c997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 18:32:16 +0200 Subject: [PATCH 25/83] Implement the run run difference_with operation --- src/bitmap/store.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 949aff541..930ba83ad 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -674,13 +674,17 @@ impl Store { this.remove_range(iv.start as u32, iv.end as u32 + 1); } } - // TODO(jpg) difference_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Run(ref intervals2)) => { + for iv in intervals2 { + this.remove_range(iv.start as u32, iv.end as u32 + 1); + } + } (ref mut this @ &mut Run(..), &Array(ref vec)) => { for i in vec { this.remove(*i); } } + // TODO(jpg) difference_with run bitmap (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } From a57aff16f2a0838e6aa19992d440334231c921a3 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Thu, 10 Apr 2025 19:49:36 +0200 Subject: [PATCH 26/83] feat: insert and insert_range on runs Implements and tests `insert` and `insert_range` methods on runs. --- roaring/src/bitmap/store/interval_store.rs | 551 +++++++++++++++++++++ roaring/src/bitmap/store/mod.rs | 39 +- 2 files changed, 559 insertions(+), 31 deletions(-) create mode 100644 roaring/src/bitmap/store/interval_store.rs diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs new file mode 100644 index 000000000..dadade0c6 --- /dev/null +++ b/roaring/src/bitmap/store/interval_store.rs @@ -0,0 +1,551 @@ +use core::cmp::Ordering; +use core::ops::RangeInclusive; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct IntervalStore(Vec); + +impl IntervalStore { + pub fn new() -> Self { + Self(Default::default()) + } + + #[inline] + pub fn insert(&mut self, index: u16) -> bool { + self.0 + .binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()) + .map_err(|loc| { + // loc may be equal to self.0.len() + let loc_or_last = if loc < self.0.len() { + Some(loc) + } else if self.0.len() != 0 { + Some(self.0.len() - 1) + } else { + None + }; + // There exists an interval at or before the location we should insert + if let Some(loc_or_last) = loc_or_last { + if index == self.0[loc_or_last].end + 1 { + // index immediately follows an interval + // Checking for sandwiched intervals is not needed because of binary search loc + // i.e. when the index is sandwiched between two intervals we always + // get the right most interval, which puts us in the different if + self.0[loc_or_last].end += 1; + } else if index + .checked_add(1) + .map(|f| f == self.0[loc_or_last].start) + .unwrap_or(false) + { + // checked_add required for if u16::MAX is added + // Value immediately precedes interval + if loc > 0 && self.0[loc - 1].end == index - 1 { + // Merge with preceding interval + self.0[loc - 1].end = self.0[loc].end; + self.0.remove(loc); + return; + } + self.0[loc].start -= 1; + } else { + // The value stands alone + self.0.insert(loc, Interval::new(index, index)); + } + } else { + // there does not exist a single interval + self.0.insert(loc, Interval::new(index, index)); + } + }) + .is_err() + } + + fn drain_overlapping(&mut self, start_index: usize, interval: &Interval) -> u64 { + let value = self.drain_overlapping_range(start_index, interval); + if let Some(to_drain) = value.1 { + self.0.drain(start_index..to_drain); + } + value.0 + } + + fn drain_overlapping_range( + &mut self, + start_index: usize, + interval: &Interval, + ) -> (u64, Option) { + let mut drain_loc = None; + let mut amount = 0; + let mut intervals = dbg!(&self.0[start_index..]).iter().enumerate().peekable(); + while let Some((i, cur_interval)) = intervals.next() { + if !interval.contains_interval(cur_interval) { + drain_loc = Some(start_index + i); + break; + } + amount += u64::from(cur_interval.run_len()); + if intervals.peek().is_none() { + drain_loc = Some(start_index + i + 1); + } + } + (amount, drain_loc) + } + + #[inline] + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + let interval = Interval { start: *range.start(), end: *range.end() }; + let first_interval = + self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); + let end_interval = + self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); + match (first_interval, end_interval) { + // both start and end index are contained in intervals + (Ok(begin), Ok(end)) => { + if begin == end { + return 0; + } + let drained_amount: u64 = + self.0[begin + 1..end].iter().map(|f| u64::from(f.run_len())).sum(); + let amount = u64::from( + Interval::new(self.0[begin].end + 1, self.0[end].start - 1).run_len(), + ) - drained_amount; + self.0[begin].end = self.0[end].end; + self.0.drain(begin + 1..=end); + return amount; + } + // start index is contained in an interval, + // end index is not + (Ok(begin), Err(to_insert)) => { + let (new_end, drain_id) = + // if there is a next interval, check if these intervals are consecutive + if to_insert < self.0.len() && self.0[to_insert].start - 1 == interval.end { + // The intervals are consecutive! Adjust new end of interval, and how far + // we drain + (self.0[to_insert].start, to_insert + 1) + } else { + (interval.end, to_insert) + }; + let drained_amount: u64 = + self.0[begin + 1..to_insert].iter().map(|f| u64::from(f.run_len())).sum(); + let amount = + u64::from(Interval::new(self.0[begin].end + 1, interval.end).run_len()) + - drained_amount; + self.0[begin].end = new_end; + self.0.drain(begin + 1..drain_id); + return amount; + } + // there is no interval that contains the start index, + // there is an interval that contains the end index, + (Err(to_begin), Ok(end)) => { + let consecutive_begin = + to_begin > 0 && self.0[to_begin - 1].end + 1 == interval.start; + let (drain_id, interval_id) = + // check if begin interval is consecutive with new interval + if consecutive_begin { + // The intervals are consecutive! Adjust how much we remove, and how + // which interval we end up keeping + (end + 1, to_begin - 1) + } else { + (end, end) + }; + let drained_amount: u64 = + self.0[to_begin..end].iter().map(|f| u64::from(f.run_len())).sum(); + let amount = + u64::from(Interval::new(interval.start, self.0[end].start - 1).run_len()) + - drained_amount; + if consecutive_begin { + self.0[interval_id].end = self.0[end].end; + } else { + self.0[interval_id].start = interval.start; + } + self.0.drain(to_begin..drain_id); + return amount; + } + (Err(to_begin), Err(to_end)) => { + if self.0.len() == 0 { + self.0.insert(to_begin, interval); + return interval.run_len().into(); + } + let consec_begin = to_begin > 0 && self.0[to_begin - 1].end + 1 == interval.start; + let conces_end = to_end < self.0.len() + && self.0[to_end] + .start + .checked_sub(1) + .map(|f| f == interval.end) + .unwrap_or(false); + if !consec_begin && !conces_end && to_begin == to_end { + // an arbitrary range with no consecutive intervals, unable to reuse existing interval + self.0.insert(to_begin, interval); + return interval.run_len().into(); + } + let (drain_id_begin, drain_id_end, interval_id) = { + if conces_end && consec_begin { + // Both intervals are consecutive! Adjust how much we remove, and + // which interval we end up keeping + // + // keep begin interval and remove end + // NOTE: to_begin - 1 since the interval we actually care about is one to + // the left e.g.: + // [3..=5, 9..=20] add 6..=8 -> + // to_begin = 1 + // to_end = 1 + (to_begin, to_end + 1, to_begin - 1) + } else if consec_begin { + // Remove end interval, keep begin to overwrite + // + // NOTE: to_begin - 1 since the interval we actually care about is one to + // the left e.g.: + // [3..=5] add 6..=8 -> + // to_begin = 1 + // to_end = 1 + (to_begin, to_end, to_begin - 1) + } else if conces_end { + // Remove begin interval, keep end to overwrite + // + // NOTE: no -1 since the interval we actually care about is one to + // the left e.g.: + // [8..=10] add 6..=7 -> + // to_begin = 0 + // to_end = 1 + (to_begin, to_end, to_end) + } else { + // keep end interval to overwrite + ( + to_begin, + to_end.min(self.0.len() - 1), + if to_end != self.0.len() { + to_begin + } else { + to_end.min(self.0.len() - 1) + }, + ) + } + }; + let drained_amount: u64 = + self.0[to_begin..to_end].iter().map(|f| u64::from(f.run_len())).sum(); + let end_amount_interval = + if conces_end { self.0[to_end].start - 1 } else { interval.end }; + let amount = + u64::from(Interval::new(interval.start, end_amount_interval).run_len()) + - drained_amount; + let end_interval = if conces_end { self.0[to_end].end } else { interval.end }; + + dbg!(end_interval); + dbg!(&self.0[interval_id]); + self.0[interval_id].end = end_interval; + if !consec_begin { + self.0[interval_id].start = interval.start; + } + dbg!(&self.0[interval_id]); + dbg!(interval_id); + self.0.drain(dbg!(drain_id_begin..drain_id_end)); + return amount; + } + }; + } +} + +/// This interval is inclusive to end. +#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] +pub(crate) struct Interval { + pub start: u16, + pub end: u16, +} + +impl IntoIterator for Interval { + type Item = u16; + type IntoIter = RangeInclusive; + + fn into_iter(self) -> Self::IntoIter { + self.start..=self.end + } +} + +impl IntoIterator for &'_ Interval { + type Item = u16; + type IntoIter = RangeInclusive; + + fn into_iter(self) -> Self::IntoIter { + self.start..=self.end + } +} + +pub(crate) fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { + if index < iv.start { + Ordering::Less + } else if index > iv.end { + Ordering::Greater + } else { + Ordering::Equal + } +} + +impl Interval { + pub fn new(start: u16, end: u16) -> Interval { + Interval { start, end } + } + + pub fn contains_index(&self, value: u16) -> bool { + self.start <= value && value <= self.end + } + + pub fn contains_interval(&self, interval: &Interval) -> bool { + self.start <= interval.start && interval.end <= self.end + } + + pub fn run_len(&self) -> u32 { + (self.end - self.start) as u32 + 1 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::u16; + + #[test] + fn insert_empty() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert!(interval_store.insert(1)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 1 }])) + } + + #[test] + fn insert_consecutive_begin() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 0 }, + ]); + assert!(interval_store.insert(1)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) + } + + #[test] + fn insert_consecutive_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 1 }, + ]); + assert!(interval_store.insert(0)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) + } + + #[test] + fn insert_consecutive_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 0 }, + Interval { start: 2, end: 2 }, + ]); + interval_store.insert(1); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 2 }])) + } + + #[test] + fn insert_arbitrary() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: 9, end: 10 }, + ]); + interval_store.insert(5); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: 5, end: 5 }, + Interval { start: 9, end: 10 }, + ]) + ) + } + + #[test] + fn insert_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 0, end: 3 },]); + interval_store.insert(u16::MAX); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: u16::MAX, end: u16::MAX }, + ]) + ) + } + + #[test] + fn insert_u16_max_consecutive() { + let mut interval_store = + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX - 1 },]); + interval_store.insert(u16::MAX); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX },]) + ) + } + + #[test] + fn insert_range_empty() { + let mut interval_store = IntervalStore(alloc::vec![ + ]); + assert_eq!( + interval_store.insert_range(1..=2), + (Interval::new(1, 2).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 1, end: 2 }, + ])); + } + + #[test] + fn insert_range_overlap_begin() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 20 } + ]); + assert_eq!( + interval_store.insert_range(5..=50), + (Interval::new(21, 50).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 1, end: 50 }, + ])); + } + + #[test] + fn insert_range_overlap_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 } + ]); + assert_eq!( + interval_store.insert_range(5..=15), + (Interval::new(5, 9).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 5, end: 20 }, + ])); + } + + #[test] + fn insert_range_overlap_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 40, end: 60 }, + ]); + assert_eq!( + interval_store.insert_range(15..=50), + (Interval::new(21, 39).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 60 }, + ])); + } + + #[test] + fn insert_range_concescutive_begin() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + ]); + assert_eq!( + interval_store.insert_range(21..=50), + (Interval::new(21, 50).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 50 }, + ])); + } + + #[test] + fn insert_range_concescutive_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(21..=49), + (Interval::new(21, 49).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 21, end: 70 }, + ])); + } + + #[test] + fn insert_range_concescutive_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(21..=49), + (Interval::new(21, 49).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 70 }, + ])); + } + + #[test] + fn insert_range_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(25..=30), + (Interval::new(25, 30).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 25, end: 30 }, + Interval { start: 50, end: 70 }, + ])); + } + + #[test] + fn insert_range_u16_max_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(90..=u16::MAX), + (Interval::new(90, u16::MAX).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + Interval { start: 90, end: u16::MAX }, + ])); + } + + #[test] + fn insert_range_u16_max_overlap_begin() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(70..=u16::MAX), + (Interval::new(71, u16::MAX).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: u16::MAX }, + ])); + } + + #[test] + fn insert_range_u16_max_overlap_all() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(0..=u16::MAX), + (Interval::new(0, u16::MAX).run_len() + - Interval::new(10, 20).run_len() - Interval::new(50, 70).run_len()) + .into() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![ + Interval { start: 0, end: u16::MAX }, + ])); + } +} diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 7237a50d2..5d4156cd7 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -1,5 +1,6 @@ mod array_store; mod bitmap_store; +mod interval_store; use alloc::vec; use core::cmp::Ordering; @@ -14,38 +15,14 @@ use self::Store::{Array, Bitmap, Run}; pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; +pub(crate) use self::interval_store::Interval; +use self::interval_store::cmp_index_interval; use crate::bitmap::container::ARRAY_LIMIT; #[cfg(not(feature = "std"))] use alloc::boxed::Box; -#[derive(PartialEq, Copy, Clone, Debug)] -pub struct Interval { - pub start: u16, - pub end: u16, -} - -fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { - if index < iv.start { - Ordering::Less - } else if index > iv.end { - Ordering::Greater - } else { - Ordering::Less - } -} - -impl Interval { - pub fn new(start: u16, end: u16) -> Interval { - Interval { start, end } - } - - pub fn run_len(&self) -> u64 { - (self.end - self.start) as u64 + 1 - } -} - #[derive(Clone)] pub(crate) enum Store { Array(ArrayStore), @@ -242,12 +219,12 @@ impl Store { Run(ref mut intervals) => { let start = *range.start(); let end = *range.end(); - let mut count = 0; + let mut count: u64 = 0; let mut search_end = false; for iv in intervals.iter_mut() { if !search_end && cmp_index_interval(start as u16, *iv) == Ordering::Equal { - count += Interval::new(iv.end, start as u16).run_len(); + count += u64::from(Interval::new(iv.end, start as u16).run_len()); iv.end = start as u16; search_end = true; } @@ -258,13 +235,13 @@ impl Store { Ordering::Less => { // We invalidate the intervals that are contained in // the start and end but doesn't touch the bounds. - count += iv.run_len(); + count += u64::from(iv.run_len()); *iv = Interval::new(u16::max_value(), 0); } Ordering::Equal => { // We shrink this interval by moving the start of it to be // the end bound which is non-inclusive. - count += Interval::new(end as u16, iv.start).run_len(); + count += u64::from(Interval::new(end as u16, iv.start).run_len()); iv.start = end as u16; } Ordering::Greater => break, @@ -984,7 +961,7 @@ impl RunIter { fn move_next(&mut self) { self.offset += 1; - if self.offset == self.intervals[self.run].run_len() { + if self.offset == self.intervals[self.run].run_len().into() { self.offset = 0; self.run += 1; } From 47b7cbfb2fea72091555f6dafc0432c52ebb9350 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 15:04:10 +0200 Subject: [PATCH 27/83] fix: fixes ci failures introduced in a57aff16 This fixes some failing tests and adds some `#[allow(todo]` and `#[allow(unused]`. --- roaring/src/bitmap/arbitrary.rs | 1 + roaring/src/bitmap/container.rs | 2 + roaring/src/bitmap/multiops.rs | 1 + roaring/src/bitmap/serialization.rs | 50 ++--- roaring/src/bitmap/statistics.rs | 1 + roaring/src/bitmap/store/array_store/mod.rs | 2 + roaring/src/bitmap/store/bitmap_store.rs | 1 - roaring/src/bitmap/store/interval_store.rs | 200 +++++++------------- roaring/src/bitmap/store/mod.rs | 89 +++++---- roaring/tests/clone.rs | 7 +- roaring/tests/serialization.rs | 17 +- roaring/tests/size_hint.rs | 7 +- 12 files changed, 162 insertions(+), 216 deletions(-) diff --git a/roaring/src/bitmap/arbitrary.rs b/roaring/src/bitmap/arbitrary.rs index 4746ea536..7a3d2eb3f 100644 --- a/roaring/src/bitmap/arbitrary.rs +++ b/roaring/src/bitmap/arbitrary.rs @@ -126,6 +126,7 @@ mod test { } impl Debug for Store { + #[allow(clippy::todo)] fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { match self { Store::Array(a) => write!(f, "Store({a:?})"), diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 7097bc0f5..cfee0feb3 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -108,6 +108,7 @@ impl Container { result } + #[allow(clippy::todo)] pub fn remove_smallest(&mut self, n: u64) { match &self.store { Store::Bitmap(bits) => { @@ -124,6 +125,7 @@ impl Container { }; } + #[allow(clippy::todo)] pub fn remove_biggest(&mut self, n: u64) { match &self.store { Store::Bitmap(bits) => { diff --git a/roaring/src/bitmap/multiops.rs b/roaring/src/bitmap/multiops.rs index 833525c92..48fbe23fb 100644 --- a/roaring/src/bitmap/multiops.rs +++ b/roaring/src/bitmap/multiops.rs @@ -385,6 +385,7 @@ fn try_multi_xor_ref<'a, E: 'a>( Ok(RoaringBitmap { containers }) } +#[allow(clippy::todo)] fn merge_container_ref<'a>( containers: &mut Vec>, rhs: &'a [Container], diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 85892e55a..41033d3ea 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -1,5 +1,5 @@ use crate::bitmap::container::{Container, ARRAY_LIMIT}; -use crate::bitmap::store::{Interval, ArrayStore, BitmapStore, Store, BITMAP_LENGTH}; +use crate::bitmap::store::{ArrayStore, BitmapStore, Interval, Store, BITMAP_LENGTH}; use crate::RoaringBitmap; use bytemuck::cast_slice_mut; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -46,8 +46,8 @@ impl RoaringBitmap { .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => 8 + values.len() as usize * ARRAY_ELEMENT_BYTES, - Store::Bitmap(..) => 8 + BITMAP_BYTES, + Store::Array(ref values) => values.len() as usize * ARRAY_ELEMENT_BYTES, + Store::Bitmap(..) => BITMAP_BYTES, Store::Run(ref intervals) => { has_run_containers = true; RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.len()) @@ -77,13 +77,7 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialize_into(&self, mut writer: W) -> io::Result<()> { - let has_run_containers = self.containers.iter().any(|c| { - if let Store::Run(_) = c.store { - true - } else { - false - } - }); + let has_run_containers = self.containers.iter().any(|c| matches!(c.store, Store::Run(_))); let size = self.containers.len(); // Depending on if run containers are present or not write the appropriate header @@ -113,17 +107,20 @@ impl RoaringBitmap { } let mut offset = header_size(size, has_run_containers) as u32; - for container in &self.containers { - writer.write_u32::(offset)?; - match container.store { - Store::Array(ref values) => { - offset += values.len() as u32 * 2; - } - Store::Bitmap(..) => { - offset += 8 * 1024; - } - Store::Run(ref intervals) => { - offset += (RUN_NUM_BYTES + (intervals.len() * RUN_ELEMENT_BYTES)) as u32; + let has_offsets = if has_run_containers { size > OFFSET_BYTES } else { true }; + if has_offsets { + for container in &self.containers { + writer.write_u32::(offset)?; + match container.store { + Store::Array(ref values) => { + offset += values.len() as u32 * 2; + } + Store::Bitmap(..) => { + offset += 8 * 1024; + } + Store::Run(ref intervals) => { + offset += (RUN_NUM_BYTES + (intervals.len() * RUN_ELEMENT_BYTES)) as u32; + } } } } @@ -273,10 +270,13 @@ impl RoaringBitmap { *len = u16::from_le(*len); }); - let intervals = intervals.into_iter().map(|[start, len]| -> Result { - let end = start.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; - Ok(Interval { start, end }) - }).collect::>()?; + let intervals = intervals + .into_iter() + .map(|[start, len]| -> Result { + let end = start.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; + Ok(Interval { start, end }) + }) + .collect::>()?; Store::Run(intervals) } else if cardinality <= ARRAY_LIMIT { diff --git a/roaring/src/bitmap/statistics.rs b/roaring/src/bitmap/statistics.rs index fc936d0dc..f62762dd1 100644 --- a/roaring/src/bitmap/statistics.rs +++ b/roaring/src/bitmap/statistics.rs @@ -60,6 +60,7 @@ impl RoaringBitmap { /// assert_eq!(statistics.min_value, Some(1)); /// assert_eq!(statistics.cardinality, 99); /// ``` + #[allow(clippy::todo)] pub fn statistics(&self) -> Statistics { let mut n_containers = 0; let mut n_array_containers = 0; diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 363eb46cc..071440954 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -456,6 +456,7 @@ mod tests { use super::*; use crate::bitmap::store::Store; + #[allow(clippy::todo)] fn into_vec(s: Store) -> Vec { match s { Store::Array(vec) => vec.vec, @@ -464,6 +465,7 @@ mod tests { } } + #[allow(clippy::todo)] fn into_bitmap_store(s: Store) -> Store { match s { Store::Array(vec) => Store::Bitmap(vec.to_bitmap_store()), diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 5c1ad8b24..1a19d9099 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -356,7 +356,6 @@ impl BitmapStore { BitmapIter::new(self.bits) } - #[cfg(feature = "std")] pub fn as_array(&self) -> &[u64; BITMAP_LENGTH] { &self.bits } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index dadade0c6..345166f57 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,3 +1,5 @@ +#![allow(unused)] +use alloc::vec::Vec; use core::cmp::Ordering; use core::ops::RangeInclusive; @@ -17,7 +19,7 @@ impl IntervalStore { // loc may be equal to self.0.len() let loc_or_last = if loc < self.0.len() { Some(loc) - } else if self.0.len() != 0 { + } else if !self.0.is_empty() { Some(self.0.len() - 1) } else { None @@ -71,13 +73,13 @@ impl IntervalStore { ) -> (u64, Option) { let mut drain_loc = None; let mut amount = 0; - let mut intervals = dbg!(&self.0[start_index..]).iter().enumerate().peekable(); + let mut intervals = self.0[start_index..].iter().enumerate().peekable(); while let Some((i, cur_interval)) = intervals.next() { if !interval.contains_interval(cur_interval) { drain_loc = Some(start_index + i); break; } - amount += u64::from(cur_interval.run_len()); + amount += cur_interval.run_len(); if intervals.peek().is_none() { drain_loc = Some(start_index + i + 1); } @@ -98,14 +100,12 @@ impl IntervalStore { if begin == end { return 0; } - let drained_amount: u64 = - self.0[begin + 1..end].iter().map(|f| u64::from(f.run_len())).sum(); - let amount = u64::from( - Interval::new(self.0[begin].end + 1, self.0[end].start - 1).run_len(), - ) - drained_amount; + let drained_amount: u64 = self.0[begin + 1..end].iter().map(|f| f.run_len()).sum(); + let amount = Interval::new(self.0[begin].end + 1, self.0[end].start - 1).run_len() + - drained_amount; self.0[begin].end = self.0[end].end; self.0.drain(begin + 1..=end); - return amount; + amount } // start index is contained in an interval, // end index is not @@ -120,13 +120,12 @@ impl IntervalStore { (interval.end, to_insert) }; let drained_amount: u64 = - self.0[begin + 1..to_insert].iter().map(|f| u64::from(f.run_len())).sum(); + self.0[begin + 1..to_insert].iter().map(|f| f.run_len()).sum(); let amount = - u64::from(Interval::new(self.0[begin].end + 1, interval.end).run_len()) - - drained_amount; + Interval::new(self.0[begin].end + 1, interval.end).run_len() - drained_amount; self.0[begin].end = new_end; self.0.drain(begin + 1..drain_id); - return amount; + amount } // there is no interval that contains the start index, // there is an interval that contains the end index, @@ -142,23 +141,21 @@ impl IntervalStore { } else { (end, end) }; - let drained_amount: u64 = - self.0[to_begin..end].iter().map(|f| u64::from(f.run_len())).sum(); + let drained_amount: u64 = self.0[to_begin..end].iter().map(|f| f.run_len()).sum(); let amount = - u64::from(Interval::new(interval.start, self.0[end].start - 1).run_len()) - - drained_amount; + Interval::new(interval.start, self.0[end].start - 1).run_len() - drained_amount; if consecutive_begin { self.0[interval_id].end = self.0[end].end; } else { self.0[interval_id].start = interval.start; } self.0.drain(to_begin..drain_id); - return amount; + amount } (Err(to_begin), Err(to_end)) => { - if self.0.len() == 0 { + if self.0.is_empty() { self.0.insert(to_begin, interval); - return interval.run_len().into(); + return interval.run_len(); } let consec_begin = to_begin > 0 && self.0[to_begin - 1].end + 1 == interval.start; let conces_end = to_end < self.0.len() @@ -170,7 +167,7 @@ impl IntervalStore { if !consec_begin && !conces_end && to_begin == to_end { // an arbitrary range with no consecutive intervals, unable to reuse existing interval self.0.insert(to_begin, interval); - return interval.run_len().into(); + return interval.run_len(); } let (drain_id_begin, drain_id_end, interval_id) = { if conces_end && consec_begin { @@ -216,26 +213,21 @@ impl IntervalStore { } }; let drained_amount: u64 = - self.0[to_begin..to_end].iter().map(|f| u64::from(f.run_len())).sum(); + self.0[to_begin..to_end].iter().map(|f| f.run_len()).sum(); let end_amount_interval = if conces_end { self.0[to_end].start - 1 } else { interval.end }; let amount = - u64::from(Interval::new(interval.start, end_amount_interval).run_len()) - - drained_amount; + Interval::new(interval.start, end_amount_interval).run_len() - drained_amount; let end_interval = if conces_end { self.0[to_end].end } else { interval.end }; - dbg!(end_interval); - dbg!(&self.0[interval_id]); self.0[interval_id].end = end_interval; if !consec_begin { self.0[interval_id].start = interval.start; } - dbg!(&self.0[interval_id]); - dbg!(interval_id); - self.0.drain(dbg!(drain_id_begin..drain_id_end)); - return amount; + self.0.drain(drain_id_begin..drain_id_end); + amount } - }; + } } } @@ -287,15 +279,14 @@ impl Interval { self.start <= interval.start && interval.end <= self.end } - pub fn run_len(&self) -> u32 { - (self.end - self.start) as u32 + 1 + pub fn run_len(&self) -> u64 { + u64::from(self.end - self.start) + 1 } } #[cfg(test)] mod tests { use super::*; - use core::u16; #[test] fn insert_empty() { @@ -306,18 +297,14 @@ mod tests { #[test] fn insert_consecutive_begin() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 0, end: 0 }, - ]); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 0, end: 0 },]); assert!(interval_store.insert(1)); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) } #[test] fn insert_consecutive_end() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 1 }, - ]); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 1 },]); assert!(interval_store.insert(0)); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) } @@ -375,46 +362,23 @@ mod tests { #[test] fn insert_range_empty() { - let mut interval_store = IntervalStore(alloc::vec![ - ]); - assert_eq!( - interval_store.insert_range(1..=2), - (Interval::new(1, 2).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 1, end: 2 }, - ])); + let mut interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.insert_range(1..=2), Interval::new(1, 2).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 2 },])); } #[test] fn insert_range_overlap_begin() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 20 } - ]); - assert_eq!( - interval_store.insert_range(5..=50), - (Interval::new(21, 50).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 1, end: 50 }, - ])); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 20 }]); + assert_eq!(interval_store.insert_range(5..=50), Interval::new(21, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 50 },])); } #[test] fn insert_range_overlap_end() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 10, end: 20 } - ]); - assert_eq!( - interval_store.insert_range(5..=15), - (Interval::new(5, 9).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 5, end: 20 }, - ])); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 }]); + assert_eq!(interval_store.insert_range(5..=15), Interval::new(5, 9).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 5, end: 20 },])); } #[test] @@ -423,44 +387,22 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 40, end: 60 }, ]); - assert_eq!( - interval_store.insert_range(15..=50), - (Interval::new(21, 39).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 10, end: 60 }, - ])); + assert_eq!(interval_store.insert_range(15..=50), Interval::new(21, 39).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); } #[test] fn insert_range_concescutive_begin() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 10, end: 20 }, - ]); - assert_eq!( - interval_store.insert_range(21..=50), - (Interval::new(21, 50).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 10, end: 50 }, - ])); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 },]); + assert_eq!(interval_store.insert_range(21..=50), Interval::new(21, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 50 },])); } #[test] fn insert_range_concescutive_end() { - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 50, end: 70 }, - ]); - assert_eq!( - interval_store.insert_range(21..=49), - (Interval::new(21, 49).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 21, end: 70 }, - ])); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert_eq!(interval_store.insert_range(21..=49), Interval::new(21, 49).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 21, end: 70 },])); } #[test] @@ -469,14 +411,8 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 50, end: 70 }, ]); - assert_eq!( - interval_store.insert_range(21..=49), - (Interval::new(21, 49).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 10, end: 70 }, - ])); + assert_eq!(interval_store.insert_range(21..=49), Interval::new(21, 49).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 70 },])); } #[test] @@ -485,16 +421,15 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 50, end: 70 }, ]); + assert_eq!(interval_store.insert_range(25..=30), Interval::new(25, 30).run_len()); assert_eq!( - interval_store.insert_range(25..=30), - (Interval::new(25, 30).run_len()) - .into() - ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ + interval_store, + IntervalStore(alloc::vec![ Interval { start: 10, end: 20 }, Interval { start: 25, end: 30 }, Interval { start: 50, end: 70 }, - ])); + ]) + ); } #[test] @@ -505,14 +440,16 @@ mod tests { ]); assert_eq!( interval_store.insert_range(90..=u16::MAX), - (Interval::new(90, u16::MAX).run_len()) - .into() + Interval::new(90, u16::MAX).run_len() ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ Interval { start: 10, end: 20 }, Interval { start: 50, end: 70 }, Interval { start: 90, end: u16::MAX }, - ])); + ]) + ); } #[test] @@ -523,13 +460,15 @@ mod tests { ]); assert_eq!( interval_store.insert_range(70..=u16::MAX), - (Interval::new(71, u16::MAX).run_len()) - .into() + Interval::new(71, u16::MAX).run_len() ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ Interval { start: 10, end: 20 }, Interval { start: 50, end: u16::MAX }, - ])); + ]) + ); } #[test] @@ -540,12 +479,13 @@ mod tests { ]); assert_eq!( interval_store.insert_range(0..=u16::MAX), - (Interval::new(0, u16::MAX).run_len() - - Interval::new(10, 20).run_len() - Interval::new(50, 70).run_len()) - .into() + Interval::new(0, u16::MAX).run_len() + - Interval::new(10, 20).run_len() + - Interval::new(50, 70).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX },]) ); - assert_eq!(interval_store, IntervalStore(alloc::vec![ - Interval { start: 0, end: u16::MAX }, - ])); } } diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 5d4156cd7..4f10f2075 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -3,6 +3,7 @@ mod bitmap_store; mod interval_store; use alloc::vec; +use alloc::vec::Vec; use core::cmp::Ordering; use core::mem; use core::ops::{ @@ -15,8 +16,8 @@ use self::Store::{Array, Bitmap, Run}; pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; -pub(crate) use self::interval_store::Interval; use self::interval_store::cmp_index_interval; +pub(crate) use self::interval_store::Interval; use crate::bitmap::container::ARRAY_LIMIT; @@ -138,6 +139,7 @@ impl Store { } } + #[allow(clippy::todo)] pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { // A Range is defined as being of size 0 if start >= end. if range.is_empty() { @@ -154,6 +156,7 @@ impl Store { /// Push `index` at the end of the store only if `index` is the new max. /// /// Returns whether `index` was effectively pushed. + #[allow(clippy::todo)] pub fn push(&mut self, index: u16) -> bool { match self { Array(vec) => vec.push(index), @@ -169,6 +172,7 @@ impl Store { /// # Panics /// /// If debug_assertions enabled and index is > self.max() + #[allow(clippy::todo)] pub(crate) fn push_unchecked(&mut self, index: u16) { match self { Array(vec) => vec.push_unchecked(index), @@ -223,9 +227,9 @@ impl Store { let mut search_end = false; for iv in intervals.iter_mut() { - if !search_end && cmp_index_interval(start as u16, *iv) == Ordering::Equal { - count += u64::from(Interval::new(iv.end, start as u16).run_len()); - iv.end = start as u16; + if !search_end && cmp_index_interval(start, *iv) == Ordering::Equal { + count += Interval::new(iv.end, start).run_len(); + iv.end = start; search_end = true; } @@ -235,14 +239,14 @@ impl Store { Ordering::Less => { // We invalidate the intervals that are contained in // the start and end but doesn't touch the bounds. - count += u64::from(iv.run_len()); - *iv = Interval::new(u16::max_value(), 0); + count += iv.run_len(); + *iv = Interval::new(u16::MAX, 0); } Ordering::Equal => { // We shrink this interval by moving the start of it to be // the end bound which is non-inclusive. - count += u64::from(Interval::new(end as u16, iv.start).run_len()); - iv.start = end as u16; + count += Interval::new(end, iv.start).run_len(); + iv.start = end; } Ordering::Greater => break, } @@ -258,6 +262,7 @@ impl Store { } } + #[allow(clippy::todo)] pub fn remove_smallest(&mut self, index: u64) { match self { Array(vec) => vec.remove_smallest(index), @@ -266,6 +271,7 @@ impl Store { } } + #[allow(clippy::todo)] pub fn remove_biggest(&mut self, index: u64) { match self { Array(vec) => vec.remove_biggest(index), @@ -278,12 +284,13 @@ impl Store { match self { Array(vec) => vec.contains(index), Bitmap(bits) => bits.contains(index), - Run(ref intervals) => intervals - .binary_search_by(|iv| cmp_index_interval(index, *iv)) - .is_ok(), + Run(ref intervals) => { + intervals.binary_search_by(|iv| cmp_index_interval(index, *iv)).is_ok() + } } } + #[allow(clippy::todo)] pub fn contains_range(&self, range: RangeInclusive) -> bool { match self { Array(vec) => vec.contains_range(range), @@ -304,7 +311,7 @@ impl Store { vec.iter().all(|&i| !bits.contains(i)) } // TODO(jpg) is_disjoint - (&Run(ref intervals1), &Run(ref intervals2)) => { + (Run(intervals1), Run(intervals2)) => { let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); let (mut iv1, mut iv2) = (i1.next(), i2.next()); loop { @@ -341,17 +348,18 @@ impl Store { (Bitmap(bits1), Bitmap(bits2)) => bits1.is_subset(bits2), (Array(vec), Bitmap(bits)) => vec.iter().all(|&i| bits.contains(i)), (Bitmap(..), &Array(..)) => false, - (&Array(ref vec), run @ &Run(..)) => vec.iter().all(|&i| run.contains(i)), + (Array(vec), run @ &Run(..)) => vec.iter().all(|&i| run.contains(i)), // TODO(jpg) is subset bitmap, run - (&Bitmap(..), &Run(ref _vec)) => unimplemented!(), + (Bitmap(..), Run(_vec)) => unimplemented!(), // TODO(jpg) is_subset run, * - (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&Run(ref _intervals), &Array(ref _vec)) => unimplemented!(), - (&Run(ref _intervals), _store @ &Bitmap(..)) => unimplemented!(), + (Run(_intervals1), Run(_intervals2)) => unimplemented!(), + (Run(_intervals), Array(_vec)) => unimplemented!(), + (Run(_intervals), _store @ &Bitmap(..)) => unimplemented!(), } } + #[allow(clippy::todo)] pub fn intersection_len(&self, other: &Self) -> u64 { match (self, other) { (Array(vec1), Array(vec2)) => vec1.intersection_len(vec2), @@ -367,10 +375,11 @@ impl Store { match self { Array(vec) => vec.len(), Bitmap(bits) => bits.len(), - Run(ref intervals) => intervals.iter().map(|iv| iv.run_len() as u64).sum(), + Run(intervals) => intervals.iter().map(|iv| iv.run_len()).sum(), } } + #[allow(clippy::todo)] pub fn is_empty(&self) -> bool { match self { Array(vec) => vec.is_empty(), @@ -396,6 +405,7 @@ impl Store { } } + #[allow(clippy::todo)] pub fn rank(&self, index: u16) -> u64 { match self { Array(vec) => vec.rank(index), @@ -404,6 +414,7 @@ impl Store { } } + #[allow(clippy::todo)] pub fn select(&self, n: u16) -> Option { match self { Array(vec) => vec.select(n), @@ -499,14 +510,14 @@ impl Store { current |= current - 1; // Find next 0 - while current == std::u64::MAX && i < BITMAP_LENGTH as u16 - 1 { + while current == u64::MAX && i < BITMAP_LENGTH as u16 - 1 { i += 1; current = bits.as_array()[i as usize]; } // Run continues until end of this container - if current == std::u64::MAX { - intervals.push(Interval::new(start, std::u16::MAX)); + if current == u64::MAX { + intervals.push(Interval::new(start, u16::MAX)); break; } @@ -533,6 +544,7 @@ impl Default for Store { impl BitOr<&Store> for &Store { type Output = Store; + #[allow(clippy::todo)] fn bitor(self, rhs: &Store) -> Store { match (self, rhs) { (Array(vec1), Array(vec2)) => Array(BitOr::bitor(vec1, vec2)), @@ -646,6 +658,7 @@ impl BitOrAssign for Store { } impl BitOrAssign<&Store> for Store { + #[allow(clippy::todo)] fn bitor_assign(&mut self, rhs: &Store) { match (self, rhs) { (&mut Array(ref mut vec1), Array(vec2)) => { @@ -757,6 +770,7 @@ impl BitAndAssign for Store { impl BitAndAssign<&Store> for Store { #[allow(clippy::suspicious_op_assign_impl)] + #[allow(clippy::todo)] fn bitand_assign(&mut self, rhs: &Store) { match (self, rhs) { (&mut Array(ref mut vec1), Array(vec2)) => { @@ -819,17 +833,17 @@ impl SubAssign<&Store> for Store { (&mut Array(ref mut vec1), Bitmap(bits2)) => { SubAssign::sub_assign(vec1, bits2); } - (ref mut this @ &mut Bitmap(..), &Run(ref intervals)) => { + (ref mut this @ &mut Bitmap(..), Run(intervals)) => { for iv in intervals { this.remove_range(iv.start..=iv.end); } } - (ref mut this @ &mut Run(..), &Run(ref intervals2)) => { + (ref mut this @ &mut Run(..), Run(intervals2)) => { for iv in intervals2 { this.remove_range(iv.start..=iv.end); } } - (ref mut this @ &mut Run(..), &Array(ref vec)) => { + (ref mut this @ &mut Run(..), Array(vec)) => { for i in vec.iter() { this.remove(*i); } @@ -861,6 +875,7 @@ impl BitXor<&Store> for &Store { } impl BitXorAssign for Store { + #[allow(clippy::todo)] fn bitxor_assign(&mut self, mut rhs: Store) { match (self, &mut rhs) { (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { @@ -890,7 +905,7 @@ impl BitXorAssign<&Store> for Store { *vec1 = BitXor::bitxor(&this, vec2); } // TODO(jpg) symmetric_difference_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), + (&mut Array(ref mut _vec), Run(_intervals)) => unimplemented!(), (&mut Bitmap(ref mut bits1), Array(vec2)) => { BitXorAssign::bitxor_assign(bits1, vec2); } @@ -898,10 +913,10 @@ impl BitXorAssign<&Store> for Store { BitXorAssign::bitxor_assign(bits1, bits2); } // TODO(jpg) symmetric_difference_with bitmap, run - (ref mut _this @ &mut Bitmap(..), &Run(ref _vec)) => unimplemented!(), + (ref mut _this @ &mut Bitmap(..), Run(_vec)) => unimplemented!(), // TODO(jpg) symmetric_difference_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (&mut Run(ref mut _intervals1), Run(_intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), Array(_vec)) => unimplemented!(), (_this @ &mut Run(..), &Bitmap(..)) => unimplemented!(), (this @ &mut Array(..), Bitmap(bits2)) => { let mut lhs: Store = Bitmap(bits2.clone()); @@ -944,7 +959,7 @@ impl PartialEq for Store { bits1.len() == bits2.len() && bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } - (&Run(ref intervals1), &Run(ref intervals2)) => intervals1 == intervals2, + (Run(intervals1), Run(ref intervals2)) => intervals1 == intervals2, _ => false, } } @@ -952,16 +967,12 @@ impl PartialEq for Store { impl RunIter { fn new(intervals: Vec) -> RunIter { - RunIter { - run: 0, - offset: 0, - intervals, - } + RunIter { run: 0, offset: 0, intervals } } fn move_next(&mut self) { self.offset += 1; - if self.offset == self.intervals[self.run].run_len().into() { + if self.offset == self.intervals[self.run].run_len() { self.offset = 0; self.run += 1; } @@ -981,11 +992,15 @@ impl Iterator for RunIter { } fn size_hint(&self) -> (usize, Option) { - panic!("Should never be called (roaring::Iter caches the size_hint itself)") + let remaining_size = + self.intervals[self.run..].iter().map(|f| f.run_len()).sum::() - self.offset; + let as_usize: Result = remaining_size.try_into(); + (as_usize.unwrap_or(usize::MAX), as_usize.ok()) } } impl DoubleEndedIterator for RunIter { + #[allow(clippy::todo)] fn next_back(&mut self) -> Option { todo!() } @@ -993,6 +1008,7 @@ impl DoubleEndedIterator for RunIter { impl Iter<'_> { /// Advance the iterator to the first value greater than or equal to `n`. + #[allow(clippy::todo)] pub(crate) fn advance_to(&mut self, n: u16) { match self { Iter::Array(inner) => { @@ -1013,6 +1029,7 @@ impl Iter<'_> { } } + #[allow(clippy::todo)] pub(crate) fn advance_back_to(&mut self, n: u16) { match self { Iter::Array(inner) => { diff --git a/roaring/tests/clone.rs b/roaring/tests/clone.rs index 2e8082ae2..c5e9c15f4 100644 --- a/roaring/tests/clone.rs +++ b/roaring/tests/clone.rs @@ -46,11 +46,8 @@ fn bitmaps() { #[test] #[allow(clippy::redundant_clone)] fn runs() { - let mut original = RoaringBitmap::from_iter( - (0..6000) - .chain(1_000_000..1_012_000) - .chain(2_000_000..2_010_000), - ); + let mut original = + RoaringBitmap::from_iter((0..6000).chain(1_000_000..1_012_000).chain(2_000_000..2_010_000)); original.optimize(); let clone = original.clone(); diff --git a/roaring/tests/serialization.rs b/roaring/tests/serialization.rs index ce47bbc87..f47725dcd 100644 --- a/roaring/tests/serialization.rs +++ b/roaring/tests/serialization.rs @@ -30,21 +30,10 @@ fn test_deserialize_without_runs_from_provided_data() { #[test] fn test_deserialize_with_runs_from_provided_data() { - assert_eq!( - RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), - test_data_bitmap() - ); -} - -#[test] -fn test_deserialize_with_run_containers_from_provided_data() { let mut expected = test_data_bitmap(); // Call optimize to create run containers expected.optimize(); - assert_eq!( - RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), - expected - ); + assert_eq!(RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), expected); } #[test] @@ -83,7 +72,7 @@ fn test_one() { fn test_array() { let original = (1000..3000).collect::(); let new = serialize_and_deserialize(&original); - assert_eq!(original, new); + assert_eq!(dbg!(original), dbg!(new)); } #[test] @@ -126,7 +115,7 @@ fn test_bitmap() { #[test] fn test_arrays() { let original = (1000..3000).chain(70000..74000).collect::(); -let new = serialize_and_deserialize(&original); + let new = serialize_and_deserialize(&original); assert_eq!(original, new); } diff --git a/roaring/tests/size_hint.rs b/roaring/tests/size_hint.rs index db22e2660..14dc5d142 100644 --- a/roaring/tests/size_hint.rs +++ b/roaring/tests/size_hint.rs @@ -69,11 +69,8 @@ fn bitmaps() { #[test] fn runs() { - let mut bitmap = RoaringBitmap::from_iter( - (0..2000) - .chain(1_000_000..1_002_000) - .chain(2_000_000..2_001_000), - ); + let mut bitmap = + RoaringBitmap::from_iter((0..2000).chain(1_000_000..1_002_000).chain(2_000_000..2_001_000)); bitmap.optimize(); let mut iter = bitmap.iter(); assert_eq!((5000, Some(5000)), iter.size_hint()); From 71c6679b49f0b7b61c9c100ab336d16389cda158 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 15:20:36 +0200 Subject: [PATCH 28/83] feat: run store push --- roaring/src/bitmap/store/interval_store.rs | 69 ++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 345166f57..fde3c97a8 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -229,6 +229,23 @@ impl IntervalStore { } } } + + pub fn push(&mut self, index: u16) -> bool { + if let Some(last_interval) = self.0.last_mut() { + if last_interval.end.checked_add(1).map(|f| f == index).unwrap_or(false) { + last_interval.end = index; + true + } else if last_interval.end < index { + self.0.push(Interval::new(index, index)); + true + } else { + false + } + } else { + self.0.push(Interval::new(index, index)); + true + } + } } /// This interval is inclusive to end. @@ -488,4 +505,56 @@ mod tests { IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX },]) ); } + + #[test] + fn push_new_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(interval_store.push(80)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 50, end: 70 }, + Interval { start: 80, end: 80 }, + ]) + ); + } + + #[test] + fn push_new_max_consecutive() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(interval_store.push(71)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 71 },])); + } + + #[test] + fn push_existing() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(!interval_store.push(60)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_non_existing_non_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(!interval_store.push(10)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_existing_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]); + assert!(!interval_store.push(u16::MAX)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_new_u16_max() { + let mut interval_store = + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX - 1 },]); + assert!(interval_store.push(u16::MAX)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]) + ); + } } From 7dfdd92880383a64ab88b8c945fbd26df4f7ac43 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 15:46:58 +0200 Subject: [PATCH 29/83] feat: run store remove index --- roaring/src/bitmap/store/interval_store.rs | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index fde3c97a8..1aa01b3b5 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -246,6 +246,35 @@ impl IntervalStore { true } } + + pub fn remove(&mut self, index: u16) -> bool { + self.0 + .binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()) + .map(|loc| { + // loc always points to an interval + let equal_to_start = self.0[loc].start == index; + let equal_to_end = self.0[loc].end == index; + if index == self.0[loc].start && index == self.0[loc].end { + // Remove entire run if it only contains this value + self.0.remove(loc); + } else if index == self.0[loc].end { + // Value is last in this interval + self.0[loc].end = index - 1; + } else if index == self.0[loc].start { + // Value is first in this interval + self.0[loc].start = index + 1; + } else { + // Value lies inside the interval, we need to split it + // First construct a new interval with the right part + let new_interval = Interval::new(index + 1, self.0[loc].end); + // Then shrink the current interval + self.0[loc].end = index - 1; + // Then insert the new interval leaving gap where value was removed + self.0.insert(loc + 1, new_interval); + } + }) + .is_ok() + } } /// This interval is inclusive to end. @@ -557,4 +586,48 @@ mod tests { IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]) ); } + + #[test] + fn remove_end_of_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(interval_store.remove(60)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 59 },])); + } + + #[test] + fn remove_begin_of_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(interval_store.remove(50)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 51, end: 60 },])); + } + + #[test] + fn remove_middle() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 3 },]); + assert!(interval_store.remove(2)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 1, end: 1 }, + Interval { start: 3, end: 3 }, + ]) + ); + } + + #[test] + fn remove_nothing() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(!interval_store.remove(90)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 60 },])); + } + + #[test] + fn remove_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]); + assert!(interval_store.remove(u16::MAX)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX - 1 },]) + ); + } } From f8df21cb70f0fb6046e9405e7ded4d0dd6acedac Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 20:59:39 +0200 Subject: [PATCH 30/83] feat: run store remove range --- roaring/src/bitmap/store/interval_store.rs | 248 +++++++++++++++++++++ 1 file changed, 248 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 1aa01b3b5..b10733a52 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -275,6 +275,117 @@ impl IntervalStore { }) .is_ok() } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + let interval = Interval { start: *range.start(), end: *range.end() }; + let first_interval = + self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); + let end_interval = + self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); + struct IdValue { + index: usize, + value: u16, + } + struct IntervalRange { + drain_range: core::ops::Range, + begin_value: Option, + end_value: Option, + residual_count: u64, + } + let todo = match (first_interval, end_interval) { + // both start and end index are contained in intervals + (Ok(begin), Ok(end)) => { + if self.0[begin].start == interval.start && self.0[end].end == interval.end { + IntervalRange { + drain_range: begin..end + 1, + begin_value: None, + end_value: None, + residual_count: 0, + } + } else if self.0[begin].start == interval.start { + IntervalRange { + drain_range: begin..end, + begin_value: None, + end_value: Some(IdValue { index: end, value: interval.end + 1 }), + residual_count: Interval::new(self.0[end].start, interval.end).run_len(), + } + } else if self.0[end].end == interval.end { + IntervalRange { + drain_range: begin + 1..end + 1, + begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), + end_value: None, + residual_count: Interval::new(interval.start, self.0[begin].end).run_len(), + } + } else { + IntervalRange { + drain_range: begin + 1..end, + begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), + end_value: Some(IdValue { index: end, value: interval.end + 1 }), + residual_count: Interval::new(self.0[end].start, interval.end).run_len() + + Interval::new(interval.start, self.0[begin].end).run_len(), + } + } + } + // start index is contained in an interval, + // end index is not + (Ok(begin), Err(to_insert)) => { + let end = if to_insert == self.0.len() { self.0.len() - 1 } else { to_insert }; + if self.0[begin].start == interval.start { + IntervalRange { + drain_range: begin..end, + begin_value: None, + end_value: None, + residual_count: 0, + } + } else { + IntervalRange { + drain_range: begin + 1..end + 1, + begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), + end_value: None, + residual_count: Interval::new(interval.start, self.0[begin].end).run_len(), + } + } + } + // there is no interval that contains the start index, + // there is an interval that contains the end index, + (Err(begin), Ok(end)) => { + if self.0[begin].end == interval.end { + IntervalRange { + drain_range: begin..end + 1, + begin_value: None, + end_value: None, + residual_count: 0, + } + } else { + IntervalRange { + drain_range: begin..end, + begin_value: None, + end_value: Some(IdValue { index: end, value: interval.end + 1 }), + residual_count: Interval::new(self.0[end].start, interval.end).run_len(), + } + } + } + (Err(begin), Err(to_end)) => { + let end = if to_end == self.0.len() { self.0.len() - 1 } else { to_end }; + IntervalRange { + drain_range: begin..end + 1, + begin_value: None, + end_value: None, + residual_count: 0, + } + } + }; + let count = self.0[todo.drain_range.clone()].iter().map(|f| f.run_len()).sum::() + + todo.residual_count; + if let Some(IdValue { index, value }) = todo.begin_value { + self.0[index].end = value; + } + if let Some(IdValue { index, value }) = todo.end_value { + self.0[index].start = value; + } + self.0.drain(todo.drain_range); + count + } } /// This interval is inclusive to end. @@ -630,4 +741,141 @@ mod tests { IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX - 1 },]) ); } + + #[test] + fn remove_range_exact_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(40..=60), 21); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_exact_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 80, end: 90 }, + Interval { start: 100, end: 200 }, + ]); + assert_eq!( + interval_store.remove_range(40..=200), + Interval::new(40, 60).run_len() + + Interval::new(80, 90).run_len() + + Interval::new(100, 200).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_begin_exact_overlap_end_one() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(40..=80), + Interval::new(40, 60).run_len() + Interval::new(70, 80).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 81, end: 90 },])); + } + + #[test] + fn remove_range_begin_overlap_end_exact_one() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(50..=90), + Interval::new(70, 90).run_len() + Interval::new(50, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_both_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(50..=80), + Interval::new(70, 80).run_len() + Interval::new(50, 60).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 40, end: 49 }, + Interval { start: 81, end: 90 }, + ]) + ); + } + + #[test] + fn remove_range_begin_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(50..=100), Interval::new(50, 60).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_begin_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 80, end: 100 }, + Interval { start: 200, end: 500 }, + ]); + assert_eq!( + interval_store.remove_range(50..=1000), + Interval::new(50, 60).run_len() + + Interval::new(80, 100).run_len() + + Interval::new(200, 500).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_end_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(20..=50), Interval::new(40, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 51, end: 60 },])); + } + + #[test] + fn remove_range_end_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 100, end: 500 }, + Interval { start: 800, end: 900 }, + ]); + assert_eq!( + interval_store.remove_range(20..=850), + Interval::new(40, 60).run_len() + + Interval::new(100, 500).run_len() + + Interval::new(800, 850).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 851, end: 900 },])); + } + + #[test] + fn remove_range_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(20..=80), Interval::new(40, 60).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_no_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + assert_eq!( + interval_store.remove_range(20..=60000), + Interval::new(40, 60).run_len() + + Interval::new(400, 600).run_len() + + Interval::new(4000, 6000).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } } From 41580e44aed25c7776bc4a7b064790ac7a621f84 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 21:15:07 +0200 Subject: [PATCH 31/83] feat: run store remove smallest --- roaring/src/bitmap/store/interval_store.rs | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index b10733a52..ce3a80fe0 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -386,6 +386,30 @@ impl IntervalStore { self.0.drain(todo.drain_range); count } + + pub fn remove_smallest(&mut self, mut amount: u64) { + let mut remove_to = 0; + let mut last_interval = None; + for (i, interval) in self.0.iter_mut().enumerate() { + let too_much = interval.run_len() < amount; + if too_much { + amount -= interval.run_len(); + } + remove_to = i; + last_interval = Some(interval); + if !too_much { + break; + } + } + if let Some(last_interval) = last_interval { + if last_interval.run_len() < amount { + remove_to += 1; + } else { + last_interval.start += amount as u16; + } + } + self.0.drain(..remove_to); + } } /// This interval is inclusive to end. @@ -878,4 +902,36 @@ mod tests { ); assert_eq!(interval_store, IntervalStore(alloc::vec![])); } + + #[test] + fn remove_smallest_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + interval_store.remove_smallest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_smallest_many_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + interval_store.remove_smallest(200); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new(500, 600), Interval::new(4000, 6000),]) + ); + } + + #[test] + fn remove_smallest_many_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 599 }, + Interval { start: 4000, end: 6000 }, + ]); + interval_store.remove_smallest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(4200, 6000),])); + } } From f4bb02556234268342e4d48a16358d4b4f1afb58 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 11 Apr 2025 22:45:44 +0200 Subject: [PATCH 32/83] feat: run store remove biggest --- roaring/src/bitmap/store/interval_store.rs | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index ce3a80fe0..76b134a18 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -410,6 +410,29 @@ impl IntervalStore { } self.0.drain(..remove_to); } + + pub fn remove_biggest(&mut self, mut amount: u64) { + let mut remove_to = 0; + let mut last_interval = None; + for (i, interval) in self.0.iter_mut().enumerate().rev() { + let too_much = interval.run_len() < amount; + if too_much { + amount -= interval.run_len(); + } + remove_to = i; + last_interval = Some(interval); + if !too_much { + break; + } + } + if let Some(last_interval) = last_interval { + if last_interval.run_len() >= amount { + remove_to += 1; + last_interval.end -= amount as u16; + } + } + self.0.drain(remove_to..); + } } /// This interval is inclusive to end. @@ -934,4 +957,36 @@ mod tests { interval_store.remove_smallest(500); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(4200, 6000),])); } + + #[test] + fn remove_biggest_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + interval_store.remove_biggest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_biggest_many_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 600 }, + Interval { start: 5901, end: 6000 }, + ]); + interval_store.remove_biggest(200); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new(0, 99), Interval::new(400, 500),]) + ); + } + + #[test] + fn remove_biggest_many_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 6000 }, + Interval { start: 8401, end: 8600 }, + Interval { start: 9901, end: 10000 }, + ]); + interval_store.remove_biggest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(1, 5800),])); + } } From 38e203cce01299587aa06186a5d6be513780df04 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 10:45:55 +0200 Subject: [PATCH 33/83] feat: store run contains --- roaring/src/bitmap/store/interval_store.rs | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 76b134a18..8fb5f593a 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -433,6 +433,10 @@ impl IntervalStore { } self.0.drain(remove_to..); } + + pub fn contains(&self, index: u16) -> bool { + self.0.binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()).is_ok() + } } /// This interval is inclusive to end. @@ -989,4 +993,25 @@ mod tests { interval_store.remove_biggest(500); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(1, 5800),])); } + + #[test] + fn contains_index_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 6000 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains(5)); + assert!(interval_store.contains(16000)); + } + + #[test] + fn contains_index_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 6000 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(!interval_store.contains(0)); + } } From 5f885430adb03aad35feb7dca685afed4b930f01 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 11:08:55 +0200 Subject: [PATCH 34/83] feat: runs store contains range --- roaring/src/bitmap/store/interval_store.rs | 47 +++++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 8fb5f593a..03a3aa00f 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -437,6 +437,19 @@ impl IntervalStore { pub fn contains(&self, index: u16) -> bool { self.0.binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()).is_ok() } + + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let interval = Interval::new(*range.start(), *range.end()); + let start = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); + let end = self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); + match (start, end) { + // both start and end are inside an interval, + // check if this interval is that same interval. + // If this is not the case then this range is not contained in this store + (Ok(start_id), Ok(end_id)) => start_id == end_id, + _ => false, + } + } } /// This interval is inclusive to end. @@ -997,7 +1010,7 @@ mod tests { #[test] fn contains_index_1() { let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 6000 }, + Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); @@ -1008,10 +1021,40 @@ mod tests { #[test] fn contains_index_2() { let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 6000 }, + Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); assert!(!interval_store.contains(0)); } + + #[test] + fn contains_range_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains_range(1..=500)); + } + + #[test] + fn contains_range_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(!interval_store.contains_range(1..=1500)); + } + + #[test] + fn contains_range_3() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains_range(1..=1)); + } } From 2302e2c904663150756ef351ad53d84af47ff6d2 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 12:45:42 +0200 Subject: [PATCH 35/83] feat: runs store is disjoint --- roaring/src/bitmap/store/interval_store.rs | 147 +++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 03a3aa00f..3ccf8759e 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -3,6 +3,8 @@ use alloc::vec::Vec; use core::cmp::Ordering; use core::ops::RangeInclusive; +use super::{ArrayStore, BitmapStore, Store}; + #[derive(PartialEq, Eq, Clone, Debug)] pub struct IntervalStore(Vec); @@ -450,6 +452,41 @@ impl IntervalStore { _ => false, } } + + pub fn is_disjoint(&self, other: &Self) -> bool { + let (mut i1, mut i2) = (self.0.iter(), other.0.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + match (iv1, iv2) { + (Some(v1), Some(v2)) => { + if v1.overlaps(v2) { + return false; + } + + // We increase the iterator based on which one is furthest behind. + // Or both if they are equal to each other. + match v1.end.cmp(&v2.end) { + Ordering::Less => iv1 = i1.next(), + Ordering::Greater => iv2 = i2.next(), + Ordering::Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } + } + } + (_, _) => return true, + } + } + } + + pub fn is_disjoint_array(&self, array: &ArrayStore) -> bool { + array.iter().all(|&i| !self.contains(i)) + } + + pub fn is_disjoint_bitmap(&self, array: &BitmapStore) -> bool { + // TODO: make this better + array.iter().all(|i| !self.contains(i)) + } } /// This interval is inclusive to end. @@ -500,6 +537,10 @@ impl Interval { self.start <= interval.start && interval.end <= self.end } + pub fn overlaps(&self, interval: &Interval) -> bool { + interval.start <= self.end && self.start <= interval.end + } + pub fn run_len(&self) -> u64 { u64::from(self.end - self.start) + 1 } @@ -507,6 +548,8 @@ impl Interval { #[cfg(test)] mod tests { + use proptest::bits::BitSetLike; + use super::*; #[test] @@ -1057,4 +1100,108 @@ mod tests { ]); assert!(interval_store.contains_range(1..=1)); } + + #[test] + fn is_disjoint_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 601, end: 1200 }, + ]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(interval_store_1.is_disjoint(&interval_store_2)); + assert!(interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_2() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 600, end: 1200 }, + ]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(!interval_store_1.is_disjoint(&interval_store_2)); + assert!(!interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_3() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 15800, end: 15905 }, + ]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(!interval_store_1.is_disjoint(&interval_store_2)); + assert!(!interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_array_store_1() { + let mut array_store = ArrayStore::from_vec_unchecked( + alloc::vec![ + 0, 60, 200, 500, + ] + ); + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 70, end: 199 }, + ]); + assert!(interval_store.is_disjoint_array(&array_store)); + } + + #[test] + fn is_disjoint_array_store_2() { + let mut array_store = ArrayStore::from_vec_unchecked( + alloc::vec![ + 0, 60, 200, 500, + ] + ); + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 400 }, + ]); + assert!(!interval_store.is_disjoint_array(&array_store)); + } + + #[test] + fn is_disjoint_bitmap_store_1() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [ + 500, 5001, 20, 40 + ] { + bitmap_store.set(to_set); + } + dbg!(&bitmap_store); + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1000, end: 4000 }, + Interval { start: 8000, end: 10000 }, + ]); + assert!(interval_store.is_disjoint_bitmap(&bitmap_store)); + } + + #[test] + fn is_disjoint_bitmap_store_2() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [ + 500, 5001, 20, 40 + ] { + bitmap_store.set(to_set); + } + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 400 }, + ]); + assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); + } } From 12aad5fc859a05756212ed9b8d52f77bd1775e84 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 13:28:10 +0200 Subject: [PATCH 36/83] feat: runs store is subset --- roaring/src/bitmap/store/interval_store.rs | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 3ccf8759e..74600c3a0 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -487,6 +487,18 @@ impl IntervalStore { // TODO: make this better array.iter().all(|i| !self.contains(i)) } + + pub fn is_subset(&self, other: &Self) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } + + pub fn is_subset_array(&self, other: &ArrayStore) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } + + pub fn is_subset_bitmap(&self, other: &BitmapStore) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } } /// This interval is inclusive to end. @@ -1204,4 +1216,36 @@ mod tests { ]); assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); } + + #[test] + fn is_subset_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1500, end: 1600 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store_1.is_subset(&interval_store_1)); + assert!(interval_store_2.is_subset(&interval_store_2)); + assert!(interval_store_1.is_subset(&interval_store_2)); + assert!(!interval_store_2.is_subset(&interval_store_1)); + } + + #[test] + fn is_subset_2() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 50, end: 700 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store_1.is_subset(&interval_store_1)); + assert!(interval_store_2.is_subset(&interval_store_2)); + assert!(!interval_store_1.is_subset(&interval_store_2)); + assert!(!interval_store_2.is_subset(&interval_store_1)); + } } From 1a3a46541f29003e06151e5ffdad4ff51db258c4 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 18:51:49 +0200 Subject: [PATCH 37/83] feat: runs store intersection len --- roaring/src/bitmap/store/bitmap_store.rs | 17 +- roaring/src/bitmap/store/interval_store.rs | 231 ++++++++++++++++----- 2 files changed, 200 insertions(+), 48 deletions(-) diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 1a19d9099..260a29220 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -4,7 +4,7 @@ use core::fmt::{Display, Formatter}; use core::mem::size_of; use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign}; -use super::ArrayStore; +use super::{ArrayStore, Interval}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -336,6 +336,21 @@ impl BitmapStore { self.bits.iter().zip(other.bits.iter()).map(|(&a, &b)| (a & b).count_ones() as u64).sum() } + pub fn intersection_len_interval(&self, interval: &Interval) -> u64 { + let (start_id, start_bit) = (key(interval.start), bit(interval.start)); + let (end_id, end_bit) = (key(interval.end), bit(interval.end)); + let mut amount: u64 = 0; + for (i, mut cur_bit) in self.bits[start_id..=end_id].iter().copied().enumerate() { + if i == start_id { + cur_bit &= u64::MAX << start_bit; + } else if i == end_id { + cur_bit &= !(u64::MAX << (u64::BITS - end_bit as u32)); + } + amount += u64::from(cur_bit.count_ones()); + } + amount + } + pub(crate) fn intersection_len_array(&self, other: &ArrayStore) -> u64 { other .iter() diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 74600c3a0..48b0b5b6d 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,7 +1,7 @@ #![allow(unused)] use alloc::vec::Vec; -use core::cmp::Ordering; use core::ops::RangeInclusive; +use core::{cmp::Ordering, ops::ControlFlow}; use super::{ArrayStore, BitmapStore, Store}; @@ -453,14 +453,25 @@ impl IntervalStore { } } - pub fn is_disjoint(&self, other: &Self) -> bool { + fn step_walk< + R, + C: FnMut(Interval, Interval, R) -> ControlFlow, + E: FnMut(Option, Option, R) -> R, + >( + &self, + other: &Self, + mut calc: C, + mut else_op: E, + mut buffer: R, + ) -> R { let (mut i1, mut i2) = (self.0.iter(), other.0.iter()); let (mut iv1, mut iv2) = (i1.next(), i2.next()); loop { match (iv1, iv2) { (Some(v1), Some(v2)) => { - if v1.overlaps(v2) { - return false; + match calc(*v1, *v2, buffer) { + ControlFlow::Continue(new_buffer) => buffer = new_buffer, + ControlFlow::Break(end) => return end, } // We increase the iterator based on which one is furthest behind. @@ -474,11 +485,26 @@ impl IntervalStore { } } } - (_, _) => return true, + (value1, value2) => return else_op(value1.copied(), value2.copied(), buffer), } } } + pub fn is_disjoint(&self, other: &Self) -> bool { + self.step_walk( + other, + |interval1, interval2, _| { + if interval1.overlaps(&interval2) { + ControlFlow::Break(false) + } else { + ControlFlow::Continue(true) + } + }, + |_, _, _| true, + false, + ) + } + pub fn is_disjoint_array(&self, array: &ArrayStore) -> bool { array.iter().all(|&i| !self.contains(i)) } @@ -499,6 +525,28 @@ impl IntervalStore { pub fn is_subset_bitmap(&self, other: &BitmapStore) -> bool { self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) } + + pub fn intersection_len(&self, other: &Self) -> u64 { + self.step_walk( + other, + |interval1, interval2, buffer| { + ControlFlow::Continue( + interval1.overlapping_interval(&interval2).map(|f| f.run_len()).unwrap_or(0) + + buffer, + ) + }, + |_, _, buffer| buffer, + 0, + ) + } + + pub fn intersection_len_bitmap(&self, other: &BitmapStore) -> u64 { + self.0.iter().map(|f| other.intersection_len_interval(f)).sum() + } + + pub fn intersection_len_array(&self, other: &ArrayStore) -> u64 { + other.iter().map(|&f| self.contains(f) as u64).sum() + } } /// This interval is inclusive to end. @@ -550,7 +598,15 @@ impl Interval { } pub fn overlaps(&self, interval: &Interval) -> bool { - interval.start <= self.end && self.start <= interval.end + interval.start <= self.end && self.start <= interval.end + } + + pub fn overlapping_interval(&self, other: &Interval) -> Option { + if self.overlaps(other) { + Some(Interval::new(self.start.max(other.start), self.end.min(other.end))) + } else { + None + } } pub fn run_len(&self) -> u64 { @@ -1120,9 +1176,7 @@ mod tests { Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ - Interval { start: 601, end: 1200 }, - ]); + let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 601, end: 1200 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(interval_store_1.is_disjoint(&interval_store_2)); @@ -1136,9 +1190,7 @@ mod tests { Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ - Interval { start: 600, end: 1200 }, - ]); + let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 600, end: 1200 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(!interval_store_1.is_disjoint(&interval_store_2)); @@ -1152,9 +1204,8 @@ mod tests { Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ - Interval { start: 15800, end: 15905 }, - ]); + let mut interval_store_2 = + IntervalStore(alloc::vec![Interval { start: 15800, end: 15905 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(!interval_store_1.is_disjoint(&interval_store_2)); @@ -1163,39 +1214,24 @@ mod tests { #[test] fn is_disjoint_array_store_1() { - let mut array_store = ArrayStore::from_vec_unchecked( - alloc::vec![ - 0, 60, 200, 500, - ] - ); - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 70, end: 199 }, - ]); + let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 70, end: 199 },]); assert!(interval_store.is_disjoint_array(&array_store)); } #[test] fn is_disjoint_array_store_2() { - let mut array_store = ArrayStore::from_vec_unchecked( - alloc::vec![ - 0, 60, 200, 500, - ] - ); - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 400 }, - ]); + let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); assert!(!interval_store.is_disjoint_array(&array_store)); } #[test] fn is_disjoint_bitmap_store_1() { let mut bitmap_store = BitmapStore::new(); - for to_set in [ - 500, 5001, 20, 40 - ] { + for to_set in [500, 5001, 20, 40] { bitmap_store.set(to_set); } - dbg!(&bitmap_store); let mut interval_store = IntervalStore(alloc::vec![ Interval { start: 1000, end: 4000 }, Interval { start: 8000, end: 10000 }, @@ -1206,22 +1242,16 @@ mod tests { #[test] fn is_disjoint_bitmap_store_2() { let mut bitmap_store = BitmapStore::new(); - for to_set in [ - 500, 5001, 20, 40 - ] { + for to_set in [500, 5001, 20, 40] { bitmap_store.set(to_set); } - let mut interval_store = IntervalStore(alloc::vec![ - Interval { start: 1, end: 400 }, - ]); + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); } #[test] fn is_subset_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![ - Interval { start: 1500, end: 1600 }, - ]); + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1500, end: 1600 },]); let mut interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, @@ -1235,9 +1265,7 @@ mod tests { #[test] fn is_subset_2() { - let mut interval_store_1 = IntervalStore(alloc::vec![ - Interval { start: 50, end: 700 }, - ]); + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 50, end: 700 },]); let mut interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, @@ -1248,4 +1276,113 @@ mod tests { assert!(!interval_store_1.is_subset(&interval_store_2)); assert!(!interval_store_2.is_subset(&interval_store_1)); } + + #[test] + fn overlapping_interval_1() { + let interval1 = Interval::new(0, 100); + let interval2 = Interval::new(50, 300); + + assert_eq!(interval1.overlapping_interval(&interval2), Some(Interval::new(50, 100))) + } + + #[test] + fn overlapping_interval_2() { + let interval1 = Interval::new(50, 300); + let interval2 = Interval::new(0, 100); + + assert_eq!(interval1.overlapping_interval(&interval2), Some(Interval::new(50, 100))) + } + + #[test] + fn overlapping_interval_3() { + let interval1 = Interval::new(0, 100); + let interval2 = Interval::new(500, 700); + + assert_eq!(interval1.overlapping_interval(&interval2), None) + } + + #[test] + fn intersection_len_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 11, end: 20 }, + Interval { start: 51, end: 80 }, + Interval { start: 111, end: 120 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 20 }, + Interval { start: 41, end: 80 }, + Interval { start: 101, end: 120 }, + ]); + assert_eq!( + interval_store_1.intersection_len(&interval_store_2), + Interval::new(11, 20).run_len() + + Interval::new(51, 80).run_len() + + Interval::new(111, 120).run_len() + ) + } + + #[test] + fn intersection_len_2() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 11, end: 20 }, + Interval { start: 51, end: 80 }, + Interval { start: 111, end: 120 }, + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 80 }, + Interval { start: 101, end: 120 }, + ]); + let intersect_len = Interval::new(11, 20).run_len() + + Interval::new(51, 80).run_len() + + Interval::new(111, 120).run_len(); + assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); + assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); + } + + #[test] + fn intersection_len_3() { + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1, end: 2000 },]); + let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 1001, end: 3000 },]); + let intersect_len = Interval::new(1001, 2000).run_len(); + assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); + assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); + } + + #[test] + fn intersection_len_bitmap_1() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [500, 5001, 20, 40, 60] { + bitmap_store.set(to_set); + } + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 4; + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_2() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..200 { + bitmap_store.set(to_set); + } + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 200 - 20; + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_array_1() { + let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![20, 40, 60, 500, 5001]); + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 4; + assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); + } + + #[test] + fn intersection_len_array_2() { + let mut array_store = ArrayStore::from_vec_unchecked(Vec::from_iter(0..200)); + let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 200 - 20; + assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); + } } From b0771c7021ee4410cdb0a00356df97b55053b074 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 18:54:28 +0200 Subject: [PATCH 38/83] feat: runs store len --- roaring/src/bitmap/store/interval_store.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 48b0b5b6d..f33c9e458 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -547,6 +547,10 @@ impl IntervalStore { pub fn intersection_len_array(&self, other: &ArrayStore) -> u64 { other.iter().map(|&f| self.contains(f) as u64).sum() } + + pub fn len(&self) -> u64 { + self.0.iter().map(|iv| iv.run_len()).sum() + } } /// This interval is inclusive to end. @@ -1385,4 +1389,16 @@ mod tests { let intersect_len = 200 - 20; assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); } + + #[test] + fn len_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 20, end: 600 }, + Interval { start: 5000, end: 8000 }, + ]); + assert_eq!( + interval_store_1.len(), + Interval::new(20, 600).run_len() + Interval::new(5000, 8000).run_len() + ); + } } From 3a2654f70db994168b8092640db8a0327af625cd Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 18:57:44 +0200 Subject: [PATCH 39/83] feat: runs store is_empty --- roaring/src/bitmap/store/interval_store.rs | 83 +++++++++++++--------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index f33c9e458..b41659322 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -551,6 +551,10 @@ impl IntervalStore { pub fn len(&self) -> u64 { self.0.iter().map(|iv| iv.run_len()).sum() } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } /// This interval is inclusive to end. @@ -1124,7 +1128,7 @@ mod tests { #[test] fn contains_index_1() { - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1135,7 +1139,7 @@ mod tests { #[test] fn contains_index_2() { - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1145,7 +1149,7 @@ mod tests { #[test] fn contains_range_1() { - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1155,7 +1159,7 @@ mod tests { #[test] fn contains_range_2() { - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1165,7 +1169,7 @@ mod tests { #[test] fn contains_range_3() { - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1175,12 +1179,12 @@ mod tests { #[test] fn is_disjoint_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 601, end: 1200 },]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 601, end: 1200 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(interval_store_1.is_disjoint(&interval_store_2)); @@ -1189,12 +1193,12 @@ mod tests { #[test] fn is_disjoint_2() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 600, end: 1200 },]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 600, end: 1200 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(!interval_store_1.is_disjoint(&interval_store_2)); @@ -1203,12 +1207,12 @@ mod tests { #[test] fn is_disjoint_3() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let mut interval_store_2 = + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 15800, end: 15905 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); @@ -1218,15 +1222,15 @@ mod tests { #[test] fn is_disjoint_array_store_1() { - let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); - let mut interval_store = IntervalStore(alloc::vec![Interval { start: 70, end: 199 },]); + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let interval_store = IntervalStore(alloc::vec![Interval { start: 70, end: 199 },]); assert!(interval_store.is_disjoint_array(&array_store)); } #[test] fn is_disjoint_array_store_2() { - let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); - let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); assert!(!interval_store.is_disjoint_array(&array_store)); } @@ -1236,7 +1240,7 @@ mod tests { for to_set in [500, 5001, 20, 40] { bitmap_store.set(to_set); } - let mut interval_store = IntervalStore(alloc::vec![ + let interval_store = IntervalStore(alloc::vec![ Interval { start: 1000, end: 4000 }, Interval { start: 8000, end: 10000 }, ]); @@ -1249,14 +1253,14 @@ mod tests { for to_set in [500, 5001, 20, 40] { bitmap_store.set(to_set); } - let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); + let interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); } #[test] fn is_subset_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1500, end: 1600 },]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1500, end: 1600 },]); + let interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1269,8 +1273,8 @@ mod tests { #[test] fn is_subset_2() { - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 50, end: 700 },]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 50, end: 700 },]); + let interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 600 }, Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, @@ -1307,12 +1311,12 @@ mod tests { #[test] fn intersection_len_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 11, end: 20 }, Interval { start: 51, end: 80 }, Interval { start: 111, end: 120 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 20 }, Interval { start: 41, end: 80 }, Interval { start: 101, end: 120 }, @@ -1327,12 +1331,12 @@ mod tests { #[test] fn intersection_len_2() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 11, end: 20 }, Interval { start: 51, end: 80 }, Interval { start: 111, end: 120 }, ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_2 = IntervalStore(alloc::vec![ Interval { start: 1, end: 80 }, Interval { start: 101, end: 120 }, ]); @@ -1345,8 +1349,8 @@ mod tests { #[test] fn intersection_len_3() { - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1, end: 2000 },]); - let mut interval_store_2 = IntervalStore(alloc::vec![Interval { start: 1001, end: 3000 },]); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1, end: 2000 },]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 1001, end: 3000 },]); let intersect_len = Interval::new(1001, 2000).run_len(); assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); @@ -1358,7 +1362,7 @@ mod tests { for to_set in [500, 5001, 20, 40, 60] { bitmap_store.set(to_set); } - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 4; assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1369,30 +1373,30 @@ mod tests { for to_set in 0..200 { bitmap_store.set(to_set); } - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 200 - 20; assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } #[test] fn intersection_len_array_1() { - let mut array_store = ArrayStore::from_vec_unchecked(alloc::vec![20, 40, 60, 500, 5001]); - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![20, 40, 60, 500, 5001]); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 4; assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); } #[test] fn intersection_len_array_2() { - let mut array_store = ArrayStore::from_vec_unchecked(Vec::from_iter(0..200)); - let mut interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let array_store = ArrayStore::from_vec_unchecked(Vec::from_iter(0..200)); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 200 - 20; assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); } #[test] fn len_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval { start: 20, end: 600 }, Interval { start: 5000, end: 8000 }, ]); @@ -1401,4 +1405,15 @@ mod tests { Interval::new(20, 600).run_len() + Interval::new(5000, 8000).run_len() ); } + + #[test] + fn is_empty() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 20, end: 600 }, + Interval { start: 5000, end: 8000 }, + ]); + assert!(!interval_store.is_empty()); + interval_store.remove_range(0..=u16::MAX); + assert!(interval_store.is_empty()); + } } From 37ee97486bd532587f5277cd53d8833f1b5ba19c Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 19:01:28 +0200 Subject: [PATCH 40/83] feat: runs store min max --- roaring/src/bitmap/store/interval_store.rs | 35 ++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index b41659322..d9c477816 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -555,6 +555,14 @@ impl IntervalStore { pub fn is_empty(&self) -> bool { self.0.is_empty() } + + pub fn min(&self) -> Option { + self.0.first().map(|f| f.start) + } + + pub fn max(&self) -> Option { + self.0.last().map(|f| f.end) + } } /// This interval is inclusive to end. @@ -1212,8 +1220,7 @@ mod tests { Interval { start: 1401, end: 1600 }, Interval { start: 15901, end: 16000 }, ]); - let interval_store_2 = - IntervalStore(alloc::vec![Interval { start: 15800, end: 15905 },]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 15800, end: 15905 },]); assert!(!interval_store_1.is_disjoint(&interval_store_1)); assert!(!interval_store_2.is_disjoint(&interval_store_2)); assert!(!interval_store_1.is_disjoint(&interval_store_2)); @@ -1416,4 +1423,28 @@ mod tests { interval_store.remove_range(0..=u16::MAX); assert!(interval_store.is_empty()); } + + #[test] + fn min_0() { + let interval_store = IntervalStore(alloc::vec![Interval::new(20, u16::MAX)]); + assert_eq!(interval_store.min(), Some(20)); + } + + #[test] + fn min_1() { + let interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.min(), None); + } + + #[test] + fn max_0() { + let interval_store = IntervalStore(alloc::vec![Interval::new(20, u16::MAX)]); + assert_eq!(interval_store.max(), Some(u16::MAX)); + } + + #[test] + fn max_1() { + let interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.max(), None); + } } From 561780f454d9b4d110b0fa6bb00c04ac06d27710 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 19:11:04 +0200 Subject: [PATCH 41/83] feat: runs store rank --- roaring/src/bitmap/store/interval_store.rs | 38 ++++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index d9c477816..6aa51dd3d 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -563,6 +563,20 @@ impl IntervalStore { pub fn max(&self) -> Option { self.0.last().map(|f| f.end) } + + pub fn rank(&self, value: u16) -> u64 { + let mut rank = 0; + for iv in self.0.iter() { + if iv.end <= value { + rank += iv.run_len(); + } else if iv.start <= value { + rank += Interval::new(iv.start, value).run_len(); + } else { + break; + } + } + rank + } } /// This interval is inclusive to end. @@ -632,8 +646,6 @@ impl Interval { #[cfg(test)] mod tests { - use proptest::bits::BitSetLike; - use super::*; #[test] @@ -1245,7 +1257,7 @@ mod tests { fn is_disjoint_bitmap_store_1() { let mut bitmap_store = BitmapStore::new(); for to_set in [500, 5001, 20, 40] { - bitmap_store.set(to_set); + bitmap_store.insert(to_set); } let interval_store = IntervalStore(alloc::vec![ Interval { start: 1000, end: 4000 }, @@ -1258,7 +1270,7 @@ mod tests { fn is_disjoint_bitmap_store_2() { let mut bitmap_store = BitmapStore::new(); for to_set in [500, 5001, 20, 40] { - bitmap_store.set(to_set); + bitmap_store.insert(to_set); } let interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); @@ -1367,7 +1379,7 @@ mod tests { fn intersection_len_bitmap_1() { let mut bitmap_store = BitmapStore::new(); for to_set in [500, 5001, 20, 40, 60] { - bitmap_store.set(to_set); + bitmap_store.insert(to_set); } let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 4; @@ -1378,7 +1390,7 @@ mod tests { fn intersection_len_bitmap_2() { let mut bitmap_store = BitmapStore::new(); for to_set in 0..200 { - bitmap_store.set(to_set); + bitmap_store.insert(to_set); } let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); let intersect_len = 200 - 20; @@ -1447,4 +1459,18 @@ mod tests { let interval_store = IntervalStore(alloc::vec![]); assert_eq!(interval_store.max(), None); } + + #[test] + fn rank() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 200), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]); + assert_eq!( + interval_store.rank(5020), + Interval::new(0, 200).run_len() + Interval::new(5000, 5020).run_len() + ); + assert_eq!(interval_store.rank(u16::MAX), interval_store.len()); + } } From 966ac1445603744226925e582eaf4997cc205cab Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 20:21:44 +0200 Subject: [PATCH 42/83] feat: runs store select --- roaring/src/bitmap/store/interval_store.rs | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 6aa51dd3d..f0402efd7 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -577,6 +577,18 @@ impl IntervalStore { } rank } + + pub fn select(&self, mut n: u16) -> Option { + for iv in self.0.iter() { + let run_len = (iv.run_len() as u16); + if run_len <= n { + n -= iv.run_len() as u16; + } else { + return Some(iv.start + n); + } + } + None + } } /// This interval is inclusive to end. @@ -1473,4 +1485,20 @@ mod tests { ); assert_eq!(interval_store.rank(u16::MAX), interval_store.len()); } + + #[test] + fn select() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]); + assert_eq!(interval_store.select(0), Some(0)); + assert_eq!(interval_store.select(1), Some(2)); + assert_eq!(interval_store.select(10), Some(11)); + assert_eq!(interval_store.select(11), Some(5000)); + assert_eq!(interval_store.select(11 + 3), Some(5003)); + assert_eq!(interval_store.select(11 + 2001), Some(8000)); + } } From 5b01331dd4052e3934a829e60a55ded5698e804b Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 13 Apr 2025 08:11:43 +0200 Subject: [PATCH 43/83] fix: failing ci --- roaring/src/bitmap/store/bitmap_store.rs | 2 +- roaring/src/bitmap/store/interval_store.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 260a29220..c507f9dce 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -336,7 +336,7 @@ impl BitmapStore { self.bits.iter().zip(other.bits.iter()).map(|(&a, &b)| (a & b).count_ones() as u64).sum() } - pub fn intersection_len_interval(&self, interval: &Interval) -> u64 { + pub(crate) fn intersection_len_interval(&self, interval: &Interval) -> u64 { let (start_id, start_bit) = (key(interval.start), bit(interval.start)); let (end_id, end_bit) = (key(interval.end), bit(interval.end)); let mut amount: u64 = 0; diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index f0402efd7..34d750516 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -505,11 +505,11 @@ impl IntervalStore { ) } - pub fn is_disjoint_array(&self, array: &ArrayStore) -> bool { + pub(crate) fn is_disjoint_array(&self, array: &ArrayStore) -> bool { array.iter().all(|&i| !self.contains(i)) } - pub fn is_disjoint_bitmap(&self, array: &BitmapStore) -> bool { + pub(crate) fn is_disjoint_bitmap(&self, array: &BitmapStore) -> bool { // TODO: make this better array.iter().all(|i| !self.contains(i)) } @@ -518,11 +518,11 @@ impl IntervalStore { self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) } - pub fn is_subset_array(&self, other: &ArrayStore) -> bool { + pub(crate) fn is_subset_array(&self, other: &ArrayStore) -> bool { self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) } - pub fn is_subset_bitmap(&self, other: &BitmapStore) -> bool { + pub(crate) fn is_subset_bitmap(&self, other: &BitmapStore) -> bool { self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) } @@ -540,11 +540,11 @@ impl IntervalStore { ) } - pub fn intersection_len_bitmap(&self, other: &BitmapStore) -> u64 { + pub(crate) fn intersection_len_bitmap(&self, other: &BitmapStore) -> u64 { self.0.iter().map(|f| other.intersection_len_interval(f)).sum() } - pub fn intersection_len_array(&self, other: &ArrayStore) -> u64 { + pub(crate) fn intersection_len_array(&self, other: &ArrayStore) -> u64 { other.iter().map(|&f| self.contains(f) as u64).sum() } From 99393170afa2494bcdf9218c597723d658247acf Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 12 Apr 2025 21:36:06 +0200 Subject: [PATCH 44/83] fix: run store wrong insert range with begin overlap and concescutive end --- roaring/src/bitmap/store/interval_store.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 34d750516..e2040d931 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -117,7 +117,7 @@ impl IntervalStore { if to_insert < self.0.len() && self.0[to_insert].start - 1 == interval.end { // The intervals are consecutive! Adjust new end of interval, and how far // we drain - (self.0[to_insert].start, to_insert + 1) + (self.0[to_insert].end, to_insert + 1) } else { (interval.end, to_insert) }; @@ -861,6 +861,14 @@ mod tests { ); } + #[test] + fn insert_range_begin_overlap_concescutive_end() { + let mut interval_store = + IntervalStore(alloc::vec![Interval::new(2, 10), Interval::new(12, 700),]); + assert_eq!(interval_store.insert_range(2..=11), 1); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(2, 700)])); + } + #[test] fn push_new_max() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); From ada90cdea7b8a2245992f0ab7f4b4f1c30ea0c55 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 13 Apr 2025 08:44:02 +0200 Subject: [PATCH 45/83] fix: llvm-cov uncovered bugs --- roaring/src/bitmap/store/interval_store.rs | 132 +++++++++++++++------ 1 file changed, 99 insertions(+), 33 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index e2040d931..0f0fcfd57 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -60,35 +60,6 @@ impl IntervalStore { .is_err() } - fn drain_overlapping(&mut self, start_index: usize, interval: &Interval) -> u64 { - let value = self.drain_overlapping_range(start_index, interval); - if let Some(to_drain) = value.1 { - self.0.drain(start_index..to_drain); - } - value.0 - } - - fn drain_overlapping_range( - &mut self, - start_index: usize, - interval: &Interval, - ) -> (u64, Option) { - let mut drain_loc = None; - let mut amount = 0; - let mut intervals = self.0[start_index..].iter().enumerate().peekable(); - while let Some((i, cur_interval)) = intervals.next() { - if !interval.contains_interval(cur_interval) { - drain_loc = Some(start_index + i); - break; - } - amount += cur_interval.run_len(); - if intervals.peek().is_none() { - drain_loc = Some(start_index + i + 1); - } - } - (amount, drain_loc) - } - #[inline] pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { let interval = Interval { start: *range.start(), end: *range.end() }; @@ -202,9 +173,10 @@ impl IntervalStore { // to_end = 1 (to_begin, to_end, to_end) } else { - // keep end interval to overwrite + // keep end interval to overwrite if it exists, + // otherwise overwrite begin interval ( - to_begin, + if to_end != self.0.len() { to_begin + 1 } else { to_begin }, to_end.min(self.0.len() - 1), if to_end != self.0.len() { to_begin @@ -334,7 +306,7 @@ impl IntervalStore { let end = if to_insert == self.0.len() { self.0.len() - 1 } else { to_insert }; if self.0[begin].start == interval.start { IntervalRange { - drain_range: begin..end, + drain_range: begin..to_insert, begin_value: None, end_value: None, residual_count: 0, @@ -351,7 +323,7 @@ impl IntervalStore { // there is no interval that contains the start index, // there is an interval that contains the end index, (Err(begin), Ok(end)) => { - if self.0[begin].end == interval.end { + if self.0[end].end == interval.end { IntervalRange { drain_range: begin..end + 1, begin_value: None, @@ -770,6 +742,16 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 50 },])); } + #[test] + fn insert_range_concescutive_begin_overlap_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 40, end: 60 }, + ]); + assert_eq!(interval_store.insert_range(21..=50), Interval::new(21, 39).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); + } + #[test] fn insert_range_concescutive_end() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); @@ -861,6 +843,28 @@ mod tests { ); } + #[test] + fn insert_range_overlap_some() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + Interval { start: 500, end: 700 }, + ]); + assert_eq!( + interval_store.insert_range(0..=100), + Interval::new(0, 100).run_len() + - Interval::new(10, 20).run_len() + - Interval::new(50, 70).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 100 }, + Interval { start: 500, end: 700 }, + ]) + ); + } + #[test] fn insert_range_begin_overlap_concescutive_end() { let mut interval_store = @@ -869,6 +873,13 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(2, 700)])); } + #[test] + fn push_empty() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert!(interval_store.push(80)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 80, end: 80 },])); + } + #[test] fn push_new_max() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); @@ -965,6 +976,13 @@ mod tests { ); } + #[test] + fn remove_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 50 },]); + assert!(interval_store.remove(50)); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + #[test] fn remove_range_exact_one() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); @@ -972,6 +990,27 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![])); } + #[test] + fn remove_range_one_with_extra_1() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(40..=70), 21); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_one_with_extra_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + assert_eq!(interval_store.remove_range(40..=70), 21); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new(400, 600), Interval::new(4000, 6000),]) + ); + } + #[test] fn remove_range_exact_many() { let mut interval_store = IntervalStore(alloc::vec![ @@ -1014,6 +1053,33 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); } + #[test] + fn remove_range_begin_no_overlap_end_exact_one_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(30..=90), + Interval::new(70, 90).run_len() + Interval::new(40, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_begin_no_overlap_end_exact_one_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + Interval { start: 700, end: 900 }, + ]); + assert_eq!( + interval_store.remove_range(30..=90), + Interval::new(70, 90).run_len() + Interval::new(40, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(700, 900),])); + } + #[test] fn remove_range_both_overlap() { let mut interval_store = IntervalStore(alloc::vec![ From 04a8d038802c8c73efc54aca93ce45668ea462e8 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 13 Apr 2025 09:41:31 +0200 Subject: [PATCH 46/83] feat: run store union and miscellaneous stuff --- roaring/src/bitmap/store/interval_store.rs | 93 +++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 0f0fcfd57..d629ac6c4 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,6 +1,6 @@ #![allow(unused)] use alloc::vec::Vec; -use core::ops::RangeInclusive; +use core::ops::{BitOr, BitOrAssign, RangeInclusive}; use core::{cmp::Ordering, ops::ControlFlow}; use super::{ArrayStore, BitmapStore, Store}; @@ -561,6 +561,47 @@ impl IntervalStore { } None } + + pub fn run_amount(&self) -> u64 { + self.0.len() as u64 + } + + pub fn to_bitmap(&self) -> BitmapStore { + let mut bits = BitmapStore::new(); + for iv in self.0.iter() { + bits.insert_range(iv.start..=iv.end); + } + bits + } + + pub(crate) fn iter(&self) -> core::slice::Iter { + self.0.iter() + } + + pub(crate) fn iter_mut(&mut self) -> core::slice::IterMut { + self.0.iter_mut() + } +} + +impl BitOrAssign for IntervalStore { + fn bitor_assign(&mut self, mut rhs: Self) { + let (add_intervals, take_intervals, self_is_add) = + if self.len() > rhs.len() { (self, &mut rhs, true) } else { (&mut rhs, self, false) }; + for iv in take_intervals.iter() { + add_intervals.insert_range(iv.start..=iv.end); + } + if !self_is_add { + core::mem::swap(add_intervals, take_intervals); + } + } +} + +impl BitOrAssign<&ArrayStore> for IntervalStore { + fn bitor_assign(&mut self, mut rhs: &ArrayStore) { + for &i in rhs.iter() { + self.insert(i); + } + } } /// This interval is inclusive to end. @@ -1575,4 +1616,54 @@ mod tests { assert_eq!(interval_store.select(11 + 3), Some(5003)); assert_eq!(interval_store.select(11 + 2001), Some(8000)); } + + #[test] + fn union_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 10), + Interval::new(12, 7000), + Interval::new(65000, 65050), + ]); + interval_store_1 |= interval_store_2; + assert_eq!( + interval_store_1, + IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 7000), + Interval::new(8000, 10000), + Interval::new(65000, 65050), + ]) + ) + } + + #[test] + fn union_array() { + let mut values = alloc::vec![0, 1, 2, 3, 4, 2000, 5000, u16::MAX]; + values.sort(); + let array = ArrayStore::from_vec_unchecked(values); + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]); + interval_store |= &array; + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new(0, 11), + Interval::new(2000, 2000), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + Interval::new(u16::MAX, u16::MAX), + ]) + ) + } } From b901e419128a42abc54b757edebc5526cbd5704c Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 13 Apr 2025 10:07:55 +0200 Subject: [PATCH 47/83] feat: run store intersection --- roaring/src/bitmap/store/interval_store.rs | 54 +++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index d629ac6c4..81531bd5c 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,12 +1,12 @@ #![allow(unused)] use alloc::vec::Vec; -use core::ops::{BitOr, BitOrAssign, RangeInclusive}; +use core::ops::{BitAnd, BitOr, BitOrAssign, RangeInclusive}; use core::{cmp::Ordering, ops::ControlFlow}; use super::{ArrayStore, BitmapStore, Store}; #[derive(PartialEq, Eq, Clone, Debug)] -pub struct IntervalStore(Vec); +pub(crate) struct IntervalStore(Vec); impl IntervalStore { pub fn new() -> Self { @@ -604,6 +604,32 @@ impl BitOrAssign<&ArrayStore> for IntervalStore { } } +impl BitOrAssign<&Self> for IntervalStore { + fn bitor_assign(&mut self, mut rhs: &Self) { + for iv in rhs.iter() { + self.insert_range(iv.start..=iv.end); + } + } +} + +impl BitAnd for &IntervalStore { + type Output = IntervalStore; + + fn bitand(self, rhs: Self) -> Self::Output { + self.step_walk( + rhs, + |iv1, iv2, mut buf: IntervalStore| { + if let Some(new_iv) = iv1.overlapping_interval(&iv2) { + buf.insert_range(new_iv.start..=new_iv.end); + } + ControlFlow::Continue(buf) + }, + |_, _, buf| buf, + IntervalStore::new(), + ) + } +} + /// This interval is inclusive to end. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] pub(crate) struct Interval { @@ -1666,4 +1692,28 @@ mod tests { ]) ) } + + #[test] + fn intersection() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(5, 50), + Interval::new(4000, 10000), + ]); + assert_eq!( + &interval_store_1 & &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(5, 11), + Interval::new(5000, 7000), + Interval::new(8000, 10000), + ]) + ) + } } From bd04ab7feb3ceb515096cd27e4a0b22725ce14e4 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 13 Apr 2025 10:10:01 +0200 Subject: [PATCH 48/83] feat: run store difference --- roaring/src/bitmap/store/interval_store.rs | 36 +++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 81531bd5c..a8b5c0d60 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,6 +1,6 @@ #![allow(unused)] use alloc::vec::Vec; -use core::ops::{BitAnd, BitOr, BitOrAssign, RangeInclusive}; +use core::ops::{BitAnd, BitOr, BitOrAssign, RangeInclusive, SubAssign}; use core::{cmp::Ordering, ops::ControlFlow}; use super::{ArrayStore, BitmapStore, Store}; @@ -8,6 +8,12 @@ use super::{ArrayStore, BitmapStore, Store}; #[derive(PartialEq, Eq, Clone, Debug)] pub(crate) struct IntervalStore(Vec); +impl Default for IntervalStore { + fn default() -> Self { + Self::new() + } +} + impl IntervalStore { pub fn new() -> Self { Self(Default::default()) @@ -630,6 +636,14 @@ impl BitAnd for &IntervalStore { } } +impl SubAssign<&Self> for IntervalStore { + fn sub_assign(&mut self, rhs: &Self) { + for iv in rhs.iter() { + self.remove_range(iv.start..=iv.end); + } + } +} + /// This interval is inclusive to end. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] pub(crate) struct Interval { @@ -1716,4 +1730,24 @@ mod tests { ]) ) } + + #[test] + fn difference() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 11000), + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(5, 50), + Interval::new(4000, 10000), + ]); + interval_store_1 -= &interval_store_2; + assert_eq!( + interval_store_1, + IntervalStore(alloc::vec![Interval::new(2, 4), Interval::new(10001, 11000),]) + ) + } } From 892fe993aa40f91082efed344d52ddc21431c55e Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Mon, 14 Apr 2025 22:06:55 +0200 Subject: [PATCH 49/83] feat: run store symmetric difference --- roaring/src/bitmap/store/interval_store.rs | 127 +++++++++++++++++++-- 1 file changed, 119 insertions(+), 8 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index a8b5c0d60..4adf7d2d8 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,6 +1,7 @@ #![allow(unused)] use alloc::vec::Vec; -use core::ops::{BitAnd, BitOr, BitOrAssign, RangeInclusive, SubAssign}; +use core::ops::{BitAnd, BitOr, BitOrAssign, BitXor, Deref, RangeInclusive, SubAssign}; +use core::slice::Iter; use core::{cmp::Ordering, ops::ControlFlow}; use super::{ArrayStore, BitmapStore, Store}; @@ -68,6 +69,9 @@ impl IntervalStore { #[inline] pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } let interval = Interval { start: *range.start(), end: *range.end() }; let first_interval = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); @@ -257,6 +261,9 @@ impl IntervalStore { } pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } let interval = Interval { start: *range.start(), end: *range.end() }; let first_interval = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); @@ -296,6 +303,15 @@ impl IntervalStore { end_value: None, residual_count: Interval::new(interval.start, self.0[begin].end).run_len(), } + } else if begin == end { + let new_interval = Interval::new(interval.end + 1, self.0[begin].end); + self.0.insert(begin + 1, new_interval); + IntervalRange { + drain_range: begin..end, + begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), + end_value: None, + residual_count: interval.run_len(), + } } else { IntervalRange { drain_range: begin + 1..end, @@ -355,15 +371,20 @@ impl IntervalStore { } } }; - let count = self.0[todo.drain_range.clone()].iter().map(|f| f.run_len()).sum::() - + todo.residual_count; + let count = if todo.drain_range.is_empty() { + 0 + } else { + self.0[todo.drain_range.clone()].iter().map(|f| f.run_len()).sum::() + } + todo.residual_count; if let Some(IdValue { index, value }) = todo.begin_value { self.0[index].end = value; } if let Some(IdValue { index, value }) = todo.end_value { self.0[index].start = value; } - self.0.drain(todo.drain_range); + if !todo.drain_range.is_empty() { + self.0.drain(todo.drain_range); + } count } @@ -432,12 +453,17 @@ impl IntervalStore { } fn step_walk< + 'a, R, C: FnMut(Interval, Interval, R) -> ControlFlow, - E: FnMut(Option, Option, R) -> R, + E: FnMut( + (Option, Option), + (Iter<'a, Interval>, Iter<'a, Interval>), + R, + ) -> R, >( - &self, - other: &Self, + &'a self, + other: &'a Self, mut calc: C, mut else_op: E, mut buffer: R, @@ -463,7 +489,9 @@ impl IntervalStore { } } } - (value1, value2) => return else_op(value1.copied(), value2.copied(), buffer), + (value1, value2) => { + return else_op((value1.copied(), value2.copied()), (i1, i2), buffer) + } } } } @@ -644,6 +672,18 @@ impl SubAssign<&Self> for IntervalStore { } } +impl BitXor for &IntervalStore { + type Output = IntervalStore; + + fn bitxor(self, rhs: Self) -> Self::Output { + let mut union = self.clone(); + union |= rhs; + let intersection = self & rhs; + union -= &intersection; + union + } +} + /// This interval is inclusive to end. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] pub(crate) struct Interval { @@ -651,6 +691,12 @@ pub(crate) struct Interval { pub end: u16, } +impl From> for Interval { + fn from(value: RangeInclusive) -> Self { + Interval::new(*value.start(), *value.end()) + } +} + impl IntoIterator for Interval { type Item = u16; type IntoIter = RangeInclusive; @@ -1249,6 +1295,16 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![])); } + #[test] + fn remove_range_complete_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 51, end: 6000 },]); + assert_eq!(interval_store.remove_range(500..=600), Interval::new(500, 600).run_len()); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new(51, 499), Interval::new(601, 6000),]) + ); + } + #[test] fn remove_smallest_one() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); @@ -1750,4 +1806,59 @@ mod tests { IntervalStore(alloc::vec![Interval::new(2, 4), Interval::new(10001, 11000),]) ) } + + #[test] + fn symmetric_difference_0() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(2, 11), + Interval::new(5000, 7000), + Interval::new(8000, 11000), + Interval::new(40000, 50000), + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![ + Interval::new(0, 0), + Interval::new(5, 50), + Interval::new(4000, 10000), + ]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new(2, 4), + Interval::new(12, 50), + Interval::new(4000, 4999), + Interval::new(7001, 7999), + Interval::new(10001, 11000), + Interval::new(40000, 50000), + ]) + ); + } + + #[test] + fn symmetric_difference_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![Interval::new(0, 50),]); + let mut interval_store_2 = IntervalStore(alloc::vec![Interval::new(100, 200),]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![Interval::new(0, 50), Interval::new(100, 200),]) + ); + } + + #[test] + fn symmetric_difference_2() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut interval_store_2 = IntervalStore(alloc::vec![Interval::new(0, 6000),]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new(51, 499), + Interval::new(601, 799), + Interval::new(1001, 6000), + ]) + ); + } } From 28b9cb7980041c6fea61dc29eeec8b82e0adf97b Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 15 Apr 2025 23:42:16 -0400 Subject: [PATCH 50/83] dont deserialize a run container with zero runs --- roaring/src/bitmap/serialization.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index fa90cd131..6947d3d76 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -216,6 +216,12 @@ impl RoaringBitmap { let store = if is_run_container { let runs = reader.read_u16::()?; + if runs == 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "run container with zero runs", + )); + } let mut intervals = vec![[0, 0]; runs as usize]; reader.read_exact(cast_slice_mut(&mut intervals))?; intervals.iter_mut().for_each(|[s, len]| { From 004e11e90f1a0f077aacaeea4f781a683272fac0 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 18 Apr 2025 17:48:16 +0200 Subject: [PATCH 51/83] feat: run store iterator --- roaring/src/bitmap/store/interval_store.rs | 539 ++++++++++++++++++++- 1 file changed, 534 insertions(+), 5 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 4adf7d2d8..2f8a671a4 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,5 +1,7 @@ #![allow(unused)] use alloc::vec::Vec; +use core::borrow::Borrow; +use core::iter::Peekable; use core::ops::{BitAnd, BitOr, BitOrAssign, BitXor, Deref, RangeInclusive, SubAssign}; use core::slice::Iter; use core::{cmp::Ordering, ops::ControlFlow}; @@ -608,11 +610,15 @@ impl IntervalStore { bits } - pub(crate) fn iter(&self) -> core::slice::Iter { + pub(crate) fn iter(&self) -> RunIterBorrowed { + self.into_iter() + } + + pub(crate) fn iter_intervals(&self) -> core::slice::Iter { self.0.iter() } - pub(crate) fn iter_mut(&mut self) -> core::slice::IterMut { + pub(crate) fn iter_intervals_mut(&mut self) -> core::slice::IterMut { self.0.iter_mut() } } @@ -621,7 +627,7 @@ impl BitOrAssign for IntervalStore { fn bitor_assign(&mut self, mut rhs: Self) { let (add_intervals, take_intervals, self_is_add) = if self.len() > rhs.len() { (self, &mut rhs, true) } else { (&mut rhs, self, false) }; - for iv in take_intervals.iter() { + for iv in take_intervals.iter_intervals() { add_intervals.insert_range(iv.start..=iv.end); } if !self_is_add { @@ -640,7 +646,7 @@ impl BitOrAssign<&ArrayStore> for IntervalStore { impl BitOrAssign<&Self> for IntervalStore { fn bitor_assign(&mut self, mut rhs: &Self) { - for iv in rhs.iter() { + for iv in rhs.iter_intervals() { self.insert_range(iv.start..=iv.end); } } @@ -666,7 +672,7 @@ impl BitAnd for &IntervalStore { impl SubAssign<&Self> for IntervalStore { fn sub_assign(&mut self, rhs: &Self) { - for iv in rhs.iter() { + for iv in rhs.iter_intervals() { self.remove_range(iv.start..=iv.end); } } @@ -684,6 +690,207 @@ impl BitXor for &IntervalStore { } } +pub(crate) type RunIterOwned = RunIter>; +pub(crate) type RunIterBorrowed<'a> = RunIter>; + +impl IntoIterator for IntervalStore { + type Item = u16; + type IntoIter = RunIter>; + + fn into_iter(self) -> Self::IntoIter { + RunIter::new(self.0.into_iter()) + } +} + +impl<'a> IntoIterator for &'a IntervalStore { + type Item = u16; + type IntoIter = RunIter>; + + fn into_iter(self) -> Self::IntoIter { + RunIter::new(self.0.iter()) + } +} + +pub(crate) trait SliceIterator: Iterator + DoubleEndedIterator { + fn as_slice(&self) -> &[I]; +} + +impl SliceIterator for alloc::vec::IntoIter { + fn as_slice(&self) -> &[I] { + alloc::vec::IntoIter::as_slice(self) + } +} + +impl<'a, I> SliceIterator for core::slice::Iter<'a, I> { + fn as_slice(&self) -> &'a [I] { + core::slice::Iter::as_slice(self) + } +} + +#[derive(Clone)] +pub(crate) struct RunIter> { + forward_offset: u16, + backward_offset: u16, + intervals: I, +} + +impl> RunIter { + fn new(intervals: I) -> Self { + Self { forward_offset: 0, backward_offset: 0, intervals } + } + + fn move_next(&mut self) { + if let Some(value) = self.forward_offset.checked_add(1) { + self.forward_offset = value; + } else { + return; + } + if Some(self.forward_offset as u64) + >= self.intervals.as_slice().first().map(|f| f.run_len()) + { + self.intervals.next(); + self.forward_offset = 0; + } + } + + fn move_next_back(&mut self) { + if let Some(value) = self.backward_offset.checked_add(1) { + self.backward_offset = value; + } else { + return; + } + if Some(self.backward_offset as u64) + >= self.intervals.as_slice().last().map(|f| f.run_len()) + { + self.intervals.next_back(); + self.backward_offset = 0; + } + } + + fn remaining_size(&self) -> usize { + (self.intervals.as_slice().iter().map(|f| f.run_len()).sum::() + - self.forward_offset as u64 + - self.backward_offset as u64) as usize + } + + /// Advance the iterator to the first value greater than or equal to `n`. + pub(crate) fn advance_to(&mut self, n: u16) { + if n == 0 { + return; + } + if self + .intervals + .as_slice() + .first() + .map(|f| f.start + self.forward_offset > n) + .unwrap_or(true) + { + return; + } + match self.intervals.as_slice().binary_search_by(|iv| cmp_index_interval(n, *iv).reverse()) + { + Ok(index) => { + if let Some(value) = index.checked_sub(1) { + self.intervals.nth(value); + } + self.forward_offset = n - self.intervals.as_slice().first().unwrap().start; + } + Err(index) => { + if index == self.intervals.as_slice().len() { + return; + } + if let Some(value) = index.checked_sub(1) { + self.intervals.nth(value); + self.forward_offset = 0; + } + } + } + } + + /// Advance the back of iterator to the first value less than or equal to `n`. + pub(crate) fn advance_back_to(&mut self, n: u16) { + if n == u16::MAX { + return; + } + if self + .intervals + .as_slice() + .last() + .map(|f| f.end - self.backward_offset < n) + .unwrap_or(true) + { + return; + } + match self.intervals.as_slice().binary_search_by(|iv| cmp_index_interval(n, *iv).reverse()) + { + Ok(index) => { + let backward_index = self.intervals.as_slice().len() - index - 1; + if let Some(value) = backward_index.checked_sub(1) { + self.intervals.nth_back(value); + } + self.backward_offset = self.intervals.as_slice().last().unwrap().end - n; + } + Err(index) => { + if index == 0 { + return; + } + let backward_index = self.intervals.as_slice().len() - index; + if let Some(value) = backward_index.checked_sub(1) { + self.intervals.nth_back(value); + self.backward_offset = 0; + } + } + } + } +} + +impl> Iterator for RunIter { + type Item = u16; + + fn next(&mut self) -> Option { + let result = self.intervals.as_slice().first()?.start + self.forward_offset; + self.move_next(); + Some(result) + } + + fn size_hint(&self) -> (usize, Option) { + let remaining_size = self.remaining_size(); + (remaining_size, Some(remaining_size)) + } + + fn count(self) -> usize { + self.remaining_size() + } + + fn nth(&mut self, n: usize) -> Option { + if let Some(skip) = n.checked_sub(1) { + let mut to_skip = skip as u64; + loop { + let to_remove = (self.intervals.as_slice().first()?.run_len() + - self.forward_offset as u64) + .min(to_skip); + to_skip -= to_remove; + self.forward_offset += to_remove as u16; + self.move_next(); + if to_skip == 0 { + break; + } + } + } + self.next() + } +} + +impl> DoubleEndedIterator for RunIter { + fn next_back(&mut self) -> Option { + let result = self.intervals.as_slice().last()?.end - self.backward_offset; + self.move_next_back(); + Some(result) + } +} + +impl> ExactSizeIterator for RunIter {} + /// This interval is inclusive to end. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] pub(crate) struct Interval { @@ -1861,4 +2068,326 @@ mod tests { ]) ); } + + #[test] + fn iter_next() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new(i as u16, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = + (Interval::new(500, 600).run_len() + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i + 500, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new((i + 500) as u16, 600).run_len() + + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = Interval::new(800, 1000).run_len() as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + if i >= 201 { + break; + } + assert_eq!(i + 800, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new((i + 800) as u16, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_next_back() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(1000 - i, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, (1000 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(600 - i, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, (600 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(50 - i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new(0, (50 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_next_and_next_back() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(1000 - i, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, (1000 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = (Interval::new(0, 50).run_len() + Interval::new(500, 600).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(600 - i, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new(0, 50).run_len() + + Interval::new(500, (600 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = (Interval::new(0, 50).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new(i as u16, 50).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_u16_max() { + let interval_store = IntervalStore(alloc::vec![Interval::new(0, u16::MAX),]); + let mut iter = interval_store.iter(); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= u16::MAX as usize { + break; + } + let size = (Interval::new(i as u16, u16::MAX).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut iter = interval_store.iter(); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(u16::MAX as usize - i, value as usize); + i += 1; + if i >= u16::MAX as usize { + break; + } + let size = (Interval::new(0, u16::MAX - i as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(u16::MAX as usize), Some(u16::MAX)); + } + + #[test] + fn iter_nth() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(50), Some(50)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(51), Some(500)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(100), Some(549)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(152), Some(800)); + + let mut iter = interval_store.iter(); + assert_eq!( + iter.nth( + (Interval::new(0, 50).run_len() + + Interval::new(500, 600).run_len() + + Interval::new(800, 1000).run_len() + - 1) as usize + ), + Some(1000) + ); + + let mut iter = interval_store.iter(); + iter.next(); + iter.next(); + iter.next(); + assert_eq!(iter.nth(152), Some(803)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(u16::MAX as usize), None); + } + + #[test] + fn iter_advance_to() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.iter(); + iter.advance_to(20); + assert_eq!(iter.next(), Some(20)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(800)); + iter.advance_to(u16::MAX); + assert_eq!(iter.next(), Some(801)); + + let mut iter = interval_store.iter(); + iter.advance_to(100); + assert_eq!(iter.next(), Some(500)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(800)); + iter.advance_to(900); + assert_eq!(iter.next(), Some(900)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(901)); + let mut iter = interval_store.iter(); + iter.next(); + iter.next(); + iter.next(); + iter.advance_to(499); + assert_eq!(iter.next(), Some(500)); + + let mut iter = interval_store.iter(); + iter.advance_to(100); + assert_eq!(iter.next(), Some(500)); + } + + #[test] + fn iter_advance_back_to() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new(0, 50), + Interval::new(500, 600), + Interval::new(800, 1000), + ]); + let mut iter = interval_store.iter(); + iter.advance_back_to(u16::MAX); + assert_eq!(iter.next_back(), Some(1000)); + iter.advance_back_to(800); + assert_eq!(iter.next_back(), Some(800)); + iter.advance_back_to(20); + assert_eq!(iter.next_back(), Some(20)); + + let mut iter = interval_store.iter(); + iter.advance_back_to(800); + assert_eq!(iter.next_back(), Some(800)); + iter.advance_back_to(900); + assert_eq!(iter.next_back(), Some(600)); + iter.advance_back_to(550); + assert_eq!(iter.next_back(), Some(550)); + iter.advance_back_to(20); + assert_eq!(iter.next_back(), Some(20)); + let mut iter = interval_store.iter(); + iter.next_back(); + iter.next_back(); + iter.next_back(); + iter.advance_back_to(700); + assert_eq!(iter.next_back(), Some(600)); + let mut iter = interval_store.iter(); + iter.advance_back_to(400); + assert_eq!(iter.next_back(), Some(50)); + } } From 251d961a33ecf1c7f19ab043ee16a5558b3450da Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 18 Apr 2025 21:58:53 +0200 Subject: [PATCH 52/83] feat: add run store --- roaring/src/bitmap/serialization.rs | 29 +- roaring/src/bitmap/store/bitmap_store.rs | 5 - roaring/src/bitmap/store/interval_store.rs | 27 +- roaring/src/bitmap/store/mod.rs | 640 ++++++++------------- 4 files changed, 287 insertions(+), 414 deletions(-) diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 41033d3ea..61392c76b 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -7,6 +7,8 @@ use core::convert::Infallible; use std::error::Error; use std::io; +use super::store::IntervalStore; + pub(crate) const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; pub(crate) const SERIAL_COOKIE: u16 = 12347; pub(crate) const NO_OFFSET_THRESHOLD: usize = 4; @@ -50,7 +52,7 @@ impl RoaringBitmap { Store::Bitmap(..) => BITMAP_BYTES, Store::Run(ref intervals) => { has_run_containers = true; - RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.len()) + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.run_amount() as usize) } }) .sum(); @@ -119,7 +121,9 @@ impl RoaringBitmap { offset += 8 * 1024; } Store::Run(ref intervals) => { - offset += (RUN_NUM_BYTES + (intervals.len() * RUN_ELEMENT_BYTES)) as u32; + offset += (RUN_NUM_BYTES + + (intervals.run_amount() as usize * RUN_ELEMENT_BYTES)) + as u32; } } } @@ -138,8 +142,8 @@ impl RoaringBitmap { } } Store::Run(ref intervals) => { - writer.write_u16::(intervals.len() as u16)?; - for iv in intervals { + writer.write_u16::(intervals.run_amount() as u16)?; + for iv in intervals.iter_intervals() { writer.write_u16::(iv.start)?; writer.write_u16::(iv.end - iv.start)?; } @@ -270,13 +274,16 @@ impl RoaringBitmap { *len = u16::from_le(*len); }); - let intervals = intervals - .into_iter() - .map(|[start, len]| -> Result { - let end = start.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; - Ok(Interval { start, end }) - }) - .collect::>()?; + let intervals = IntervalStore::from_vec_unchecked( + intervals + .into_iter() + .map(|[start, len]| -> Result { + let end = start.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; + // TODO: easy safe way of constructing an `IntervalStore` + Ok(Interval { start, end }) + }) + .collect::>()?, + ); Store::Run(intervals) } else if cardinality <= ARRAY_LIMIT { diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index c507f9dce..def2ed057 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -375,11 +375,6 @@ impl BitmapStore { &self.bits } - // NOTE: escape hatch for runs - pub fn mut_array(&mut self) -> &mut [u64; BITMAP_LENGTH] { - &mut self.bits - } - pub fn clear(&mut self) { self.bits.fill(0); self.len = 0; diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 2f8a671a4..8e86526dc 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -2,7 +2,9 @@ use alloc::vec::Vec; use core::borrow::Borrow; use core::iter::Peekable; -use core::ops::{BitAnd, BitOr, BitOrAssign, BitXor, Deref, RangeInclusive, SubAssign}; +use core::ops::{ + BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, Deref, RangeInclusive, SubAssign, +}; use core::slice::Iter; use core::{cmp::Ordering, ops::ControlFlow}; @@ -22,6 +24,23 @@ impl IntervalStore { Self(Default::default()) } + pub fn from_vec_unchecked(vec: Vec) -> Self { + #[cfg(debug_assertions)] + { + for (i, cur_interval) in vec.iter().enumerate() { + if let Some(next) = vec.get(i + 1) { + assert!(cur_interval.end < next.start); + } + } + } + Self(vec) + } + + pub(crate) fn push_interval_unchecked(&mut self, interval: Interval) { + debug_assert!(self.0.last().map(|f| f.end < interval.start).unwrap_or(true)); + self.0.push(interval) + } + #[inline] pub fn insert(&mut self, index: u16) -> bool { self.0 @@ -670,6 +689,12 @@ impl BitAnd for &IntervalStore { } } +impl BitAndAssign<&IntervalStore> for ArrayStore { + fn bitand_assign(&mut self, rhs: &IntervalStore) { + self.retain(|f| rhs.contains(f)); + } +} + impl SubAssign<&Self> for IntervalStore { fn sub_assign(&mut self, rhs: &Self) { for iv in rhs.iter_intervals() { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 4f10f2075..3779baecd 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -3,8 +3,6 @@ mod bitmap_store; mod interval_store; use alloc::vec; -use alloc::vec::Vec; -use core::cmp::Ordering; use core::mem; use core::ops::{ BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, @@ -16,8 +14,8 @@ use self::Store::{Array, Bitmap, Run}; pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; -use self::interval_store::cmp_index_interval; pub(crate) use self::interval_store::Interval; +pub(crate) use interval_store::{IntervalStore, RunIterBorrowed, RunIterOwned}; use crate::bitmap::container::ARRAY_LIMIT; @@ -28,7 +26,7 @@ use alloc::boxed::Box; pub(crate) enum Store { Array(ArrayStore), Bitmap(BitmapStore), - Run(Vec), + Run(IntervalStore), } #[derive(Clone)] @@ -37,14 +35,8 @@ pub(crate) enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), - Run(RunIter), -} - -#[derive(Clone)] -pub struct RunIter { - run: usize, - offset: u64, - intervals: Vec, + RunBorrowed(RunIterBorrowed<'a>), + RunOwned(RunIterOwned), } impl Store { @@ -99,47 +91,10 @@ impl Store { match self { Array(vec) => vec.insert(index), Bitmap(bits) => bits.insert(index), - Run(ref mut vec) => { - vec.binary_search_by(|iv| cmp_index_interval(index, *iv)) - .map_err(|loc| { - // Value is beyond end of interval - if vec[loc].end < index { - // If immediately follows this interval - if index == vec[loc].end - 1 { - if loc < vec.len() && index == vec[loc + 1].start { - // Merge with following interval - vec[loc].end = vec[loc + 1].end; - vec.remove(loc + 1); - return; - } - // Extend end of this interval by 1 - vec[loc].end += 1 - } else { - // Otherwise create new standalone interval - vec.insert(loc, Interval::new(index, index)); - } - } else if vec[loc].start == index + 1 { - // Value immediately precedes interval - if loc > 0 && vec[loc - 1].end == &index - 1 { - // Merge with preceding interval - vec[loc - 1].end = vec[loc].end; - vec.remove(loc); - return; - } - vec[loc].start -= 1; - } else if loc > 0 && index - 1 == vec[loc - 1].end { - // Immediately follows the previous interval - vec[loc - 1].end += 1 - } else { - vec.insert(loc, Interval::new(index, index)); - } - }) - .is_err() - } + Run(runs) => runs.insert(index), } } - #[allow(clippy::todo)] pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { // A Range is defined as being of size 0 if start >= end. if range.is_empty() { @@ -149,7 +104,7 @@ impl Store { match self { Array(vec) => vec.insert_range(range), Bitmap(bits) => bits.insert_range(range), - Run(..) => todo!(), + Run(runs) => runs.insert_range(range), } } @@ -161,7 +116,7 @@ impl Store { match self { Array(vec) => vec.push(index), Bitmap(bits) => bits.push(index), - Run(..) => todo!(), + Run(runs) => runs.push(index), } } @@ -172,12 +127,15 @@ impl Store { /// # Panics /// /// If debug_assertions enabled and index is > self.max() - #[allow(clippy::todo)] pub(crate) fn push_unchecked(&mut self, index: u16) { match self { Array(vec) => vec.push_unchecked(index), Bitmap(bits) => bits.push_unchecked(index), - Run(..) => todo!(), + Run(runs) => { + // push unchecked for intervals doesn't make since we have to check anyways to + // merge ends with the index if these are consecutive + runs.push(index); + } } } @@ -185,29 +143,7 @@ impl Store { match self { Array(vec) => vec.remove(index), Bitmap(bits) => bits.remove(index), - Run(ref mut vec) => vec - .binary_search_by(|iv| cmp_index_interval(index, *iv)) - .map(|loc| { - if index == vec[loc].start && index == vec[loc].end { - // Remove entire run if it only contains this value - vec.remove(loc); - } else if index == vec[loc].end { - // Value is last in this interval - vec[loc].end = index - 1; - } else if index == vec[loc].start { - // Value is first in this interval - vec[loc].start = index + 1; - } else { - // Value lies inside the interval, we need to split it - // First construct a new interval with the right part - let new_interval = Interval::new(index + 1, vec[loc].end); - // Then shrink the current interval - vec[loc].end = index - 1; - // Then insert the new interval leaving gap where value was removed - vec.insert(loc + 1, new_interval); - } - }) - .is_ok(), + Run(runs) => runs.remove(index), } } @@ -219,64 +155,23 @@ impl Store { match self { Array(vec) => vec.remove_range(range), Bitmap(bits) => bits.remove_range(range), - // TODO we must test that algorithm - Run(ref mut intervals) => { - let start = *range.start(); - let end = *range.end(); - let mut count: u64 = 0; - let mut search_end = false; - - for iv in intervals.iter_mut() { - if !search_end && cmp_index_interval(start, *iv) == Ordering::Equal { - count += Interval::new(iv.end, start).run_len(); - iv.end = start; - search_end = true; - } - - if search_end { - // The end bound is non-inclusive therefore we must search for end - 1. - match cmp_index_interval(end, *iv) { - Ordering::Less => { - // We invalidate the intervals that are contained in - // the start and end but doesn't touch the bounds. - count += iv.run_len(); - *iv = Interval::new(u16::MAX, 0); - } - Ordering::Equal => { - // We shrink this interval by moving the start of it to be - // the end bound which is non-inclusive. - count += Interval::new(end, iv.start).run_len(); - iv.start = end; - } - Ordering::Greater => break, - } - } - } - - // We invalidated the intervals to remove, - // the start is greater than the end. - intervals.retain(|iv| iv.start <= iv.end); - - count - } + Run(runs) => runs.remove_range(range), } } - #[allow(clippy::todo)] pub fn remove_smallest(&mut self, index: u64) { match self { Array(vec) => vec.remove_smallest(index), Bitmap(bits) => bits.remove_smallest(index), - Run(..) => todo!(), + Run(runs) => runs.remove_smallest(index), } } - #[allow(clippy::todo)] pub fn remove_biggest(&mut self, index: u64) { match self { Array(vec) => vec.remove_biggest(index), Bitmap(bits) => bits.remove_biggest(index), - Run(..) => todo!(), + Run(runs) => runs.remove_biggest(index), } } @@ -284,18 +179,15 @@ impl Store { match self { Array(vec) => vec.contains(index), Bitmap(bits) => bits.contains(index), - Run(ref intervals) => { - intervals.binary_search_by(|iv| cmp_index_interval(index, *iv)).is_ok() - } + Run(intervals) => intervals.contains(index), } } - #[allow(clippy::todo)] pub fn contains_range(&self, range: RangeInclusive) -> bool { match self { Array(vec) => vec.contains_range(range), Bitmap(bits) => bits.contains_range(range), - Run(..) => todo!(), + Run(runs) => runs.contains_range(range), } } @@ -310,35 +202,11 @@ impl Store { (Array(vec), Bitmap(bits)) | (Bitmap(bits), Array(vec)) => { vec.iter().all(|&i| !bits.contains(i)) } - // TODO(jpg) is_disjoint - (Run(intervals1), Run(intervals2)) => { - let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); - let (mut iv1, mut iv2) = (i1.next(), i2.next()); - loop { - match (iv1, iv2) { - (Some(v1), Some(v2)) => { - if v2.start <= v1.end && v1.start <= v2.end { - return false; - } - - match v1.end.cmp(&v2.end) { - Ordering::Less => iv1 = i1.next(), - Ordering::Greater => iv2 = i2.next(), - Ordering::Equal => { - iv1 = i1.next(); - iv2 = i2.next(); - } - } - } - (_, _) => return true, - } - } + (Run(intervals1), Run(intervals2)) => intervals1.is_disjoint(intervals2), + (Run(runs), Array(vec)) | (Array(vec), Run(runs)) => runs.is_disjoint_array(vec), + (Run(intervals), Bitmap(bitmap)) | (Bitmap(bitmap), Run(intervals)) => { + intervals.is_disjoint_bitmap(bitmap) } - (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { - vec.iter().all(|&i| !run.contains(i)) - } - (&Run(ref _intervals), _store @ &Bitmap(..)) - | (_store @ &Bitmap(..), &Run(ref _intervals)) => unimplemented!(), } } @@ -348,26 +216,25 @@ impl Store { (Bitmap(bits1), Bitmap(bits2)) => bits1.is_subset(bits2), (Array(vec), Bitmap(bits)) => vec.iter().all(|&i| bits.contains(i)), (Bitmap(..), &Array(..)) => false, - (Array(vec), run @ &Run(..)) => vec.iter().all(|&i| run.contains(i)), - // TODO(jpg) is subset bitmap, run - (Bitmap(..), Run(_vec)) => unimplemented!(), - - // TODO(jpg) is_subset run, * - (Run(_intervals1), Run(_intervals2)) => unimplemented!(), - (Run(_intervals), Array(_vec)) => unimplemented!(), - (Run(_intervals), _store @ &Bitmap(..)) => unimplemented!(), + (Array(vec), Run(runs)) => vec.iter().all(|&i| runs.contains(i)), + (Bitmap(bitmap), Run(runs)) => bitmap.iter().all(|i| runs.contains(i)), + (Run(intervals1), Run(intervals2)) => intervals1.is_subset(intervals2), + (Run(intervals), Array(vec)) => intervals.is_subset_array(vec), + (Run(intervals), Bitmap(bitmap)) => intervals.is_subset_bitmap(bitmap), } } - #[allow(clippy::todo)] pub fn intersection_len(&self, other: &Self) -> u64 { match (self, other) { (Array(vec1), Array(vec2)) => vec1.intersection_len(vec2), (Bitmap(bits1), Bitmap(bits2)) => bits1.intersection_len_bitmap(bits2), (Array(vec), Bitmap(bits)) => bits.intersection_len_array(vec), (Bitmap(bits), Array(vec)) => bits.intersection_len_array(vec), - (Run(..), _) => todo!(), - (_, Run(..)) => todo!(), + (Run(runs), Array(vec)) | (Array(vec), Run(runs)) => runs.intersection_len_array(vec), + (Run(runs), Bitmap(bitmap)) | (Bitmap(bitmap), Run(runs)) => { + runs.intersection_len_bitmap(bitmap) + } + (Run(runs1), Run(runs2)) => runs1.intersection_len(runs2), } } @@ -375,16 +242,15 @@ impl Store { match self { Array(vec) => vec.len(), Bitmap(bits) => bits.len(), - Run(intervals) => intervals.iter().map(|iv| iv.run_len()).sum(), + Run(intervals) => intervals.len(), } } - #[allow(clippy::todo)] pub fn is_empty(&self) -> bool { match self { Array(vec) => vec.is_empty(), Bitmap(bits) => bits.is_empty(), - Run(..) => todo!(), + Run(runs) => runs.is_empty(), } } @@ -392,7 +258,7 @@ impl Store { match self { Array(vec) => vec.min(), Bitmap(bits) => bits.min(), - Run(ref intervals) => intervals.first().map(|f| f.start), + Run(runs) => runs.min(), } } @@ -401,31 +267,29 @@ impl Store { match self { Array(vec) => vec.max(), Bitmap(bits) => bits.max(), - Run(ref intervals) => intervals.last().map(|f| f.end), + Run(runs) => runs.max(), } } - #[allow(clippy::todo)] pub fn rank(&self, index: u16) -> u64 { match self { Array(vec) => vec.rank(index), Bitmap(bits) => bits.rank(index), - Run(..) => todo!(), + Run(runs) => runs.rank(index), } } - #[allow(clippy::todo)] pub fn select(&self, n: u16) -> Option { match self { Array(vec) => vec.select(n), Bitmap(bits) => bits.select(n), - Run(..) => todo!(), + Run(runs) => runs.select(n), } } pub fn count_runs(&self) -> u64 { - match *self { - Array(ref vec) => { + match self { + Array(vec) => { vec.iter() .fold((-2, 0u64), |(prev, runs), &v| { let new = v as i32; @@ -437,7 +301,7 @@ impl Store { }) .1 } - Bitmap(ref bits) => { + Bitmap(bits) => { let mut num_runs = 0u64; for i in 0..BITMAP_LENGTH - 1 { @@ -451,7 +315,7 @@ impl Store { num_runs += ((last << 1) & !last).count_ones() as u64 + (last >> 63); num_runs } - Run(ref intervals) => intervals.len() as u64, + Run(intervals) => intervals.run_amount(), } } @@ -459,39 +323,37 @@ impl Store { match self { Array(arr) => Bitmap(arr.to_bitmap_store()), Bitmap(_) => self.clone(), - Run(ref intervals) => { - let mut bits = BitmapStore::new(); - for iv in intervals { - for index in iv.start..=iv.end { - bits.mut_array()[bitmap_store::key(index)] |= 1 << bitmap_store::bit(index); - } - } - Bitmap(bits) - } + Run(intervals) => Bitmap(intervals.to_bitmap()), } } pub(crate) fn to_run(&self) -> Self { - match *self { - Array(ref vec) => { - let mut intervals = Vec::new(); - let mut start = *vec.as_slice().first().unwrap(); - for (idx, &v) in vec.as_slice()[1..].iter().enumerate() { - if v - vec.as_slice()[idx] > 1 { - intervals.push(Interval::new(start, vec.as_slice()[idx])); - start = v + match self { + Array(vec) => { + let mut intervals = IntervalStore::new(); + if let Some(mut start) = vec.as_slice().first().copied() { + for (idx, &v) in vec.as_slice()[1..].iter().enumerate() { + // TODO: why are we subtracting the same number here? + if v - vec.as_slice()[idx] > 1 { + intervals + .push_interval_unchecked(Interval::new(start, vec.as_slice()[idx])); + start = v + } } + intervals.push_interval_unchecked(Interval::new( + start, + *vec.as_slice().last().unwrap(), + )); } - intervals.push(Interval::new(start, *vec.as_slice().last().unwrap())); Run(intervals) } - Bitmap(ref bits) => { + Bitmap(bits) => { let mut current = bits.as_array()[0]; let mut i = 0u16; let mut start; let mut last; - let mut intervals = Vec::new(); + let mut intervals = IntervalStore::new(); loop { // Skip over empty words @@ -517,20 +379,20 @@ impl Store { // Run continues until end of this container if current == u64::MAX { - intervals.push(Interval::new(start, u16::MAX)); + intervals.push_interval_unchecked(Interval::new(start, u16::MAX)); break; } let current_last = (!current).trailing_zeros() as u16; last = 64 * i + current_last; - intervals.push(Interval::new(start, last - 1)); + intervals.push_interval_unchecked(Interval::new(start, last - 1)); // pad LSBs with 0s current &= current + 1; } Run(intervals) } - Run(ref _intervals) => panic!("Cannot convert run to run"), + Run(intervals) => Run(intervals.clone()), } } } @@ -544,7 +406,6 @@ impl Default for Store { impl BitOr<&Store> for &Store { type Output = Store; - #[allow(clippy::todo)] fn bitor(self, rhs: &Store) -> Store { match (self, rhs) { (Array(vec1), Array(vec2)) => Array(BitOr::bitor(vec1, vec2)), @@ -563,93 +424,58 @@ impl BitOr<&Store> for &Store { BitOrAssign::bitor_assign(&mut rhs, self); rhs } - (Run(..), _) => todo!(), - (_, Run(..)) => todo!(), + (Run(left), Run(right)) => { + let (smallest, biggest) = if left.run_amount() > right.run_amount() { + (right, left) + } else { + (left, right) + }; + let mut res = biggest.clone(); + BitOrAssign::bitor_assign(&mut res, smallest); + Run(res) + } + (Run(runs), Array(array)) | (Array(array), Run(runs)) => { + let mut ret = runs.clone(); + BitOrAssign::bitor_assign(&mut ret, array); + Run(ret) + } + (Run(runs), Bitmap(bitmap)) | (Bitmap(bitmap), Run(runs)) => { + let mut ret = runs.to_bitmap(); + BitOrAssign::bitor_assign(&mut ret, bitmap); + Bitmap(ret) + } } } } impl BitOrAssign for Store { - fn bitor_assign(&mut self, mut rhs: Store) { - match (self, &mut rhs) { - (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { + fn bitor_assign(&mut self, rhs: Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), Array(ref vec2)) => { *vec1 = BitOr::bitor(&*vec1, vec2); } - (&mut Bitmap(ref mut bits1), &mut Array(ref vec2)) => { + (&mut Bitmap(ref mut bits1), Array(ref vec2)) => { BitOrAssign::bitor_assign(bits1, vec2); } - (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + (&mut Bitmap(ref mut bits1), Bitmap(ref bits2)) => { BitOrAssign::bitor_assign(bits1, bits2); } - (this @ &mut Array(..), Run(..)) => { - let new = rhs.clone(); - BitOrAssign::bitor_assign(this, new); - } - (this @ &mut Bitmap(..), Run(..)) => { + (this @ &mut Bitmap(..), rhs @ Run(..)) => { let other = rhs.to_bitmap(); BitOrAssign::bitor_assign(this, other); } - (&mut Run(ref mut intervals1), Run(ref intervals2)) => { - let mut merged = Vec::new(); - - let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); - let (mut iv1, mut iv2) = (i1.next(), i2.next()); - loop { - // Iterate over two iterators and return the lowest value at each step. - let iv = match (iv1, iv2) { - (None, None) => break, - (Some(v1), None) => { - iv1 = i1.next(); - v1 - } - (None, Some(v2)) => { - iv2 = i2.next(); - v2 - } - (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { - Ordering::Equal => { - iv1 = i1.next(); - iv2 = i2.next(); - v1 - } - Ordering::Less => { - iv1 = i1.next(); - v1 - } - Ordering::Greater => { - iv2 = i2.next(); - v2 - } - }, - }; - - match merged.last_mut() { - // If the list of merged intervals is empty, append the interval. - None => merged.push(*iv), - Some(last) => { - if last.end < iv.start { - // If the interval does not overlap with the previous, append it. - merged.push(*iv); - } else { - // If there is overlap, so we merge the current and previous intervals. - last.end = core::cmp::max(last.end, iv.end); - } - } - } - } - - *intervals1 = merged; - } - (ref mut this @ &mut Run(..), Array(ref vec)) => { - for i in vec.iter() { - this.insert(*i); - } + (Run(intervals1), Run(intervals2)) => BitOrAssign::bitor_assign(intervals1, intervals2), + (Run(intervals1), Array(ref vec)) => BitOrAssign::bitor_assign(intervals1, vec), + (this @ Array(..), Run(mut intervals)) => { + let Array(vec) = &this else { unreachable!() }; + BitOrAssign::bitor_assign(&mut intervals, vec); + *this = Run(intervals); } - (this @ &mut Run(..), Bitmap(..)) => { + (this @ Run(..), rhs @ Bitmap(..)) => { *this = this.to_bitmap(); BitOrAssign::bitor_assign(this, rhs); } - (this @ &mut Array(..), &mut Bitmap(..)) => { + (this @ &mut Array(..), mut rhs @ Bitmap(..)) => { mem::swap(this, &mut rhs); BitOrAssign::bitor_assign(this, rhs); } @@ -658,7 +484,6 @@ impl BitOrAssign for Store { } impl BitOrAssign<&Store> for Store { - #[allow(clippy::todo)] fn bitor_assign(&mut self, rhs: &Store) { match (self, rhs) { (&mut Array(ref mut vec1), Array(vec2)) => { @@ -676,8 +501,27 @@ impl BitOrAssign<&Store> for Store { BitOrAssign::bitor_assign(&mut lhs, &*this); *this = lhs; } - (Run(..), _) => todo!(), - (_, Run(..)) => todo!(), + (Run(runs1), Run(runs2)) => { + BitOrAssign::bitor_assign(runs1, runs2); + } + (Run(runs), Array(array)) => { + BitOrAssign::bitor_assign(runs, array); + } + (this @ Array(..), Run(runs)) => { + let mut runs = runs.clone(); + let Array(array) = &this else { unreachable!() }; + BitOrAssign::bitor_assign(&mut runs, array); + *this = Run(runs); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new = runs.to_bitmap(); + BitOrAssign::bitor_assign(&mut new, bitmap); + *this = Bitmap(new); + } + (Bitmap(bitmap), Run(runs)) => { + BitOrAssign::bitor_assign(bitmap, &runs.to_bitmap()); + } } } } @@ -712,53 +556,31 @@ impl BitAndAssign for Store { } BitAndAssign::bitand_assign(vec1, &*vec2); } - (&mut Array(ref mut vec), run @ Run(..)) => { - vec.retain(|i| run.contains(i)); - } (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { BitAndAssign::bitand_assign(bits1, bits2); } (&mut Array(ref mut vec1), &mut Bitmap(ref bits2)) => { BitAndAssign::bitand_assign(vec1, bits2); } - (this @ &mut Bitmap(..), Run(..)) => { - let other = rhs.to_bitmap(); - BitAndAssign::bitand_assign(this, other); + (Run(intervals1), Run(intervals2)) => { + *intervals1 = BitAnd::bitand(&*intervals1, &*intervals2); } - (&mut Run(ref mut intervals1), Run(ref intervals2)) => { - let mut merged = Vec::new(); - - let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); - let (mut iv1, mut iv2) = (i1.next(), i2.next()); - - // Iterate over both iterators. - while let (Some(v1), Some(v2)) = (iv1, iv2) { - if v2.start <= v1.end && v1.start <= v2.end { - let start = core::cmp::max(v1.start, v2.start); - let end = core::cmp::min(v1.end, v2.end); - let iv = Interval::new(start, end); - merged.push(iv); - } - - match v1.end.cmp(&v2.end) { - Ordering::Less => iv1 = i1.next(), - Ordering::Greater => iv2 = i2.next(), - Ordering::Equal => { - iv1 = i1.next(); - iv2 = i2.next(); - } - } - } - - *intervals1 = merged; + (this @ &mut Run(..), Array(array)) => { + let Run(runs) = &this else { unreachable!() }; + BitAndAssign::bitand_assign(array, runs); + *this = rhs; } - (this @ &mut Run(..), other @ Array(..)) => { - let new = other.clone(); - BitAndAssign::bitand_assign(this, new); + (Array(array), Run(runs)) => { + BitAndAssign::bitand_assign(array, &*runs); } - (this @ &mut Run(..), other @ Bitmap(..)) => { - let new = other.clone(); - BitAndAssign::bitand_assign(this, new); + (this @ &mut Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitAndAssign::bitand_assign(&mut new_bitmap, &*bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + BitAndAssign::bitand_assign(bitmap, &runs.to_bitmap()); } (this @ &mut Bitmap(..), &mut Array(..)) => { mem::swap(this, &mut rhs); @@ -770,7 +592,6 @@ impl BitAndAssign for Store { impl BitAndAssign<&Store> for Store { #[allow(clippy::suspicious_op_assign_impl)] - #[allow(clippy::todo)] fn bitand_assign(&mut self, rhs: &Store) { match (self, rhs) { (&mut Array(ref mut vec1), Array(vec2)) => { @@ -794,8 +615,25 @@ impl BitAndAssign<&Store> for Store { BitAndAssign::bitand_assign(&mut new, &*this); *this = new; } - (Run(..), _) => todo!(), - (_, Run(..)) => todo!(), + (Run(runs1), Run(runs2)) => { + *runs1 = BitAnd::bitand(&*runs1, runs2); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitAndAssign::bitand_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + BitAndAssign::bitand_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Array(array)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_array = array.clone(); + new_array.retain(|f| runs.contains(f)); + *this = Array(new_array); + } + (Array(array), Run(runs)) => array.retain(|f| runs.contains(f)), } } } @@ -821,9 +659,6 @@ impl SubAssign<&Store> for Store { (&mut Array(ref mut vec1), Array(vec2)) => { SubAssign::sub_assign(vec1, vec2); } - (&mut Array(ref mut vec), run @ &Run(..)) => { - vec.retain(|i| !run.contains(i)); - } (&mut Bitmap(ref mut bits1), Array(vec2)) => { SubAssign::sub_assign(bits1, vec2); } @@ -833,23 +668,27 @@ impl SubAssign<&Store> for Store { (&mut Array(ref mut vec1), Bitmap(bits2)) => { SubAssign::sub_assign(vec1, bits2); } - (ref mut this @ &mut Bitmap(..), Run(intervals)) => { - for iv in intervals { - this.remove_range(iv.start..=iv.end); - } + (Run(runs1), Run(runs2)) => { + SubAssign::sub_assign(runs1, runs2); } - (ref mut this @ &mut Run(..), Run(intervals2)) => { - for iv in intervals2 { - this.remove_range(iv.start..=iv.end); - } + (Run(runs), Array(array)) => { + array.iter().for_each(|&f| { + runs.remove(f); + }); } - (ref mut this @ &mut Run(..), Array(vec)) => { - for i in vec.iter() { - this.remove(*i); - } + (Array(array), Run(runs)) => { + array.retain(|f| !runs.contains(f)); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + SubAssign::sub_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + let new_bitmap = runs.to_bitmap(); + SubAssign::sub_assign(bitmap, &new_bitmap); } - // TODO(jpg) difference_with run bitmap - (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } } @@ -875,7 +714,6 @@ impl BitXor<&Store> for &Store { } impl BitXorAssign for Store { - #[allow(clippy::todo)] fn bitxor_assign(&mut self, mut rhs: Store) { match (self, &mut rhs) { (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { @@ -891,8 +729,31 @@ impl BitXorAssign for Store { mem::swap(this, &mut rhs); BitXorAssign::bitxor_assign(this, rhs); } - (Run(..), _) => todo!(), - (_, Run(..)) => todo!(), + (Run(runs1), Run(runs2)) => { + *runs1 = BitXor::bitxor(&*runs1, &*runs2); + } + (Run(runs1), Array(array)) => array.iter().for_each(|&f| { + if runs1.contains(f) { + runs1.remove(f); + } + }), + (this @ Array(..), Run(runs1)) => { + let Array(array) = &this else { unreachable!() }; + array.iter().for_each(|&f| { + if runs1.contains(f) { + runs1.remove(f); + } + }); + *this = rhs; + } + (Bitmap(bitmap), Run(runs)) => { + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + *this = rhs; + } } } } @@ -904,25 +765,44 @@ impl BitXorAssign<&Store> for Store { let this = mem::take(vec1); *vec1 = BitXor::bitxor(&this, vec2); } - // TODO(jpg) symmetric_difference_with array, run - (&mut Array(ref mut _vec), Run(_intervals)) => unimplemented!(), - (&mut Bitmap(ref mut bits1), Array(vec2)) => { - BitXorAssign::bitxor_assign(bits1, vec2); - } (&mut Bitmap(ref mut bits1), Bitmap(bits2)) => { BitXorAssign::bitxor_assign(bits1, bits2); } - // TODO(jpg) symmetric_difference_with bitmap, run - (ref mut _this @ &mut Bitmap(..), Run(_vec)) => unimplemented!(), - // TODO(jpg) symmetric_difference_with run, * - (&mut Run(ref mut _intervals1), Run(_intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), Array(_vec)) => unimplemented!(), - (_this @ &mut Run(..), &Bitmap(..)) => unimplemented!(), (this @ &mut Array(..), Bitmap(bits2)) => { let mut lhs: Store = Bitmap(bits2.clone()); BitXorAssign::bitxor_assign(&mut lhs, &*this); *this = lhs; } + (&mut Bitmap(ref mut bits1), Array(vec2)) => { + BitXorAssign::bitxor_assign(bits1, vec2); + } + (Run(runs1), Run(runs2)) => { + *runs1 = BitXor::bitxor(&*runs1, runs2); + } + (Run(runs1), Array(array)) => array.iter().for_each(|&f| { + if runs1.contains(f) { + runs1.remove(f); + } + }), + (this @ Array(..), Run(runs1)) => { + let Array(array) = &this else { unreachable!() }; + let mut runs1 = runs1.clone(); + array.iter().for_each(|&f| { + if runs1.contains(f) { + runs1.remove(f); + } + }); + *this = Run(runs1); + } + (Bitmap(bitmap), Run(runs)) => { + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitXorAssign::bitxor_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } } } } @@ -934,7 +814,7 @@ impl<'a> IntoIterator for &'a Store { match self { Array(vec) => Iter::Array(vec.iter()), Bitmap(bits) => Iter::BitmapBorrowed(bits.iter()), - Run(ref intervals) => Iter::Run(RunIter::new(intervals.to_vec())), + Run(intervals) => Iter::RunBorrowed(intervals.iter()), } } } @@ -946,7 +826,7 @@ impl IntoIterator for Store { match self { Array(vec) => Iter::Vec(vec.into_iter()), Bitmap(bits) => Iter::BitmapOwned(bits.into_iter()), - Run(intervals) => Iter::Run(RunIter::new(intervals)), + Run(intervals) => Iter::RunOwned(intervals.into_iter()), } } } @@ -959,56 +839,15 @@ impl PartialEq for Store { bits1.len() == bits2.len() && bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } + // TODO: Run containers should be checked against other types of containers right? (Run(intervals1), Run(ref intervals2)) => intervals1 == intervals2, _ => false, } } } -impl RunIter { - fn new(intervals: Vec) -> RunIter { - RunIter { run: 0, offset: 0, intervals } - } - - fn move_next(&mut self) { - self.offset += 1; - if self.offset == self.intervals[self.run].run_len() { - self.offset = 0; - self.run += 1; - } - } -} - -impl Iterator for RunIter { - type Item = u16; - - fn next(&mut self) -> Option { - if self.run == self.intervals.len() { - return None; - } - let result = self.intervals[self.run].start + self.offset as u16; - self.move_next(); - Some(result) - } - - fn size_hint(&self) -> (usize, Option) { - let remaining_size = - self.intervals[self.run..].iter().map(|f| f.run_len()).sum::() - self.offset; - let as_usize: Result = remaining_size.try_into(); - (as_usize.unwrap_or(usize::MAX), as_usize.ok()) - } -} - -impl DoubleEndedIterator for RunIter { - #[allow(clippy::todo)] - fn next_back(&mut self) -> Option { - todo!() - } -} - impl Iter<'_> { /// Advance the iterator to the first value greater than or equal to `n`. - #[allow(clippy::todo)] pub(crate) fn advance_to(&mut self, n: u16) { match self { Iter::Array(inner) => { @@ -1025,7 +864,8 @@ impl Iter<'_> { } Iter::BitmapBorrowed(inner) => inner.advance_to(n), Iter::BitmapOwned(inner) => inner.advance_to(n), - Iter::Run(..) => todo!(), + Iter::RunOwned(inner) => inner.advance_to(n), + Iter::RunBorrowed(inner) => inner.advance_to(n), } } @@ -1050,7 +890,8 @@ impl Iter<'_> { } Iter::BitmapBorrowed(inner) => inner.advance_back_to(n), Iter::BitmapOwned(inner) => inner.advance_back_to(n), - Iter::Run(..) => todo!(), + Iter::RunOwned(inner) => inner.advance_back_to(n), + Iter::RunBorrowed(inner) => inner.advance_back_to(n), } } } @@ -1064,7 +905,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.next(), Iter::BitmapBorrowed(inner) => inner.next(), Iter::BitmapOwned(inner) => inner.next(), - Iter::Run(ref mut inner) => inner.next(), + Iter::RunOwned(inner) => inner.next(), + Iter::RunBorrowed(inner) => inner.next(), } } @@ -1074,7 +916,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.size_hint(), Iter::BitmapBorrowed(inner) => inner.size_hint(), Iter::BitmapOwned(inner) => inner.size_hint(), - Iter::Run(inner) => inner.size_hint(), + Iter::RunOwned(inner) => inner.size_hint(), + Iter::RunBorrowed(inner) => inner.size_hint(), } } @@ -1087,7 +930,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.count(), Iter::BitmapBorrowed(inner) => inner.count(), Iter::BitmapOwned(inner) => inner.count(), - Iter::Run(inner) => inner.count(), + Iter::RunOwned(inner) => inner.count(), + Iter::RunBorrowed(inner) => inner.count(), } } @@ -1097,7 +941,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.nth(n), Iter::BitmapBorrowed(inner) => inner.nth(n), Iter::BitmapOwned(inner) => inner.nth(n), - Iter::Run(inner) => inner.nth(n), + Iter::RunOwned(inner) => inner.nth(n), + Iter::RunBorrowed(inner) => inner.nth(n), } } } @@ -1109,7 +954,8 @@ impl DoubleEndedIterator for Iter<'_> { Iter::Vec(inner) => inner.next_back(), Iter::BitmapBorrowed(inner) => inner.next_back(), Iter::BitmapOwned(inner) => inner.next_back(), - Iter::Run(inner) => inner.next_back(), + Iter::RunOwned(inner) => inner.next_back(), + Iter::RunBorrowed(inner) => inner.next_back(), } } } From 948b7bd2c91b82e2ce49dad550ab11197c2890b9 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Wed, 16 Apr 2025 00:18:13 -0400 Subject: [PATCH 53/83] dont deserialize a run container with overlapping ranges --- roaring/src/bitmap/serialization.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 6947d3d76..615bf82ee 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -231,8 +231,15 @@ impl RoaringBitmap { let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum(); let mut store = Store::with_capacity(cardinality); + let mut last_end = None::; intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> { let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; + if let Some(last_end) = last_end.replace(end) { + if s <= last_end.saturating_add(1) { + // Range overlaps or would be contiguous with the previous range + return Err(io::ErrorKind::InvalidData); + } + } store.insert_range(RangeInclusive::new(s, end)); Ok(()) })?; From a21ecf6e86caf56dedbe9991c7d87f4a4675074e Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Wed, 16 Apr 2025 00:18:45 -0400 Subject: [PATCH 54/83] require strictly increasing container keys --- roaring/src/bitmap/serialization.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 615bf82ee..f1033b8f7 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -205,9 +205,18 @@ impl RoaringBitmap { let mut containers = Vec::with_capacity(size); + let mut last_key = None::; // Read each container for i in 0..size { let key = description_bytes.read_u16::()?; + if let Some(last_key) = last_key.replace(key) { + if key <= last_key { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "container keys are not sorted", + )); + } + } let cardinality = u64::from(description_bytes.read_u16::()?) + 1; // If the run container bitmap is present, check if this container is a run container From c348cbc8fd5fa9f227e2365f75d9f9e46ca1c0c1 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Wed, 16 Apr 2025 00:22:05 -0400 Subject: [PATCH 55/83] add tests for deserialization errors Based on tests added in CRoaring at --- roaring/tests/serialization.rs | 185 +++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/roaring/tests/serialization.rs b/roaring/tests/serialization.rs index 78325017c..3993d3ade 100644 --- a/roaring/tests/serialization.rs +++ b/roaring/tests/serialization.rs @@ -541,3 +541,188 @@ fn test_strange() { let new = serialize_and_deserialize(&original); assert_eq!(original, new); } + +fn assert_invalid_serialization(serialized: &[u8], msg: &str) { + let result = RoaringBitmap::deserialize_from(serialized); + if let Ok(res) = result { + panic!("Expected error: {}. Got: {:?}", msg, res); + } +} + +#[test] +fn deserialize_negative_container_count() { + let data = [ + 0x3A, 0x30, 0, 0, // Serial cookie, no run containers + 0x00, 0x00, 0x00, 0x80, // Container count (NEGATIVE) + ]; + assert_invalid_serialization(&data, "Negative container count"); +} + +#[test] +fn deserialize_huge_container_count() { + const MAX_CONTAINERS: usize = 0xFFFF; + let data = [ + 0x3A, 0x30, 0, 0, // Serial cookie, no run containers + 0x01, 0x00, 0x01, 0x00, // Container count (MAX_CONTAINERS + 1) + ]; + assert_invalid_serialization(&data, "Huge container count"); + + // For each container, 32 bits for container offset, 16 bits for a key, cardinality - 1, and a + // single array value + let full_size = data.len() + (MAX_CONTAINERS + 1) * (4 + 3 * 2); + let mut full_data = vec![0; full_size]; + full_data[..data.len()].copy_from_slice(&data); + assert_invalid_serialization(&full_data, "Huge container count"); +} + +#[test] +fn deserialize_empty_run_container() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // First Container - Number of runs + ]; + assert_invalid_serialization(&data, "Empty run container"); +} + +#[test] +fn deserialize_run_container_contiguous_ranges() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 2, 0, // First Container - Number of runs + 0, 0, // First run start + 0, 0, // First run length - 1 + 1, 0, // Second run start (STARTS AT THE END OF THE FIRST) + 0, 0, // Second run length - 1 + ]; + + assert_invalid_serialization(&data, "Contiguous ranges in run container"); +} + +#[test] +fn deserialize_run_container_overlap() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 4, 0, // Cardinality - 1 of the first container + 2, 0, // First Container - Number of runs + 0, 0, // First run start + 4, 0, // First run length - 1 + 1, 0, // Second run start (STARTS INSIDE THE FIRST) + 0, 0, // Second run length - 1 + ]; + + assert_invalid_serialization(&data, "Overlapping ranges in run container"); +} + +#[test] +fn deserialize_run_container_overflow() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 4, 0, // Cardinality - 1 of the first container + 1, 0, // First Container - Number of runs + 0xFE, 0xFF, // First run start + 4, 0, // First run length - 1 (OVERFLOW) + ]; + + assert_invalid_serialization(&data, "Overflow in run container"); +} + +#[test] +fn deserialize_duplicate_keys() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x01, 0x00, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // Upper 16 bits of the second container - DUPLICATE + 0, 0, // Cardinality - 1 of the second container + 0, 0, // Only value of first container + 0, 0, // Only value of second container + ]; + + assert_invalid_serialization(&data, "Duplicate keys in containers"); +} + +#[test] +fn deserialize_unsorted_keys() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 1, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 1, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // Upper 16 bits of the second container (LESS THAN FIRST) + 0, 0, // Cardinality - 1 of the second container + 0, 0, // Only value of first container + 0, 0, // Only value of second container + ]; + + assert_invalid_serialization(&data, "Unsorted keys in containers"); +} + +#[test] +fn deserialize_array_duplicate_value() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 0, 0, // first value of first container + 0, 0, // second value of first container (DUPLICATE) + ]; + + assert_invalid_serialization(&data, "Duplicate values in array container"); +} + +#[test] +fn deserialize_array_unsorted_values() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 1, 0, // first value of first container + 0, 0, // second value of first container (LESS THAN FIRST) + ]; + + assert_invalid_serialization(&data, "Unsorted values in array container"); +} + +#[test] +fn deserialize_bitset_incorrect_cardinality() { + let data_start = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 0xFF, + 0xFF, // Cardinality - 1 of the first container. + + // First container is a bitset, should be followed by 1 << 16 bits + ]; + let mut data = vec![0xFF; data_start.len() + (1 << 16) / 8]; + data[..data_start.len()].copy_from_slice(&data_start); + // Bitset filled with 0xFF will have the correct cardinality + let result = RoaringBitmap::deserialize_from(&data[..]).unwrap(); + assert_eq!(result.len(), 0x1_0000); + + // Bitset will no longer have the correct cardinality + data[data_start.len()] = 0x0; + assert_invalid_serialization(&data, "Bitset incorrect cardinality"); +} From f6c70ac30100500ebb2203d6ca52e013275656cd Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 15 Apr 2025 23:28:51 -0400 Subject: [PATCH 56/83] add fuzzer --- fuzz/.gitignore | 4 + fuzz/Cargo.lock | 196 +++++++++++++++ fuzz/Cargo.toml | 22 ++ fuzz/fuzz_targets/against_croaring.rs | 38 +++ fuzz/fuzz_targets/arbitrary_ops/mod.rs | 325 +++++++++++++++++++++++++ fuzz/rust-toolchain.toml | 2 + 6 files changed, 587 insertions(+) create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.lock create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/against_croaring.rs create mode 100644 fuzz/fuzz_targets/arbitrary_ops/mod.rs create mode 100644 fuzz/rust-toolchain.toml diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 000000000..3afac7aca --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,196 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "bytemuck" +version = "1.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "croaring" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1583a0c6ed2e2fe1a948e23d62ca42e0f2f3b45c59276c884a947c0dab47a20d" +dependencies = [ + "croaring-sys", +] + +[[package]] +name = "croaring-sys" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3124cf04e54f50ecc5f53874e1b1e3a803e35523221bd2864851977b48ba7d00" +dependencies = [ + "cc", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.172" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + +[[package]] +name = "roaring" +version = "0.10.12" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "roaring-fuzz" +version = "0.0.0" +dependencies = [ + "croaring", + "libfuzzer-sys", + "roaring", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..8d9b34655 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "roaring-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = { version = "0.4.9", features = ["arbitrary-derive"] } +roaring = { path = "../roaring" } +croaring = "2.0" + +[[bin]] +name = "against_croaring" +path = "fuzz_targets/against_croaring.rs" +test = false +doc = false +bench = false + +[workspace] diff --git a/fuzz/fuzz_targets/against_croaring.rs b/fuzz/fuzz_targets/against_croaring.rs new file mode 100644 index 000000000..bdaa58524 --- /dev/null +++ b/fuzz/fuzz_targets/against_croaring.rs @@ -0,0 +1,38 @@ +#![no_main] + +mod arbitrary_ops; + +use libfuzzer_sys::arbitrary::{self, Arbitrary}; +use libfuzzer_sys::fuzz_target; + +use crate::arbitrary_ops::{check_equal, Operation}; + +#[derive(Arbitrary, Debug)] +struct FuzzInput<'a> { + ops: Vec, + initial_input: &'a [u8], +} + +fuzz_target!(|input: FuzzInput| { + let lhs_c = croaring::Bitmap::try_deserialize::(input.initial_input); + let lhs_r = roaring::RoaringBitmap::deserialize_from(input.initial_input).ok(); + + let (mut lhs_c, mut lhs_r) = match (lhs_c, lhs_r) { + (Some(lhs_c), Some(lhs_r)) => { + check_equal(&lhs_c, &lhs_r); + (lhs_c, lhs_r) + } + (None, None) => Default::default(), + (Some(_), None) => panic!("croaring deserialized, but roaring failed"), + (None, Some(_)) => panic!("roaring deserialized, but croaring failed"), + }; + + let mut rhs_c = croaring::Bitmap::new(); + let mut rhs_r = roaring::RoaringBitmap::new(); + + for op in input.ops { + op.apply(&mut lhs_c, &mut rhs_c, &mut lhs_r, &mut rhs_r); + } + check_equal(&lhs_c, &lhs_r); + check_equal(&rhs_c, &rhs_r); +}); diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs new file mode 100644 index 000000000..fad72f3d9 --- /dev/null +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -0,0 +1,325 @@ +use libfuzzer_sys::arbitrary::{self, Arbitrary, Unstructured}; +use std::mem; +use std::ops::RangeInclusive; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Num(pub u32); + +pub const MAX_NUM: u32 = 0x1_0000 * 4; + +impl<'a> Arbitrary<'a> for Num { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + Ok(Self(u.int_in_range(0..=(MAX_NUM - 1))?)) + } +} + +#[derive(Arbitrary, Debug)] +pub enum Operation { + Binary(BitmapBinaryOperation), + MutateLhs(MutableBitmapOperation), + Read(ReadBitmapOperation), + SwapSides, +} + +impl Operation { + pub fn apply( + &self, + lhs_c: &mut croaring::Bitmap, + rhs_c: &mut croaring::Bitmap, + lhs_r: &mut roaring::RoaringBitmap, + rhs_r: &mut roaring::RoaringBitmap, + ) { + match self { + Operation::Binary(op) => op.apply(lhs_c, rhs_c, lhs_r, rhs_r), + Operation::MutateLhs(op) => op.apply(lhs_c, lhs_r), + Operation::Read(op) => op.apply(lhs_c, lhs_r), + Operation::SwapSides => { + mem::swap(lhs_c, rhs_c); + mem::swap(lhs_r, rhs_r); + } + } + } +} + +#[derive(Arbitrary, Debug)] +pub enum MutableBitmapOperation { + Insert(Num), + InsertRange(RangeInclusive), + Push(Num), + Remove(Num), + RemoveRange(RangeInclusive), + Clear, + Extend(Vec), + // Probably turn it into a bitmap + MakeBitmap { key: u16 }, +} + +#[derive(Arbitrary, Debug)] +pub enum ReadBitmapOperation { + ContainsRange(RangeInclusive), + Contains(Num), + RangeCardinality(RangeInclusive), + Cardinality, + IsEmpty, + IsFull, + Minimum, + Maximum, + Rank(Num), + Select(Num), + Statistics, + Clone, + Debug, + SerializedSize, + Serialize, +} + +#[derive(Arbitrary, Debug)] +pub enum BitmapBinaryOperation { + Eq, + IsSubset, + And, + Or, + Xor, + AndNot, +} + +impl ReadBitmapOperation { + pub fn apply(&self, x: &mut croaring::Bitmap, y: &roaring::RoaringBitmap) { + match *self { + ReadBitmapOperation::ContainsRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected = x.contains_range(range.clone()); + let actual = y.contains_range(range); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Contains(Num(n)) => { + let expected = x.contains(n); + let actual = y.contains(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::RangeCardinality(ref range) => { + let range = range.start().0..=range.end().0; + let expected = x.range_cardinality(range.clone()); + let actual = y.range_cardinality(range); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Cardinality => { + let expected = x.cardinality(); + let actual = y.len(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::IsEmpty => { + let expected = x.is_empty(); + let actual = y.is_empty(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::IsFull => { + let expected = x.contains_range(..); + let actual = y.is_full(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Minimum => { + let expected = x.minimum(); + let actual = y.min(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Maximum => { + let expected = x.maximum(); + let actual = y.max(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Rank(Num(n)) => { + let expected = x.rank(n); + let actual = y.rank(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Select(Num(n)) => { + let expected = x.select(n); + let actual = y.select(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Statistics => { + // roaring-rs doesn't support range containers (yet) + x.remove_run_compression(); + let expected = x.statistics(); + let actual = y.statistics(); + // Convert to the same statistics struct + let expected = { + let mut v = actual; + v.n_containers = expected.n_containers; + v.n_array_containers = expected.n_array_containers; + v.n_run_containers = expected.n_run_containers; + v.n_bitset_containers = expected.n_bitset_containers; + v.n_values_array_containers = expected.n_values_array_containers; + v.n_values_run_containers = expected.n_values_run_containers; + v.n_values_bitset_containers = expected.n_values_bitset_containers.into(); + // The n_bytes_* fields are not directly comparable: + // they are based on the number of bytes of _capacity_ of the + // containers, which depends on the allocation strategy. + // v.n_bytes_array_containers = expected.n_bytes_array_containers.into(); + // v.n_bytes_run_containers = expected.n_bytes_run_containers.into(); + // v.n_bytes_bitset_containers = expected.n_bytes_bitset_containers.into(); + v.max_value = x.maximum(); + v.min_value = x.minimum(); + v.cardinality = x.cardinality(); + v + }; + assert_eq!(expected, actual); + } + ReadBitmapOperation::Clone => { + assert_eq!(*y, y.clone()); + } + ReadBitmapOperation::Debug => { + use std::io::Write; + write!(std::io::sink(), "{:?}", y).unwrap(); + } + ReadBitmapOperation::SerializedSize => { + // roaring-rs doesn't support range containers (yet) + x.remove_run_compression(); + let expected = x.get_serialized_size_in_bytes::(); + let actual = y.serialized_size(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Serialize => { + // roaring-rs doesn't support range containers (yet) + x.remove_run_compression(); + let expected = x.serialize::(); + let mut actual = Vec::new(); + y.serialize_into(&mut actual).unwrap(); + assert_eq!(expected, actual); + } + } + } +} + +impl MutableBitmapOperation { + pub fn apply(&self, x: &mut croaring::Bitmap, y: &mut roaring::RoaringBitmap) { + match *self { + MutableBitmapOperation::Insert(Num(n)) => { + let expected = x.add_checked(n); + let actual = y.insert(n); + assert_eq!(expected, actual); + } + MutableBitmapOperation::InsertRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected_added = u64::try_from(range.clone().count()).unwrap() + - x.range_cardinality(range.clone()); + x.add_range(range.clone()); + assert_eq!(expected_added, y.insert_range(range)); + } + MutableBitmapOperation::Push(Num(n)) => { + let should_push = y.max().is_none_or(|max| n > max); + if should_push { + x.add(n); + } + assert_eq!(should_push, y.push(n)); + } + MutableBitmapOperation::Remove(Num(n)) => { + let expected = x.remove_checked(n); + let actual = y.remove(n); + assert_eq!(expected, actual); + } + MutableBitmapOperation::RemoveRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected_removed = x.range_cardinality(range.clone()); + x.remove_range(range.clone()); + assert_eq!(expected_removed, y.remove_range(range)); + } + MutableBitmapOperation::Clear => { + x.clear(); + y.clear(); + } + MutableBitmapOperation::Extend(ref items) => { + // Safety - Num is repr(transparent) over u32 + let items: &[u32] = unsafe { mem::transmute(&items[..]) }; + x.add_many(items); + y.extend(items); + } + MutableBitmapOperation::MakeBitmap { key } => { + let key = u32::from(key); + let start = key * 0x1_0000; + let end = start + 9 * 1024; + for i in (start..end).step_by(2) { + x.add(i); + y.insert(i); + } + } + } + } +} + +impl BitmapBinaryOperation { + pub fn apply( + &self, + lhs_c: &mut croaring::Bitmap, + rhs_c: &croaring::Bitmap, + lhs_r: &mut roaring::RoaringBitmap, + rhs_r: &roaring::RoaringBitmap, + ) { + match *self { + BitmapBinaryOperation::Eq => { + let expected = lhs_c == rhs_c; + let actual = lhs_r == rhs_r; + assert_eq!(expected, actual); + } + BitmapBinaryOperation::IsSubset => { + let expected = lhs_c.is_subset(rhs_c); + let actual = lhs_r.is_subset(rhs_r); + assert_eq!(expected, actual); + } + BitmapBinaryOperation::And => { + let expected_len = lhs_r.intersection_len(rhs_r); + let actual_len = lhs_c.and_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r &= rhs_r; + *lhs_c &= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::Or => { + let expected_len = lhs_r.union_len(rhs_r); + let actual_len = lhs_c.or_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r |= rhs_r; + *lhs_c |= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::Xor => { + let expected_len = lhs_r.symmetric_difference_len(rhs_r); + let actual_len = lhs_c.xor_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r ^= rhs_r; + *lhs_c ^= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::AndNot => { + let expected_len = lhs_r.difference_len(rhs_r); + let actual_len = lhs_c.andnot_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r -= rhs_r; + *lhs_c -= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + } + } +} + +pub(crate) fn check_equal(c: &croaring::Bitmap, r: &roaring::RoaringBitmap) { + let mut lhs = c.iter(); + let mut rhs = r.iter(); + + loop { + match (lhs.next(), rhs.next()) { + (Some(l), Some(r)) => { + assert_eq!(l, r); + } + (None, None) => break, + (Some(n), None) => panic!("croaring has more elements: {n}"), + (None, Some(n)) => panic!("roaring has more elements: {n}"), + } + } +} diff --git a/fuzz/rust-toolchain.toml b/fuzz/rust-toolchain.toml new file mode 100644 index 000000000..5d56faf9a --- /dev/null +++ b/fuzz/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" From 340dff2e8c2f8aac9e739c8492339d6254f1da0b Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 20 Apr 2025 11:43:56 +0200 Subject: [PATCH 57/83] test: interval store Integrates interval stores in the current tests by implementing `BitSetLike` for `IntervalStore`. This commit also fixes all the bugs these tests discovered. --- roaring/src/bitmap/arbitrary.rs | 50 ++- roaring/src/bitmap/container.rs | 6 +- roaring/src/bitmap/multiops.rs | 12 +- roaring/src/bitmap/serialization.rs | 10 +- roaring/src/bitmap/store/array_store/mod.rs | 6 +- roaring/src/bitmap/store/bitmap_store.rs | 7 +- roaring/src/bitmap/store/interval_store.rs | 349 ++++++++++++-------- roaring/src/bitmap/store/mod.rs | 49 ++- 8 files changed, 303 insertions(+), 186 deletions(-) diff --git a/roaring/src/bitmap/arbitrary.rs b/roaring/src/bitmap/arbitrary.rs index 7a3d2eb3f..88eb7d520 100644 --- a/roaring/src/bitmap/arbitrary.rs +++ b/roaring/src/bitmap/arbitrary.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod test { - use crate::bitmap::container::Container; - use crate::bitmap::store::{ArrayStore, BitmapStore, Store}; + use crate::bitmap::container::{Container, RUN_MAX_SIZE}; + use crate::bitmap::store::{ArrayStore, BitmapStore, IntervalStore, Store}; use crate::RoaringBitmap; use core::fmt::{Debug, Formatter}; use proptest::bits::{BitSetLike, SampledBitSetStrategy}; @@ -68,6 +68,47 @@ mod test { } } + impl BitSetLike for IntervalStore { + fn new_bitset(max: usize) -> Self { + assert!(max <= IntervalStore::MAX + 1); + IntervalStore::new() + } + + fn len(&self) -> usize { + IntervalStore::MAX + 1 + } + + fn test(&self, bit: usize) -> bool { + assert!(bit <= IntervalStore::MAX); + self.contains(bit as u16) + } + + fn set(&mut self, bit: usize) { + assert!(bit <= IntervalStore::MAX); + self.insert(bit as u16); + } + + fn clear(&mut self, bit: usize) { + assert!(bit <= IntervalStore::MAX); + self.remove(bit as u16); + } + + fn count(&self) -> usize { + self.len() as usize + } + } + + impl IntervalStore { + const MAX: usize = u16::MAX as usize; + + pub fn sampled( + size: impl Into, + bits: impl Into, + ) -> SampledBitSetStrategy { + SampledBitSetStrategy::new(size.into(), bits.into()) + } + } + impl Debug for ArrayStore { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { if self.len() < 16 { @@ -126,12 +167,11 @@ mod test { } impl Debug for Store { - #[allow(clippy::todo)] fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { match self { Store::Array(a) => write!(f, "Store({a:?})"), Store::Bitmap(b) => write!(f, "Store({b:?})"), - Store::Run(_) => todo!(), + Store::Run(c) => write!(f, "Store({c:?})"), } } } @@ -142,6 +182,8 @@ mod test { ArrayStore::sampled(1..=4096, ..=u16::MAX as usize).prop_map(Store::Array), BitmapStore::sampled(4097..u16::MAX as usize, ..=u16::MAX as usize) .prop_map(Store::Bitmap), + IntervalStore::sampled(1..=RUN_MAX_SIZE as usize, ..=u16::MAX as usize) + .prop_map(Store::Run), ] } } diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index cfee0feb3..6708f2ac8 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -108,7 +108,6 @@ impl Container { result } - #[allow(clippy::todo)] pub fn remove_smallest(&mut self, n: u64) { match &self.store { Store::Bitmap(bits) => { @@ -121,11 +120,10 @@ impl Container { } } Store::Array(_) => self.store.remove_smallest(n), - Store::Run(..) => todo!(), + Store::Run(_) => self.store.remove_smallest(n), }; } - #[allow(clippy::todo)] pub fn remove_biggest(&mut self, n: u64) { match &self.store { Store::Bitmap(bits) => { @@ -138,7 +136,7 @@ impl Container { } } Store::Array(_) => self.store.remove_biggest(n), - Store::Run(..) => todo!(), + Store::Run(_) => self.store.remove_biggest(n), }; } diff --git a/roaring/src/bitmap/multiops.rs b/roaring/src/bitmap/multiops.rs index 48fbe23fb..4ba588aa6 100644 --- a/roaring/src/bitmap/multiops.rs +++ b/roaring/src/bitmap/multiops.rs @@ -385,7 +385,6 @@ fn try_multi_xor_ref<'a, E: 'a>( Ok(RoaringBitmap { containers }) } -#[allow(clippy::todo)] fn merge_container_ref<'a>( containers: &mut Vec>, rhs: &'a [Container], @@ -419,8 +418,15 @@ fn merge_container_ref<'a>( // If it was borrowed it will clone-on-write op(&mut lhs.to_mut().store, &rhs.store); } - (Store::Run(..), _) => todo!(), - (_, Store::Run(..)) => todo!(), + (Store::Run(..), Store::Run(..)) => { + op(&mut lhs.to_mut().store, &rhs.store); + } + (Store::Run(..), _) => { + op(&mut lhs.to_mut().store, &rhs.store); + } + (Store::Array(..), Store::Run(..)) => { + op(&mut lhs.to_mut().store, &rhs.store); + } }; } } diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 61392c76b..8f33218f9 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -1,5 +1,7 @@ use crate::bitmap::container::{Container, ARRAY_LIMIT}; -use crate::bitmap::store::{ArrayStore, BitmapStore, Interval, Store, BITMAP_LENGTH}; +use crate::bitmap::store::{ + ArrayStore, BitmapStore, Interval, Store, BITMAP_LENGTH, RUN_ELEMENT_BYTES, RUN_NUM_BYTES, +}; use crate::RoaringBitmap; use bytemuck::cast_slice_mut; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -22,8 +24,6 @@ pub(crate) const OFFSET_BYTES: usize = 4; // Sizes of container structures pub(crate) const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; pub(crate) const ARRAY_ELEMENT_BYTES: usize = 2; -pub(crate) const RUN_NUM_BYTES: usize = 2; -pub(crate) const RUN_ELEMENT_BYTES: usize = 4; impl RoaringBitmap { /// Return the size in bytes of the serialized output. @@ -52,7 +52,7 @@ impl RoaringBitmap { Store::Bitmap(..) => BITMAP_BYTES, Store::Run(ref intervals) => { has_run_containers = true; - RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.run_amount() as usize) + intervals.byte_size() } }) .sum(); @@ -109,7 +109,7 @@ impl RoaringBitmap { } let mut offset = header_size(size, has_run_containers) as u32; - let has_offsets = if has_run_containers { size > OFFSET_BYTES } else { true }; + let has_offsets = if has_run_containers { size >= OFFSET_BYTES } else { true }; if has_offsets { for container in &self.containers { writer.write_u32::(offset)?; diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 071440954..0d41f4d62 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -456,21 +456,19 @@ mod tests { use super::*; use crate::bitmap::store::Store; - #[allow(clippy::todo)] fn into_vec(s: Store) -> Vec { match s { Store::Array(vec) => vec.vec, Store::Bitmap(bits) => bits.to_array_store().vec, - Store::Run(_) => todo!(), + Store::Run(runs) => runs.iter().collect(), } } - #[allow(clippy::todo)] fn into_bitmap_store(s: Store) -> Store { match s { Store::Array(vec) => Store::Bitmap(vec.to_bitmap_store()), Store::Bitmap(..) => s, - Store::Run(_) => todo!(), + Store::Run(runs) => Store::Bitmap(runs.to_bitmap()), } } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index def2ed057..f90cf596e 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -341,10 +341,11 @@ impl BitmapStore { let (end_id, end_bit) = (key(interval.end), bit(interval.end)); let mut amount: u64 = 0; for (i, mut cur_bit) in self.bits[start_id..=end_id].iter().copied().enumerate() { - if i == start_id { + if i == 0 { cur_bit &= u64::MAX << start_bit; - } else if i == end_id { - cur_bit &= !(u64::MAX << (u64::BITS - end_bit as u32)); + } + if i == end_id - start_id { + cur_bit &= !(u64::MAX.checked_shl(end_bit as u32 + 1).unwrap_or(0)); } amount += u64::from(cur_bit.count_ones()); } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 8e86526dc..afa1aefca 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -1,18 +1,18 @@ -#![allow(unused)] use alloc::vec::Vec; -use core::borrow::Borrow; -use core::iter::Peekable; use core::ops::{ - BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, Deref, RangeInclusive, SubAssign, + BitAnd, BitAndAssign, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, SubAssign, }; use core::slice::Iter; use core::{cmp::Ordering, ops::ControlFlow}; -use super::{ArrayStore, BitmapStore, Store}; +use super::{ArrayStore, BitmapStore}; #[derive(PartialEq, Eq, Clone, Debug)] pub(crate) struct IntervalStore(Vec); +pub(crate) const RUN_NUM_BYTES: usize = 2; +pub(crate) const RUN_ELEMENT_BYTES: usize = 4; + impl Default for IntervalStore { fn default() -> Self { Self::new() @@ -24,6 +24,14 @@ impl IntervalStore { Self(Default::default()) } + pub fn full() -> Self { + Self(alloc::vec![Interval::new(0, u16::MAX)]) + } + + pub fn byte_size(&self) -> usize { + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * self.run_amount() as usize) + } + pub fn from_vec_unchecked(vec: Vec) -> Self { #[cfg(debug_assertions)] { @@ -56,7 +64,7 @@ impl IntervalStore { }; // There exists an interval at or before the location we should insert if let Some(loc_or_last) = loc_or_last { - if index == self.0[loc_or_last].end + 1 { + if Some(index) == self.0[loc_or_last].end.checked_add(1) { // index immediately follows an interval // Checking for sandwiched intervals is not needed because of binary search loc // i.e. when the index is sandwiched between two intervals we always @@ -76,6 +84,14 @@ impl IntervalStore { return; } self.0[loc].start -= 1; + } else if loc_or_last + .checked_sub(1) + .map(|f| self.0[f].end == index - 1) + .unwrap_or(false) + { + // We are sandwiched between 2 intervals, but the previous interval is + // continuous with the index. If the loc + self.0[loc_or_last - 1].end = index; } else { // The value stands alone self.0.insert(loc, Interval::new(index, index)); @@ -257,8 +273,6 @@ impl IntervalStore { .binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()) .map(|loc| { // loc always points to an interval - let equal_to_start = self.0[loc].start == index; - let equal_to_end = self.0[loc].end == index; if index == self.0[loc].start && index == self.0[loc].end { // Remove entire run if it only contains this value self.0.remove(loc); @@ -285,128 +299,95 @@ impl IntervalStore { if range.is_empty() { return 0; } - let interval = Interval { start: *range.start(), end: *range.end() }; + + let interval = Interval::new(*range.start(), *range.end()); let first_interval = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); let end_interval = self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); - struct IdValue { - index: usize, - value: u16, - } - struct IntervalRange { - drain_range: core::ops::Range, - begin_value: Option, - end_value: Option, - residual_count: u64, - } - let todo = match (first_interval, end_interval) { + match (first_interval, end_interval) { // both start and end index are contained in intervals - (Ok(begin), Ok(end)) => { - if self.0[begin].start == interval.start && self.0[end].end == interval.end { - IntervalRange { - drain_range: begin..end + 1, - begin_value: None, - end_value: None, - residual_count: 0, - } - } else if self.0[begin].start == interval.start { - IntervalRange { - drain_range: begin..end, - begin_value: None, - end_value: Some(IdValue { index: end, value: interval.end + 1 }), - residual_count: Interval::new(self.0[end].start, interval.end).run_len(), + (Ok(first), Ok(end)) => { + if self.0[first].start == interval.start && self.0[end].end == interval.end { + let removed = self.0[first..=end].iter().map(|iv| iv.run_len()).sum(); + self.0.drain(first..=end); + removed + } else if self.0[first].start == interval.start { + if first == end { + self.0[end].start = interval.end + 1; + return interval.run_len(); } + let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum::() + + Interval::new(self.0[end].start, interval.end).run_len(); + self.0[end].start = interval.end + 1; + self.0.drain(first..end); + removed } else if self.0[end].end == interval.end { - IntervalRange { - drain_range: begin + 1..end + 1, - begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), - end_value: None, - residual_count: Interval::new(interval.start, self.0[begin].end).run_len(), - } - } else if begin == end { - let new_interval = Interval::new(interval.end + 1, self.0[begin].end); - self.0.insert(begin + 1, new_interval); - IntervalRange { - drain_range: begin..end, - begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), - end_value: None, - residual_count: interval.run_len(), + if first == end { + self.0[end].end = interval.start - 1; + return interval.run_len(); } + let removed = + self.0[first + 1..=end].iter().map(|iv| iv.run_len()).sum::() + + Interval::new(interval.start, self.0[first].end).run_len(); + self.0[first].end = interval.start - 1; + self.0.drain(first + 1..=end); + removed } else { - IntervalRange { - drain_range: begin + 1..end, - begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), - end_value: Some(IdValue { index: end, value: interval.end + 1 }), - residual_count: Interval::new(self.0[end].start, interval.end).run_len() - + Interval::new(interval.start, self.0[begin].end).run_len(), + if first == end { + let old_end = self.0[first].end; + self.0[first].end = interval.start - 1; + self.0.insert(first + 1, Interval::new(interval.end + 1, old_end)); + return interval.run_len(); } + + let removed = self.0[first + 1..end].iter().map(|iv| iv.run_len()).sum::() + + Interval::new(interval.start, self.0[first].end).run_len() + + Interval::new(self.0[end].start, interval.end).run_len(); + self.0[first].end = interval.start - 1; + self.0[end].start = interval.end + 1; + removed } } - // start index is contained in an interval, - // end index is not - (Ok(begin), Err(to_insert)) => { - let end = if to_insert == self.0.len() { self.0.len() - 1 } else { to_insert }; - if self.0[begin].start == interval.start { - IntervalRange { - drain_range: begin..to_insert, - begin_value: None, - end_value: None, - residual_count: 0, - } + // start index is in the interval store, end index is not + (Ok(first), Err(end)) => { + debug_assert!(first < end); + if self.0[first].start == interval.start { + let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum(); + self.0.drain(first..end); + removed } else { - IntervalRange { - drain_range: begin + 1..end + 1, - begin_value: Some(IdValue { index: begin, value: interval.start - 1 }), - end_value: None, - residual_count: Interval::new(interval.start, self.0[begin].end).run_len(), - } + let removed = self.0[first + 1..end].iter().map(|iv| iv.run_len()).sum::() + + Interval::new(interval.start, self.0[first].end).run_len(); + self.0[first].end = interval.start - 1; + self.0.drain(first + 1..end); + removed } } - // there is no interval that contains the start index, - // there is an interval that contains the end index, - (Err(begin), Ok(end)) => { + // end index is in the interval store, start index is not + (Err(first), Ok(end)) => { if self.0[end].end == interval.end { - IntervalRange { - drain_range: begin..end + 1, - begin_value: None, - end_value: None, - residual_count: 0, - } + let removed = self.0[first..=end].iter().map(|iv| iv.run_len()).sum(); + self.0.drain(first..=end); + removed } else { - IntervalRange { - drain_range: begin..end, - begin_value: None, - end_value: Some(IdValue { index: end, value: interval.end + 1 }), - residual_count: Interval::new(self.0[end].start, interval.end).run_len(), - } + let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum::() + + Interval::new(self.0[end].start, interval.end).run_len(); + self.0[end].start = interval.end + 1; + self.0.drain(first..end); + removed } } - (Err(begin), Err(to_end)) => { - let end = if to_end == self.0.len() { self.0.len() - 1 } else { to_end }; - IntervalRange { - drain_range: begin..end + 1, - begin_value: None, - end_value: None, - residual_count: 0, + // both indices are not contained in the interval store + (Err(first), Err(end)) => { + if first == end { + return 0; } + let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum(); + self.0.drain(first..end); + removed } - }; - let count = if todo.drain_range.is_empty() { - 0 - } else { - self.0[todo.drain_range.clone()].iter().map(|f| f.run_len()).sum::() - } + todo.residual_count; - if let Some(IdValue { index, value }) = todo.begin_value { - self.0[index].end = value; } - if let Some(IdValue { index, value }) = todo.end_value { - self.0[index].start = value; - } - if !todo.drain_range.is_empty() { - self.0.drain(todo.drain_range); - } - count } pub fn remove_smallest(&mut self, mut amount: u64) { @@ -607,7 +588,7 @@ impl IntervalStore { pub fn select(&self, mut n: u16) -> Option { for iv in self.0.iter() { - let run_len = (iv.run_len() as u16); + let run_len = iv.run_len() as u16; if run_len <= n { n -= iv.run_len() as u16; } else { @@ -636,10 +617,6 @@ impl IntervalStore { pub(crate) fn iter_intervals(&self) -> core::slice::Iter { self.0.iter() } - - pub(crate) fn iter_intervals_mut(&mut self) -> core::slice::IterMut { - self.0.iter_mut() - } } impl BitOrAssign for IntervalStore { @@ -656,7 +633,7 @@ impl BitOrAssign for IntervalStore { } impl BitOrAssign<&ArrayStore> for IntervalStore { - fn bitor_assign(&mut self, mut rhs: &ArrayStore) { + fn bitor_assign(&mut self, rhs: &ArrayStore) { for &i in rhs.iter() { self.insert(i); } @@ -664,7 +641,7 @@ impl BitOrAssign<&ArrayStore> for IntervalStore { } impl BitOrAssign<&Self> for IntervalStore { - fn bitor_assign(&mut self, mut rhs: &Self) { + fn bitor_assign(&mut self, rhs: &Self) { for iv in rhs.iter_intervals() { self.insert_range(iv.start..=iv.end); } @@ -715,6 +692,18 @@ impl BitXor for &IntervalStore { } } +impl BitXorAssign<&ArrayStore> for IntervalStore { + fn bitxor_assign(&mut self, rhs: &ArrayStore) { + rhs.iter().for_each(|&f| { + if self.contains(f) { + self.remove(f); + } else { + self.insert(f); + } + }) + } +} + pub(crate) type RunIterOwned = RunIter>; pub(crate) type RunIterBorrowed<'a> = RunIter>; @@ -962,14 +951,6 @@ impl Interval { Interval { start, end } } - pub fn contains_index(&self, value: u16) -> bool { - self.start <= value && value <= self.end - } - - pub fn contains_interval(&self, interval: &Interval) -> bool { - self.start <= interval.start && interval.end <= self.end - } - pub fn overlaps(&self, interval: &Interval) -> bool { interval.start <= self.end && self.start <= interval.end } @@ -1063,6 +1044,22 @@ mod tests { ) } + #[test] + fn insert_consecutive_end_with_extra() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 65079, end: 65079 }, + Interval { start: 65179, end: 65179 }, + ]); + assert!(interval_store.insert(65080)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 65079, end: 65080 }, + Interval { start: 65179, end: 65179 }, + ]) + ) + } + #[test] fn insert_range_empty() { let mut interval_store = IntervalStore(alloc::vec![]); @@ -1232,6 +1229,13 @@ mod tests { assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(2, 700)])); } + #[test] + fn insert_range_pin_1() { + let mut interval_store = IntervalStore(alloc::vec![Interval::new(65079, 65079)]); + assert_eq!(interval_store.insert_range(65080..=65080), 1); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(65079, 65080)])); + } + #[test] fn push_empty() { let mut interval_store = IntervalStore(alloc::vec![]); @@ -1537,6 +1541,24 @@ mod tests { ); } + #[test] + fn remove_range_nothing() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.remove_range(50000..=60000), 0); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_with_extra() { + let mut interval_store = + IntervalStore(alloc::vec![Interval::new(38161, 38162), Interval::new(40562, 40562),]); + assert_eq!(interval_store.remove_range(38162..=38163), 1); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new(38161, 38161), Interval::new(40562, 40562),]) + ); + } + #[test] fn remove_smallest_one() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); @@ -1844,11 +1866,67 @@ mod tests { #[test] fn intersection_len_bitmap_2() { let mut bitmap_store = BitmapStore::new(); - for to_set in 0..200 { + for to_set in 0..=200 { bitmap_store.insert(to_set); } let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); - let intersect_len = 200 - 20; + let intersect_len = Interval::new(20, 200).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_3() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20000 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 20, end: 6000 }, + Interval { start: 5000, end: 33333 }, + ]); + let intersect_len = + Interval::new(20, 6000).run_len() + Interval::new(5000, 20000).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_4() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20000 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 64, end: 6400 }, + Interval { start: 7680, end: 64000 }, + ]); + let intersect_len = + Interval::new(64, 6400).run_len() + Interval::new(7680, 20000).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_5() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20005 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 64, end: 6400 }, + Interval { start: 7680, end: 64000 }, + ]); + let intersect_len = + Interval::new(64, 6400).run_len() + Interval::new(7680, 20005).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_6() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20005 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 64, end: 64 },]); + let intersect_len = Interval::new(64, 64).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1997,13 +2075,13 @@ mod tests { #[test] fn intersection() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval::new(0, 0), Interval::new(2, 11), Interval::new(5000, 7000), Interval::new(8000, 10000), ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_2 = IntervalStore(alloc::vec![ Interval::new(0, 0), Interval::new(5, 50), Interval::new(4000, 10000), @@ -2016,7 +2094,8 @@ mod tests { Interval::new(5000, 7000), Interval::new(8000, 10000), ]) - ) + ); + assert_eq!(&interval_store_1 & &interval_store_1, interval_store_1); } #[test] @@ -2027,7 +2106,7 @@ mod tests { Interval::new(5000, 7000), Interval::new(8000, 11000), ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_2 = IntervalStore(alloc::vec![ Interval::new(0, 0), Interval::new(5, 50), Interval::new(4000, 10000), @@ -2041,14 +2120,14 @@ mod tests { #[test] fn symmetric_difference_0() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval::new(0, 0), Interval::new(2, 11), Interval::new(5000, 7000), Interval::new(8000, 11000), Interval::new(40000, 50000), ]); - let mut interval_store_2 = IntervalStore(alloc::vec![ + let interval_store_2 = IntervalStore(alloc::vec![ Interval::new(0, 0), Interval::new(5, 50), Interval::new(4000, 10000), @@ -2068,8 +2147,8 @@ mod tests { #[test] fn symmetric_difference_1() { - let mut interval_store_1 = IntervalStore(alloc::vec![Interval::new(0, 50),]); - let mut interval_store_2 = IntervalStore(alloc::vec![Interval::new(100, 200),]); + let interval_store_1 = IntervalStore(alloc::vec![Interval::new(0, 50),]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new(100, 200),]); assert_eq!( &interval_store_1 ^ &interval_store_2, IntervalStore(alloc::vec![Interval::new(0, 50), Interval::new(100, 200),]) @@ -2078,12 +2157,12 @@ mod tests { #[test] fn symmetric_difference_2() { - let mut interval_store_1 = IntervalStore(alloc::vec![ + let interval_store_1 = IntervalStore(alloc::vec![ Interval::new(0, 50), Interval::new(500, 600), Interval::new(800, 1000), ]); - let mut interval_store_2 = IntervalStore(alloc::vec![Interval::new(0, 6000),]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new(0, 6000),]); assert_eq!( &interval_store_1 ^ &interval_store_2, IntervalStore(alloc::vec![ diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 3779baecd..42979f212 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -16,6 +16,8 @@ pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; pub(crate) use self::interval_store::Interval; pub(crate) use interval_store::{IntervalStore, RunIterBorrowed, RunIterOwned}; +#[cfg(feature = "std")] +pub(crate) use interval_store::{RUN_ELEMENT_BYTES, RUN_NUM_BYTES}; use crate::bitmap::container::ARRAY_LIMIT; @@ -54,7 +56,7 @@ impl Store { } pub fn full() -> Store { - Store::Bitmap(BitmapStore::full()) + Store::Run(IntervalStore::full()) } pub fn from_lsb0_bytes(bytes: &[u8], byte_offset: usize) -> Option { @@ -111,7 +113,6 @@ impl Store { /// Push `index` at the end of the store only if `index` is the new max. /// /// Returns whether `index` was effectively pushed. - #[allow(clippy::todo)] pub fn push(&mut self, index: u16) -> bool { match self { Array(vec) => vec.push(index), @@ -132,8 +133,9 @@ impl Store { Array(vec) => vec.push_unchecked(index), Bitmap(bits) => bits.push_unchecked(index), Run(runs) => { - // push unchecked for intervals doesn't make since we have to check anyways to - // merge ends with the index if these are consecutive + // push unchecked for intervals doesn't make sense since we have to check anyways to + // intervals and such when the index is consecutive + debug_assert!(runs.max().map(|f| f < index).unwrap_or(true)); runs.push(index); } } @@ -677,7 +679,9 @@ impl SubAssign<&Store> for Store { }); } (Array(array), Run(runs)) => { - array.retain(|f| !runs.contains(f)); + runs.iter_intervals().for_each(|iv| { + array.remove_range(iv.start..=iv.end); + }); } (this @ Run(..), Bitmap(bitmap)) => { let Run(runs) = &this else { unreachable!() }; @@ -732,18 +736,10 @@ impl BitXorAssign for Store { (Run(runs1), Run(runs2)) => { *runs1 = BitXor::bitxor(&*runs1, &*runs2); } - (Run(runs1), Array(array)) => array.iter().for_each(|&f| { - if runs1.contains(f) { - runs1.remove(f); - } - }), + (Run(runs1), Array(array)) => BitXorAssign::bitxor_assign(runs1, array), (this @ Array(..), Run(runs1)) => { let Array(array) = &this else { unreachable!() }; - array.iter().for_each(|&f| { - if runs1.contains(f) { - runs1.remove(f); - } - }); + BitXorAssign::bitxor_assign(runs1, array); *this = rhs; } (Bitmap(bitmap), Run(runs)) => { @@ -779,19 +775,11 @@ impl BitXorAssign<&Store> for Store { (Run(runs1), Run(runs2)) => { *runs1 = BitXor::bitxor(&*runs1, runs2); } - (Run(runs1), Array(array)) => array.iter().for_each(|&f| { - if runs1.contains(f) { - runs1.remove(f); - } - }), + (Run(runs1), Array(array)) => BitXorAssign::bitxor_assign(runs1, array), (this @ Array(..), Run(runs1)) => { let Array(array) = &this else { unreachable!() }; let mut runs1 = runs1.clone(); - array.iter().for_each(|&f| { - if runs1.contains(f) { - runs1.remove(f); - } - }); + BitXorAssign::bitxor_assign(&mut runs1, array); *this = Run(runs1); } (Bitmap(bitmap), Run(runs)) => { @@ -839,8 +827,14 @@ impl PartialEq for Store { bits1.len() == bits2.len() && bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } - // TODO: Run containers should be checked against other types of containers right? - (Run(intervals1), Run(ref intervals2)) => intervals1 == intervals2, + (Run(intervals1), Run(intervals2)) => intervals1 == intervals2, + (Run(run), Array(array)) | (Array(array), Run(run)) => { + run.len() == array.len() && array.iter().all(|&i| run.contains(i)) + } + (Run(run), Bitmap(bitmap)) | (Bitmap(bitmap), Run(run)) => { + run.len() == bitmap.len() + && run.iter_intervals().all(|&iv| bitmap.contains_range(iv.start..=iv.end)) + } _ => false, } } @@ -869,7 +863,6 @@ impl Iter<'_> { } } - #[allow(clippy::todo)] pub(crate) fn advance_back_to(&mut self, n: u16) { match self { Iter::Array(inner) => { From b2d2657a7342ec4761adfe15d77455f1a6589d18 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 20 Apr 2025 12:28:31 +0200 Subject: [PATCH 58/83] feat: run container statistics --- roaring/src/bitmap/statistics.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/roaring/src/bitmap/statistics.rs b/roaring/src/bitmap/statistics.rs index f62762dd1..41ec8d7a7 100644 --- a/roaring/src/bitmap/statistics.rs +++ b/roaring/src/bitmap/statistics.rs @@ -65,10 +65,13 @@ impl RoaringBitmap { let mut n_containers = 0; let mut n_array_containers = 0; let mut n_bitset_containers = 0; + let mut n_run_containers = 0; let mut n_values_array_containers = 0; let mut n_values_bitset_containers = 0; + let mut n_values_run_containers = 0; let mut n_bytes_array_containers = 0; let mut n_bytes_bitset_containers = 0; + let mut n_bytes_run_containers = 0; let mut cardinality = 0; for Container { key: _, store } in &self.containers { @@ -85,7 +88,12 @@ impl RoaringBitmap { n_bytes_bitset_containers += bitmap.capacity() as u64; n_bitset_containers += 1; } - Store::Run(..) => todo!(), + Store::Run(runs) => { + cardinality += runs.len(); + n_values_run_containers += runs.len() as u32; + n_bytes_run_containers += runs.byte_size() as u64; + n_run_containers += 1; + } } n_containers += 1; } @@ -93,13 +101,13 @@ impl RoaringBitmap { Statistics { n_containers, n_array_containers, - n_run_containers: 0, + n_run_containers, n_bitset_containers, n_values_array_containers, - n_values_run_containers: 0, + n_values_run_containers, n_values_bitset_containers, n_bytes_array_containers, - n_bytes_run_containers: 0, + n_bytes_run_containers, n_bytes_bitset_containers, max_value: self.max(), min_value: self.min(), From b3cd52f4f8a90e1691531ce39742f4c8d813bc12 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 20 Apr 2025 22:25:18 -0400 Subject: [PATCH 59/83] attempt to run fuzzer in CI --- .github/workflows/test.yml | 47 +++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 077dcd32b..f47e0140c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -68,11 +68,18 @@ jobs: features: simd env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" + ROARINGRS_BENCH_OFFLINE: "true" steps: - name: Checkout roaring-rs uses: actions/checkout@v4 + - name: Checkout benchmark datasets + uses: actions/checkout@v4 + with: + repository: "RoaringBitmap/real-roaring-datasets" + path: "benchmarks/real-roaring-datasets" + - name: Initialize rust toolchain uses: dtolnay/rust-toolchain@master with: @@ -82,6 +89,10 @@ jobs: if: matrix.features == 'default' run: cargo test -p roaring --features serde + - name: Test Benches + if: matrix.rust != '1.71.1' && matrix.features == 'default' + run: cargo test -p benchmarks --benches + - name: Test no default features if: matrix.features == 'no-std' run: cargo test -p roaring --no-default-features @@ -113,37 +124,31 @@ jobs: - name: Test bit endian run: cargo miri test --target s390x-unknown-linux-gnu -p roaring --lib -- bitmap::serialization::test::test_from_lsb0_bytes - bench: + fuzz: runs-on: ubuntu-latest needs: build - strategy: - matrix: - rust: - - stable - - nightly - features: - - default - include: - - rust: nightly - features: simd env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" - ROARINGRS_BENCH_OFFLINE: "true" steps: - name: Checkout roaring-rs uses: actions/checkout@v4 - - name: Checkout benchmark datasets - uses: actions/checkout@v4 - with: - repository: "RoaringBitmap/real-roaring-datasets" - path: "benchmarks/real-roaring-datasets" - - name: Initialize rust toolchain uses: dtolnay/rust-toolchain@master with: - toolchain: ${{ matrix.rust }} + toolchain: nightly + + - name: Install cargo fuzz + run: cargo install cargo-fuzz + + - name: Setup Cache for corpus and artifacts + uses: actions/cache@v4 + with: + key: always + path: | + fuzz/artifacts + fuzz/corpus - - name: Bench - run: cargo bench --features "${{ matrix.features }}" + - name: Run Fuzzer vs croaring for 30 minutes + run: cargo fuzz run against_croaring -s none -- -timeout=5 -max_total_time=1800 From 9ba4f8f068af4bdf14549e5da3e4442b708ae5f1 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 20 Apr 2025 23:04:57 -0400 Subject: [PATCH 60/83] add caching to CI --- .github/workflows/test.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f47e0140c..7313a5cbe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,6 +33,9 @@ jobs: toolchain: ${{ matrix.rust }} components: rustfmt, clippy + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Check # clippy will also do a build check # so we don't need to run `cargo check` or `cargo build` @@ -80,6 +83,9 @@ jobs: repository: "RoaringBitmap/real-roaring-datasets" path: "benchmarks/real-roaring-datasets" + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Initialize rust toolchain uses: dtolnay/rust-toolchain@master with: @@ -118,6 +124,9 @@ jobs: toolchain: nightly components: miri + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Setup miri run: cargo miri setup @@ -139,6 +148,9 @@ jobs: with: toolchain: nightly + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Install cargo fuzz run: cargo install cargo-fuzz From fd4c8292b8e593161eb46f149bd7e8bf6686a40b Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 25 Apr 2025 19:38:19 +0200 Subject: [PATCH 61/83] fix: remove unused method --- roaring/src/bitmap/store/bitmap_store.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index f90cf596e..b29541ea0 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -24,10 +24,6 @@ impl BitmapStore { BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) } } - pub fn full() -> BitmapStore { - BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) } - } - pub fn capacity(&self) -> usize { BITMAP_LENGTH * u64::BITS as usize } From 0745eda04ad0e1cb51244c2b334a8558a378c6c4 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 25 Apr 2025 19:48:08 +0200 Subject: [PATCH 62/83] fix: improve debug_assertions, remove unused `#[allow(clippy::todo)]` + others --- roaring/src/bitmap/container.rs | 2 +- roaring/src/bitmap/statistics.rs | 1 - roaring/src/bitmap/store/bitmap_store.rs | 2 +- roaring/src/bitmap/store/interval_store.rs | 10 ++++++---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 6708f2ac8..f56c10104 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -207,7 +207,7 @@ impl Container { self.ensure_correct_store() } } - Store::Run(..) => self.ensure_correct_store(), + Store::Run(..) => false, } } } diff --git a/roaring/src/bitmap/statistics.rs b/roaring/src/bitmap/statistics.rs index 41ec8d7a7..2869c42fd 100644 --- a/roaring/src/bitmap/statistics.rs +++ b/roaring/src/bitmap/statistics.rs @@ -60,7 +60,6 @@ impl RoaringBitmap { /// assert_eq!(statistics.min_value, Some(1)); /// assert_eq!(statistics.cardinality, 99); /// ``` - #[allow(clippy::todo)] pub fn statistics(&self) -> Statistics { let mut n_containers = 0; let mut n_array_containers = 0; diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index b29541ea0..e84b70cd2 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -341,7 +341,7 @@ impl BitmapStore { cur_bit &= u64::MAX << start_bit; } if i == end_id - start_id { - cur_bit &= !(u64::MAX.checked_shl(end_bit as u32 + 1).unwrap_or(0)); + cur_bit &= u64::MAX >> (64 - end_bit - 1); } amount += u64::from(cur_bit.count_ones()); } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index afa1aefca..2f672bc65 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -32,13 +32,14 @@ impl IntervalStore { RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * self.run_amount() as usize) } + #[cfg(feature = "std")] pub fn from_vec_unchecked(vec: Vec) -> Self { #[cfg(debug_assertions)] { - for (i, cur_interval) in vec.iter().enumerate() { - if let Some(next) = vec.get(i + 1) { - assert!(cur_interval.end < next.start); - } + for win in vec.windows(2) { + let [cur_interval, next] = [win[0], win[1]]; + assert!(cur_interval.end + 1 < next.start); + assert!(cur_interval.start <= cur_interval.end); } } Self(vec) @@ -46,6 +47,7 @@ impl IntervalStore { pub(crate) fn push_interval_unchecked(&mut self, interval: Interval) { debug_assert!(self.0.last().map(|f| f.end < interval.start).unwrap_or(true)); + debug_assert!(interval.start <= interval.end); self.0.push(interval) } From 41c7f083ec32c3e31ab0f1e7acb13b9beec2656c Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 25 Apr 2025 19:50:06 +0200 Subject: [PATCH 63/83] fix: deobfuscate `IntervalStore::insert` Suggestion from @Dr-Emann in #320. Co-authored-by: Zachary Dremann --- roaring/src/bitmap/serialization.rs | 21 +++--- roaring/src/bitmap/store/interval_store.rs | 86 ++++++++++------------ 2 files changed, 49 insertions(+), 58 deletions(-) diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 836f04dc2..4c0601119 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -291,16 +291,19 @@ impl RoaringBitmap { let mut last_end = None::; let store = IntervalStore::from_vec_unchecked( - intervals.into_iter().map(|[s, len]| -> Result { - let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; - if let Some(last_end) = last_end.replace(end) { - if s <= last_end.saturating_add(1) { - // Range overlaps or would be contiguous with the previous range - return Err(io::ErrorKind::InvalidData); + intervals + .into_iter() + .map(|[s, len]| -> Result { + let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; + if let Some(last_end) = last_end.replace(end) { + if s <= last_end.saturating_add(1) { + // Range overlaps or would be contiguous with the previous range + return Err(io::ErrorKind::InvalidData); + } } - } - Ok(Interval::new(s, end)) - }).collect::>()? + Ok(Interval::new(s, end)) + }) + .collect::>()?, ); Store::Run(store) } else if cardinality <= ARRAY_LIMIT { diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 2f672bc65..4a67fe5eb 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -53,57 +53,45 @@ impl IntervalStore { #[inline] pub fn insert(&mut self, index: u16) -> bool { - self.0 - .binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()) - .map_err(|loc| { - // loc may be equal to self.0.len() - let loc_or_last = if loc < self.0.len() { - Some(loc) - } else if !self.0.is_empty() { - Some(self.0.len() - 1) - } else { - None - }; - // There exists an interval at or before the location we should insert - if let Some(loc_or_last) = loc_or_last { - if Some(index) == self.0[loc_or_last].end.checked_add(1) { - // index immediately follows an interval - // Checking for sandwiched intervals is not needed because of binary search loc - // i.e. when the index is sandwiched between two intervals we always - // get the right most interval, which puts us in the different if - self.0[loc_or_last].end += 1; - } else if index - .checked_add(1) - .map(|f| f == self.0[loc_or_last].start) - .unwrap_or(false) - { - // checked_add required for if u16::MAX is added - // Value immediately precedes interval - if loc > 0 && self.0[loc - 1].end == index - 1 { - // Merge with preceding interval - self.0[loc - 1].end = self.0[loc].end; - self.0.remove(loc); - return; - } - self.0[loc].start -= 1; - } else if loc_or_last - .checked_sub(1) - .map(|f| self.0[f].end == index - 1) - .unwrap_or(false) - { - // We are sandwiched between 2 intervals, but the previous interval is - // continuous with the index. If the loc - self.0[loc_or_last - 1].end = index; - } else { - // The value stands alone - self.0.insert(loc, Interval::new(index, index)); + // All intervals before idx are _fully_ before our index (iv.end < index) + let idx = self.0.partition_point(|iv| iv.end < index); + let (before, maybe_after) = self.0.split_at_mut(idx); + if let Some(next) = maybe_after.first_mut() { + // Check if the next interval actually already contains our index + // Because of partition_point, we know already know end >= index + if next.start <= index { + // index is already in the interval + return false; + } + // `next` is instead the first interval _after_ our index, + // check if we should grow that interval down by one + // Because we know from above that next.start > index, adding 1 is safe + if next.start == index + 1 { + next.start -= 1; + + // Check if the previous interval will now be continuous with this interval + if let Some(prev) = before.last_mut() { + // From the partition point: prev.end < index, subtracting 1 is safe + if prev.end == index - 1 { + prev.end = next.end; + self.0.remove(idx); } - } else { - // there does not exist a single interval - self.0.insert(loc, Interval::new(index, index)); } - }) - .is_err() + return true; + } + } + if let Some(prev) = before.last_mut() { + // Because we know from the partition point that prev.end < index, adding 1 is safe + if prev.end + 1 == index { + // Merge with previous interval + prev.end += 1; + // If we had needed to merge with the next interval, we would have handled that in + // the previous if statement, so we're done here + return true; + } + } + self.0.insert(idx, Interval::new(index, index)); + true } #[inline] From b074682e7f2f01973a67e528080499db22f32106 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 25 Apr 2025 20:28:55 +0200 Subject: [PATCH 64/83] fix: deobfuscate `IntervalStore::insert_range` Suggestion from @Dr-Emann in #320. Co-authored-by: Zachary Dremann --- roaring/src/bitmap/store/interval_store.rs | 175 +++++---------------- 1 file changed, 38 insertions(+), 137 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 4a67fe5eb..d1799926a 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -99,146 +99,47 @@ impl IntervalStore { if range.is_empty() { return 0; } - let interval = Interval { start: *range.start(), end: *range.end() }; - let first_interval = - self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); - let end_interval = - self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); - match (first_interval, end_interval) { - // both start and end index are contained in intervals - (Ok(begin), Ok(end)) => { - if begin == end { - return 0; - } - let drained_amount: u64 = self.0[begin + 1..end].iter().map(|f| f.run_len()).sum(); - let amount = Interval::new(self.0[begin].end + 1, self.0[end].start - 1).run_len() - - drained_amount; - self.0[begin].end = self.0[end].end; - self.0.drain(begin + 1..=end); - amount - } - // start index is contained in an interval, - // end index is not - (Ok(begin), Err(to_insert)) => { - let (new_end, drain_id) = - // if there is a next interval, check if these intervals are consecutive - if to_insert < self.0.len() && self.0[to_insert].start - 1 == interval.end { - // The intervals are consecutive! Adjust new end of interval, and how far - // we drain - (self.0[to_insert].end, to_insert + 1) - } else { - (interval.end, to_insert) - }; - let drained_amount: u64 = - self.0[begin + 1..to_insert].iter().map(|f| f.run_len()).sum(); - let amount = - Interval::new(self.0[begin].end + 1, interval.end).run_len() - drained_amount; - self.0[begin].end = new_end; - self.0.drain(begin + 1..drain_id); - amount - } - // there is no interval that contains the start index, - // there is an interval that contains the end index, - (Err(to_begin), Ok(end)) => { - let consecutive_begin = - to_begin > 0 && self.0[to_begin - 1].end + 1 == interval.start; - let (drain_id, interval_id) = - // check if begin interval is consecutive with new interval - if consecutive_begin { - // The intervals are consecutive! Adjust how much we remove, and how - // which interval we end up keeping - (end + 1, to_begin - 1) - } else { - (end, end) - }; - let drained_amount: u64 = self.0[to_begin..end].iter().map(|f| f.run_len()).sum(); - let amount = - Interval::new(interval.start, self.0[end].start - 1).run_len() - drained_amount; - if consecutive_begin { - self.0[interval_id].end = self.0[end].end; - } else { - self.0[interval_id].start = interval.start; - } - self.0.drain(to_begin..drain_id); - amount + let mut interval = Interval { start: *range.start(), end: *range.end() }; + // All intervals in `start_idx..end_idx` are fully contained in our interval. + let mut start_idx = self.0.partition_point(|iv| iv.start < interval.start); + let mut end_idx = + self.0[start_idx..].partition_point(|iv| iv.end <= interval.end) + start_idx; + + if let Some(prev) = self.0[..start_idx].last() { + // If the previous interval contains our start, or would be contiguous with us, expand + // to include it + // from partition point, we know prev.start < interval.start + if prev.end >= interval.start - 1 { + // We need to merge with the previous interval + interval.start = prev.start; + interval.end = interval.end.max(prev.end); + start_idx -= 1; } - (Err(to_begin), Err(to_end)) => { - if self.0.is_empty() { - self.0.insert(to_begin, interval); - return interval.run_len(); - } - let consec_begin = to_begin > 0 && self.0[to_begin - 1].end + 1 == interval.start; - let conces_end = to_end < self.0.len() - && self.0[to_end] - .start - .checked_sub(1) - .map(|f| f == interval.end) - .unwrap_or(false); - if !consec_begin && !conces_end && to_begin == to_end { - // an arbitrary range with no consecutive intervals, unable to reuse existing interval - self.0.insert(to_begin, interval); - return interval.run_len(); - } - let (drain_id_begin, drain_id_end, interval_id) = { - if conces_end && consec_begin { - // Both intervals are consecutive! Adjust how much we remove, and - // which interval we end up keeping - // - // keep begin interval and remove end - // NOTE: to_begin - 1 since the interval we actually care about is one to - // the left e.g.: - // [3..=5, 9..=20] add 6..=8 -> - // to_begin = 1 - // to_end = 1 - (to_begin, to_end + 1, to_begin - 1) - } else if consec_begin { - // Remove end interval, keep begin to overwrite - // - // NOTE: to_begin - 1 since the interval we actually care about is one to - // the left e.g.: - // [3..=5] add 6..=8 -> - // to_begin = 1 - // to_end = 1 - (to_begin, to_end, to_begin - 1) - } else if conces_end { - // Remove begin interval, keep end to overwrite - // - // NOTE: no -1 since the interval we actually care about is one to - // the left e.g.: - // [8..=10] add 6..=7 -> - // to_begin = 0 - // to_end = 1 - (to_begin, to_end, to_end) - } else { - // keep end interval to overwrite if it exists, - // otherwise overwrite begin interval - ( - if to_end != self.0.len() { to_begin + 1 } else { to_begin }, - to_end.min(self.0.len() - 1), - if to_end != self.0.len() { - to_begin - } else { - to_end.min(self.0.len() - 1) - }, - ) - } - }; - let drained_amount: u64 = - self.0[to_begin..to_end].iter().map(|f| f.run_len()).sum(); - let end_amount_interval = - if conces_end { self.0[to_end].start - 1 } else { interval.end }; - let amount = - Interval::new(interval.start, end_amount_interval).run_len() - drained_amount; - let end_interval = if conces_end { self.0[to_end].end } else { interval.end }; - - self.0[interval_id].end = end_interval; - if !consec_begin { - self.0[interval_id].start = interval.start; - } - self.0.drain(drain_id_begin..drain_id_end); - amount + } + if let Some(next) = self.0.get(end_idx) { + // from partition point, we know next.end > interval.end + if next.start <= interval.end + 1 { + // We need to merge with the next interval + interval.end = next.end; + interval.start = interval.start.min(next.start); + end_idx += 1; } } + + let mut added_count = interval.run_len(); + // Replace the first interval to be replaced with an interval covering the new range + // and remove the rest + // Otherwise, just insert a new interval + if let [first, rest @ ..] = &mut self.0[start_idx..end_idx] { + added_count -= first.run_len(); + added_count -= rest.iter().map(|iv| iv.run_len()).sum::(); + *first = interval; + self.0.drain(start_idx + 1..end_idx); + } else { + // No intervals to merge with, we can just insert + self.0.insert(start_idx, interval); + } + added_count } pub fn push(&mut self, index: u16) -> bool { From 53c9aa2a28ce9952f35c750d4301199ae05ca063 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Fri, 25 Apr 2025 22:04:58 +0200 Subject: [PATCH 65/83] fix: deobfuscate `IntervalStore::insert_range` Suggestion from @Dr-Emann in #320. --- roaring/src/bitmap/store/interval_store.rs | 125 +++++++-------------- 1 file changed, 41 insertions(+), 84 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index d1799926a..58bc0adbd 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -191,94 +191,51 @@ impl IntervalStore { return 0; } - let interval = Interval::new(*range.start(), *range.end()); - let first_interval = - self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); - let end_interval = - self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); - match (first_interval, end_interval) { - // both start and end index are contained in intervals - (Ok(first), Ok(end)) => { - if self.0[first].start == interval.start && self.0[end].end == interval.end { - let removed = self.0[first..=end].iter().map(|iv| iv.run_len()).sum(); - self.0.drain(first..=end); - removed - } else if self.0[first].start == interval.start { - if first == end { - self.0[end].start = interval.end + 1; - return interval.run_len(); - } - let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum::() - + Interval::new(self.0[end].start, interval.end).run_len(); - self.0[end].start = interval.end + 1; - self.0.drain(first..end); - removed - } else if self.0[end].end == interval.end { - if first == end { - self.0[end].end = interval.start - 1; - return interval.run_len(); - } - let removed = - self.0[first + 1..=end].iter().map(|iv| iv.run_len()).sum::() - + Interval::new(interval.start, self.0[first].end).run_len(); - self.0[first].end = interval.start - 1; - self.0.drain(first + 1..=end); - removed - } else { - if first == end { - let old_end = self.0[first].end; - self.0[first].end = interval.start - 1; - self.0.insert(first + 1, Interval::new(interval.end + 1, old_end)); - return interval.run_len(); - } - - let removed = self.0[first + 1..end].iter().map(|iv| iv.run_len()).sum::() - + Interval::new(interval.start, self.0[first].end).run_len() - + Interval::new(self.0[end].start, interval.end).run_len(); - self.0[first].end = interval.start - 1; - self.0[end].start = interval.end + 1; - removed - } - } - // start index is in the interval store, end index is not - (Ok(first), Err(end)) => { - debug_assert!(first < end); - if self.0[first].start == interval.start { - let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum(); - self.0.drain(first..end); - removed - } else { - let removed = self.0[first + 1..end].iter().map(|iv| iv.run_len()).sum::() - + Interval::new(interval.start, self.0[first].end).run_len(); - self.0[first].end = interval.start - 1; - self.0.drain(first + 1..end); - removed - } - } - // end index is in the interval store, start index is not - (Err(first), Ok(end)) => { - if self.0[end].end == interval.end { - let removed = self.0[first..=end].iter().map(|iv| iv.run_len()).sum(); - self.0.drain(first..=end); - removed - } else { - let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum::() - + Interval::new(self.0[end].start, interval.end).run_len(); - self.0[end].start = interval.end + 1; - self.0.drain(first..end); - removed + let mut interval = Interval::new(*range.start(), *range.end()); + // All intervals in `start_idx..end_idx` are fully contained in our interval. + let start_idx = self.0.partition_point(|iv| iv.start < interval.start); + let end_idx = self.0[start_idx..].partition_point(|iv| iv.end <= interval.end) + start_idx; + let mut removed_count = 0; + let mut add_needed = false; + if let Some(prev) = self.0[..start_idx].last_mut() { + // If the previous interval contains our start, remove it + // from partition point, we know prev.start < interval.start + if prev.end >= interval.start { + // We need to remove from the previous interval + removed_count += + Interval::new(interval.start, prev.end.min(interval.end)).run_len(); + let new_end = interval.start - 1; + add_needed = prev.end > interval.end; + if add_needed { + interval.start = interval.end + 1; + interval.end = prev.end; } + prev.end = new_end; } - // both indices are not contained in the interval store - (Err(first), Err(end)) => { - if first == end { - return 0; - } - let removed = self.0[first..end].iter().map(|iv| iv.run_len()).sum(); - self.0.drain(first..end); - removed + } + if let Some(next) = self.0.get_mut(end_idx) { + // from partition point, we know next.end > interval.end + if next.start <= interval.end { + // We need to remove everything til interval.end + removed_count += + Interval::new(next.start.max(interval.start), interval.end).run_len(); + next.start = interval.end + 1; } } + + // Replace the first interval to be replaced with an interval covering the new range + // and remove the rest + // Otherwise, just insert a new interval + if let [first, rest @ ..] = &mut self.0[start_idx..end_idx] { + removed_count += first.run_len(); + removed_count += rest.iter().map(|iv| iv.run_len()).sum::(); + self.0.drain(start_idx..end_idx); + } else if add_needed { + // We are removing a range contained in a single interval + // As such we must add a new interval + self.0.insert(start_idx, interval); + } + removed_count } pub fn remove_smallest(&mut self, mut amount: u64) { From 6af1dadccdf5a4873a71e13e45880693b8dfdbd7 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 26 Apr 2025 12:44:02 +0200 Subject: [PATCH 66/83] fix ci --- roaring/tests/serialization.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roaring/tests/serialization.rs b/roaring/tests/serialization.rs index be0afcc21..ac862dcc5 100644 --- a/roaring/tests/serialization.rs +++ b/roaring/tests/serialization.rs @@ -565,7 +565,7 @@ fn test_runs() { fn assert_invalid_serialization(serialized: &[u8], msg: &str) { let result = RoaringBitmap::deserialize_from(serialized); if let Ok(res) = result { - panic!("Expected error: {}. Got: {:?}", msg, res); + panic!("Expected error: {msg}. Got: {res:?}"); } } From 4496602fe9fc198f9c5a0c84b88e86025ce0b124 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sat, 26 Apr 2025 22:48:13 -0400 Subject: [PATCH 67/83] add more range-related testing to fuzzing --- fuzz/fuzz_targets/arbitrary_ops/mod.rs | 89 ++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 13 deletions(-) diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index fad72f3d9..405216a71 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -51,8 +51,20 @@ pub enum MutableBitmapOperation { RemoveRange(RangeInclusive), Clear, Extend(Vec), + SwapSerialization, + Optimize, + // TODO: not implemented in roaring-rs yet + // RemoveRunCompression, // Probably turn it into a bitmap MakeBitmap { key: u16 }, + // Probably turn it into a Range + MakeRange { key: u16 }, +} + +#[derive(Arbitrary, Debug, Copy, Clone)] +pub enum RangeOperations { + Optimized, + Removed, } #[derive(Arbitrary, Debug)] @@ -67,11 +79,11 @@ pub enum ReadBitmapOperation { Maximum, Rank(Num), Select(Num), - Statistics, + Statistics(RangeOperations), Clone, Debug, - SerializedSize, - Serialize, + SerializedSize(RangeOperations), + Serialize(RangeOperations), } #[derive(Arbitrary, Debug)] @@ -85,7 +97,7 @@ pub enum BitmapBinaryOperation { } impl ReadBitmapOperation { - pub fn apply(&self, x: &mut croaring::Bitmap, y: &roaring::RoaringBitmap) { + pub fn apply(&self, x: &mut croaring::Bitmap, y: &mut roaring::RoaringBitmap) { match *self { ReadBitmapOperation::ContainsRange(ref range) => { let range = range.start().0..=range.end().0; @@ -139,9 +151,18 @@ impl ReadBitmapOperation { let actual = y.select(n); assert_eq!(expected, actual); } - ReadBitmapOperation::Statistics => { - // roaring-rs doesn't support range containers (yet) - x.remove_run_compression(); + ReadBitmapOperation::Statistics(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.run_optimize(); + y.optimize(); + + } + RangeOperations::Removed => { + // TODO: Not implemented in roaring-rs yet + return; + } + } let expected = x.statistics(); let actual = y.statistics(); // Convert to the same statistics struct @@ -174,16 +195,34 @@ impl ReadBitmapOperation { use std::io::Write; write!(std::io::sink(), "{:?}", y).unwrap(); } - ReadBitmapOperation::SerializedSize => { - // roaring-rs doesn't support range containers (yet) - x.remove_run_compression(); + ReadBitmapOperation::SerializedSize(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.run_optimize(); + y.optimize(); + + } + RangeOperations::Removed => { + // TODO: Not implemented in roaring-rs yet + return; + } + } let expected = x.get_serialized_size_in_bytes::(); let actual = y.serialized_size(); assert_eq!(expected, actual); } - ReadBitmapOperation::Serialize => { - // roaring-rs doesn't support range containers (yet) - x.remove_run_compression(); + ReadBitmapOperation::Serialize(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.run_optimize(); + y.optimize(); + + } + RangeOperations::Removed => { + // TODO: Not implemented in roaring-rs yet + return; + } + } let expected = x.serialize::(); let mut actual = Vec::new(); y.serialize_into(&mut actual).unwrap(); @@ -230,12 +269,29 @@ impl MutableBitmapOperation { x.clear(); y.clear(); } + MutableBitmapOperation::Optimize => { + let expected_changed = x.run_optimize(); + let actual_changed = y.optimize(); + assert_eq!(expected_changed, actual_changed); + } MutableBitmapOperation::Extend(ref items) => { // Safety - Num is repr(transparent) over u32 let items: &[u32] = unsafe { mem::transmute(&items[..]) }; x.add_many(items); y.extend(items); } + MutableBitmapOperation::SwapSerialization => { + let x_serialized = x.serialize::(); + let mut y_serialized = Vec::new(); + y.serialize_into(&mut y_serialized).unwrap(); + + let new_x = croaring::Bitmap::try_deserialize::(&y_serialized).unwrap(); + let new_y = roaring::RoaringBitmap::deserialize_from(&x_serialized[..]).unwrap(); + assert_eq!(new_x, *x); + assert_eq!(new_y, *y); + *x = new_x; + *y = new_y; + } MutableBitmapOperation::MakeBitmap { key } => { let key = u32::from(key); let start = key * 0x1_0000; @@ -245,6 +301,13 @@ impl MutableBitmapOperation { y.insert(i); } } + MutableBitmapOperation::MakeRange { key } => { + let key = u32::from(key); + let start = key * 0x1_0000; + let end = start + 9 * 1024; + x.add_range(start..=end); + y.insert_range(start..=end); + } } } } From b88e25824639ff37e20f8e0465a0552f03042be1 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Mon, 5 May 2025 19:30:56 +0200 Subject: [PATCH 68/83] fix: align container optimize with croaring implementation --- roaring/src/bitmap/container.rs | 44 ++++++++++++++++----- roaring/src/bitmap/serialization.rs | 9 ++--- roaring/src/bitmap/store/array_store/mod.rs | 10 +++++ roaring/src/bitmap/store/bitmap_store.rs | 1 + roaring/src/bitmap/store/interval_store.rs | 26 +++++++++++- roaring/src/bitmap/store/mod.rs | 2 +- 6 files changed, 74 insertions(+), 18 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index f56c10104..bcd4124fa 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -3,10 +3,11 @@ use core::ops::{ BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, }; -use super::store::{self, Store}; +use super::store::{self, ArrayStore, IntervalStore, Store, BITMAP_BYTES}; use super::util; pub const ARRAY_LIMIT: u64 = 4096; +#[cfg(test)] pub const RUN_MAX_SIZE: u64 = 2048; #[cfg(not(feature = "std"))] @@ -196,18 +197,41 @@ impl Container { } pub fn optimize(&mut self) -> bool { - match self.store { - Store::Array(..) | Store::Bitmap(..) => { + match &mut self.store { + Store::Bitmap(_) => { let num_runs = self.store.count_runs(); - if num_runs <= RUN_MAX_SIZE && num_runs <= self.len() / 2 { - // convert to run container - self.store = self.store.to_run(); - true - } else { - self.ensure_correct_store() + let size_as_run = IntervalStore::serialized_byte_size(num_runs); + if BITMAP_BYTES <= size_as_run { + return false; + } + self.store = self.store.to_run(); + true + } + Store::Array(array) => { + let size_as_array = array.byte_size(); + let num_runs = self.store.count_runs(); + let size_as_run = IntervalStore::serialized_byte_size(num_runs); + if size_as_array <= size_as_run { + return false; + } + self.store = self.store.to_run(); + true + } + Store::Run(runs) => { + let size_as_run = runs.byte_size(); + let card = runs.len(); + let size_as_array = ArrayStore::serialized_byte_size(card); + let min_size_non_run = size_as_array.min(BITMAP_BYTES); + if size_as_run <= min_size_non_run { + return false; + } + if card <= ARRAY_LIMIT { + self.store = Store::Array(runs.to_array()); + return true; } + self.store = Store::Bitmap(runs.to_bitmap()); + true } - Store::Run(..) => false, } } } diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 4c0601119..267d0f476 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -1,6 +1,7 @@ use crate::bitmap::container::{Container, ARRAY_LIMIT}; use crate::bitmap::store::{ - ArrayStore, BitmapStore, Interval, Store, BITMAP_LENGTH, RUN_ELEMENT_BYTES, RUN_NUM_BYTES, + ArrayStore, BitmapStore, Interval, Store, BITMAP_BYTES, BITMAP_LENGTH, RUN_ELEMENT_BYTES, + RUN_NUM_BYTES, }; use crate::RoaringBitmap; use bytemuck::cast_slice_mut; @@ -21,10 +22,6 @@ pub(crate) const SIZE_BYTES: usize = 4; pub(crate) const DESCRIPTION_BYTES: usize = 4; pub(crate) const OFFSET_BYTES: usize = 4; -// Sizes of container structures -pub(crate) const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; -pub(crate) const ARRAY_ELEMENT_BYTES: usize = 2; - impl RoaringBitmap { /// Return the size in bytes of the serialized output. /// This is compatible with the official C/C++, Java and Go implementations. @@ -48,7 +45,7 @@ impl RoaringBitmap { .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => values.len() as usize * ARRAY_ELEMENT_BYTES, + Store::Array(ref values) => values.byte_size(), Store::Bitmap(..) => BITMAP_BYTES, Store::Run(ref intervals) => { has_run_containers = true; diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 0d41f4d62..c888572de 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -17,6 +17,8 @@ use alloc::boxed::Box; use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; +pub(crate) const ARRAY_ELEMENT_BYTES: usize = 2; + #[derive(Clone, Eq, PartialEq)] pub(crate) struct ArrayStore { vec: Vec, @@ -27,6 +29,14 @@ impl ArrayStore { ArrayStore { vec: vec![] } } + pub fn serialized_byte_size(cardinality: u64) -> usize { + cardinality as usize * ARRAY_ELEMENT_BYTES + } + + pub fn byte_size(&self) -> usize { + Self::serialized_byte_size(self.len()) + } + #[cfg(feature = "std")] pub fn with_capacity(capacity: usize) -> ArrayStore { ArrayStore { vec: Vec::with_capacity(capacity) } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index e84b70cd2..9a24d4567 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -12,6 +12,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; pub const BITMAP_LENGTH: usize = 1024; +pub const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; #[derive(Clone, Eq, PartialEq)] pub struct BitmapStore { diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 58bc0adbd..316f94987 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -29,7 +29,11 @@ impl IntervalStore { } pub fn byte_size(&self) -> usize { - RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * self.run_amount() as usize) + Self::serialized_byte_size(self.run_amount()) + } + + pub fn serialized_byte_size(run_amount: u64) -> usize { + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * run_amount as usize) } #[cfg(feature = "std")] @@ -458,6 +462,14 @@ impl IntervalStore { bits } + pub fn to_array(&self) -> ArrayStore { + let mut array = ArrayStore::new(); + for iv in self.0.iter() { + array.insert_range(iv.start..=iv.end); + } + array + } + pub(crate) fn iter(&self) -> RunIterBorrowed { self.into_iter() } @@ -467,6 +479,18 @@ impl IntervalStore { } } +impl From for BitmapStore { + fn from(value: IntervalStore) -> Self { + value.to_bitmap() + } +} + +impl From for ArrayStore { + fn from(value: IntervalStore) -> Self { + value.to_array() + } +} + impl BitOrAssign for IntervalStore { fn bitor_assign(&mut self, mut rhs: Self) { let (add_intervals, take_intervals, self_is_add) = diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 42979f212..31bcc9f98 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -9,7 +9,7 @@ use core::ops::{ }; use core::slice; -pub use self::bitmap_store::BITMAP_LENGTH; +pub use self::bitmap_store::{BITMAP_BYTES, BITMAP_LENGTH}; use self::Store::{Array, Bitmap, Run}; pub(crate) use self::array_store::ArrayStore; From a24ff696b6e3cacd13479ab4358bba6f4ad02cdf Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Mon, 5 May 2025 19:46:37 +0200 Subject: [PATCH 69/83] fix: interval store iterator not stopping --- roaring/src/bitmap/store/interval_store.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 316f94987..88e347a16 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -629,6 +629,7 @@ impl> RunIter { if let Some(value) = self.forward_offset.checked_add(1) { self.forward_offset = value; } else { + self.intervals.next(); return; } if Some(self.forward_offset as u64) @@ -643,6 +644,7 @@ impl> RunIter { if let Some(value) = self.backward_offset.checked_add(1) { self.backward_offset = value; } else { + self.intervals.next_back(); return; } if Some(self.backward_offset as u64) From 869e7802c6c493cb06dce29ef7025be54c540823 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Mon, 5 May 2025 21:01:36 +0200 Subject: [PATCH 70/83] test: align optimize in croaring to current optimize --- fuzz/fuzz_targets/arbitrary_ops/mod.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index 405216a71..0b14a18b5 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -270,9 +270,13 @@ impl MutableBitmapOperation { y.clear(); } MutableBitmapOperation::Optimize => { - let expected_changed = x.run_optimize(); - let actual_changed = y.optimize(); - assert_eq!(expected_changed, actual_changed); + x.run_optimize(); + y.optimize(); + let stat_x = x.statistics(); + let stat_y = y.statistics(); + assert_eq!(stat_x.n_run_containers, stat_y.n_run_containers); + assert_eq!(stat_x.n_bitset_containers, stat_y.n_bitset_containers); + assert_eq!(stat_x.n_array_containers, stat_y.n_array_containers); } MutableBitmapOperation::Extend(ref items) => { // Safety - Num is repr(transparent) over u32 From 08f3ed132246b9755d2f1151071f144207293fcc Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Mon, 5 May 2025 21:03:51 +0200 Subject: [PATCH 71/83] fix: interval select truncating much needed run_len bits --- roaring/src/bitmap/store/interval_store.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 88e347a16..eb4aafa12 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -440,9 +440,10 @@ impl IntervalStore { pub fn select(&self, mut n: u16) -> Option { for iv in self.0.iter() { - let run_len = iv.run_len() as u16; - if run_len <= n { - n -= iv.run_len() as u16; + let run_len = iv.run_len(); + if run_len <= n.into() { + n -= iv.run_len() as u16; // this conversion never overflows since run_len is + // smaller then a u16 } else { return Some(iv.start + n); } From 439a22ca1ee158da967a8f4290c6bb44fef1c33e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 5 May 2025 17:42:24 -0700 Subject: [PATCH 72/83] Include license files in published crate --- roaring/LICENSE-APACHE | 1 + roaring/LICENSE-MIT | 1 + 2 files changed, 2 insertions(+) create mode 120000 roaring/LICENSE-APACHE create mode 120000 roaring/LICENSE-MIT diff --git a/roaring/LICENSE-APACHE b/roaring/LICENSE-APACHE new file mode 120000 index 000000000..965b606f3 --- /dev/null +++ b/roaring/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/roaring/LICENSE-MIT b/roaring/LICENSE-MIT new file mode 120000 index 000000000..76219eb72 --- /dev/null +++ b/roaring/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file From eff381a7fe833e821c93aa7cfa8e75a7d39495b4 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 11 May 2025 13:17:49 +0200 Subject: [PATCH 73/83] feat: run optimized insert range --- roaring/src/bitmap/container.rs | 46 +++++++++++++++---- roaring/src/bitmap/inherent.rs | 49 ++++++++++++++++----- roaring/src/bitmap/store/array_store/mod.rs | 10 +++++ roaring/src/bitmap/store/bitmap_store.rs | 3 ++ roaring/src/bitmap/store/interval_store.rs | 9 ++++ 5 files changed, 97 insertions(+), 20 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index bcd4124fa..f8ea24ea9 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -3,7 +3,7 @@ use core::ops::{ BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, }; -use super::store::{self, ArrayStore, IntervalStore, Store, BITMAP_BYTES}; +use super::store::{self, ArrayStore, Interval, IntervalStore, Store, BITMAP_BYTES}; use super::util; pub const ARRAY_LIMIT: u64 = 4096; @@ -30,6 +30,16 @@ impl Container { Container { key, store: Store::new() } } + pub fn new_with_range(key: u16, range: RangeInclusive) -> Container { + if range.len() <= 2 { + let mut array = ArrayStore::new(); + array.insert_range(range); + Self { key, store: Store::Array(array) } + } else { + Self { key, store: Store::Run(IntervalStore::new_with_range(range)) } + } + } + pub fn full(key: u16) -> Container { Container { key, store: Store::full() } } @@ -59,15 +69,35 @@ impl Container { } pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { - // If inserting the range will make this a bitmap by itself, do it now - if range.len() as u64 > ARRAY_LIMIT { - if let Store::Array(arr) = &self.store { - self.store = Store::Bitmap(arr.to_bitmap_store()); + match &self.store { + Store::Bitmap(bitmap) => { + let added_amount = range.len() as u64 + - bitmap + .intersection_len_interval(&Interval::new(*range.start(), *range.end())); + let union_cardinality = bitmap.len() + added_amount; + if union_cardinality == 1 << 16 { + self.store = Store::Run(IntervalStore::full()); + added_amount + } else { + self.store.insert_range(range) + } + } + Store::Array(array) => { + let added_amount = range.len() as u64 + - array.intersection_len_interval(&Interval::new(*range.start(), *range.end())); + let union_cardinality = array.len() + added_amount; + if union_cardinality == 1 << 16 { + self.store = Store::Run(IntervalStore::full()); + added_amount + } else if union_cardinality <= ARRAY_LIMIT { + self.store.insert_range(range) + } else { + self.store = self.store.to_bitmap(); + self.store.insert_range(range) + } } + Store::Run(_) => self.store.insert_range(range), } - let inserted = self.store.insert_range(range); - self.ensure_correct_store(); - inserted } /// Pushes `index` at the end of the container only if `index` is the new max. diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index f24bc6d57..bee898e8a 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -1,6 +1,6 @@ use core::cmp::Ordering; use core::mem::size_of; -use core::ops::RangeBounds; +use core::ops::{RangeBounds, RangeInclusive}; use crate::bitmap::store::BITMAP_LENGTH; use crate::RoaringBitmap; @@ -211,6 +211,31 @@ impl RoaringBitmap { } } + /// Searches and then modifies a specific container with `M` by the given key. + /// Creates a new container using `B` if it doesn't exist. + /// + /// Returns `R` based on `M` or `B`. + #[inline] + pub(crate) fn mod_or_build_container_by_key< + R, + M: FnMut(&mut Container) -> R, + B: FnMut(u16) -> (Container, R), + >( + &mut self, + key: u16, + mut modifier: M, + mut builder: B, + ) -> R { + match self.containers.binary_search_by_key(&key, |c| c.key) { + Ok(loc) => modifier(&mut self.containers[loc]), + Err(loc) => { + let build_value = builder(key); + self.containers.insert(loc, build_value.0); + build_value.1 + } + } + } + /// Inserts a range of values. /// Returns the number of inserted values. /// @@ -237,14 +262,19 @@ impl RoaringBitmap { let (start_container_key, start_index) = util::split(start); let (end_container_key, end_index) = util::split(end); - - // Find the container index for start_container_key - let first_index = self.find_container_by_key(start_container_key); + let modify_container_range = + |bitmap: &mut Self, container_key: u16, range: RangeInclusive| { + bitmap.mod_or_build_container_by_key( + container_key, + |container| container.insert_range(range.clone()), + |key| (Container::new_with_range(key, range.clone()), range.len() as u64), + ) + }; // If the end range value is in the same container, just call into // the one container. if start_container_key == end_container_key { - return self.containers[first_index].insert_range(start_index..=end_index); + return modify_container_range(self, start_container_key, start_index..=end_index); } // For the first container, insert start_index..=u16::MAX, with @@ -256,19 +286,14 @@ impl RoaringBitmap { let mut inserted = 0; for i in start_container_key..end_container_key { - let index = self.find_container_by_key(i); - - // Insert the range subset for this container - inserted += self.containers[index].insert_range(low..=u16::MAX); + inserted += modify_container_range(self, i, low..=u16::MAX); // After the first container, always fill the containers. low = 0; } // Handle the last container - let last_index = self.find_container_by_key(end_container_key); - - inserted += self.containers[last_index].insert_range(0..=end_index); + inserted += modify_container_range(self, end_container_key, 0..=end_index); inserted } diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index c888572de..1b1add2d4 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -16,6 +16,7 @@ use alloc::vec::Vec; use alloc::boxed::Box; use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; +use super::Interval; pub(crate) const ARRAY_ELEMENT_BYTES: usize = 2; @@ -231,6 +232,15 @@ impl ArrayStore { visitor.into_inner() } + pub fn intersection_len_interval(&self, interval: &Interval) -> u64 { + if interval.is_full() { + return self.len(); + } + let start_id = self.vec.partition_point(|&f| f < interval.start); + let end_id = self.vec.partition_point(|&f| f <= interval.end); + (end_id.saturating_sub(start_id)) as u64 + } + pub fn to_bitmap_store(&self) -> BitmapStore { let mut bits = Box::new([0; BITMAP_LENGTH]); let len = self.len(); diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 9a24d4567..ce55fb233 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -334,6 +334,9 @@ impl BitmapStore { } pub(crate) fn intersection_len_interval(&self, interval: &Interval) -> u64 { + if interval.is_full() { + return self.len(); + } let (start_id, start_bit) = (key(interval.start), bit(interval.start)); let (end_id, end_bit) = (key(interval.end), bit(interval.end)); let mut amount: u64 = 0; diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index eb4aafa12..c1fd4ebb8 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -24,6 +24,11 @@ impl IntervalStore { Self(Default::default()) } + pub fn new_with_range(range: RangeInclusive) -> Self { + debug_assert!(!range.is_empty()); + Self(alloc::vec![Interval::new(*range.start(), *range.end())]) + } + pub fn full() -> Self { Self(alloc::vec![Interval::new(0, u16::MAX)]) } @@ -841,6 +846,10 @@ impl Interval { pub fn run_len(&self) -> u64 { u64::from(self.end - self.start) + 1 } + + pub fn is_full(&self) -> bool { + self.start == 0 && self.end == u16::MAX + } } #[cfg(test)] From c3ebe863e377b58a0732f0ba27da13dc8a1b987f Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 11 May 2025 18:01:42 +0200 Subject: [PATCH 74/83] feat: remove_run_compression --- fuzz/fuzz_targets/arbitrary_ops/mod.rs | 50 ++++++++++++++------------ roaring/src/bitmap/container.rs | 15 ++++++++ roaring/src/bitmap/inherent.rs | 28 +++++++++++++-- roaring/tests/lib.rs | 7 ++++ 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index 0b14a18b5..684f45a92 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -53,8 +53,7 @@ pub enum MutableBitmapOperation { Extend(Vec), SwapSerialization, Optimize, - // TODO: not implemented in roaring-rs yet - // RemoveRunCompression, + RemoveRunCompression, // Probably turn it into a bitmap MakeBitmap { key: u16 }, // Probably turn it into a Range @@ -154,13 +153,14 @@ impl ReadBitmapOperation { ReadBitmapOperation::Statistics(ranges) => { match ranges { RangeOperations::Optimized => { - x.run_optimize(); - y.optimize(); - + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { - // TODO: Not implemented in roaring-rs yet - return; + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); } } let expected = x.statistics(); @@ -198,13 +198,14 @@ impl ReadBitmapOperation { ReadBitmapOperation::SerializedSize(ranges) => { match ranges { RangeOperations::Optimized => { - x.run_optimize(); - y.optimize(); - + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { - // TODO: Not implemented in roaring-rs yet - return; + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); } } let expected = x.get_serialized_size_in_bytes::(); @@ -214,13 +215,14 @@ impl ReadBitmapOperation { ReadBitmapOperation::Serialize(ranges) => { match ranges { RangeOperations::Optimized => { - x.run_optimize(); - y.optimize(); - + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { - // TODO: Not implemented in roaring-rs yet - return; + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); } } let expected = x.serialize::(); @@ -270,13 +272,14 @@ impl MutableBitmapOperation { y.clear(); } MutableBitmapOperation::Optimize => { + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); + } + MutableBitmapOperation::RemoveRunCompression => { x.run_optimize(); y.optimize(); - let stat_x = x.statistics(); - let stat_y = y.statistics(); - assert_eq!(stat_x.n_run_containers, stat_y.n_run_containers); - assert_eq!(stat_x.n_bitset_containers, stat_y.n_bitset_containers); - assert_eq!(stat_x.n_array_containers, stat_y.n_array_containers); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); } MutableBitmapOperation::Extend(ref items) => { // Safety - Num is repr(transparent) over u32 @@ -289,7 +292,8 @@ impl MutableBitmapOperation { let mut y_serialized = Vec::new(); y.serialize_into(&mut y_serialized).unwrap(); - let new_x = croaring::Bitmap::try_deserialize::(&y_serialized).unwrap(); + let new_x = + croaring::Bitmap::try_deserialize::(&y_serialized).unwrap(); let new_y = roaring::RoaringBitmap::deserialize_from(&x_serialized[..]).unwrap(); assert_eq!(new_x, *x); assert_eq!(new_y, *y); diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index f8ea24ea9..d1f1cb7cc 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -264,6 +264,21 @@ impl Container { } } } + + pub fn remove_run_compression(&mut self) -> bool { + match &mut self.store { + Store::Bitmap(_) | Store::Array(_) => false, + Store::Run(runs) => { + let card = runs.len(); + if card <= ARRAY_LIMIT { + self.store = Store::Array(runs.to_array()); + } else { + self.store = Store::Bitmap(runs.to_bitmap()); + } + true + } + } + } } impl BitOr<&Container> for &Container { diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index bee898e8a..f5981a792 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -832,15 +832,16 @@ impl RoaringBitmap { } } - // TODO(jpg) actually come up with example that illustrates creation of run containers /// Optimizes the container storage for this bitmap. /// Returns true if the container storage was modified, false if not. /// /// # Examples + /// + /// ``` /// use roaring::RoaringBitmap; /// - /// let mut rb = RoaringBitmap::from_iter(1000..100000) - /// rb.optimize() + /// let mut rb = RoaringBitmap::from_iter(1000..100000); + /// rb.optimize(); /// ``` pub fn optimize(&mut self) -> bool { let mut changed = false; @@ -849,6 +850,27 @@ impl RoaringBitmap { } changed } + + /// Removes run-length encoding even when it is more space efficient. + /// + /// Returns true if the container storage was modified, false if not. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::from_iter(0..=10000); + /// rb.optimize(); + /// assert!(rb.remove_run_compression()); + /// ``` + pub fn remove_run_compression(&mut self) -> bool { + let mut changed = false; + for container in &mut self.containers { + changed |= container.remove_run_compression() + } + changed + } } impl Default for RoaringBitmap { diff --git a/roaring/tests/lib.rs b/roaring/tests/lib.rs index 468d23fd8..568de01d7 100644 --- a/roaring/tests/lib.rs +++ b/roaring/tests/lib.rs @@ -136,6 +136,13 @@ fn optimize_bitmap() { assert!(bitmap.optimize()); } +#[test] +fn remove_run_compression() { + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); + assert!(bitmap.remove_run_compression()); +} + #[test] fn optimize_run() { let mut bitmap = RoaringBitmap::from_iter(0..1000); From a9614bb449d62917dcebc978756580216013033d Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 31 May 2025 10:28:11 +0200 Subject: [PATCH 75/83] fix: fuzzing against croaring failure by optimize Fixes a fuzz failure by ensuring no run containers are present in both implementations before adding run containers and then removing them again to check if both remove operations had the same effect. --- fuzz/fuzz_targets/arbitrary_ops/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index 684f45a92..03a00321d 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -158,6 +158,8 @@ impl ReadBitmapOperation { assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); x.run_optimize(); y.optimize(); assert_eq!(x.remove_run_compression(), y.remove_run_compression()); @@ -203,6 +205,8 @@ impl ReadBitmapOperation { assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); x.run_optimize(); y.optimize(); assert_eq!(x.remove_run_compression(), y.remove_run_compression()); @@ -220,6 +224,8 @@ impl ReadBitmapOperation { assert_eq!(x.run_optimize(), y.optimize()); } RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); x.run_optimize(); y.optimize(); assert_eq!(x.remove_run_compression(), y.remove_run_compression()); @@ -277,6 +283,8 @@ impl MutableBitmapOperation { assert_eq!(x.run_optimize(), y.optimize()); } MutableBitmapOperation::RemoveRunCompression => { + x.remove_run_compression(); + y.remove_run_compression(); x.run_optimize(); y.optimize(); assert_eq!(x.remove_run_compression(), y.remove_run_compression()); From 69fe5e6fb9f59ded67646d89d7adcacff12a86b2 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sat, 31 May 2025 10:44:23 +0200 Subject: [PATCH 76/83] fix: enforce `Interval` invariants --- roaring/src/bitmap/container.rs | 22 +- roaring/src/bitmap/serialization.rs | 6 +- roaring/src/bitmap/store/array_store/mod.rs | 4 +- roaring/src/bitmap/store/bitmap_store.rs | 4 +- roaring/src/bitmap/store/interval_store.rs | 514 +++++++++++--------- roaring/src/bitmap/store/mod.rs | 19 +- 6 files changed, 322 insertions(+), 247 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index d1f1cb7cc..4ed11cdd0 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -36,7 +36,13 @@ impl Container { array.insert_range(range); Self { key, store: Store::Array(array) } } else { - Self { key, store: Store::Run(IntervalStore::new_with_range(range)) } + Self { + key, + store: Store::Run(IntervalStore::new_with_range( + // This is ok, since range must be non empty + Interval::new_unchecked(*range.start(), *range.end()), + )), + } } } @@ -69,11 +75,16 @@ impl Container { } pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } match &self.store { Store::Bitmap(bitmap) => { let added_amount = range.len() as u64 - - bitmap - .intersection_len_interval(&Interval::new(*range.start(), *range.end())); + - bitmap.intersection_len_interval(&Interval::new_unchecked( + *range.start(), + *range.end(), + )); let union_cardinality = bitmap.len() + added_amount; if union_cardinality == 1 << 16 { self.store = Store::Run(IntervalStore::full()); @@ -84,7 +95,10 @@ impl Container { } Store::Array(array) => { let added_amount = range.len() as u64 - - array.intersection_len_interval(&Interval::new(*range.start(), *range.end())); + - array.intersection_len_interval(&Interval::new_unchecked( + *range.start(), + *range.end(), + )); let union_cardinality = array.len() + added_amount; if union_cardinality == 1 << 16 { self.store = Store::Run(IntervalStore::full()); diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index 267d0f476..e848e2c61 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -141,8 +141,8 @@ impl RoaringBitmap { Store::Run(ref intervals) => { writer.write_u16::(intervals.run_amount() as u16)?; for iv in intervals.iter_intervals() { - writer.write_u16::(iv.start)?; - writer.write_u16::(iv.end - iv.start)?; + writer.write_u16::(iv.start())?; + writer.write_u16::(iv.end() - iv.start())?; } } } @@ -298,7 +298,7 @@ impl RoaringBitmap { return Err(io::ErrorKind::InvalidData); } } - Ok(Interval::new(s, end)) + Ok(Interval::new_unchecked(s, end)) }) .collect::>()?, ); diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 1b1add2d4..17a9e79f0 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -236,8 +236,8 @@ impl ArrayStore { if interval.is_full() { return self.len(); } - let start_id = self.vec.partition_point(|&f| f < interval.start); - let end_id = self.vec.partition_point(|&f| f <= interval.end); + let start_id = self.vec.partition_point(|&f| f < interval.start()); + let end_id = self.vec.partition_point(|&f| f <= interval.end()); (end_id.saturating_sub(start_id)) as u64 } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index ce55fb233..26a3265c3 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -337,8 +337,8 @@ impl BitmapStore { if interval.is_full() { return self.len(); } - let (start_id, start_bit) = (key(interval.start), bit(interval.start)); - let (end_id, end_bit) = (key(interval.end), bit(interval.end)); + let (start_id, start_bit) = (key(interval.start()), bit(interval.start())); + let (end_id, end_bit) = (key(interval.end()), bit(interval.end())); let mut amount: u64 = 0; for (i, mut cur_bit) in self.bits[start_id..=end_id].iter().copied().enumerate() { if i == 0 { diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index c1fd4ebb8..2c13dac1d 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -24,13 +24,12 @@ impl IntervalStore { Self(Default::default()) } - pub fn new_with_range(range: RangeInclusive) -> Self { - debug_assert!(!range.is_empty()); - Self(alloc::vec![Interval::new(*range.start(), *range.end())]) + pub fn new_with_range(range: Interval) -> Self { + Self(alloc::vec![range]) } pub fn full() -> Self { - Self(alloc::vec![Interval::new(0, u16::MAX)]) + Self(alloc::vec![Interval::new_unchecked(0, u16::MAX)]) } pub fn byte_size(&self) -> usize { @@ -99,7 +98,7 @@ impl IntervalStore { return true; } } - self.0.insert(idx, Interval::new(index, index)); + self.0.insert(idx, Interval::new_unchecked(index, index)); true } @@ -157,13 +156,13 @@ impl IntervalStore { last_interval.end = index; true } else if last_interval.end < index { - self.0.push(Interval::new(index, index)); + self.0.push(Interval::new_unchecked(index, index)); true } else { false } } else { - self.0.push(Interval::new(index, index)); + self.0.push(Interval::new_unchecked(index, index)); true } } @@ -185,7 +184,7 @@ impl IntervalStore { } else { // Value lies inside the interval, we need to split it // First construct a new interval with the right part - let new_interval = Interval::new(index + 1, self.0[loc].end); + let new_interval = Interval::new_unchecked(index + 1, self.0[loc].end); // Then shrink the current interval self.0[loc].end = index - 1; // Then insert the new interval leaving gap where value was removed @@ -200,7 +199,7 @@ impl IntervalStore { return 0; } - let mut interval = Interval::new(*range.start(), *range.end()); + let mut interval = Interval::new_unchecked(*range.start(), *range.end()); // All intervals in `start_idx..end_idx` are fully contained in our interval. let start_idx = self.0.partition_point(|iv| iv.start < interval.start); let end_idx = self.0[start_idx..].partition_point(|iv| iv.end <= interval.end) + start_idx; @@ -212,7 +211,7 @@ impl IntervalStore { if prev.end >= interval.start { // We need to remove from the previous interval removed_count += - Interval::new(interval.start, prev.end.min(interval.end)).run_len(); + Interval::new_unchecked(interval.start, prev.end.min(interval.end)).run_len(); let new_end = interval.start - 1; add_needed = prev.end > interval.end; if add_needed { @@ -227,7 +226,7 @@ impl IntervalStore { if next.start <= interval.end { // We need to remove everything til interval.end removed_count += - Interval::new(next.start.max(interval.start), interval.end).run_len(); + Interval::new_unchecked(next.start.max(interval.start), interval.end).run_len(); next.start = interval.end + 1; } } @@ -299,7 +298,7 @@ impl IntervalStore { } pub fn contains_range(&self, range: RangeInclusive) -> bool { - let interval = Interval::new(*range.start(), *range.end()); + let interval = Interval::new_unchecked(*range.start(), *range.end()); let start = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); let end = self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); match (start, end) { @@ -435,7 +434,7 @@ impl IntervalStore { if iv.end <= value { rank += iv.run_len(); } else if iv.start <= value { - rank += Interval::new(iv.start, value).run_len(); + rank += Interval::new_unchecked(iv.start, value).run_len(); } else { break; } @@ -788,13 +787,13 @@ impl> ExactSizeIterator for RunIter {} /// This interval is inclusive to end. #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] pub(crate) struct Interval { - pub start: u16, - pub end: u16, + start: u16, + end: u16, } impl From> for Interval { fn from(value: RangeInclusive) -> Self { - Interval::new(*value.start(), *value.end()) + Interval::new_unchecked(*value.start(), *value.end()) } } @@ -827,17 +826,26 @@ pub(crate) fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { } impl Interval { - pub fn new(start: u16, end: u16) -> Interval { - Interval { start, end } + pub fn new_unchecked(start: u16, end: u16) -> Self { + debug_assert!(start <= end); + Self { start, end } } - pub fn overlaps(&self, interval: &Interval) -> bool { + pub fn start(&self) -> u16 { + self.start + } + + pub fn end(&self) -> u16 { + self.end + } + + pub fn overlaps(&self, interval: &Self) -> bool { interval.start <= self.end && self.start <= interval.end } - pub fn overlapping_interval(&self, other: &Interval) -> Option { + pub fn overlapping_interval(&self, other: &Self) -> Option { if self.overlaps(other) { - Some(Interval::new(self.start.max(other.start), self.end.min(other.end))) + Some(Self::new_unchecked(self.start.max(other.start), self.end.min(other.end))) } else { None } @@ -947,21 +955,21 @@ mod tests { #[test] fn insert_range_empty() { let mut interval_store = IntervalStore(alloc::vec![]); - assert_eq!(interval_store.insert_range(1..=2), Interval::new(1, 2).run_len()); + assert_eq!(interval_store.insert_range(1..=2), Interval::new_unchecked(1, 2).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 2 },])); } #[test] fn insert_range_overlap_begin() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 20 }]); - assert_eq!(interval_store.insert_range(5..=50), Interval::new(21, 50).run_len()); + assert_eq!(interval_store.insert_range(5..=50), Interval::new_unchecked(21, 50).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 50 },])); } #[test] fn insert_range_overlap_end() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 }]); - assert_eq!(interval_store.insert_range(5..=15), Interval::new(5, 9).run_len()); + assert_eq!(interval_store.insert_range(5..=15), Interval::new_unchecked(5, 9).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 5, end: 20 },])); } @@ -971,14 +979,14 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 40, end: 60 }, ]); - assert_eq!(interval_store.insert_range(15..=50), Interval::new(21, 39).run_len()); + assert_eq!(interval_store.insert_range(15..=50), Interval::new_unchecked(21, 39).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); } #[test] fn insert_range_concescutive_begin() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 },]); - assert_eq!(interval_store.insert_range(21..=50), Interval::new(21, 50).run_len()); + assert_eq!(interval_store.insert_range(21..=50), Interval::new_unchecked(21, 50).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 50 },])); } @@ -988,14 +996,14 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 40, end: 60 }, ]); - assert_eq!(interval_store.insert_range(21..=50), Interval::new(21, 39).run_len()); + assert_eq!(interval_store.insert_range(21..=50), Interval::new_unchecked(21, 39).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); } #[test] fn insert_range_concescutive_end() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); - assert_eq!(interval_store.insert_range(21..=49), Interval::new(21, 49).run_len()); + assert_eq!(interval_store.insert_range(21..=49), Interval::new_unchecked(21, 49).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 21, end: 70 },])); } @@ -1005,7 +1013,7 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 50, end: 70 }, ]); - assert_eq!(interval_store.insert_range(21..=49), Interval::new(21, 49).run_len()); + assert_eq!(interval_store.insert_range(21..=49), Interval::new_unchecked(21, 49).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 70 },])); } @@ -1015,7 +1023,7 @@ mod tests { Interval { start: 10, end: 20 }, Interval { start: 50, end: 70 }, ]); - assert_eq!(interval_store.insert_range(25..=30), Interval::new(25, 30).run_len()); + assert_eq!(interval_store.insert_range(25..=30), Interval::new_unchecked(25, 30).run_len()); assert_eq!( interval_store, IntervalStore(alloc::vec![ @@ -1034,7 +1042,7 @@ mod tests { ]); assert_eq!( interval_store.insert_range(90..=u16::MAX), - Interval::new(90, u16::MAX).run_len() + Interval::new_unchecked(90, u16::MAX).run_len() ); assert_eq!( interval_store, @@ -1054,7 +1062,7 @@ mod tests { ]); assert_eq!( interval_store.insert_range(70..=u16::MAX), - Interval::new(71, u16::MAX).run_len() + Interval::new_unchecked(71, u16::MAX).run_len() ); assert_eq!( interval_store, @@ -1073,9 +1081,9 @@ mod tests { ]); assert_eq!( interval_store.insert_range(0..=u16::MAX), - Interval::new(0, u16::MAX).run_len() - - Interval::new(10, 20).run_len() - - Interval::new(50, 70).run_len() + Interval::new_unchecked(0, u16::MAX).run_len() + - Interval::new_unchecked(10, 20).run_len() + - Interval::new_unchecked(50, 70).run_len() ); assert_eq!( interval_store, @@ -1092,9 +1100,9 @@ mod tests { ]); assert_eq!( interval_store.insert_range(0..=100), - Interval::new(0, 100).run_len() - - Interval::new(10, 20).run_len() - - Interval::new(50, 70).run_len() + Interval::new_unchecked(0, 100).run_len() + - Interval::new_unchecked(10, 20).run_len() + - Interval::new_unchecked(50, 70).run_len() ); assert_eq!( interval_store, @@ -1107,17 +1115,22 @@ mod tests { #[test] fn insert_range_begin_overlap_concescutive_end() { - let mut interval_store = - IntervalStore(alloc::vec![Interval::new(2, 10), Interval::new(12, 700),]); + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(2, 10), + Interval::new_unchecked(12, 700), + ]); assert_eq!(interval_store.insert_range(2..=11), 1); - assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(2, 700)])); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(2, 700)])); } #[test] fn insert_range_pin_1() { - let mut interval_store = IntervalStore(alloc::vec![Interval::new(65079, 65079)]); + let mut interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(65079, 65079)]); assert_eq!(interval_store.insert_range(65080..=65080), 1); - assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(65079, 65080)])); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new_unchecked(65079, 65080)]) + ); } #[test] @@ -1254,7 +1267,10 @@ mod tests { assert_eq!(interval_store.remove_range(40..=70), 21); assert_eq!( interval_store, - IntervalStore(alloc::vec![Interval::new(400, 600), Interval::new(4000, 6000),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(400, 600), + Interval::new_unchecked(4000, 6000), + ]) ); } @@ -1267,9 +1283,9 @@ mod tests { ]); assert_eq!( interval_store.remove_range(40..=200), - Interval::new(40, 60).run_len() - + Interval::new(80, 90).run_len() - + Interval::new(100, 200).run_len() + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(80, 90).run_len() + + Interval::new_unchecked(100, 200).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![])); } @@ -1282,7 +1298,7 @@ mod tests { ]); assert_eq!( interval_store.remove_range(40..=80), - Interval::new(40, 60).run_len() + Interval::new(70, 80).run_len() + Interval::new_unchecked(40, 60).run_len() + Interval::new_unchecked(70, 80).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 81, end: 90 },])); } @@ -1295,7 +1311,7 @@ mod tests { ]); assert_eq!( interval_store.remove_range(50..=90), - Interval::new(70, 90).run_len() + Interval::new(50, 60).run_len() + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(50, 60).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); } @@ -1308,7 +1324,7 @@ mod tests { ]); assert_eq!( interval_store.remove_range(30..=90), - Interval::new(70, 90).run_len() + Interval::new(40, 60).run_len() + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(40, 60).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![])); } @@ -1322,9 +1338,9 @@ mod tests { ]); assert_eq!( interval_store.remove_range(30..=90), - Interval::new(70, 90).run_len() + Interval::new(40, 60).run_len() + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(40, 60).run_len() ); - assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(700, 900),])); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(700, 900),])); } #[test] @@ -1335,7 +1351,7 @@ mod tests { ]); assert_eq!( interval_store.remove_range(50..=80), - Interval::new(70, 80).run_len() + Interval::new(50, 60).run_len() + Interval::new_unchecked(70, 80).run_len() + Interval::new_unchecked(50, 60).run_len() ); assert_eq!( interval_store, @@ -1349,7 +1365,10 @@ mod tests { #[test] fn remove_range_begin_overlap() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); - assert_eq!(interval_store.remove_range(50..=100), Interval::new(50, 60).run_len()); + assert_eq!( + interval_store.remove_range(50..=100), + Interval::new_unchecked(50, 60).run_len() + ); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); } @@ -1362,9 +1381,9 @@ mod tests { ]); assert_eq!( interval_store.remove_range(50..=1000), - Interval::new(50, 60).run_len() - + Interval::new(80, 100).run_len() - + Interval::new(200, 500).run_len() + Interval::new_unchecked(50, 60).run_len() + + Interval::new_unchecked(80, 100).run_len() + + Interval::new_unchecked(200, 500).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); } @@ -1372,7 +1391,7 @@ mod tests { #[test] fn remove_range_end_overlap() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); - assert_eq!(interval_store.remove_range(20..=50), Interval::new(40, 50).run_len()); + assert_eq!(interval_store.remove_range(20..=50), Interval::new_unchecked(40, 50).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 51, end: 60 },])); } @@ -1385,9 +1404,9 @@ mod tests { ]); assert_eq!( interval_store.remove_range(20..=850), - Interval::new(40, 60).run_len() - + Interval::new(100, 500).run_len() - + Interval::new(800, 850).run_len() + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(100, 500).run_len() + + Interval::new_unchecked(800, 850).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 851, end: 900 },])); } @@ -1395,7 +1414,7 @@ mod tests { #[test] fn remove_range_no_overlap() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); - assert_eq!(interval_store.remove_range(20..=80), Interval::new(40, 60).run_len()); + assert_eq!(interval_store.remove_range(20..=80), Interval::new_unchecked(40, 60).run_len()); assert_eq!(interval_store, IntervalStore(alloc::vec![])); } @@ -1408,9 +1427,9 @@ mod tests { ]); assert_eq!( interval_store.remove_range(20..=60000), - Interval::new(40, 60).run_len() - + Interval::new(400, 600).run_len() - + Interval::new(4000, 6000).run_len() + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(400, 600).run_len() + + Interval::new_unchecked(4000, 6000).run_len() ); assert_eq!(interval_store, IntervalStore(alloc::vec![])); } @@ -1418,10 +1437,16 @@ mod tests { #[test] fn remove_range_complete_overlap() { let mut interval_store = IntervalStore(alloc::vec![Interval { start: 51, end: 6000 },]); - assert_eq!(interval_store.remove_range(500..=600), Interval::new(500, 600).run_len()); + assert_eq!( + interval_store.remove_range(500..=600), + Interval::new_unchecked(500, 600).run_len() + ); assert_eq!( interval_store, - IntervalStore(alloc::vec![Interval::new(51, 499), Interval::new(601, 6000),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(51, 499), + Interval::new_unchecked(601, 6000), + ]) ); } @@ -1434,12 +1459,17 @@ mod tests { #[test] fn remove_range_with_extra() { - let mut interval_store = - IntervalStore(alloc::vec![Interval::new(38161, 38162), Interval::new(40562, 40562),]); + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(38161, 38162), + Interval::new_unchecked(40562, 40562), + ]); assert_eq!(interval_store.remove_range(38162..=38163), 1); assert_eq!( interval_store, - IntervalStore(alloc::vec![Interval::new(38161, 38161), Interval::new(40562, 40562),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(38161, 38161), + Interval::new_unchecked(40562, 40562), + ]) ); } @@ -1460,7 +1490,10 @@ mod tests { interval_store.remove_smallest(200); assert_eq!( interval_store, - IntervalStore(alloc::vec![Interval::new(500, 600), Interval::new(4000, 6000),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(500, 600), + Interval::new_unchecked(4000, 6000), + ]) ); } @@ -1472,7 +1505,10 @@ mod tests { Interval { start: 4000, end: 6000 }, ]); interval_store.remove_smallest(500); - assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(4200, 6000),])); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new_unchecked(4200, 6000),]) + ); } #[test] @@ -1492,7 +1528,10 @@ mod tests { interval_store.remove_biggest(200); assert_eq!( interval_store, - IntervalStore(alloc::vec![Interval::new(0, 99), Interval::new(400, 500),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 99), + Interval::new_unchecked(400, 500), + ]) ); } @@ -1504,7 +1543,7 @@ mod tests { Interval { start: 9901, end: 10000 }, ]); interval_store.remove_biggest(500); - assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new(1, 5800),])); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(1, 5800),])); } #[test] @@ -1667,24 +1706,30 @@ mod tests { #[test] fn overlapping_interval_1() { - let interval1 = Interval::new(0, 100); - let interval2 = Interval::new(50, 300); + let interval1 = Interval::new_unchecked(0, 100); + let interval2 = Interval::new_unchecked(50, 300); - assert_eq!(interval1.overlapping_interval(&interval2), Some(Interval::new(50, 100))) + assert_eq!( + interval1.overlapping_interval(&interval2), + Some(Interval::new_unchecked(50, 100)) + ) } #[test] fn overlapping_interval_2() { - let interval1 = Interval::new(50, 300); - let interval2 = Interval::new(0, 100); + let interval1 = Interval::new_unchecked(50, 300); + let interval2 = Interval::new_unchecked(0, 100); - assert_eq!(interval1.overlapping_interval(&interval2), Some(Interval::new(50, 100))) + assert_eq!( + interval1.overlapping_interval(&interval2), + Some(Interval::new_unchecked(50, 100)) + ) } #[test] fn overlapping_interval_3() { - let interval1 = Interval::new(0, 100); - let interval2 = Interval::new(500, 700); + let interval1 = Interval::new_unchecked(0, 100); + let interval2 = Interval::new_unchecked(500, 700); assert_eq!(interval1.overlapping_interval(&interval2), None) } @@ -1703,9 +1748,9 @@ mod tests { ]); assert_eq!( interval_store_1.intersection_len(&interval_store_2), - Interval::new(11, 20).run_len() - + Interval::new(51, 80).run_len() - + Interval::new(111, 120).run_len() + Interval::new_unchecked(11, 20).run_len() + + Interval::new_unchecked(51, 80).run_len() + + Interval::new_unchecked(111, 120).run_len() ) } @@ -1720,9 +1765,9 @@ mod tests { Interval { start: 1, end: 80 }, Interval { start: 101, end: 120 }, ]); - let intersect_len = Interval::new(11, 20).run_len() - + Interval::new(51, 80).run_len() - + Interval::new(111, 120).run_len(); + let intersect_len = Interval::new_unchecked(11, 20).run_len() + + Interval::new_unchecked(51, 80).run_len() + + Interval::new_unchecked(111, 120).run_len(); assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); } @@ -1731,7 +1776,7 @@ mod tests { fn intersection_len_3() { let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1, end: 2000 },]); let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 1001, end: 3000 },]); - let intersect_len = Interval::new(1001, 2000).run_len(); + let intersect_len = Interval::new_unchecked(1001, 2000).run_len(); assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); } @@ -1754,7 +1799,7 @@ mod tests { bitmap_store.insert(to_set); } let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); - let intersect_len = Interval::new(20, 200).run_len(); + let intersect_len = Interval::new_unchecked(20, 200).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1768,8 +1813,8 @@ mod tests { Interval { start: 20, end: 6000 }, Interval { start: 5000, end: 33333 }, ]); - let intersect_len = - Interval::new(20, 6000).run_len() + Interval::new(5000, 20000).run_len(); + let intersect_len = Interval::new_unchecked(20, 6000).run_len() + + Interval::new_unchecked(5000, 20000).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1783,8 +1828,8 @@ mod tests { Interval { start: 64, end: 6400 }, Interval { start: 7680, end: 64000 }, ]); - let intersect_len = - Interval::new(64, 6400).run_len() + Interval::new(7680, 20000).run_len(); + let intersect_len = Interval::new_unchecked(64, 6400).run_len() + + Interval::new_unchecked(7680, 20000).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1798,8 +1843,8 @@ mod tests { Interval { start: 64, end: 6400 }, Interval { start: 7680, end: 64000 }, ]); - let intersect_len = - Interval::new(64, 6400).run_len() + Interval::new(7680, 20005).run_len(); + let intersect_len = Interval::new_unchecked(64, 6400).run_len() + + Interval::new_unchecked(7680, 20005).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1810,7 +1855,7 @@ mod tests { bitmap_store.insert(to_set); } let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 64, end: 64 },]); - let intersect_len = Interval::new(64, 64).run_len(); + let intersect_len = Interval::new_unchecked(64, 64).run_len(); assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); } @@ -1838,7 +1883,8 @@ mod tests { ]); assert_eq!( interval_store_1.len(), - Interval::new(20, 600).run_len() + Interval::new(5000, 8000).run_len() + Interval::new_unchecked(20, 600).run_len() + + Interval::new_unchecked(5000, 8000).run_len() ); } @@ -1855,7 +1901,7 @@ mod tests { #[test] fn min_0() { - let interval_store = IntervalStore(alloc::vec![Interval::new(20, u16::MAX)]); + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(20, u16::MAX)]); assert_eq!(interval_store.min(), Some(20)); } @@ -1867,7 +1913,7 @@ mod tests { #[test] fn max_0() { - let interval_store = IntervalStore(alloc::vec![Interval::new(20, u16::MAX)]); + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(20, u16::MAX)]); assert_eq!(interval_store.max(), Some(u16::MAX)); } @@ -1880,13 +1926,14 @@ mod tests { #[test] fn rank() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 200), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 200), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]); assert_eq!( interval_store.rank(5020), - Interval::new(0, 200).run_len() + Interval::new(5000, 5020).run_len() + Interval::new_unchecked(0, 200).run_len() + + Interval::new_unchecked(5000, 5020).run_len() ); assert_eq!(interval_store.rank(u16::MAX), interval_store.len()); } @@ -1894,10 +1941,10 @@ mod tests { #[test] fn select() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]); assert_eq!(interval_store.select(0), Some(0)); assert_eq!(interval_store.select(1), Some(2)); @@ -1910,25 +1957,25 @@ mod tests { #[test] fn union_1() { let mut interval_store_1 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]); let interval_store_2 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 10), - Interval::new(12, 7000), - Interval::new(65000, 65050), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 10), + Interval::new_unchecked(12, 7000), + Interval::new_unchecked(65000, 65050), ]); interval_store_1 |= interval_store_2; assert_eq!( interval_store_1, IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 7000), - Interval::new(8000, 10000), - Interval::new(65000, 65050), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 7000), + Interval::new_unchecked(8000, 10000), + Interval::new_unchecked(65000, 65050), ]) ) } @@ -1939,20 +1986,20 @@ mod tests { values.sort(); let array = ArrayStore::from_vec_unchecked(values); let mut interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]); interval_store |= &array; assert_eq!( interval_store, IntervalStore(alloc::vec![ - Interval::new(0, 11), - Interval::new(2000, 2000), - Interval::new(5000, 7000), - Interval::new(8000, 10000), - Interval::new(u16::MAX, u16::MAX), + Interval::new_unchecked(0, 11), + Interval::new_unchecked(2000, 2000), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + Interval::new_unchecked(u16::MAX, u16::MAX), ]) ) } @@ -1960,23 +2007,23 @@ mod tests { #[test] fn intersection() { let interval_store_1 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]); let interval_store_2 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(5, 50), - Interval::new(4000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), ]); assert_eq!( &interval_store_1 & &interval_store_2, IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(5, 11), - Interval::new(5000, 7000), - Interval::new(8000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), ]) ); assert_eq!(&interval_store_1 & &interval_store_1, interval_store_1); @@ -1985,74 +2032,80 @@ mod tests { #[test] fn difference() { let mut interval_store_1 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 11000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 11000), ]); let interval_store_2 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(5, 50), - Interval::new(4000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), ]); interval_store_1 -= &interval_store_2; assert_eq!( interval_store_1, - IntervalStore(alloc::vec![Interval::new(2, 4), Interval::new(10001, 11000),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(2, 4), + Interval::new_unchecked(10001, 11000), + ]) ) } #[test] fn symmetric_difference_0() { let interval_store_1 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(2, 11), - Interval::new(5000, 7000), - Interval::new(8000, 11000), - Interval::new(40000, 50000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 11000), + Interval::new_unchecked(40000, 50000), ]); let interval_store_2 = IntervalStore(alloc::vec![ - Interval::new(0, 0), - Interval::new(5, 50), - Interval::new(4000, 10000), + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), ]); assert_eq!( &interval_store_1 ^ &interval_store_2, IntervalStore(alloc::vec![ - Interval::new(2, 4), - Interval::new(12, 50), - Interval::new(4000, 4999), - Interval::new(7001, 7999), - Interval::new(10001, 11000), - Interval::new(40000, 50000), + Interval::new_unchecked(2, 4), + Interval::new_unchecked(12, 50), + Interval::new_unchecked(4000, 4999), + Interval::new_unchecked(7001, 7999), + Interval::new_unchecked(10001, 11000), + Interval::new_unchecked(40000, 50000), ]) ); } #[test] fn symmetric_difference_1() { - let interval_store_1 = IntervalStore(alloc::vec![Interval::new(0, 50),]); - let interval_store_2 = IntervalStore(alloc::vec![Interval::new(100, 200),]); + let interval_store_1 = IntervalStore(alloc::vec![Interval::new_unchecked(0, 50),]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new_unchecked(100, 200),]); assert_eq!( &interval_store_1 ^ &interval_store_2, - IntervalStore(alloc::vec![Interval::new(0, 50), Interval::new(100, 200),]) + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(100, 200), + ]) ); } #[test] fn symmetric_difference_2() { let interval_store_1 = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); - let interval_store_2 = IntervalStore(alloc::vec![Interval::new(0, 6000),]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new_unchecked(0, 6000),]); assert_eq!( &interval_store_1 ^ &interval_store_2, IntervalStore(alloc::vec![ - Interval::new(51, 499), - Interval::new(601, 799), - Interval::new(1001, 6000), + Interval::new_unchecked(51, 499), + Interval::new_unchecked(601, 799), + Interval::new_unchecked(1001, 6000), ]) ); } @@ -2060,15 +2113,15 @@ mod tests { #[test] fn iter_next() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.into_iter(); - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2078,14 +2131,14 @@ mod tests { if i >= 51 { break; } - let size = (Interval::new(i as u16, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked(i as u16, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } - let size = - (Interval::new(500, 600).run_len() + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2095,12 +2148,12 @@ mod tests { if i >= 101 { break; } - let size = (Interval::new((i + 500) as u16, 600).run_len() - + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked((i + 500) as u16, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } - let size = Interval::new(800, 1000).run_len() as usize; + let size = Interval::new_unchecked(800, 1000).run_len() as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2113,7 +2166,7 @@ mod tests { if i >= 201 { break; } - let size = (Interval::new((i + 800) as u16, 1000).run_len()) as usize; + let size = (Interval::new_unchecked((i + 800) as u16, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } assert_eq!(iter.size_hint(), (0, Some(0))); @@ -2125,15 +2178,15 @@ mod tests { #[test] fn iter_next_back() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.into_iter(); - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2143,9 +2196,10 @@ mod tests { if i >= 201 { break; } - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, (1000 - i) as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, (1000 - i) as u16).run_len()) + as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } @@ -2156,8 +2210,9 @@ mod tests { if i >= 101 { break; } - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, (600 - i) as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, (600 - i) as u16).run_len()) + as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } @@ -2168,7 +2223,7 @@ mod tests { if i >= 51 { break; } - let size = (Interval::new(0, (50 - i) as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, (50 - i) as u16).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } assert_eq!(iter.size_hint(), (0, Some(0))); @@ -2179,15 +2234,15 @@ mod tests { #[test] fn iter_next_and_next_back() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.into_iter(); - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, 1000).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2197,13 +2252,15 @@ mod tests { if i >= 201 { break; } - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, (1000 - i) as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, (1000 - i) as u16).run_len()) + as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } - let size = (Interval::new(0, 50).run_len() + Interval::new(500, 600).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2213,12 +2270,13 @@ mod tests { if i >= 101 { break; } - let size = (Interval::new(0, 50).run_len() - + Interval::new(500, (600 - i) as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, (600 - i) as u16).run_len()) + as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } - let size = (Interval::new(0, 50).run_len()) as usize; + let size = (Interval::new_unchecked(0, 50).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); let mut i = 0; @@ -2228,7 +2286,7 @@ mod tests { if i >= 51 { break; } - let size = (Interval::new(i as u16, 50).run_len()) as usize; + let size = (Interval::new_unchecked(i as u16, 50).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } assert_eq!(iter.size_hint(), (0, Some(0))); @@ -2238,7 +2296,7 @@ mod tests { #[test] fn iter_u16_max() { - let interval_store = IntervalStore(alloc::vec![Interval::new(0, u16::MAX),]); + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(0, u16::MAX),]); let mut iter = interval_store.iter(); let mut i = 0; @@ -2248,7 +2306,7 @@ mod tests { if i >= u16::MAX as usize { break; } - let size = (Interval::new(i as u16, u16::MAX).run_len()) as usize; + let size = (Interval::new_unchecked(i as u16, u16::MAX).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } @@ -2261,7 +2319,7 @@ mod tests { if i >= u16::MAX as usize { break; } - let size = (Interval::new(0, u16::MAX - i as u16).run_len()) as usize; + let size = (Interval::new_unchecked(0, u16::MAX - i as u16).run_len()) as usize; assert_eq!(iter.size_hint(), (size, Some(size))); } let mut iter = interval_store.iter(); @@ -2271,9 +2329,9 @@ mod tests { #[test] fn iter_nth() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.iter(); assert_eq!(iter.nth(50), Some(50)); @@ -2290,9 +2348,9 @@ mod tests { let mut iter = interval_store.iter(); assert_eq!( iter.nth( - (Interval::new(0, 50).run_len() - + Interval::new(500, 600).run_len() - + Interval::new(800, 1000).run_len() + (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len() - 1) as usize ), Some(1000) @@ -2311,9 +2369,9 @@ mod tests { #[test] fn iter_advance_to() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.iter(); iter.advance_to(20); @@ -2347,9 +2405,9 @@ mod tests { #[test] fn iter_advance_back_to() { let interval_store = IntervalStore(alloc::vec![ - Interval::new(0, 50), - Interval::new(500, 600), - Interval::new(800, 1000), + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), ]); let mut iter = interval_store.iter(); iter.advance_back_to(u16::MAX); diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 31bcc9f98..c930bec28 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -335,14 +335,17 @@ impl Store { let mut intervals = IntervalStore::new(); if let Some(mut start) = vec.as_slice().first().copied() { for (idx, &v) in vec.as_slice()[1..].iter().enumerate() { - // TODO: why are we subtracting the same number here? + // subtract current and previous values, then check if the gap is too large + // for a run if v - vec.as_slice()[idx] > 1 { - intervals - .push_interval_unchecked(Interval::new(start, vec.as_slice()[idx])); + intervals.push_interval_unchecked(Interval::new_unchecked( + start, + vec.as_slice()[idx], + )); start = v } } - intervals.push_interval_unchecked(Interval::new( + intervals.push_interval_unchecked(Interval::new_unchecked( start, *vec.as_slice().last().unwrap(), )); @@ -381,13 +384,13 @@ impl Store { // Run continues until end of this container if current == u64::MAX { - intervals.push_interval_unchecked(Interval::new(start, u16::MAX)); + intervals.push_interval_unchecked(Interval::new_unchecked(start, u16::MAX)); break; } let current_last = (!current).trailing_zeros() as u16; last = 64 * i + current_last; - intervals.push_interval_unchecked(Interval::new(start, last - 1)); + intervals.push_interval_unchecked(Interval::new_unchecked(start, last - 1)); // pad LSBs with 0s current &= current + 1; @@ -680,7 +683,7 @@ impl SubAssign<&Store> for Store { } (Array(array), Run(runs)) => { runs.iter_intervals().for_each(|iv| { - array.remove_range(iv.start..=iv.end); + array.remove_range(iv.start()..=iv.end()); }); } (this @ Run(..), Bitmap(bitmap)) => { @@ -833,7 +836,7 @@ impl PartialEq for Store { } (Run(run), Bitmap(bitmap)) | (Bitmap(bitmap), Run(run)) => { run.len() == bitmap.len() - && run.iter_intervals().all(|&iv| bitmap.contains_range(iv.start..=iv.end)) + && run.iter_intervals().all(|&iv| bitmap.contains_range(iv.start()..=iv.end())) } _ => false, } From 5b9372a25d8b42e65b160b51eaa6f879be7c6d65 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 1 Jun 2025 19:58:37 +0200 Subject: [PATCH 77/83] chore: update croaring to 2.3.1 for fuzzing --- fuzz/Cargo.lock | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 3afac7aca..a3e195558 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -13,15 +13,15 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "bytemuck" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" [[package]] name = "byteorder" @@ -31,9 +31,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cc" -version = "1.2.19" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" dependencies = [ "jobserver", "libc", @@ -48,18 +48,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "croaring" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1583a0c6ed2e2fe1a948e23d62ca42e0f2f3b45c59276c884a947c0dab47a20d" +checksum = "0a7378e8f3ede464bd5d6dbdb1b6f2ed907c0dd27dcbe465a7991c4bb78b5ddd" dependencies = [ "croaring-sys", ] [[package]] name = "croaring-sys" -version = "4.3.1" +version = "4.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3124cf04e54f50ecc5f53874e1b1e3a803e35523221bd2864851977b48ba7d00" +checksum = "5008a00afde0b8493eae0f33975f1d0af95f2e654a7c9938c27e654c09119dcd" dependencies = [ "cc", ] @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", @@ -115,9 +115,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -162,9 +162,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "syn" -version = "2.0.100" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", From 542789777efa9b13682cedf28c9e13c184f904a9 Mon Sep 17 00:00:00 2001 From: Lucas Van Laer Date: Sun, 1 Jun 2025 19:59:35 +0200 Subject: [PATCH 78/83] test: remove `dbg!` statement --- roaring/tests/serialization.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roaring/tests/serialization.rs b/roaring/tests/serialization.rs index ac862dcc5..f4adc8d7c 100644 --- a/roaring/tests/serialization.rs +++ b/roaring/tests/serialization.rs @@ -72,7 +72,7 @@ fn test_one() { fn test_array() { let original = (1000..3000).collect::(); let new = serialize_and_deserialize(&original); - assert_eq!(dbg!(original), dbg!(new)); + assert_eq!(original, new); } #[test] From 9e5cee4a4ce7cd1d530fa070ba76d26e486148b7 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 18:14:06 +0200 Subject: [PATCH 79/83] Bump version to 0.11.0 --- roaring/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roaring/Cargo.toml b/roaring/Cargo.toml index 4c16b8b6a..41843b64e 100644 --- a/roaring/Cargo.toml +++ b/roaring/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "roaring" -version = "0.10.12" +version = "0.11.0" rust-version = "1.65.0" authors = ["Wim Looman ", "Kerollmops "] description = "A better compressed bitset - pure Rust implementation" From 06230d10f087e1f445c5986bfac708acb26f51a8 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 18:17:37 +0200 Subject: [PATCH 80/83] Make cargo nightly happy --- roaring/src/bitmap/iter.rs | 4 ++-- roaring/src/bitmap/store/array_store/mod.rs | 2 +- roaring/src/bitmap/store/interval_store.rs | 4 ++-- roaring/src/treemap/iter.rs | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 537acc3a5..1c4b09c39 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -152,7 +152,7 @@ fn advance_back_to_impl<'a, It>( } impl Iter<'_> { - fn new(containers: &[Container]) -> Iter { + fn new(containers: &'_ [Container]) -> Iter<'_> { Iter { front: None, containers: containers.iter(), back: None } } @@ -558,7 +558,7 @@ impl RoaringBitmap { /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter(&self) -> Iter { + pub fn iter(&'_ self) -> Iter<'_> { Iter::new(&self.containers) } diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 17a9e79f0..e91842597 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -279,7 +279,7 @@ impl ArrayStore { self.vec.get(n as usize).cloned() } - pub fn iter(&self) -> core::slice::Iter { + pub fn iter(&'_ self) -> core::slice::Iter<'_, u16> { self.vec.iter() } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index 2c13dac1d..74a902498 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -475,11 +475,11 @@ impl IntervalStore { array } - pub(crate) fn iter(&self) -> RunIterBorrowed { + pub(crate) fn iter(&'_ self) -> RunIterBorrowed<'_> { self.into_iter() } - pub(crate) fn iter_intervals(&self) -> core::slice::Iter { + pub(crate) fn iter_intervals(&'_ self) -> core::slice::Iter<'_, Interval> { self.0.iter() } } diff --git a/roaring/src/treemap/iter.rs b/roaring/src/treemap/iter.rs index 57b39b900..b11f1c3e9 100644 --- a/roaring/src/treemap/iter.rs +++ b/roaring/src/treemap/iter.rs @@ -121,7 +121,7 @@ pub struct IntoIter { } impl Iter<'_> { - fn new(map: &BTreeMap) -> Iter { + fn new(map: &'_ BTreeMap) -> Iter<'_> { let outer = BitmapIter::new(map); Iter { outer, front: None, back: None } } @@ -346,7 +346,7 @@ impl RoaringTreemap { /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter(&self) -> Iter { + pub fn iter(&'_ self) -> Iter<'_> { Iter::new(&self.map) } @@ -365,7 +365,7 @@ impl RoaringTreemap { /// assert_eq!(bitmaps.next(), Some((0, &(0..6000).collect::()))); /// assert_eq!(bitmaps.next(), None); /// ``` - pub fn bitmaps(&self) -> BitmapIter { + pub fn bitmaps(&'_ self) -> BitmapIter<'_> { BitmapIter::new(&self.map) } From dec5564a7fe2cc24dde131a466096233a4fc9357 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 17:31:35 +0200 Subject: [PATCH 81/83] Change the API of the push method to return a Result --- roaring/src/bitmap/inherent.rs | 17 +++++++++++------ roaring/src/lib.rs | 10 ++++++++++ roaring/src/treemap/inherent.rs | 3 ++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index f5981a792..baaf834ce 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -3,7 +3,7 @@ use core::mem::size_of; use core::ops::{RangeBounds, RangeInclusive}; use crate::bitmap::store::BITMAP_LENGTH; -use crate::RoaringBitmap; +use crate::{IntegerTooSmall, RoaringBitmap}; use super::container::Container; use super::util; @@ -316,22 +316,27 @@ impl RoaringBitmap { /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` #[inline] - pub fn push(&mut self, value: u32) -> bool { + pub fn push(&mut self, value: u32) -> Result<(), IntegerTooSmall> { let (key, index) = util::split(value); match self.containers.last_mut() { - Some(container) if container.key == key => container.push(index), - Some(container) if container.key > key => false, + Some(container) if container.key == key => { + if container.push(index) { + Ok(()) + } else { + Err(IntegerTooSmall) + } + } + Some(container) if container.key > key => Err(IntegerTooSmall), _otherwise => { let mut container = Container::new(key); container.push(index); self.containers.push(container); - true + Ok(()) } } } - /// /// Pushes `value` at the end of the bitmap. /// It is up to the caller to have validated index > self.max() /// diff --git a/roaring/src/lib.rs b/roaring/src/lib.rs index 1a78f8901..32ee31add 100644 --- a/roaring/src/lib.rs +++ b/roaring/src/lib.rs @@ -33,6 +33,16 @@ pub mod treemap; pub use bitmap::RoaringBitmap; pub use treemap::RoaringTreemap; +/// An error type that is returned when a push in a bitmap did not succeed. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct IntegerTooSmall; + +impl fmt::Display for IntegerTooSmall { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("inserted integer is smaller than the largest integer") + } +} + /// An error type that is returned when an iterator isn't sorted. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct NonSortedIntegers { diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index 63e2cb3bb..c49196476 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -2,6 +2,7 @@ use alloc::collections::btree_map::{BTreeMap, Entry}; use core::iter; use core::ops::RangeBounds; +use crate::IntegerTooSmall; use crate::RoaringBitmap; use crate::RoaringTreemap; @@ -123,7 +124,7 @@ impl RoaringTreemap { /// /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` - pub fn push(&mut self, value: u64) -> bool { + pub fn push(&mut self, value: u64) -> Result<(), IntegerTooSmall> { let (hi, lo) = util::split(value); self.map.entry(hi).or_default().push(lo) } From 86a78219fcd8dad48395d07f82549994dcb57879 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 17:45:37 +0200 Subject: [PATCH 82/83] Introduce a new try_push method --- roaring/src/bitmap/inherent.rs | 25 ++++++++++++++++++++++++- roaring/src/treemap/inherent.rs | 27 +++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index baaf834ce..e34f2b84b 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -316,7 +316,30 @@ impl RoaringBitmap { /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` #[inline] - pub fn push(&mut self, value: u32) -> Result<(), IntegerTooSmall> { + #[deprecated(since = "0.11.0", note = "use `try_push` instead")] + pub fn push(&mut self, value: u32) -> bool { + self.try_push(value).is_ok() + } + + /// Pushes `value` in the bitmap only if it is greater than the current maximum value. + /// + /// Returns an error if the value is not greater than the current maximum value. + /// + /// # Examples + /// + /// ```rust + /// use roaring::{RoaringBitmap, IntegerTooSmall}; + /// + /// let mut rb = RoaringBitmap::new(); + /// assert!(rb.try_push(1).is_ok()); + /// assert!(rb.try_push(3).is_ok()); + /// assert_eq!(rb.try_push(3), Err(IntegerTooSmall)); + /// assert!(rb.try_push(5).is_ok()); + /// + /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); + /// ``` + #[inline] + pub fn try_push(&mut self, value: u32) -> Result<(), IntegerTooSmall> { let (key, index) = util::split(value); match self.containers.last_mut() { diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index c49196476..8f27f9f55 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -124,9 +124,32 @@ impl RoaringTreemap { /// /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` - pub fn push(&mut self, value: u64) -> Result<(), IntegerTooSmall> { + #[deprecated(since = "0.11.0", note = "use `try_push` instead")] + pub fn push(&mut self, value: u64) -> bool { let (hi, lo) = util::split(value); - self.map.entry(hi).or_default().push(lo) + self.map.entry(hi).or_default().try_push(lo).is_ok() + } + + /// Pushes `value` in the treemap only if it is greater than the current maximum value. + /// + /// Returns an error if the value is not greater than the current maximum value. + /// + /// # Examples + /// + /// ```rust + /// use roaring::{RoaringTreemap, IntegerTooSmall}; + /// + /// let mut rb = RoaringTreemap::new(); + /// assert!(rb.try_push(1).is_ok()); + /// assert!(rb.try_push(3).is_ok()); + /// assert_eq!(rb.try_push(3), Err(IntegerTooSmall)); + /// assert!(rb.try_push(5).is_ok()); + /// + /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); + /// ``` + pub fn try_push(&mut self, value: u64) -> Result<(), IntegerTooSmall> { + let (hi, lo) = util::split(value); + self.map.entry(hi).or_default().try_push(lo) } /// Pushes `value` in the treemap only if it is greater than the current maximum value. From 5e2d93efc858524ee773f0727f7d3482fb78d841 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 17 Jun 2025 18:50:08 +0200 Subject: [PATCH 83/83] Fix documentation --- roaring/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roaring/src/lib.rs b/roaring/src/lib.rs index 32ee31add..f6c4453b1 100644 --- a/roaring/src/lib.rs +++ b/roaring/src/lib.rs @@ -33,7 +33,7 @@ pub mod treemap; pub use bitmap::RoaringBitmap; pub use treemap::RoaringTreemap; -/// An error type that is returned when a push in a bitmap did not succeed. +/// An error type that is returned when a `try_push` in a bitmap did not succeed. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct IntegerTooSmall;