diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 077dcd32b..7313a5cbe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,6 +33,9 @@ jobs: toolchain: ${{ matrix.rust }} components: rustfmt, clippy + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Check # clippy will also do a build check # so we don't need to run `cargo check` or `cargo build` @@ -68,11 +71,21 @@ jobs: features: simd env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" + ROARINGRS_BENCH_OFFLINE: "true" steps: - name: Checkout roaring-rs uses: actions/checkout@v4 + - name: Checkout benchmark datasets + uses: actions/checkout@v4 + with: + repository: "RoaringBitmap/real-roaring-datasets" + path: "benchmarks/real-roaring-datasets" + + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Initialize rust toolchain uses: dtolnay/rust-toolchain@master with: @@ -82,6 +95,10 @@ jobs: if: matrix.features == 'default' run: cargo test -p roaring --features serde + - name: Test Benches + if: matrix.rust != '1.71.1' && matrix.features == 'default' + run: cargo test -p benchmarks --benches + - name: Test no default features if: matrix.features == 'no-std' run: cargo test -p roaring --no-default-features @@ -107,43 +124,43 @@ jobs: toolchain: nightly components: miri + - name: Caching + uses: Swatinem/rust-cache@v2 + - name: Setup miri run: cargo miri setup - name: Test bit endian run: cargo miri test --target s390x-unknown-linux-gnu -p roaring --lib -- bitmap::serialization::test::test_from_lsb0_bytes - bench: + fuzz: runs-on: ubuntu-latest needs: build - strategy: - matrix: - rust: - - stable - - nightly - features: - - default - include: - - rust: nightly - features: simd env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" - ROARINGRS_BENCH_OFFLINE: "true" steps: - name: Checkout roaring-rs uses: actions/checkout@v4 - - name: Checkout benchmark datasets - uses: actions/checkout@v4 - with: - repository: "RoaringBitmap/real-roaring-datasets" - path: "benchmarks/real-roaring-datasets" - - name: Initialize rust toolchain uses: dtolnay/rust-toolchain@master with: - toolchain: ${{ matrix.rust }} + toolchain: nightly + + - name: Caching + uses: Swatinem/rust-cache@v2 + + - name: Install cargo fuzz + run: cargo install cargo-fuzz + + - name: Setup Cache for corpus and artifacts + uses: actions/cache@v4 + with: + key: always + path: | + fuzz/artifacts + fuzz/corpus - - name: Bench - run: cargo bench --features "${{ matrix.features }}" + - name: Run Fuzzer vs croaring for 30 minutes + run: cargo fuzz run against_croaring -s none -- -timeout=5 -max_total_time=1800 diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 000000000..a3e195558 --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,196 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bytemuck" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "croaring" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7378e8f3ede464bd5d6dbdb1b6f2ed907c0dd27dcbe465a7991c4bb78b5ddd" +dependencies = [ + "croaring-sys", +] + +[[package]] +name = "croaring-sys" +version = "4.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5008a00afde0b8493eae0f33975f1d0af95f2e654a7c9938c27e654c09119dcd" +dependencies = [ + "cc", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.172" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + +[[package]] +name = "roaring" +version = "0.10.12" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "roaring-fuzz" +version = "0.0.0" +dependencies = [ + "croaring", + "libfuzzer-sys", + "roaring", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..8d9b34655 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "roaring-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = { version = "0.4.9", features = ["arbitrary-derive"] } +roaring = { path = "../roaring" } +croaring = "2.0" + +[[bin]] +name = "against_croaring" +path = "fuzz_targets/against_croaring.rs" +test = false +doc = false +bench = false + +[workspace] diff --git a/fuzz/fuzz_targets/against_croaring.rs b/fuzz/fuzz_targets/against_croaring.rs new file mode 100644 index 000000000..bdaa58524 --- /dev/null +++ b/fuzz/fuzz_targets/against_croaring.rs @@ -0,0 +1,38 @@ +#![no_main] + +mod arbitrary_ops; + +use libfuzzer_sys::arbitrary::{self, Arbitrary}; +use libfuzzer_sys::fuzz_target; + +use crate::arbitrary_ops::{check_equal, Operation}; + +#[derive(Arbitrary, Debug)] +struct FuzzInput<'a> { + ops: Vec, + initial_input: &'a [u8], +} + +fuzz_target!(|input: FuzzInput| { + let lhs_c = croaring::Bitmap::try_deserialize::(input.initial_input); + let lhs_r = roaring::RoaringBitmap::deserialize_from(input.initial_input).ok(); + + let (mut lhs_c, mut lhs_r) = match (lhs_c, lhs_r) { + (Some(lhs_c), Some(lhs_r)) => { + check_equal(&lhs_c, &lhs_r); + (lhs_c, lhs_r) + } + (None, None) => Default::default(), + (Some(_), None) => panic!("croaring deserialized, but roaring failed"), + (None, Some(_)) => panic!("roaring deserialized, but croaring failed"), + }; + + let mut rhs_c = croaring::Bitmap::new(); + let mut rhs_r = roaring::RoaringBitmap::new(); + + for op in input.ops { + op.apply(&mut lhs_c, &mut rhs_c, &mut lhs_r, &mut rhs_r); + } + check_equal(&lhs_c, &lhs_r); + check_equal(&rhs_c, &rhs_r); +}); diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs new file mode 100644 index 000000000..03a00321d --- /dev/null +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -0,0 +1,404 @@ +use libfuzzer_sys::arbitrary::{self, Arbitrary, Unstructured}; +use std::mem; +use std::ops::RangeInclusive; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Num(pub u32); + +pub const MAX_NUM: u32 = 0x1_0000 * 4; + +impl<'a> Arbitrary<'a> for Num { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + Ok(Self(u.int_in_range(0..=(MAX_NUM - 1))?)) + } +} + +#[derive(Arbitrary, Debug)] +pub enum Operation { + Binary(BitmapBinaryOperation), + MutateLhs(MutableBitmapOperation), + Read(ReadBitmapOperation), + SwapSides, +} + +impl Operation { + pub fn apply( + &self, + lhs_c: &mut croaring::Bitmap, + rhs_c: &mut croaring::Bitmap, + lhs_r: &mut roaring::RoaringBitmap, + rhs_r: &mut roaring::RoaringBitmap, + ) { + match self { + Operation::Binary(op) => op.apply(lhs_c, rhs_c, lhs_r, rhs_r), + Operation::MutateLhs(op) => op.apply(lhs_c, lhs_r), + Operation::Read(op) => op.apply(lhs_c, lhs_r), + Operation::SwapSides => { + mem::swap(lhs_c, rhs_c); + mem::swap(lhs_r, rhs_r); + } + } + } +} + +#[derive(Arbitrary, Debug)] +pub enum MutableBitmapOperation { + Insert(Num), + InsertRange(RangeInclusive), + Push(Num), + Remove(Num), + RemoveRange(RangeInclusive), + Clear, + Extend(Vec), + SwapSerialization, + Optimize, + RemoveRunCompression, + // Probably turn it into a bitmap + MakeBitmap { key: u16 }, + // Probably turn it into a Range + MakeRange { key: u16 }, +} + +#[derive(Arbitrary, Debug, Copy, Clone)] +pub enum RangeOperations { + Optimized, + Removed, +} + +#[derive(Arbitrary, Debug)] +pub enum ReadBitmapOperation { + ContainsRange(RangeInclusive), + Contains(Num), + RangeCardinality(RangeInclusive), + Cardinality, + IsEmpty, + IsFull, + Minimum, + Maximum, + Rank(Num), + Select(Num), + Statistics(RangeOperations), + Clone, + Debug, + SerializedSize(RangeOperations), + Serialize(RangeOperations), +} + +#[derive(Arbitrary, Debug)] +pub enum BitmapBinaryOperation { + Eq, + IsSubset, + And, + Or, + Xor, + AndNot, +} + +impl ReadBitmapOperation { + pub fn apply(&self, x: &mut croaring::Bitmap, y: &mut roaring::RoaringBitmap) { + match *self { + ReadBitmapOperation::ContainsRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected = x.contains_range(range.clone()); + let actual = y.contains_range(range); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Contains(Num(n)) => { + let expected = x.contains(n); + let actual = y.contains(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::RangeCardinality(ref range) => { + let range = range.start().0..=range.end().0; + let expected = x.range_cardinality(range.clone()); + let actual = y.range_cardinality(range); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Cardinality => { + let expected = x.cardinality(); + let actual = y.len(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::IsEmpty => { + let expected = x.is_empty(); + let actual = y.is_empty(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::IsFull => { + let expected = x.contains_range(..); + let actual = y.is_full(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Minimum => { + let expected = x.minimum(); + let actual = y.min(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Maximum => { + let expected = x.maximum(); + let actual = y.max(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Rank(Num(n)) => { + let expected = x.rank(n); + let actual = y.rank(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Select(Num(n)) => { + let expected = x.select(n); + let actual = y.select(n); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Statistics(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); + } + RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); + } + } + let expected = x.statistics(); + let actual = y.statistics(); + // Convert to the same statistics struct + let expected = { + let mut v = actual; + v.n_containers = expected.n_containers; + v.n_array_containers = expected.n_array_containers; + v.n_run_containers = expected.n_run_containers; + v.n_bitset_containers = expected.n_bitset_containers; + v.n_values_array_containers = expected.n_values_array_containers; + v.n_values_run_containers = expected.n_values_run_containers; + v.n_values_bitset_containers = expected.n_values_bitset_containers.into(); + // The n_bytes_* fields are not directly comparable: + // they are based on the number of bytes of _capacity_ of the + // containers, which depends on the allocation strategy. + // v.n_bytes_array_containers = expected.n_bytes_array_containers.into(); + // v.n_bytes_run_containers = expected.n_bytes_run_containers.into(); + // v.n_bytes_bitset_containers = expected.n_bytes_bitset_containers.into(); + v.max_value = x.maximum(); + v.min_value = x.minimum(); + v.cardinality = x.cardinality(); + v + }; + assert_eq!(expected, actual); + } + ReadBitmapOperation::Clone => { + assert_eq!(*y, y.clone()); + } + ReadBitmapOperation::Debug => { + use std::io::Write; + write!(std::io::sink(), "{:?}", y).unwrap(); + } + ReadBitmapOperation::SerializedSize(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); + } + RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); + } + } + let expected = x.get_serialized_size_in_bytes::(); + let actual = y.serialized_size(); + assert_eq!(expected, actual); + } + ReadBitmapOperation::Serialize(ranges) => { + match ranges { + RangeOperations::Optimized => { + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); + } + RangeOperations::Removed => { + x.remove_run_compression(); + y.remove_run_compression(); + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); + } + } + let expected = x.serialize::(); + let mut actual = Vec::new(); + y.serialize_into(&mut actual).unwrap(); + assert_eq!(expected, actual); + } + } + } +} + +impl MutableBitmapOperation { + pub fn apply(&self, x: &mut croaring::Bitmap, y: &mut roaring::RoaringBitmap) { + match *self { + MutableBitmapOperation::Insert(Num(n)) => { + let expected = x.add_checked(n); + let actual = y.insert(n); + assert_eq!(expected, actual); + } + MutableBitmapOperation::InsertRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected_added = u64::try_from(range.clone().count()).unwrap() + - x.range_cardinality(range.clone()); + x.add_range(range.clone()); + assert_eq!(expected_added, y.insert_range(range)); + } + MutableBitmapOperation::Push(Num(n)) => { + let should_push = y.max().is_none_or(|max| n > max); + if should_push { + x.add(n); + } + assert_eq!(should_push, y.push(n)); + } + MutableBitmapOperation::Remove(Num(n)) => { + let expected = x.remove_checked(n); + let actual = y.remove(n); + assert_eq!(expected, actual); + } + MutableBitmapOperation::RemoveRange(ref range) => { + let range = range.start().0..=range.end().0; + let expected_removed = x.range_cardinality(range.clone()); + x.remove_range(range.clone()); + assert_eq!(expected_removed, y.remove_range(range)); + } + MutableBitmapOperation::Clear => { + x.clear(); + y.clear(); + } + MutableBitmapOperation::Optimize => { + x.remove_run_compression(); + y.remove_run_compression(); + assert_eq!(x.run_optimize(), y.optimize()); + } + MutableBitmapOperation::RemoveRunCompression => { + x.remove_run_compression(); + y.remove_run_compression(); + x.run_optimize(); + y.optimize(); + assert_eq!(x.remove_run_compression(), y.remove_run_compression()); + } + MutableBitmapOperation::Extend(ref items) => { + // Safety - Num is repr(transparent) over u32 + let items: &[u32] = unsafe { mem::transmute(&items[..]) }; + x.add_many(items); + y.extend(items); + } + MutableBitmapOperation::SwapSerialization => { + let x_serialized = x.serialize::(); + let mut y_serialized = Vec::new(); + y.serialize_into(&mut y_serialized).unwrap(); + + let new_x = + croaring::Bitmap::try_deserialize::(&y_serialized).unwrap(); + let new_y = roaring::RoaringBitmap::deserialize_from(&x_serialized[..]).unwrap(); + assert_eq!(new_x, *x); + assert_eq!(new_y, *y); + *x = new_x; + *y = new_y; + } + MutableBitmapOperation::MakeBitmap { key } => { + let key = u32::from(key); + let start = key * 0x1_0000; + let end = start + 9 * 1024; + for i in (start..end).step_by(2) { + x.add(i); + y.insert(i); + } + } + MutableBitmapOperation::MakeRange { key } => { + let key = u32::from(key); + let start = key * 0x1_0000; + let end = start + 9 * 1024; + x.add_range(start..=end); + y.insert_range(start..=end); + } + } + } +} + +impl BitmapBinaryOperation { + pub fn apply( + &self, + lhs_c: &mut croaring::Bitmap, + rhs_c: &croaring::Bitmap, + lhs_r: &mut roaring::RoaringBitmap, + rhs_r: &roaring::RoaringBitmap, + ) { + match *self { + BitmapBinaryOperation::Eq => { + let expected = lhs_c == rhs_c; + let actual = lhs_r == rhs_r; + assert_eq!(expected, actual); + } + BitmapBinaryOperation::IsSubset => { + let expected = lhs_c.is_subset(rhs_c); + let actual = lhs_r.is_subset(rhs_r); + assert_eq!(expected, actual); + } + BitmapBinaryOperation::And => { + let expected_len = lhs_r.intersection_len(rhs_r); + let actual_len = lhs_c.and_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r &= rhs_r; + *lhs_c &= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::Or => { + let expected_len = lhs_r.union_len(rhs_r); + let actual_len = lhs_c.or_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r |= rhs_r; + *lhs_c |= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::Xor => { + let expected_len = lhs_r.symmetric_difference_len(rhs_r); + let actual_len = lhs_c.xor_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r ^= rhs_r; + *lhs_c ^= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + BitmapBinaryOperation::AndNot => { + let expected_len = lhs_r.difference_len(rhs_r); + let actual_len = lhs_c.andnot_cardinality(rhs_c); + assert_eq!(expected_len, actual_len); + + *lhs_r -= rhs_r; + *lhs_c -= rhs_c; + assert_eq!(lhs_r.len(), expected_len); + } + } + } +} + +pub(crate) fn check_equal(c: &croaring::Bitmap, r: &roaring::RoaringBitmap) { + let mut lhs = c.iter(); + let mut rhs = r.iter(); + + loop { + match (lhs.next(), rhs.next()) { + (Some(l), Some(r)) => { + assert_eq!(l, r); + } + (None, None) => break, + (Some(n), None) => panic!("croaring has more elements: {n}"), + (None, Some(n)) => panic!("roaring has more elements: {n}"), + } + } +} diff --git a/fuzz/rust-toolchain.toml b/fuzz/rust-toolchain.toml new file mode 100644 index 000000000..5d56faf9a --- /dev/null +++ b/fuzz/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/roaring/Cargo.toml b/roaring/Cargo.toml index 4c16b8b6a..41843b64e 100644 --- a/roaring/Cargo.toml +++ b/roaring/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "roaring" -version = "0.10.12" +version = "0.11.0" rust-version = "1.65.0" authors = ["Wim Looman ", "Kerollmops "] description = "A better compressed bitset - pure Rust implementation" diff --git a/roaring/LICENSE-APACHE b/roaring/LICENSE-APACHE new file mode 120000 index 000000000..965b606f3 --- /dev/null +++ b/roaring/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/roaring/LICENSE-MIT b/roaring/LICENSE-MIT new file mode 120000 index 000000000..76219eb72 --- /dev/null +++ b/roaring/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/roaring/src/bitmap/arbitrary.rs b/roaring/src/bitmap/arbitrary.rs index 11de91ff6..88eb7d520 100644 --- a/roaring/src/bitmap/arbitrary.rs +++ b/roaring/src/bitmap/arbitrary.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod test { - use crate::bitmap::container::Container; - use crate::bitmap::store::{ArrayStore, BitmapStore, Store}; + use crate::bitmap::container::{Container, RUN_MAX_SIZE}; + use crate::bitmap::store::{ArrayStore, BitmapStore, IntervalStore, Store}; use crate::RoaringBitmap; use core::fmt::{Debug, Formatter}; use proptest::bits::{BitSetLike, SampledBitSetStrategy}; @@ -68,6 +68,47 @@ mod test { } } + impl BitSetLike for IntervalStore { + fn new_bitset(max: usize) -> Self { + assert!(max <= IntervalStore::MAX + 1); + IntervalStore::new() + } + + fn len(&self) -> usize { + IntervalStore::MAX + 1 + } + + fn test(&self, bit: usize) -> bool { + assert!(bit <= IntervalStore::MAX); + self.contains(bit as u16) + } + + fn set(&mut self, bit: usize) { + assert!(bit <= IntervalStore::MAX); + self.insert(bit as u16); + } + + fn clear(&mut self, bit: usize) { + assert!(bit <= IntervalStore::MAX); + self.remove(bit as u16); + } + + fn count(&self) -> usize { + self.len() as usize + } + } + + impl IntervalStore { + const MAX: usize = u16::MAX as usize; + + pub fn sampled( + size: impl Into, + bits: impl Into, + ) -> SampledBitSetStrategy { + SampledBitSetStrategy::new(size.into(), bits.into()) + } + } + impl Debug for ArrayStore { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { if self.len() < 16 { @@ -130,6 +171,7 @@ mod test { match self { Store::Array(a) => write!(f, "Store({a:?})"), Store::Bitmap(b) => write!(f, "Store({b:?})"), + Store::Run(c) => write!(f, "Store({c:?})"), } } } @@ -140,6 +182,8 @@ mod test { ArrayStore::sampled(1..=4096, ..=u16::MAX as usize).prop_map(Store::Array), BitmapStore::sampled(4097..u16::MAX as usize, ..=u16::MAX as usize) .prop_map(Store::Bitmap), + IntervalStore::sampled(1..=RUN_MAX_SIZE as usize, ..=u16::MAX as usize) + .prop_map(Store::Run), ] } } diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index a02a85551..4ed11cdd0 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -3,10 +3,12 @@ use core::ops::{ BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, }; -use super::store::{self, Store}; +use super::store::{self, ArrayStore, Interval, IntervalStore, Store, BITMAP_BYTES}; use super::util; pub const ARRAY_LIMIT: u64 = 4096; +#[cfg(test)] +pub const RUN_MAX_SIZE: u64 = 2048; #[cfg(not(feature = "std"))] use alloc::vec::Vec; @@ -28,6 +30,22 @@ impl Container { Container { key, store: Store::new() } } + pub fn new_with_range(key: u16, range: RangeInclusive) -> Container { + if range.len() <= 2 { + let mut array = ArrayStore::new(); + array.insert_range(range); + Self { key, store: Store::Array(array) } + } else { + Self { + key, + store: Store::Run(IntervalStore::new_with_range( + // This is ok, since range must be non empty + Interval::new_unchecked(*range.start(), *range.end()), + )), + } + } + } + pub fn full(key: u16) -> Container { Container { key, store: Store::full() } } @@ -57,15 +75,43 @@ impl Container { } pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { - // If inserting the range will make this a bitmap by itself, do it now - if range.len() as u64 > ARRAY_LIMIT { - if let Store::Array(arr) = &self.store { - self.store = Store::Bitmap(arr.to_bitmap_store()); + if range.is_empty() { + return 0; + } + match &self.store { + Store::Bitmap(bitmap) => { + let added_amount = range.len() as u64 + - bitmap.intersection_len_interval(&Interval::new_unchecked( + *range.start(), + *range.end(), + )); + let union_cardinality = bitmap.len() + added_amount; + if union_cardinality == 1 << 16 { + self.store = Store::Run(IntervalStore::full()); + added_amount + } else { + self.store.insert_range(range) + } } + Store::Array(array) => { + let added_amount = range.len() as u64 + - array.intersection_len_interval(&Interval::new_unchecked( + *range.start(), + *range.end(), + )); + let union_cardinality = array.len() + added_amount; + if union_cardinality == 1 << 16 { + self.store = Store::Run(IntervalStore::full()); + added_amount + } else if union_cardinality <= ARRAY_LIMIT { + self.store.insert_range(range) + } else { + self.store = self.store.to_bitmap(); + self.store.insert_range(range) + } + } + Store::Run(_) => self.store.insert_range(range), } - let inserted = self.store.insert_range(range); - self.ensure_correct_store(); - inserted } /// Pushes `index` at the end of the container only if `index` is the new max. @@ -119,6 +165,7 @@ impl Container { } } Store::Array(_) => self.store.remove_smallest(n), + Store::Run(_) => self.store.remove_smallest(n), }; } @@ -134,6 +181,7 @@ impl Container { } } Store::Array(_) => self.store.remove_biggest(n), + Store::Run(_) => self.store.remove_biggest(n), }; } @@ -174,19 +222,76 @@ impl Container { self.store.rank(index) } - pub(crate) fn ensure_correct_store(&mut self) { - match &self.store { - Store::Bitmap(ref bits) => { - if bits.len() <= ARRAY_LIMIT { - self.store = Store::Array(bits.to_array_store()) + pub(crate) fn ensure_correct_store(&mut self) -> bool { + let new_store = match &self.store { + Store::Bitmap(ref bits) if bits.len() <= ARRAY_LIMIT => { + Store::Array(bits.to_array_store()).into() + } + Store::Array(ref vec) if vec.len() > ARRAY_LIMIT => { + Store::Bitmap(vec.to_bitmap_store()).into() + } + _ => None, + }; + if let Some(new_store) = new_store { + self.store = new_store; + true + } else { + false + } + } + + pub fn optimize(&mut self) -> bool { + match &mut self.store { + Store::Bitmap(_) => { + let num_runs = self.store.count_runs(); + let size_as_run = IntervalStore::serialized_byte_size(num_runs); + if BITMAP_BYTES <= size_as_run { + return false; } + self.store = self.store.to_run(); + true } - Store::Array(ref vec) => { - if vec.len() > ARRAY_LIMIT { - self.store = Store::Bitmap(vec.to_bitmap_store()) + Store::Array(array) => { + let size_as_array = array.byte_size(); + let num_runs = self.store.count_runs(); + let size_as_run = IntervalStore::serialized_byte_size(num_runs); + if size_as_array <= size_as_run { + return false; } + self.store = self.store.to_run(); + true } - }; + Store::Run(runs) => { + let size_as_run = runs.byte_size(); + let card = runs.len(); + let size_as_array = ArrayStore::serialized_byte_size(card); + let min_size_non_run = size_as_array.min(BITMAP_BYTES); + if size_as_run <= min_size_non_run { + return false; + } + if card <= ARRAY_LIMIT { + self.store = Store::Array(runs.to_array()); + return true; + } + self.store = Store::Bitmap(runs.to_bitmap()); + true + } + } + } + + pub fn remove_run_compression(&mut self) -> bool { + match &mut self.store { + Store::Bitmap(_) | Store::Array(_) => false, + Store::Run(runs) => { + let card = runs.len(); + if card <= ARRAY_LIMIT { + self.store = Store::Array(runs.to_array()); + } else { + self.store = Store::Bitmap(runs.to_bitmap()); + } + true + } + } } } diff --git a/roaring/src/bitmap/fmt.rs b/roaring/src/bitmap/fmt.rs index 702ea998f..af767bd67 100644 --- a/roaring/src/bitmap/fmt.rs +++ b/roaring/src/bitmap/fmt.rs @@ -12,10 +12,11 @@ impl fmt::Debug for RoaringBitmap { } else { write!( f, - "RoaringBitmap<{:?} values between {:?} and {:?}>", + "RoaringBitmap<{:?} values between {:?} and {:?} in {:?} containers>", self.len(), self.min().unwrap(), - self.max().unwrap() + self.max().unwrap(), + self.containers.len(), ) } } diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 470bf7f97..e34f2b84b 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -1,9 +1,9 @@ use core::cmp::Ordering; use core::mem::size_of; -use core::ops::RangeBounds; +use core::ops::{RangeBounds, RangeInclusive}; use crate::bitmap::store::BITMAP_LENGTH; -use crate::RoaringBitmap; +use crate::{IntegerTooSmall, RoaringBitmap}; use super::container::Container; use super::util; @@ -211,6 +211,31 @@ impl RoaringBitmap { } } + /// Searches and then modifies a specific container with `M` by the given key. + /// Creates a new container using `B` if it doesn't exist. + /// + /// Returns `R` based on `M` or `B`. + #[inline] + pub(crate) fn mod_or_build_container_by_key< + R, + M: FnMut(&mut Container) -> R, + B: FnMut(u16) -> (Container, R), + >( + &mut self, + key: u16, + mut modifier: M, + mut builder: B, + ) -> R { + match self.containers.binary_search_by_key(&key, |c| c.key) { + Ok(loc) => modifier(&mut self.containers[loc]), + Err(loc) => { + let build_value = builder(key); + self.containers.insert(loc, build_value.0); + build_value.1 + } + } + } + /// Inserts a range of values. /// Returns the number of inserted values. /// @@ -237,14 +262,19 @@ impl RoaringBitmap { let (start_container_key, start_index) = util::split(start); let (end_container_key, end_index) = util::split(end); - - // Find the container index for start_container_key - let first_index = self.find_container_by_key(start_container_key); + let modify_container_range = + |bitmap: &mut Self, container_key: u16, range: RangeInclusive| { + bitmap.mod_or_build_container_by_key( + container_key, + |container| container.insert_range(range.clone()), + |key| (Container::new_with_range(key, range.clone()), range.len() as u64), + ) + }; // If the end range value is in the same container, just call into // the one container. if start_container_key == end_container_key { - return self.containers[first_index].insert_range(start_index..=end_index); + return modify_container_range(self, start_container_key, start_index..=end_index); } // For the first container, insert start_index..=u16::MAX, with @@ -256,19 +286,14 @@ impl RoaringBitmap { let mut inserted = 0; for i in start_container_key..end_container_key { - let index = self.find_container_by_key(i); - - // Insert the range subset for this container - inserted += self.containers[index].insert_range(low..=u16::MAX); + inserted += modify_container_range(self, i, low..=u16::MAX); // After the first container, always fill the containers. low = 0; } // Handle the last container - let last_index = self.find_container_by_key(end_container_key); - - inserted += self.containers[last_index].insert_range(0..=end_index); + inserted += modify_container_range(self, end_container_key, 0..=end_index); inserted } @@ -291,22 +316,50 @@ impl RoaringBitmap { /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` #[inline] + #[deprecated(since = "0.11.0", note = "use `try_push` instead")] pub fn push(&mut self, value: u32) -> bool { + self.try_push(value).is_ok() + } + + /// Pushes `value` in the bitmap only if it is greater than the current maximum value. + /// + /// Returns an error if the value is not greater than the current maximum value. + /// + /// # Examples + /// + /// ```rust + /// use roaring::{RoaringBitmap, IntegerTooSmall}; + /// + /// let mut rb = RoaringBitmap::new(); + /// assert!(rb.try_push(1).is_ok()); + /// assert!(rb.try_push(3).is_ok()); + /// assert_eq!(rb.try_push(3), Err(IntegerTooSmall)); + /// assert!(rb.try_push(5).is_ok()); + /// + /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); + /// ``` + #[inline] + pub fn try_push(&mut self, value: u32) -> Result<(), IntegerTooSmall> { let (key, index) = util::split(value); match self.containers.last_mut() { - Some(container) if container.key == key => container.push(index), - Some(container) if container.key > key => false, + Some(container) if container.key == key => { + if container.push(index) { + Ok(()) + } else { + Err(IntegerTooSmall) + } + } + Some(container) if container.key > key => Err(IntegerTooSmall), _otherwise => { let mut container = Container::new(key); container.push(index); self.containers.push(container); - true + Ok(()) } } } - /// /// Pushes `value` at the end of the bitmap. /// It is up to the caller to have validated index > self.max() /// @@ -806,6 +859,46 @@ impl RoaringBitmap { self.containers.clear(); } } + + /// Optimizes the container storage for this bitmap. + /// Returns true if the container storage was modified, false if not. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::from_iter(1000..100000); + /// rb.optimize(); + /// ``` + pub fn optimize(&mut self) -> bool { + let mut changed = false; + for container in &mut self.containers { + changed |= container.optimize() + } + changed + } + + /// Removes run-length encoding even when it is more space efficient. + /// + /// Returns true if the container storage was modified, false if not. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::from_iter(0..=10000); + /// rb.optimize(); + /// assert!(rb.remove_run_compression()); + /// ``` + pub fn remove_run_compression(&mut self) -> bool { + let mut changed = false; + for container in &mut self.containers { + changed |= container.remove_run_compression() + } + changed + } } impl Default for RoaringBitmap { diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 537acc3a5..1c4b09c39 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -152,7 +152,7 @@ fn advance_back_to_impl<'a, It>( } impl Iter<'_> { - fn new(containers: &[Container]) -> Iter { + fn new(containers: &'_ [Container]) -> Iter<'_> { Iter { front: None, containers: containers.iter(), back: None } } @@ -558,7 +558,7 @@ impl RoaringBitmap { /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter(&self) -> Iter { + pub fn iter(&'_ self) -> Iter<'_> { Iter::new(&self.containers) } diff --git a/roaring/src/bitmap/multiops.rs b/roaring/src/bitmap/multiops.rs index f6bdec92e..4ba588aa6 100644 --- a/roaring/src/bitmap/multiops.rs +++ b/roaring/src/bitmap/multiops.rs @@ -418,6 +418,15 @@ fn merge_container_ref<'a>( // If it was borrowed it will clone-on-write op(&mut lhs.to_mut().store, &rhs.store); } + (Store::Run(..), Store::Run(..)) => { + op(&mut lhs.to_mut().store, &rhs.store); + } + (Store::Run(..), _) => { + op(&mut lhs.to_mut().store, &rhs.store); + } + (Store::Array(..), Store::Run(..)) => { + op(&mut lhs.to_mut().store, &rhs.store); + } }; } } diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index fa90cd131..e848e2c61 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -1,18 +1,24 @@ use crate::bitmap::container::{Container, ARRAY_LIMIT}; -use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH}; +use crate::bitmap::store::{ + ArrayStore, BitmapStore, Interval, Store, BITMAP_BYTES, BITMAP_LENGTH, RUN_ELEMENT_BYTES, + RUN_NUM_BYTES, +}; use crate::RoaringBitmap; use bytemuck::cast_slice_mut; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use core::convert::Infallible; -use core::ops::RangeInclusive; use std::error::Error; use std::io; +use super::store::IntervalStore; + pub(crate) const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; pub(crate) const SERIAL_COOKIE: u16 = 12347; pub(crate) const NO_OFFSET_THRESHOLD: usize = 4; // Sizes of header structures +pub(crate) const COOKIE_BYTES: usize = 4; +pub(crate) const SIZE_BYTES: usize = 4; pub(crate) const DESCRIPTION_BYTES: usize = 4; pub(crate) const OFFSET_BYTES: usize = 4; @@ -33,17 +39,23 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialized_size(&self) -> usize { + let mut has_run_containers = false; + let size = self.containers.len(); let container_sizes: usize = self .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => 8 + values.len() as usize * 2, - Store::Bitmap(..) => 8 + 8 * 1024, + Store::Array(ref values) => values.byte_size(), + Store::Bitmap(..) => BITMAP_BYTES, + Store::Run(ref intervals) => { + has_run_containers = true; + intervals.byte_size() + } }) .sum(); // header + container sizes - 8 + container_sizes + header_size(size, has_run_containers) + container_sizes } /// Serialize this bitmap into [the standard Roaring on-disk format][format]. @@ -64,23 +76,52 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialize_into(&self, mut writer: W) -> io::Result<()> { - writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; - writer.write_u32::(self.containers.len() as u32)?; + let has_run_containers = self.containers.iter().any(|c| matches!(c.store, Store::Run(_))); + let size = self.containers.len(); + + // Depending on if run containers are present or not write the appropriate header + if has_run_containers { + // The new format stores the container count in the most significant bits of the header + let cookie = SERIAL_COOKIE as u32 | ((size as u32 - 1) << 16); + writer.write_u32::(cookie)?; + // It is then followed by a bitset indicating which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + let mut run_container_bitmap = vec![0; run_container_bitmap_size]; + for (i, container) in self.containers.iter().enumerate() { + if let Store::Run(_) = container.store { + run_container_bitmap[i / 8] |= 1 << (i % 8); + } + } + writer.write_all(&run_container_bitmap)?; + } else { + // Write old format, cookie followed by container count + writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + writer.write_u32::(size as u32)?; + } + // Write the container descriptions for container in &self.containers { writer.write_u16::(container.key)?; writer.write_u16::((container.len() - 1) as u16)?; } - let mut offset = 8 + 8 * self.containers.len() as u32; - for container in &self.containers { - writer.write_u32::(offset)?; - match container.store { - Store::Array(ref values) => { - offset += values.len() as u32 * 2; - } - Store::Bitmap(..) => { - offset += 8 * 1024; + let mut offset = header_size(size, has_run_containers) as u32; + let has_offsets = if has_run_containers { size >= OFFSET_BYTES } else { true }; + if has_offsets { + for container in &self.containers { + writer.write_u32::(offset)?; + match container.store { + Store::Array(ref values) => { + offset += values.len() as u32 * 2; + } + Store::Bitmap(..) => { + offset += 8 * 1024; + } + Store::Run(ref intervals) => { + offset += (RUN_NUM_BYTES + + (intervals.run_amount() as usize * RUN_ELEMENT_BYTES)) + as u32; + } } } } @@ -97,6 +138,13 @@ impl RoaringBitmap { writer.write_u64::(value)?; } } + Store::Run(ref intervals) => { + writer.write_u16::(intervals.run_amount() as u16)?; + for iv in intervals.iter_intervals() { + writer.write_u16::(iv.start())?; + writer.write_u16::(iv.end() - iv.start())?; + } + } } } @@ -205,9 +253,18 @@ impl RoaringBitmap { let mut containers = Vec::with_capacity(size); + let mut last_key = None::; // Read each container for i in 0..size { let key = description_bytes.read_u16::()?; + if let Some(last_key) = last_key.replace(key) { + if key <= last_key { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "container keys are not sorted", + )); + } + } let cardinality = u64::from(description_bytes.read_u16::()?) + 1; // If the run container bitmap is present, check if this container is a run container @@ -216,6 +273,12 @@ impl RoaringBitmap { let store = if is_run_container { let runs = reader.read_u16::()?; + if runs == 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "run container with zero runs", + )); + } let mut intervals = vec![[0, 0]; runs as usize]; reader.read_exact(cast_slice_mut(&mut intervals))?; intervals.iter_mut().for_each(|[s, len]| { @@ -223,14 +286,23 @@ impl RoaringBitmap { *len = u16::from_le(*len); }); - let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum(); - let mut store = Store::with_capacity(cardinality); - intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> { - let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; - store.insert_range(RangeInclusive::new(s, end)); - Ok(()) - })?; - store + let mut last_end = None::; + let store = IntervalStore::from_vec_unchecked( + intervals + .into_iter() + .map(|[s, len]| -> Result { + let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?; + if let Some(last_end) = last_end.replace(end) { + if s <= last_end.saturating_add(1) { + // Range overlaps or would be contiguous with the previous range + return Err(io::ErrorKind::InvalidData); + } + } + Ok(Interval::new_unchecked(s, end)) + }) + .collect::>()?, + ); + Store::Run(store) } else if cardinality <= ARRAY_LIMIT { let mut values = vec![0; cardinality as usize]; reader.read_exact(cast_slice_mut(&mut values))?; @@ -253,6 +325,24 @@ impl RoaringBitmap { } } +fn header_size(size: usize, has_run_containers: bool) -> usize { + if has_run_containers { + // New format encodes the size (number of containers) into the 4 byte cookie + // Additionally a bitmap is included marking which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + // New format conditionally includes offsets if there are 4 or more containers + if size >= NO_OFFSET_THRESHOLD { + COOKIE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + run_container_bitmap_size + } else { + COOKIE_BYTES + (DESCRIPTION_BYTES * size) + run_container_bitmap_size + } + } else { + // Old format encodes cookie followed by container count + // It also always includes the offsets + COOKIE_BYTES + SIZE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + } +} + #[cfg(test)] mod test { use crate::{bitmap::store::BITMAP_LENGTH, RoaringBitmap}; diff --git a/roaring/src/bitmap/statistics.rs b/roaring/src/bitmap/statistics.rs index d3aea3827..2869c42fd 100644 --- a/roaring/src/bitmap/statistics.rs +++ b/roaring/src/bitmap/statistics.rs @@ -64,10 +64,13 @@ impl RoaringBitmap { let mut n_containers = 0; let mut n_array_containers = 0; let mut n_bitset_containers = 0; + let mut n_run_containers = 0; let mut n_values_array_containers = 0; let mut n_values_bitset_containers = 0; + let mut n_values_run_containers = 0; let mut n_bytes_array_containers = 0; let mut n_bytes_bitset_containers = 0; + let mut n_bytes_run_containers = 0; let mut cardinality = 0; for Container { key: _, store } in &self.containers { @@ -84,6 +87,12 @@ impl RoaringBitmap { n_bytes_bitset_containers += bitmap.capacity() as u64; n_bitset_containers += 1; } + Store::Run(runs) => { + cardinality += runs.len(); + n_values_run_containers += runs.len() as u32; + n_bytes_run_containers += runs.byte_size() as u64; + n_run_containers += 1; + } } n_containers += 1; } @@ -91,13 +100,13 @@ impl RoaringBitmap { Statistics { n_containers, n_array_containers, - n_run_containers: 0, + n_run_containers, n_bitset_containers, n_values_array_containers, - n_values_run_containers: 0, + n_values_run_containers, n_values_bitset_containers, n_bytes_array_containers, - n_bytes_run_containers: 0, + n_bytes_run_containers, n_bytes_bitset_containers, max_value: self.max(), min_value: self.min(), diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 26c3a7e5a..e91842597 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -16,6 +16,9 @@ use alloc::vec::Vec; use alloc::boxed::Box; use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; +use super::Interval; + +pub(crate) const ARRAY_ELEMENT_BYTES: usize = 2; #[derive(Clone, Eq, PartialEq)] pub(crate) struct ArrayStore { @@ -27,6 +30,14 @@ impl ArrayStore { ArrayStore { vec: vec![] } } + pub fn serialized_byte_size(cardinality: u64) -> usize { + cardinality as usize * ARRAY_ELEMENT_BYTES + } + + pub fn byte_size(&self) -> usize { + Self::serialized_byte_size(self.len()) + } + #[cfg(feature = "std")] pub fn with_capacity(capacity: usize) -> ArrayStore { ArrayStore { vec: Vec::with_capacity(capacity) } @@ -221,6 +232,15 @@ impl ArrayStore { visitor.into_inner() } + pub fn intersection_len_interval(&self, interval: &Interval) -> u64 { + if interval.is_full() { + return self.len(); + } + let start_id = self.vec.partition_point(|&f| f < interval.start()); + let end_id = self.vec.partition_point(|&f| f <= interval.end()); + (end_id.saturating_sub(start_id)) as u64 + } + pub fn to_bitmap_store(&self) -> BitmapStore { let mut bits = Box::new([0; BITMAP_LENGTH]); let len = self.len(); @@ -259,7 +279,7 @@ impl ArrayStore { self.vec.get(n as usize).cloned() } - pub fn iter(&self) -> core::slice::Iter { + pub fn iter(&'_ self) -> core::slice::Iter<'_, u16> { self.vec.iter() } @@ -460,6 +480,7 @@ mod tests { match s { Store::Array(vec) => vec.vec, Store::Bitmap(bits) => bits.to_array_store().vec, + Store::Run(runs) => runs.iter().collect(), } } @@ -467,6 +488,7 @@ mod tests { match s { Store::Array(vec) => Store::Bitmap(vec.to_bitmap_store()), Store::Bitmap(..) => s, + Store::Run(runs) => Store::Bitmap(runs.to_bitmap()), } } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index a7a954b40..26a3265c3 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -4,7 +4,7 @@ use core::fmt::{Display, Formatter}; use core::mem::size_of; use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign}; -use super::ArrayStore; +use super::{ArrayStore, Interval}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -12,6 +12,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; pub const BITMAP_LENGTH: usize = 1024; +pub const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; #[derive(Clone, Eq, PartialEq)] pub struct BitmapStore { @@ -24,10 +25,6 @@ impl BitmapStore { BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) } } - pub fn full() -> BitmapStore { - BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) } - } - pub fn capacity(&self) -> usize { BITMAP_LENGTH * u64::BITS as usize } @@ -336,6 +333,25 @@ impl BitmapStore { self.bits.iter().zip(other.bits.iter()).map(|(&a, &b)| (a & b).count_ones() as u64).sum() } + pub(crate) fn intersection_len_interval(&self, interval: &Interval) -> u64 { + if interval.is_full() { + return self.len(); + } + let (start_id, start_bit) = (key(interval.start()), bit(interval.start())); + let (end_id, end_bit) = (key(interval.end()), bit(interval.end())); + let mut amount: u64 = 0; + for (i, mut cur_bit) in self.bits[start_id..=end_id].iter().copied().enumerate() { + if i == 0 { + cur_bit &= u64::MAX << start_bit; + } + if i == end_id - start_id { + cur_bit &= u64::MAX >> (64 - end_bit - 1); + } + amount += u64::from(cur_bit.count_ones()); + } + amount + } + pub(crate) fn intersection_len_array(&self, other: &ArrayStore) -> u64 { other .iter() @@ -356,7 +372,6 @@ impl BitmapStore { BitmapIter::new(self.bits) } - #[cfg(feature = "std")] pub fn as_array(&self) -> &[u64; BITMAP_LENGTH] { &self.bits } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs new file mode 100644 index 000000000..74a902498 --- /dev/null +++ b/roaring/src/bitmap/store/interval_store.rs @@ -0,0 +1,2439 @@ +use alloc::vec::Vec; +use core::ops::{ + BitAnd, BitAndAssign, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, SubAssign, +}; +use core::slice::Iter; +use core::{cmp::Ordering, ops::ControlFlow}; + +use super::{ArrayStore, BitmapStore}; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub(crate) struct IntervalStore(Vec); + +pub(crate) const RUN_NUM_BYTES: usize = 2; +pub(crate) const RUN_ELEMENT_BYTES: usize = 4; + +impl Default for IntervalStore { + fn default() -> Self { + Self::new() + } +} + +impl IntervalStore { + pub fn new() -> Self { + Self(Default::default()) + } + + pub fn new_with_range(range: Interval) -> Self { + Self(alloc::vec![range]) + } + + pub fn full() -> Self { + Self(alloc::vec![Interval::new_unchecked(0, u16::MAX)]) + } + + pub fn byte_size(&self) -> usize { + Self::serialized_byte_size(self.run_amount()) + } + + pub fn serialized_byte_size(run_amount: u64) -> usize { + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * run_amount as usize) + } + + #[cfg(feature = "std")] + pub fn from_vec_unchecked(vec: Vec) -> Self { + #[cfg(debug_assertions)] + { + for win in vec.windows(2) { + let [cur_interval, next] = [win[0], win[1]]; + assert!(cur_interval.end + 1 < next.start); + assert!(cur_interval.start <= cur_interval.end); + } + } + Self(vec) + } + + pub(crate) fn push_interval_unchecked(&mut self, interval: Interval) { + debug_assert!(self.0.last().map(|f| f.end < interval.start).unwrap_or(true)); + debug_assert!(interval.start <= interval.end); + self.0.push(interval) + } + + #[inline] + pub fn insert(&mut self, index: u16) -> bool { + // All intervals before idx are _fully_ before our index (iv.end < index) + let idx = self.0.partition_point(|iv| iv.end < index); + let (before, maybe_after) = self.0.split_at_mut(idx); + if let Some(next) = maybe_after.first_mut() { + // Check if the next interval actually already contains our index + // Because of partition_point, we know already know end >= index + if next.start <= index { + // index is already in the interval + return false; + } + // `next` is instead the first interval _after_ our index, + // check if we should grow that interval down by one + // Because we know from above that next.start > index, adding 1 is safe + if next.start == index + 1 { + next.start -= 1; + + // Check if the previous interval will now be continuous with this interval + if let Some(prev) = before.last_mut() { + // From the partition point: prev.end < index, subtracting 1 is safe + if prev.end == index - 1 { + prev.end = next.end; + self.0.remove(idx); + } + } + return true; + } + } + if let Some(prev) = before.last_mut() { + // Because we know from the partition point that prev.end < index, adding 1 is safe + if prev.end + 1 == index { + // Merge with previous interval + prev.end += 1; + // If we had needed to merge with the next interval, we would have handled that in + // the previous if statement, so we're done here + return true; + } + } + self.0.insert(idx, Interval::new_unchecked(index, index)); + true + } + + #[inline] + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } + let mut interval = Interval { start: *range.start(), end: *range.end() }; + // All intervals in `start_idx..end_idx` are fully contained in our interval. + let mut start_idx = self.0.partition_point(|iv| iv.start < interval.start); + let mut end_idx = + self.0[start_idx..].partition_point(|iv| iv.end <= interval.end) + start_idx; + + if let Some(prev) = self.0[..start_idx].last() { + // If the previous interval contains our start, or would be contiguous with us, expand + // to include it + // from partition point, we know prev.start < interval.start + if prev.end >= interval.start - 1 { + // We need to merge with the previous interval + interval.start = prev.start; + interval.end = interval.end.max(prev.end); + start_idx -= 1; + } + } + if let Some(next) = self.0.get(end_idx) { + // from partition point, we know next.end > interval.end + if next.start <= interval.end + 1 { + // We need to merge with the next interval + interval.end = next.end; + interval.start = interval.start.min(next.start); + end_idx += 1; + } + } + + let mut added_count = interval.run_len(); + // Replace the first interval to be replaced with an interval covering the new range + // and remove the rest + // Otherwise, just insert a new interval + if let [first, rest @ ..] = &mut self.0[start_idx..end_idx] { + added_count -= first.run_len(); + added_count -= rest.iter().map(|iv| iv.run_len()).sum::(); + *first = interval; + self.0.drain(start_idx + 1..end_idx); + } else { + // No intervals to merge with, we can just insert + self.0.insert(start_idx, interval); + } + added_count + } + + pub fn push(&mut self, index: u16) -> bool { + if let Some(last_interval) = self.0.last_mut() { + if last_interval.end.checked_add(1).map(|f| f == index).unwrap_or(false) { + last_interval.end = index; + true + } else if last_interval.end < index { + self.0.push(Interval::new_unchecked(index, index)); + true + } else { + false + } + } else { + self.0.push(Interval::new_unchecked(index, index)); + true + } + } + + pub fn remove(&mut self, index: u16) -> bool { + self.0 + .binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()) + .map(|loc| { + // loc always points to an interval + if index == self.0[loc].start && index == self.0[loc].end { + // Remove entire run if it only contains this value + self.0.remove(loc); + } else if index == self.0[loc].end { + // Value is last in this interval + self.0[loc].end = index - 1; + } else if index == self.0[loc].start { + // Value is first in this interval + self.0[loc].start = index + 1; + } else { + // Value lies inside the interval, we need to split it + // First construct a new interval with the right part + let new_interval = Interval::new_unchecked(index + 1, self.0[loc].end); + // Then shrink the current interval + self.0[loc].end = index - 1; + // Then insert the new interval leaving gap where value was removed + self.0.insert(loc + 1, new_interval); + } + }) + .is_ok() + } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } + + let mut interval = Interval::new_unchecked(*range.start(), *range.end()); + // All intervals in `start_idx..end_idx` are fully contained in our interval. + let start_idx = self.0.partition_point(|iv| iv.start < interval.start); + let end_idx = self.0[start_idx..].partition_point(|iv| iv.end <= interval.end) + start_idx; + let mut removed_count = 0; + let mut add_needed = false; + if let Some(prev) = self.0[..start_idx].last_mut() { + // If the previous interval contains our start, remove it + // from partition point, we know prev.start < interval.start + if prev.end >= interval.start { + // We need to remove from the previous interval + removed_count += + Interval::new_unchecked(interval.start, prev.end.min(interval.end)).run_len(); + let new_end = interval.start - 1; + add_needed = prev.end > interval.end; + if add_needed { + interval.start = interval.end + 1; + interval.end = prev.end; + } + prev.end = new_end; + } + } + if let Some(next) = self.0.get_mut(end_idx) { + // from partition point, we know next.end > interval.end + if next.start <= interval.end { + // We need to remove everything til interval.end + removed_count += + Interval::new_unchecked(next.start.max(interval.start), interval.end).run_len(); + next.start = interval.end + 1; + } + } + + // Replace the first interval to be replaced with an interval covering the new range + // and remove the rest + // Otherwise, just insert a new interval + if let [first, rest @ ..] = &mut self.0[start_idx..end_idx] { + removed_count += first.run_len(); + removed_count += rest.iter().map(|iv| iv.run_len()).sum::(); + self.0.drain(start_idx..end_idx); + } else if add_needed { + // We are removing a range contained in a single interval + // As such we must add a new interval + self.0.insert(start_idx, interval); + } + removed_count + } + + pub fn remove_smallest(&mut self, mut amount: u64) { + let mut remove_to = 0; + let mut last_interval = None; + for (i, interval) in self.0.iter_mut().enumerate() { + let too_much = interval.run_len() < amount; + if too_much { + amount -= interval.run_len(); + } + remove_to = i; + last_interval = Some(interval); + if !too_much { + break; + } + } + if let Some(last_interval) = last_interval { + if last_interval.run_len() < amount { + remove_to += 1; + } else { + last_interval.start += amount as u16; + } + } + self.0.drain(..remove_to); + } + + pub fn remove_biggest(&mut self, mut amount: u64) { + let mut remove_to = 0; + let mut last_interval = None; + for (i, interval) in self.0.iter_mut().enumerate().rev() { + let too_much = interval.run_len() < amount; + if too_much { + amount -= interval.run_len(); + } + remove_to = i; + last_interval = Some(interval); + if !too_much { + break; + } + } + if let Some(last_interval) = last_interval { + if last_interval.run_len() >= amount { + remove_to += 1; + last_interval.end -= amount as u16; + } + } + self.0.drain(remove_to..); + } + + pub fn contains(&self, index: u16) -> bool { + self.0.binary_search_by(|iv| cmp_index_interval(index, *iv).reverse()).is_ok() + } + + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let interval = Interval::new_unchecked(*range.start(), *range.end()); + let start = self.0.binary_search_by(|iv| cmp_index_interval(interval.start, *iv).reverse()); + let end = self.0.binary_search_by(|iv| cmp_index_interval(interval.end, *iv).reverse()); + match (start, end) { + // both start and end are inside an interval, + // check if this interval is that same interval. + // If this is not the case then this range is not contained in this store + (Ok(start_id), Ok(end_id)) => start_id == end_id, + _ => false, + } + } + + fn step_walk< + 'a, + R, + C: FnMut(Interval, Interval, R) -> ControlFlow, + E: FnMut( + (Option, Option), + (Iter<'a, Interval>, Iter<'a, Interval>), + R, + ) -> R, + >( + &'a self, + other: &'a Self, + mut calc: C, + mut else_op: E, + mut buffer: R, + ) -> R { + let (mut i1, mut i2) = (self.0.iter(), other.0.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + match (iv1, iv2) { + (Some(v1), Some(v2)) => { + match calc(*v1, *v2, buffer) { + ControlFlow::Continue(new_buffer) => buffer = new_buffer, + ControlFlow::Break(end) => return end, + } + + // We increase the iterator based on which one is furthest behind. + // Or both if they are equal to each other. + match v1.end.cmp(&v2.end) { + Ordering::Less => iv1 = i1.next(), + Ordering::Greater => iv2 = i2.next(), + Ordering::Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } + } + } + (value1, value2) => { + return else_op((value1.copied(), value2.copied()), (i1, i2), buffer) + } + } + } + } + + pub fn is_disjoint(&self, other: &Self) -> bool { + self.step_walk( + other, + |interval1, interval2, _| { + if interval1.overlaps(&interval2) { + ControlFlow::Break(false) + } else { + ControlFlow::Continue(true) + } + }, + |_, _, _| true, + false, + ) + } + + pub(crate) fn is_disjoint_array(&self, array: &ArrayStore) -> bool { + array.iter().all(|&i| !self.contains(i)) + } + + pub(crate) fn is_disjoint_bitmap(&self, array: &BitmapStore) -> bool { + // TODO: make this better + array.iter().all(|i| !self.contains(i)) + } + + pub fn is_subset(&self, other: &Self) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } + + pub(crate) fn is_subset_array(&self, other: &ArrayStore) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } + + pub(crate) fn is_subset_bitmap(&self, other: &BitmapStore) -> bool { + self.0.iter().all(|interval| other.contains_range(interval.start..=interval.end)) + } + + pub fn intersection_len(&self, other: &Self) -> u64 { + self.step_walk( + other, + |interval1, interval2, buffer| { + ControlFlow::Continue( + interval1.overlapping_interval(&interval2).map(|f| f.run_len()).unwrap_or(0) + + buffer, + ) + }, + |_, _, buffer| buffer, + 0, + ) + } + + pub(crate) fn intersection_len_bitmap(&self, other: &BitmapStore) -> u64 { + self.0.iter().map(|f| other.intersection_len_interval(f)).sum() + } + + pub(crate) fn intersection_len_array(&self, other: &ArrayStore) -> u64 { + other.iter().map(|&f| self.contains(f) as u64).sum() + } + + pub fn len(&self) -> u64 { + self.0.iter().map(|iv| iv.run_len()).sum() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn min(&self) -> Option { + self.0.first().map(|f| f.start) + } + + pub fn max(&self) -> Option { + self.0.last().map(|f| f.end) + } + + pub fn rank(&self, value: u16) -> u64 { + let mut rank = 0; + for iv in self.0.iter() { + if iv.end <= value { + rank += iv.run_len(); + } else if iv.start <= value { + rank += Interval::new_unchecked(iv.start, value).run_len(); + } else { + break; + } + } + rank + } + + pub fn select(&self, mut n: u16) -> Option { + for iv in self.0.iter() { + let run_len = iv.run_len(); + if run_len <= n.into() { + n -= iv.run_len() as u16; // this conversion never overflows since run_len is + // smaller then a u16 + } else { + return Some(iv.start + n); + } + } + None + } + + pub fn run_amount(&self) -> u64 { + self.0.len() as u64 + } + + pub fn to_bitmap(&self) -> BitmapStore { + let mut bits = BitmapStore::new(); + for iv in self.0.iter() { + bits.insert_range(iv.start..=iv.end); + } + bits + } + + pub fn to_array(&self) -> ArrayStore { + let mut array = ArrayStore::new(); + for iv in self.0.iter() { + array.insert_range(iv.start..=iv.end); + } + array + } + + pub(crate) fn iter(&'_ self) -> RunIterBorrowed<'_> { + self.into_iter() + } + + pub(crate) fn iter_intervals(&'_ self) -> core::slice::Iter<'_, Interval> { + self.0.iter() + } +} + +impl From for BitmapStore { + fn from(value: IntervalStore) -> Self { + value.to_bitmap() + } +} + +impl From for ArrayStore { + fn from(value: IntervalStore) -> Self { + value.to_array() + } +} + +impl BitOrAssign for IntervalStore { + fn bitor_assign(&mut self, mut rhs: Self) { + let (add_intervals, take_intervals, self_is_add) = + if self.len() > rhs.len() { (self, &mut rhs, true) } else { (&mut rhs, self, false) }; + for iv in take_intervals.iter_intervals() { + add_intervals.insert_range(iv.start..=iv.end); + } + if !self_is_add { + core::mem::swap(add_intervals, take_intervals); + } + } +} + +impl BitOrAssign<&ArrayStore> for IntervalStore { + fn bitor_assign(&mut self, rhs: &ArrayStore) { + for &i in rhs.iter() { + self.insert(i); + } + } +} + +impl BitOrAssign<&Self> for IntervalStore { + fn bitor_assign(&mut self, rhs: &Self) { + for iv in rhs.iter_intervals() { + self.insert_range(iv.start..=iv.end); + } + } +} + +impl BitAnd for &IntervalStore { + type Output = IntervalStore; + + fn bitand(self, rhs: Self) -> Self::Output { + self.step_walk( + rhs, + |iv1, iv2, mut buf: IntervalStore| { + if let Some(new_iv) = iv1.overlapping_interval(&iv2) { + buf.insert_range(new_iv.start..=new_iv.end); + } + ControlFlow::Continue(buf) + }, + |_, _, buf| buf, + IntervalStore::new(), + ) + } +} + +impl BitAndAssign<&IntervalStore> for ArrayStore { + fn bitand_assign(&mut self, rhs: &IntervalStore) { + self.retain(|f| rhs.contains(f)); + } +} + +impl SubAssign<&Self> for IntervalStore { + fn sub_assign(&mut self, rhs: &Self) { + for iv in rhs.iter_intervals() { + self.remove_range(iv.start..=iv.end); + } + } +} + +impl BitXor for &IntervalStore { + type Output = IntervalStore; + + fn bitxor(self, rhs: Self) -> Self::Output { + let mut union = self.clone(); + union |= rhs; + let intersection = self & rhs; + union -= &intersection; + union + } +} + +impl BitXorAssign<&ArrayStore> for IntervalStore { + fn bitxor_assign(&mut self, rhs: &ArrayStore) { + rhs.iter().for_each(|&f| { + if self.contains(f) { + self.remove(f); + } else { + self.insert(f); + } + }) + } +} + +pub(crate) type RunIterOwned = RunIter>; +pub(crate) type RunIterBorrowed<'a> = RunIter>; + +impl IntoIterator for IntervalStore { + type Item = u16; + type IntoIter = RunIter>; + + fn into_iter(self) -> Self::IntoIter { + RunIter::new(self.0.into_iter()) + } +} + +impl<'a> IntoIterator for &'a IntervalStore { + type Item = u16; + type IntoIter = RunIter>; + + fn into_iter(self) -> Self::IntoIter { + RunIter::new(self.0.iter()) + } +} + +pub(crate) trait SliceIterator: Iterator + DoubleEndedIterator { + fn as_slice(&self) -> &[I]; +} + +impl SliceIterator for alloc::vec::IntoIter { + fn as_slice(&self) -> &[I] { + alloc::vec::IntoIter::as_slice(self) + } +} + +impl<'a, I> SliceIterator for core::slice::Iter<'a, I> { + fn as_slice(&self) -> &'a [I] { + core::slice::Iter::as_slice(self) + } +} + +#[derive(Clone)] +pub(crate) struct RunIter> { + forward_offset: u16, + backward_offset: u16, + intervals: I, +} + +impl> RunIter { + fn new(intervals: I) -> Self { + Self { forward_offset: 0, backward_offset: 0, intervals } + } + + fn move_next(&mut self) { + if let Some(value) = self.forward_offset.checked_add(1) { + self.forward_offset = value; + } else { + self.intervals.next(); + return; + } + if Some(self.forward_offset as u64) + >= self.intervals.as_slice().first().map(|f| f.run_len()) + { + self.intervals.next(); + self.forward_offset = 0; + } + } + + fn move_next_back(&mut self) { + if let Some(value) = self.backward_offset.checked_add(1) { + self.backward_offset = value; + } else { + self.intervals.next_back(); + return; + } + if Some(self.backward_offset as u64) + >= self.intervals.as_slice().last().map(|f| f.run_len()) + { + self.intervals.next_back(); + self.backward_offset = 0; + } + } + + fn remaining_size(&self) -> usize { + (self.intervals.as_slice().iter().map(|f| f.run_len()).sum::() + - self.forward_offset as u64 + - self.backward_offset as u64) as usize + } + + /// Advance the iterator to the first value greater than or equal to `n`. + pub(crate) fn advance_to(&mut self, n: u16) { + if n == 0 { + return; + } + if self + .intervals + .as_slice() + .first() + .map(|f| f.start + self.forward_offset > n) + .unwrap_or(true) + { + return; + } + match self.intervals.as_slice().binary_search_by(|iv| cmp_index_interval(n, *iv).reverse()) + { + Ok(index) => { + if let Some(value) = index.checked_sub(1) { + self.intervals.nth(value); + } + self.forward_offset = n - self.intervals.as_slice().first().unwrap().start; + } + Err(index) => { + if index == self.intervals.as_slice().len() { + return; + } + if let Some(value) = index.checked_sub(1) { + self.intervals.nth(value); + self.forward_offset = 0; + } + } + } + } + + /// Advance the back of iterator to the first value less than or equal to `n`. + pub(crate) fn advance_back_to(&mut self, n: u16) { + if n == u16::MAX { + return; + } + if self + .intervals + .as_slice() + .last() + .map(|f| f.end - self.backward_offset < n) + .unwrap_or(true) + { + return; + } + match self.intervals.as_slice().binary_search_by(|iv| cmp_index_interval(n, *iv).reverse()) + { + Ok(index) => { + let backward_index = self.intervals.as_slice().len() - index - 1; + if let Some(value) = backward_index.checked_sub(1) { + self.intervals.nth_back(value); + } + self.backward_offset = self.intervals.as_slice().last().unwrap().end - n; + } + Err(index) => { + if index == 0 { + return; + } + let backward_index = self.intervals.as_slice().len() - index; + if let Some(value) = backward_index.checked_sub(1) { + self.intervals.nth_back(value); + self.backward_offset = 0; + } + } + } + } +} + +impl> Iterator for RunIter { + type Item = u16; + + fn next(&mut self) -> Option { + let result = self.intervals.as_slice().first()?.start + self.forward_offset; + self.move_next(); + Some(result) + } + + fn size_hint(&self) -> (usize, Option) { + let remaining_size = self.remaining_size(); + (remaining_size, Some(remaining_size)) + } + + fn count(self) -> usize { + self.remaining_size() + } + + fn nth(&mut self, n: usize) -> Option { + if let Some(skip) = n.checked_sub(1) { + let mut to_skip = skip as u64; + loop { + let to_remove = (self.intervals.as_slice().first()?.run_len() + - self.forward_offset as u64) + .min(to_skip); + to_skip -= to_remove; + self.forward_offset += to_remove as u16; + self.move_next(); + if to_skip == 0 { + break; + } + } + } + self.next() + } +} + +impl> DoubleEndedIterator for RunIter { + fn next_back(&mut self) -> Option { + let result = self.intervals.as_slice().last()?.end - self.backward_offset; + self.move_next_back(); + Some(result) + } +} + +impl> ExactSizeIterator for RunIter {} + +/// This interval is inclusive to end. +#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] +pub(crate) struct Interval { + start: u16, + end: u16, +} + +impl From> for Interval { + fn from(value: RangeInclusive) -> Self { + Interval::new_unchecked(*value.start(), *value.end()) + } +} + +impl IntoIterator for Interval { + type Item = u16; + type IntoIter = RangeInclusive; + + fn into_iter(self) -> Self::IntoIter { + self.start..=self.end + } +} + +impl IntoIterator for &'_ Interval { + type Item = u16; + type IntoIter = RangeInclusive; + + fn into_iter(self) -> Self::IntoIter { + self.start..=self.end + } +} + +pub(crate) fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { + if index < iv.start { + Ordering::Less + } else if index > iv.end { + Ordering::Greater + } else { + Ordering::Equal + } +} + +impl Interval { + pub fn new_unchecked(start: u16, end: u16) -> Self { + debug_assert!(start <= end); + Self { start, end } + } + + pub fn start(&self) -> u16 { + self.start + } + + pub fn end(&self) -> u16 { + self.end + } + + pub fn overlaps(&self, interval: &Self) -> bool { + interval.start <= self.end && self.start <= interval.end + } + + pub fn overlapping_interval(&self, other: &Self) -> Option { + if self.overlaps(other) { + Some(Self::new_unchecked(self.start.max(other.start), self.end.min(other.end))) + } else { + None + } + } + + pub fn run_len(&self) -> u64 { + u64::from(self.end - self.start) + 1 + } + + pub fn is_full(&self) -> bool { + self.start == 0 && self.end == u16::MAX + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn insert_empty() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert!(interval_store.insert(1)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 1 }])) + } + + #[test] + fn insert_consecutive_begin() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 0, end: 0 },]); + assert!(interval_store.insert(1)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) + } + + #[test] + fn insert_consecutive_end() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 1 },]); + assert!(interval_store.insert(0)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 1 }])) + } + + #[test] + fn insert_consecutive_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 0 }, + Interval { start: 2, end: 2 }, + ]); + interval_store.insert(1); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 0, end: 2 }])) + } + + #[test] + fn insert_arbitrary() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: 9, end: 10 }, + ]); + interval_store.insert(5); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: 5, end: 5 }, + Interval { start: 9, end: 10 }, + ]) + ) + } + + #[test] + fn insert_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 0, end: 3 },]); + interval_store.insert(u16::MAX); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 3 }, + Interval { start: u16::MAX, end: u16::MAX }, + ]) + ) + } + + #[test] + fn insert_u16_max_consecutive() { + let mut interval_store = + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX - 1 },]); + interval_store.insert(u16::MAX); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX },]) + ) + } + + #[test] + fn insert_consecutive_end_with_extra() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 65079, end: 65079 }, + Interval { start: 65179, end: 65179 }, + ]); + assert!(interval_store.insert(65080)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 65079, end: 65080 }, + Interval { start: 65179, end: 65179 }, + ]) + ) + } + + #[test] + fn insert_range_empty() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.insert_range(1..=2), Interval::new_unchecked(1, 2).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 2 },])); + } + + #[test] + fn insert_range_overlap_begin() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 20 }]); + assert_eq!(interval_store.insert_range(5..=50), Interval::new_unchecked(21, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 1, end: 50 },])); + } + + #[test] + fn insert_range_overlap_end() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 }]); + assert_eq!(interval_store.insert_range(5..=15), Interval::new_unchecked(5, 9).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 5, end: 20 },])); + } + + #[test] + fn insert_range_overlap_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 40, end: 60 }, + ]); + assert_eq!(interval_store.insert_range(15..=50), Interval::new_unchecked(21, 39).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); + } + + #[test] + fn insert_range_concescutive_begin() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 10, end: 20 },]); + assert_eq!(interval_store.insert_range(21..=50), Interval::new_unchecked(21, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 50 },])); + } + + #[test] + fn insert_range_concescutive_begin_overlap_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 40, end: 60 }, + ]); + assert_eq!(interval_store.insert_range(21..=50), Interval::new_unchecked(21, 39).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 60 },])); + } + + #[test] + fn insert_range_concescutive_end() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert_eq!(interval_store.insert_range(21..=49), Interval::new_unchecked(21, 49).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 21, end: 70 },])); + } + + #[test] + fn insert_range_concescutive_begin_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!(interval_store.insert_range(21..=49), Interval::new_unchecked(21, 49).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 10, end: 70 },])); + } + + #[test] + fn insert_range_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!(interval_store.insert_range(25..=30), Interval::new_unchecked(25, 30).run_len()); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 25, end: 30 }, + Interval { start: 50, end: 70 }, + ]) + ); + } + + #[test] + fn insert_range_u16_max_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(90..=u16::MAX), + Interval::new_unchecked(90, u16::MAX).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + Interval { start: 90, end: u16::MAX }, + ]) + ); + } + + #[test] + fn insert_range_u16_max_overlap_begin() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(70..=u16::MAX), + Interval::new_unchecked(71, u16::MAX).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: u16::MAX }, + ]) + ); + } + + #[test] + fn insert_range_u16_max_overlap_all() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + ]); + assert_eq!( + interval_store.insert_range(0..=u16::MAX), + Interval::new_unchecked(0, u16::MAX).run_len() + - Interval::new_unchecked(10, 20).run_len() + - Interval::new_unchecked(50, 70).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 0, end: u16::MAX },]) + ); + } + + #[test] + fn insert_range_overlap_some() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 10, end: 20 }, + Interval { start: 50, end: 70 }, + Interval { start: 500, end: 700 }, + ]); + assert_eq!( + interval_store.insert_range(0..=100), + Interval::new_unchecked(0, 100).run_len() + - Interval::new_unchecked(10, 20).run_len() + - Interval::new_unchecked(50, 70).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 0, end: 100 }, + Interval { start: 500, end: 700 }, + ]) + ); + } + + #[test] + fn insert_range_begin_overlap_concescutive_end() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(2, 10), + Interval::new_unchecked(12, 700), + ]); + assert_eq!(interval_store.insert_range(2..=11), 1); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(2, 700)])); + } + + #[test] + fn insert_range_pin_1() { + let mut interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(65079, 65079)]); + assert_eq!(interval_store.insert_range(65080..=65080), 1); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new_unchecked(65079, 65080)]) + ); + } + + #[test] + fn push_empty() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert!(interval_store.push(80)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 80, end: 80 },])); + } + + #[test] + fn push_new_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(interval_store.push(80)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 50, end: 70 }, + Interval { start: 80, end: 80 }, + ]) + ); + } + + #[test] + fn push_new_max_consecutive() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(interval_store.push(71)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 71 },])); + } + + #[test] + fn push_existing() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(!interval_store.push(60)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_non_existing_non_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 70 },]); + assert!(!interval_store.push(10)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_existing_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]); + assert!(!interval_store.push(u16::MAX)); + assert_eq!(interval_store, interval_store); + } + + #[test] + fn push_new_u16_max() { + let mut interval_store = + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX - 1 },]); + assert!(interval_store.push(u16::MAX)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]) + ); + } + + #[test] + fn remove_end_of_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(interval_store.remove(60)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 59 },])); + } + + #[test] + fn remove_begin_of_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(interval_store.remove(50)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 51, end: 60 },])); + } + + #[test] + fn remove_middle() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 3 },]); + assert!(interval_store.remove(2)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 1, end: 1 }, + Interval { start: 3, end: 3 }, + ]) + ); + } + + #[test] + fn remove_nothing() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 60 },]); + assert!(!interval_store.remove(90)); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 50, end: 60 },])); + } + + #[test] + fn remove_u16_max() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX },]); + assert!(interval_store.remove(u16::MAX)); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval { start: 50, end: u16::MAX - 1 },]) + ); + } + + #[test] + fn remove_interval() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 50, end: 50 },]); + assert!(interval_store.remove(50)); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_exact_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(40..=60), 21); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_one_with_extra_1() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(40..=70), 21); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_one_with_extra_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + assert_eq!(interval_store.remove_range(40..=70), 21); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(400, 600), + Interval::new_unchecked(4000, 6000), + ]) + ); + } + + #[test] + fn remove_range_exact_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 80, end: 90 }, + Interval { start: 100, end: 200 }, + ]); + assert_eq!( + interval_store.remove_range(40..=200), + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(80, 90).run_len() + + Interval::new_unchecked(100, 200).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_begin_exact_overlap_end_one() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(40..=80), + Interval::new_unchecked(40, 60).run_len() + Interval::new_unchecked(70, 80).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 81, end: 90 },])); + } + + #[test] + fn remove_range_begin_overlap_end_exact_one() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(50..=90), + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(50, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_begin_no_overlap_end_exact_one_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(30..=90), + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(40, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_begin_no_overlap_end_exact_one_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + Interval { start: 700, end: 900 }, + ]); + assert_eq!( + interval_store.remove_range(30..=90), + Interval::new_unchecked(70, 90).run_len() + Interval::new_unchecked(40, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(700, 900),])); + } + + #[test] + fn remove_range_both_overlap() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 70, end: 90 }, + ]); + assert_eq!( + interval_store.remove_range(50..=80), + Interval::new_unchecked(70, 80).run_len() + Interval::new_unchecked(50, 60).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval { start: 40, end: 49 }, + Interval { start: 81, end: 90 }, + ]) + ); + } + + #[test] + fn remove_range_begin_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!( + interval_store.remove_range(50..=100), + Interval::new_unchecked(50, 60).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_begin_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 80, end: 100 }, + Interval { start: 200, end: 500 }, + ]); + assert_eq!( + interval_store.remove_range(50..=1000), + Interval::new_unchecked(50, 60).run_len() + + Interval::new_unchecked(80, 100).run_len() + + Interval::new_unchecked(200, 500).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 40, end: 49 },])); + } + + #[test] + fn remove_range_end_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(20..=50), Interval::new_unchecked(40, 50).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 51, end: 60 },])); + } + + #[test] + fn remove_range_end_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 100, end: 500 }, + Interval { start: 800, end: 900 }, + ]); + assert_eq!( + interval_store.remove_range(20..=850), + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(100, 500).run_len() + + Interval::new_unchecked(800, 850).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval { start: 851, end: 900 },])); + } + + #[test] + fn remove_range_no_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + assert_eq!(interval_store.remove_range(20..=80), Interval::new_unchecked(40, 60).run_len()); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_no_overlap_many() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 40, end: 60 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + assert_eq!( + interval_store.remove_range(20..=60000), + Interval::new_unchecked(40, 60).run_len() + + Interval::new_unchecked(400, 600).run_len() + + Interval::new_unchecked(4000, 6000).run_len() + ); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_complete_overlap() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 51, end: 6000 },]); + assert_eq!( + interval_store.remove_range(500..=600), + Interval::new_unchecked(500, 600).run_len() + ); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(51, 499), + Interval::new_unchecked(601, 6000), + ]) + ); + } + + #[test] + fn remove_range_nothing() { + let mut interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.remove_range(50000..=60000), 0); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_range_with_extra() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(38161, 38162), + Interval::new_unchecked(40562, 40562), + ]); + assert_eq!(interval_store.remove_range(38162..=38163), 1); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(38161, 38161), + Interval::new_unchecked(40562, 40562), + ]) + ); + } + + #[test] + fn remove_smallest_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + interval_store.remove_smallest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_smallest_many_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 600 }, + Interval { start: 4000, end: 6000 }, + ]); + interval_store.remove_smallest(200); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(500, 600), + Interval::new_unchecked(4000, 6000), + ]) + ); + } + + #[test] + fn remove_smallest_many_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 599 }, + Interval { start: 4000, end: 6000 }, + ]); + interval_store.remove_smallest(500); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![Interval::new_unchecked(4200, 6000),]) + ); + } + + #[test] + fn remove_biggest_one() { + let mut interval_store = IntervalStore(alloc::vec![Interval { start: 40, end: 60 },]); + interval_store.remove_biggest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![])); + } + + #[test] + fn remove_biggest_many_1() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 0, end: 99 }, + Interval { start: 400, end: 600 }, + Interval { start: 5901, end: 6000 }, + ]); + interval_store.remove_biggest(200); + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 99), + Interval::new_unchecked(400, 500), + ]) + ); + } + + #[test] + fn remove_biggest_many_2() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 6000 }, + Interval { start: 8401, end: 8600 }, + Interval { start: 9901, end: 10000 }, + ]); + interval_store.remove_biggest(500); + assert_eq!(interval_store, IntervalStore(alloc::vec![Interval::new_unchecked(1, 5800),])); + } + + #[test] + fn contains_index_1() { + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains(5)); + assert!(interval_store.contains(16000)); + } + + #[test] + fn contains_index_2() { + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(!interval_store.contains(0)); + } + + #[test] + fn contains_range_1() { + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains_range(1..=500)); + } + + #[test] + fn contains_range_2() { + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(!interval_store.contains_range(1..=1500)); + } + + #[test] + fn contains_range_3() { + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store.contains_range(1..=1)); + } + + #[test] + fn is_disjoint_1() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 601, end: 1200 },]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(interval_store_1.is_disjoint(&interval_store_2)); + assert!(interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_2() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 600, end: 1200 },]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(!interval_store_1.is_disjoint(&interval_store_2)); + assert!(!interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_3() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 15800, end: 15905 },]); + assert!(!interval_store_1.is_disjoint(&interval_store_1)); + assert!(!interval_store_2.is_disjoint(&interval_store_2)); + assert!(!interval_store_1.is_disjoint(&interval_store_2)); + assert!(!interval_store_2.is_disjoint(&interval_store_1)); + } + + #[test] + fn is_disjoint_array_store_1() { + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let interval_store = IntervalStore(alloc::vec![Interval { start: 70, end: 199 },]); + assert!(interval_store.is_disjoint_array(&array_store)); + } + + #[test] + fn is_disjoint_array_store_2() { + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![0, 60, 200, 500,]); + let interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); + assert!(!interval_store.is_disjoint_array(&array_store)); + } + + #[test] + fn is_disjoint_bitmap_store_1() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [500, 5001, 20, 40] { + bitmap_store.insert(to_set); + } + let interval_store = IntervalStore(alloc::vec![ + Interval { start: 1000, end: 4000 }, + Interval { start: 8000, end: 10000 }, + ]); + assert!(interval_store.is_disjoint_bitmap(&bitmap_store)); + } + + #[test] + fn is_disjoint_bitmap_store_2() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [500, 5001, 20, 40] { + bitmap_store.insert(to_set); + } + let interval_store = IntervalStore(alloc::vec![Interval { start: 1, end: 400 },]); + assert!(!interval_store.is_disjoint_bitmap(&bitmap_store)); + } + + #[test] + fn is_subset_1() { + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1500, end: 1600 },]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store_1.is_subset(&interval_store_1)); + assert!(interval_store_2.is_subset(&interval_store_2)); + assert!(interval_store_1.is_subset(&interval_store_2)); + assert!(!interval_store_2.is_subset(&interval_store_1)); + } + + #[test] + fn is_subset_2() { + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 50, end: 700 },]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 600 }, + Interval { start: 1401, end: 1600 }, + Interval { start: 15901, end: 16000 }, + ]); + assert!(interval_store_1.is_subset(&interval_store_1)); + assert!(interval_store_2.is_subset(&interval_store_2)); + assert!(!interval_store_1.is_subset(&interval_store_2)); + assert!(!interval_store_2.is_subset(&interval_store_1)); + } + + #[test] + fn overlapping_interval_1() { + let interval1 = Interval::new_unchecked(0, 100); + let interval2 = Interval::new_unchecked(50, 300); + + assert_eq!( + interval1.overlapping_interval(&interval2), + Some(Interval::new_unchecked(50, 100)) + ) + } + + #[test] + fn overlapping_interval_2() { + let interval1 = Interval::new_unchecked(50, 300); + let interval2 = Interval::new_unchecked(0, 100); + + assert_eq!( + interval1.overlapping_interval(&interval2), + Some(Interval::new_unchecked(50, 100)) + ) + } + + #[test] + fn overlapping_interval_3() { + let interval1 = Interval::new_unchecked(0, 100); + let interval2 = Interval::new_unchecked(500, 700); + + assert_eq!(interval1.overlapping_interval(&interval2), None) + } + + #[test] + fn intersection_len_1() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 11, end: 20 }, + Interval { start: 51, end: 80 }, + Interval { start: 111, end: 120 }, + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 20 }, + Interval { start: 41, end: 80 }, + Interval { start: 101, end: 120 }, + ]); + assert_eq!( + interval_store_1.intersection_len(&interval_store_2), + Interval::new_unchecked(11, 20).run_len() + + Interval::new_unchecked(51, 80).run_len() + + Interval::new_unchecked(111, 120).run_len() + ) + } + + #[test] + fn intersection_len_2() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 11, end: 20 }, + Interval { start: 51, end: 80 }, + Interval { start: 111, end: 120 }, + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval { start: 1, end: 80 }, + Interval { start: 101, end: 120 }, + ]); + let intersect_len = Interval::new_unchecked(11, 20).run_len() + + Interval::new_unchecked(51, 80).run_len() + + Interval::new_unchecked(111, 120).run_len(); + assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); + assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); + } + + #[test] + fn intersection_len_3() { + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 1, end: 2000 },]); + let interval_store_2 = IntervalStore(alloc::vec![Interval { start: 1001, end: 3000 },]); + let intersect_len = Interval::new_unchecked(1001, 2000).run_len(); + assert_eq!(interval_store_1.intersection_len(&interval_store_2), intersect_len); + assert_eq!(interval_store_2.intersection_len(&interval_store_1), intersect_len); + } + + #[test] + fn intersection_len_bitmap_1() { + let mut bitmap_store = BitmapStore::new(); + for to_set in [500, 5001, 20, 40, 60] { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 4; + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_2() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=200 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = Interval::new_unchecked(20, 200).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_3() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20000 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 20, end: 6000 }, + Interval { start: 5000, end: 33333 }, + ]); + let intersect_len = Interval::new_unchecked(20, 6000).run_len() + + Interval::new_unchecked(5000, 20000).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_4() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20000 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 64, end: 6400 }, + Interval { start: 7680, end: 64000 }, + ]); + let intersect_len = Interval::new_unchecked(64, 6400).run_len() + + Interval::new_unchecked(7680, 20000).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_5() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20005 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 64, end: 6400 }, + Interval { start: 7680, end: 64000 }, + ]); + let intersect_len = Interval::new_unchecked(64, 6400).run_len() + + Interval::new_unchecked(7680, 20005).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_bitmap_6() { + let mut bitmap_store = BitmapStore::new(); + for to_set in 0..=20005 { + bitmap_store.insert(to_set); + } + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 64, end: 64 },]); + let intersect_len = Interval::new_unchecked(64, 64).run_len(); + assert_eq!(interval_store_1.intersection_len_bitmap(&bitmap_store), intersect_len); + } + + #[test] + fn intersection_len_array_1() { + let array_store = ArrayStore::from_vec_unchecked(alloc::vec![20, 40, 60, 500, 5001]); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 4; + assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); + } + + #[test] + fn intersection_len_array_2() { + let array_store = ArrayStore::from_vec_unchecked(Vec::from_iter(0..200)); + let interval_store_1 = IntervalStore(alloc::vec![Interval { start: 20, end: 600 },]); + let intersect_len = 200 - 20; + assert_eq!(interval_store_1.intersection_len_array(&array_store), intersect_len); + } + + #[test] + fn len_1() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval { start: 20, end: 600 }, + Interval { start: 5000, end: 8000 }, + ]); + assert_eq!( + interval_store_1.len(), + Interval::new_unchecked(20, 600).run_len() + + Interval::new_unchecked(5000, 8000).run_len() + ); + } + + #[test] + fn is_empty() { + let mut interval_store = IntervalStore(alloc::vec![ + Interval { start: 20, end: 600 }, + Interval { start: 5000, end: 8000 }, + ]); + assert!(!interval_store.is_empty()); + interval_store.remove_range(0..=u16::MAX); + assert!(interval_store.is_empty()); + } + + #[test] + fn min_0() { + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(20, u16::MAX)]); + assert_eq!(interval_store.min(), Some(20)); + } + + #[test] + fn min_1() { + let interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.min(), None); + } + + #[test] + fn max_0() { + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(20, u16::MAX)]); + assert_eq!(interval_store.max(), Some(u16::MAX)); + } + + #[test] + fn max_1() { + let interval_store = IntervalStore(alloc::vec![]); + assert_eq!(interval_store.max(), None); + } + + #[test] + fn rank() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 200), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]); + assert_eq!( + interval_store.rank(5020), + Interval::new_unchecked(0, 200).run_len() + + Interval::new_unchecked(5000, 5020).run_len() + ); + assert_eq!(interval_store.rank(u16::MAX), interval_store.len()); + } + + #[test] + fn select() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]); + assert_eq!(interval_store.select(0), Some(0)); + assert_eq!(interval_store.select(1), Some(2)); + assert_eq!(interval_store.select(10), Some(11)); + assert_eq!(interval_store.select(11), Some(5000)); + assert_eq!(interval_store.select(11 + 3), Some(5003)); + assert_eq!(interval_store.select(11 + 2001), Some(8000)); + } + + #[test] + fn union_1() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 10), + Interval::new_unchecked(12, 7000), + Interval::new_unchecked(65000, 65050), + ]); + interval_store_1 |= interval_store_2; + assert_eq!( + interval_store_1, + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 7000), + Interval::new_unchecked(8000, 10000), + Interval::new_unchecked(65000, 65050), + ]) + ) + } + + #[test] + fn union_array() { + let mut values = alloc::vec![0, 1, 2, 3, 4, 2000, 5000, u16::MAX]; + values.sort(); + let array = ArrayStore::from_vec_unchecked(values); + let mut interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]); + interval_store |= &array; + assert_eq!( + interval_store, + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 11), + Interval::new_unchecked(2000, 2000), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + Interval::new_unchecked(u16::MAX, u16::MAX), + ]) + ) + } + + #[test] + fn intersection() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), + ]); + assert_eq!( + &interval_store_1 & &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 10000), + ]) + ); + assert_eq!(&interval_store_1 & &interval_store_1, interval_store_1); + } + + #[test] + fn difference() { + let mut interval_store_1 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 11000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), + ]); + interval_store_1 -= &interval_store_2; + assert_eq!( + interval_store_1, + IntervalStore(alloc::vec![ + Interval::new_unchecked(2, 4), + Interval::new_unchecked(10001, 11000), + ]) + ) + } + + #[test] + fn symmetric_difference_0() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(2, 11), + Interval::new_unchecked(5000, 7000), + Interval::new_unchecked(8000, 11000), + Interval::new_unchecked(40000, 50000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 0), + Interval::new_unchecked(5, 50), + Interval::new_unchecked(4000, 10000), + ]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new_unchecked(2, 4), + Interval::new_unchecked(12, 50), + Interval::new_unchecked(4000, 4999), + Interval::new_unchecked(7001, 7999), + Interval::new_unchecked(10001, 11000), + Interval::new_unchecked(40000, 50000), + ]) + ); + } + + #[test] + fn symmetric_difference_1() { + let interval_store_1 = IntervalStore(alloc::vec![Interval::new_unchecked(0, 50),]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new_unchecked(100, 200),]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(100, 200), + ]) + ); + } + + #[test] + fn symmetric_difference_2() { + let interval_store_1 = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let interval_store_2 = IntervalStore(alloc::vec![Interval::new_unchecked(0, 6000),]); + assert_eq!( + &interval_store_1 ^ &interval_store_2, + IntervalStore(alloc::vec![ + Interval::new_unchecked(51, 499), + Interval::new_unchecked(601, 799), + Interval::new_unchecked(1001, 6000), + ]) + ); + } + + #[test] + fn iter_next() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new_unchecked(i as u16, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = (Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i + 500, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new_unchecked((i + 500) as u16, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = Interval::new_unchecked(800, 1000).run_len() as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + if i >= 201 { + break; + } + assert_eq!(i + 800, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new_unchecked((i + 800) as u16, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_next_back() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(1000 - i, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, (1000 - i) as u16).run_len()) + as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(600 - i, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, (600 - i) as u16).run_len()) + as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(50 - i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new_unchecked(0, (50 - i) as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_next_and_next_back() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.into_iter(); + + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(1000 - i, value as usize); + i += 1; + if i >= 201 { + break; + } + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, (1000 - i) as u16).run_len()) + as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(600 - i, value as usize); + i += 1; + if i >= 101 { + break; + } + let size = (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, (600 - i) as u16).run_len()) + as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let size = (Interval::new_unchecked(0, 50).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= 51 { + break; + } + let size = (Interval::new_unchecked(i as u16, 50).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn iter_u16_max() { + let interval_store = IntervalStore(alloc::vec![Interval::new_unchecked(0, u16::MAX),]); + let mut iter = interval_store.iter(); + + let mut i = 0; + while let Some(value) = iter.next() { + assert_eq!(i, value as usize); + i += 1; + if i >= u16::MAX as usize { + break; + } + let size = (Interval::new_unchecked(i as u16, u16::MAX).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + + let mut iter = interval_store.iter(); + + let mut i = 0; + while let Some(value) = iter.next_back() { + assert_eq!(u16::MAX as usize - i, value as usize); + i += 1; + if i >= u16::MAX as usize { + break; + } + let size = (Interval::new_unchecked(0, u16::MAX - i as u16).run_len()) as usize; + assert_eq!(iter.size_hint(), (size, Some(size))); + } + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(u16::MAX as usize), Some(u16::MAX)); + } + + #[test] + fn iter_nth() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(50), Some(50)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(51), Some(500)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(100), Some(549)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(152), Some(800)); + + let mut iter = interval_store.iter(); + assert_eq!( + iter.nth( + (Interval::new_unchecked(0, 50).run_len() + + Interval::new_unchecked(500, 600).run_len() + + Interval::new_unchecked(800, 1000).run_len() + - 1) as usize + ), + Some(1000) + ); + + let mut iter = interval_store.iter(); + iter.next(); + iter.next(); + iter.next(); + assert_eq!(iter.nth(152), Some(803)); + + let mut iter = interval_store.iter(); + assert_eq!(iter.nth(u16::MAX as usize), None); + } + + #[test] + fn iter_advance_to() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.iter(); + iter.advance_to(20); + assert_eq!(iter.next(), Some(20)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(800)); + iter.advance_to(u16::MAX); + assert_eq!(iter.next(), Some(801)); + + let mut iter = interval_store.iter(); + iter.advance_to(100); + assert_eq!(iter.next(), Some(500)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(800)); + iter.advance_to(900); + assert_eq!(iter.next(), Some(900)); + iter.advance_to(800); + assert_eq!(iter.next(), Some(901)); + let mut iter = interval_store.iter(); + iter.next(); + iter.next(); + iter.next(); + iter.advance_to(499); + assert_eq!(iter.next(), Some(500)); + + let mut iter = interval_store.iter(); + iter.advance_to(100); + assert_eq!(iter.next(), Some(500)); + } + + #[test] + fn iter_advance_back_to() { + let interval_store = IntervalStore(alloc::vec![ + Interval::new_unchecked(0, 50), + Interval::new_unchecked(500, 600), + Interval::new_unchecked(800, 1000), + ]); + let mut iter = interval_store.iter(); + iter.advance_back_to(u16::MAX); + assert_eq!(iter.next_back(), Some(1000)); + iter.advance_back_to(800); + assert_eq!(iter.next_back(), Some(800)); + iter.advance_back_to(20); + assert_eq!(iter.next_back(), Some(20)); + + let mut iter = interval_store.iter(); + iter.advance_back_to(800); + assert_eq!(iter.next_back(), Some(800)); + iter.advance_back_to(900); + assert_eq!(iter.next_back(), Some(600)); + iter.advance_back_to(550); + assert_eq!(iter.next_back(), Some(550)); + iter.advance_back_to(20); + assert_eq!(iter.next_back(), Some(20)); + let mut iter = interval_store.iter(); + iter.next_back(); + iter.next_back(); + iter.next_back(); + iter.advance_back_to(700); + assert_eq!(iter.next_back(), Some(600)); + let mut iter = interval_store.iter(); + iter.advance_back_to(400); + assert_eq!(iter.next_back(), Some(50)); + } +} diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 384b7fe81..c930bec28 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -1,5 +1,6 @@ mod array_store; mod bitmap_store; +mod interval_store; use alloc::vec; use core::mem; @@ -8,11 +9,15 @@ use core::ops::{ }; use core::slice; -pub use self::bitmap_store::BITMAP_LENGTH; -use self::Store::{Array, Bitmap}; +pub use self::bitmap_store::{BITMAP_BYTES, BITMAP_LENGTH}; +use self::Store::{Array, Bitmap, Run}; pub(crate) use self::array_store::ArrayStore; pub use self::bitmap_store::{BitmapIter, BitmapStore}; +pub(crate) use self::interval_store::Interval; +pub(crate) use interval_store::{IntervalStore, RunIterBorrowed, RunIterOwned}; +#[cfg(feature = "std")] +pub(crate) use interval_store::{RUN_ELEMENT_BYTES, RUN_NUM_BYTES}; use crate::bitmap::container::ARRAY_LIMIT; @@ -23,6 +28,7 @@ use alloc::boxed::Box; pub(crate) enum Store { Array(ArrayStore), Bitmap(BitmapStore), + Run(IntervalStore), } #[derive(Clone)] @@ -31,6 +37,8 @@ pub(crate) enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), + RunBorrowed(RunIterBorrowed<'a>), + RunOwned(RunIterOwned), } impl Store { @@ -48,7 +56,7 @@ impl Store { } pub fn full() -> Store { - Store::Bitmap(BitmapStore::full()) + Store::Run(IntervalStore::full()) } pub fn from_lsb0_bytes(bytes: &[u8], byte_offset: usize) -> Option { @@ -85,6 +93,7 @@ impl Store { match self { Array(vec) => vec.insert(index), Bitmap(bits) => bits.insert(index), + Run(runs) => runs.insert(index), } } @@ -97,6 +106,7 @@ impl Store { match self { Array(vec) => vec.insert_range(range), Bitmap(bits) => bits.insert_range(range), + Run(runs) => runs.insert_range(range), } } @@ -107,6 +117,7 @@ impl Store { match self { Array(vec) => vec.push(index), Bitmap(bits) => bits.push(index), + Run(runs) => runs.push(index), } } @@ -121,6 +132,12 @@ impl Store { match self { Array(vec) => vec.push_unchecked(index), Bitmap(bits) => bits.push_unchecked(index), + Run(runs) => { + // push unchecked for intervals doesn't make sense since we have to check anyways to + // intervals and such when the index is consecutive + debug_assert!(runs.max().map(|f| f < index).unwrap_or(true)); + runs.push(index); + } } } @@ -128,6 +145,7 @@ impl Store { match self { Array(vec) => vec.remove(index), Bitmap(bits) => bits.remove(index), + Run(runs) => runs.remove(index), } } @@ -139,6 +157,7 @@ impl Store { match self { Array(vec) => vec.remove_range(range), Bitmap(bits) => bits.remove_range(range), + Run(runs) => runs.remove_range(range), } } @@ -146,6 +165,7 @@ impl Store { match self { Array(vec) => vec.remove_smallest(index), Bitmap(bits) => bits.remove_smallest(index), + Run(runs) => runs.remove_smallest(index), } } @@ -153,6 +173,7 @@ impl Store { match self { Array(vec) => vec.remove_biggest(index), Bitmap(bits) => bits.remove_biggest(index), + Run(runs) => runs.remove_biggest(index), } } @@ -160,6 +181,7 @@ impl Store { match self { Array(vec) => vec.contains(index), Bitmap(bits) => bits.contains(index), + Run(intervals) => intervals.contains(index), } } @@ -167,6 +189,7 @@ impl Store { match self { Array(vec) => vec.contains_range(range), Bitmap(bits) => bits.contains_range(range), + Run(runs) => runs.contains_range(range), } } @@ -181,6 +204,11 @@ impl Store { (Array(vec), Bitmap(bits)) | (Bitmap(bits), Array(vec)) => { vec.iter().all(|&i| !bits.contains(i)) } + (Run(intervals1), Run(intervals2)) => intervals1.is_disjoint(intervals2), + (Run(runs), Array(vec)) | (Array(vec), Run(runs)) => runs.is_disjoint_array(vec), + (Run(intervals), Bitmap(bitmap)) | (Bitmap(bitmap), Run(intervals)) => { + intervals.is_disjoint_bitmap(bitmap) + } } } @@ -190,6 +218,11 @@ impl Store { (Bitmap(bits1), Bitmap(bits2)) => bits1.is_subset(bits2), (Array(vec), Bitmap(bits)) => vec.iter().all(|&i| bits.contains(i)), (Bitmap(..), &Array(..)) => false, + (Array(vec), Run(runs)) => vec.iter().all(|&i| runs.contains(i)), + (Bitmap(bitmap), Run(runs)) => bitmap.iter().all(|i| runs.contains(i)), + (Run(intervals1), Run(intervals2)) => intervals1.is_subset(intervals2), + (Run(intervals), Array(vec)) => intervals.is_subset_array(vec), + (Run(intervals), Bitmap(bitmap)) => intervals.is_subset_bitmap(bitmap), } } @@ -199,6 +232,11 @@ impl Store { (Bitmap(bits1), Bitmap(bits2)) => bits1.intersection_len_bitmap(bits2), (Array(vec), Bitmap(bits)) => bits.intersection_len_array(vec), (Bitmap(bits), Array(vec)) => bits.intersection_len_array(vec), + (Run(runs), Array(vec)) | (Array(vec), Run(runs)) => runs.intersection_len_array(vec), + (Run(runs), Bitmap(bitmap)) | (Bitmap(bitmap), Run(runs)) => { + runs.intersection_len_bitmap(bitmap) + } + (Run(runs1), Run(runs2)) => runs1.intersection_len(runs2), } } @@ -206,6 +244,7 @@ impl Store { match self { Array(vec) => vec.len(), Bitmap(bits) => bits.len(), + Run(intervals) => intervals.len(), } } @@ -213,6 +252,7 @@ impl Store { match self { Array(vec) => vec.is_empty(), Bitmap(bits) => bits.is_empty(), + Run(runs) => runs.is_empty(), } } @@ -220,6 +260,7 @@ impl Store { match self { Array(vec) => vec.min(), Bitmap(bits) => bits.min(), + Run(runs) => runs.min(), } } @@ -228,6 +269,7 @@ impl Store { match self { Array(vec) => vec.max(), Bitmap(bits) => bits.max(), + Run(runs) => runs.max(), } } @@ -235,6 +277,7 @@ impl Store { match self { Array(vec) => vec.rank(index), Bitmap(bits) => bits.rank(index), + Run(runs) => runs.rank(index), } } @@ -242,6 +285,39 @@ impl Store { match self { Array(vec) => vec.select(n), Bitmap(bits) => bits.select(n), + Run(runs) => runs.select(n), + } + } + + pub fn count_runs(&self) -> u64 { + match self { + Array(vec) => { + vec.iter() + .fold((-2, 0u64), |(prev, runs), &v| { + let new = v as i32; + if prev + 1 != new { + (new, runs + 1) + } else { + (new, runs) + } + }) + .1 + } + Bitmap(bits) => { + let mut num_runs = 0u64; + + for i in 0..BITMAP_LENGTH - 1 { + let word = bits.as_array()[i]; + let next_word = bits.as_array()[i + 1]; + num_runs += + ((word << 1) & !word).count_ones() as u64 + ((word >> 63) & !next_word); + } + + let last = bits.as_array()[BITMAP_LENGTH - 1]; + num_runs += ((last << 1) & !last).count_ones() as u64 + (last >> 63); + num_runs + } + Run(intervals) => intervals.run_amount(), } } @@ -249,6 +325,79 @@ impl Store { match self { Array(arr) => Bitmap(arr.to_bitmap_store()), Bitmap(_) => self.clone(), + Run(intervals) => Bitmap(intervals.to_bitmap()), + } + } + + pub(crate) fn to_run(&self) -> Self { + match self { + Array(vec) => { + let mut intervals = IntervalStore::new(); + if let Some(mut start) = vec.as_slice().first().copied() { + for (idx, &v) in vec.as_slice()[1..].iter().enumerate() { + // subtract current and previous values, then check if the gap is too large + // for a run + if v - vec.as_slice()[idx] > 1 { + intervals.push_interval_unchecked(Interval::new_unchecked( + start, + vec.as_slice()[idx], + )); + start = v + } + } + intervals.push_interval_unchecked(Interval::new_unchecked( + start, + *vec.as_slice().last().unwrap(), + )); + } + Run(intervals) + } + Bitmap(bits) => { + let mut current = bits.as_array()[0]; + let mut i = 0u16; + let mut start; + let mut last; + + let mut intervals = IntervalStore::new(); + + loop { + // Skip over empty words + while current == 0 && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits.as_array()[i as usize]; + } + // Reached end of the bitmap without finding anymore bits set + if current == 0 { + break; + } + let current_start = current.trailing_zeros() as u16; + start = 64 * i + current_start; + + // Pad LSBs with 1s + current |= current - 1; + + // Find next 0 + while current == u64::MAX && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits.as_array()[i as usize]; + } + + // Run continues until end of this container + if current == u64::MAX { + intervals.push_interval_unchecked(Interval::new_unchecked(start, u16::MAX)); + break; + } + + let current_last = (!current).trailing_zeros() as u16; + last = 64 * i + current_last; + intervals.push_interval_unchecked(Interval::new_unchecked(start, last - 1)); + + // pad LSBs with 0s + current &= current + 1; + } + Run(intervals) + } + Run(intervals) => Run(intervals.clone()), } } } @@ -280,23 +429,58 @@ impl BitOr<&Store> for &Store { BitOrAssign::bitor_assign(&mut rhs, self); rhs } + (Run(left), Run(right)) => { + let (smallest, biggest) = if left.run_amount() > right.run_amount() { + (right, left) + } else { + (left, right) + }; + let mut res = biggest.clone(); + BitOrAssign::bitor_assign(&mut res, smallest); + Run(res) + } + (Run(runs), Array(array)) | (Array(array), Run(runs)) => { + let mut ret = runs.clone(); + BitOrAssign::bitor_assign(&mut ret, array); + Run(ret) + } + (Run(runs), Bitmap(bitmap)) | (Bitmap(bitmap), Run(runs)) => { + let mut ret = runs.to_bitmap(); + BitOrAssign::bitor_assign(&mut ret, bitmap); + Bitmap(ret) + } } } } impl BitOrAssign for Store { - fn bitor_assign(&mut self, mut rhs: Store) { - match (self, &mut rhs) { - (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { + fn bitor_assign(&mut self, rhs: Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), Array(ref vec2)) => { *vec1 = BitOr::bitor(&*vec1, vec2); } - (&mut Bitmap(ref mut bits1), &mut Array(ref vec2)) => { + (&mut Bitmap(ref mut bits1), Array(ref vec2)) => { BitOrAssign::bitor_assign(bits1, vec2); } - (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + (&mut Bitmap(ref mut bits1), Bitmap(ref bits2)) => { BitOrAssign::bitor_assign(bits1, bits2); } - (this @ &mut Array(..), &mut Bitmap(..)) => { + (this @ &mut Bitmap(..), rhs @ Run(..)) => { + let other = rhs.to_bitmap(); + BitOrAssign::bitor_assign(this, other); + } + (Run(intervals1), Run(intervals2)) => BitOrAssign::bitor_assign(intervals1, intervals2), + (Run(intervals1), Array(ref vec)) => BitOrAssign::bitor_assign(intervals1, vec), + (this @ Array(..), Run(mut intervals)) => { + let Array(vec) = &this else { unreachable!() }; + BitOrAssign::bitor_assign(&mut intervals, vec); + *this = Run(intervals); + } + (this @ Run(..), rhs @ Bitmap(..)) => { + *this = this.to_bitmap(); + BitOrAssign::bitor_assign(this, rhs); + } + (this @ &mut Array(..), mut rhs @ Bitmap(..)) => { mem::swap(this, &mut rhs); BitOrAssign::bitor_assign(this, rhs); } @@ -322,6 +506,27 @@ impl BitOrAssign<&Store> for Store { BitOrAssign::bitor_assign(&mut lhs, &*this); *this = lhs; } + (Run(runs1), Run(runs2)) => { + BitOrAssign::bitor_assign(runs1, runs2); + } + (Run(runs), Array(array)) => { + BitOrAssign::bitor_assign(runs, array); + } + (this @ Array(..), Run(runs)) => { + let mut runs = runs.clone(); + let Array(array) = &this else { unreachable!() }; + BitOrAssign::bitor_assign(&mut runs, array); + *this = Run(runs); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new = runs.to_bitmap(); + BitOrAssign::bitor_assign(&mut new, bitmap); + *this = Bitmap(new); + } + (Bitmap(bitmap), Run(runs)) => { + BitOrAssign::bitor_assign(bitmap, &runs.to_bitmap()); + } } } } @@ -362,6 +567,26 @@ impl BitAndAssign for Store { (&mut Array(ref mut vec1), &mut Bitmap(ref bits2)) => { BitAndAssign::bitand_assign(vec1, bits2); } + (Run(intervals1), Run(intervals2)) => { + *intervals1 = BitAnd::bitand(&*intervals1, &*intervals2); + } + (this @ &mut Run(..), Array(array)) => { + let Run(runs) = &this else { unreachable!() }; + BitAndAssign::bitand_assign(array, runs); + *this = rhs; + } + (Array(array), Run(runs)) => { + BitAndAssign::bitand_assign(array, &*runs); + } + (this @ &mut Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitAndAssign::bitand_assign(&mut new_bitmap, &*bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + BitAndAssign::bitand_assign(bitmap, &runs.to_bitmap()); + } (this @ &mut Bitmap(..), &mut Array(..)) => { mem::swap(this, &mut rhs); BitAndAssign::bitand_assign(this, rhs); @@ -395,6 +620,25 @@ impl BitAndAssign<&Store> for Store { BitAndAssign::bitand_assign(&mut new, &*this); *this = new; } + (Run(runs1), Run(runs2)) => { + *runs1 = BitAnd::bitand(&*runs1, runs2); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitAndAssign::bitand_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + BitAndAssign::bitand_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Array(array)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_array = array.clone(); + new_array.retain(|f| runs.contains(f)); + *this = Array(new_array); + } + (Array(array), Run(runs)) => array.retain(|f| runs.contains(f)), } } } @@ -429,6 +673,29 @@ impl SubAssign<&Store> for Store { (&mut Array(ref mut vec1), Bitmap(bits2)) => { SubAssign::sub_assign(vec1, bits2); } + (Run(runs1), Run(runs2)) => { + SubAssign::sub_assign(runs1, runs2); + } + (Run(runs), Array(array)) => { + array.iter().for_each(|&f| { + runs.remove(f); + }); + } + (Array(array), Run(runs)) => { + runs.iter_intervals().for_each(|iv| { + array.remove_range(iv.start()..=iv.end()); + }); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + SubAssign::sub_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } + (Bitmap(bitmap), Run(runs)) => { + let new_bitmap = runs.to_bitmap(); + SubAssign::sub_assign(bitmap, &new_bitmap); + } } } } @@ -469,6 +736,23 @@ impl BitXorAssign for Store { mem::swap(this, &mut rhs); BitXorAssign::bitxor_assign(this, rhs); } + (Run(runs1), Run(runs2)) => { + *runs1 = BitXor::bitxor(&*runs1, &*runs2); + } + (Run(runs1), Array(array)) => BitXorAssign::bitxor_assign(runs1, array), + (this @ Array(..), Run(runs1)) => { + let Array(array) = &this else { unreachable!() }; + BitXorAssign::bitxor_assign(runs1, array); + *this = rhs; + } + (Bitmap(bitmap), Run(runs)) => { + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + *this = rhs; + } } } } @@ -480,9 +764,6 @@ impl BitXorAssign<&Store> for Store { let this = mem::take(vec1); *vec1 = BitXor::bitxor(&this, vec2); } - (&mut Bitmap(ref mut bits1), Array(vec2)) => { - BitXorAssign::bitxor_assign(bits1, vec2); - } (&mut Bitmap(ref mut bits1), Bitmap(bits2)) => { BitXorAssign::bitxor_assign(bits1, bits2); } @@ -491,6 +772,28 @@ impl BitXorAssign<&Store> for Store { BitXorAssign::bitxor_assign(&mut lhs, &*this); *this = lhs; } + (&mut Bitmap(ref mut bits1), Array(vec2)) => { + BitXorAssign::bitxor_assign(bits1, vec2); + } + (Run(runs1), Run(runs2)) => { + *runs1 = BitXor::bitxor(&*runs1, runs2); + } + (Run(runs1), Array(array)) => BitXorAssign::bitxor_assign(runs1, array), + (this @ Array(..), Run(runs1)) => { + let Array(array) = &this else { unreachable!() }; + let mut runs1 = runs1.clone(); + BitXorAssign::bitxor_assign(&mut runs1, array); + *this = Run(runs1); + } + (Bitmap(bitmap), Run(runs)) => { + BitXorAssign::bitxor_assign(bitmap, &runs.to_bitmap()); + } + (this @ Run(..), Bitmap(bitmap)) => { + let Run(runs) = &this else { unreachable!() }; + let mut new_bitmap = runs.to_bitmap(); + BitXorAssign::bitxor_assign(&mut new_bitmap, bitmap); + *this = Bitmap(new_bitmap); + } } } } @@ -502,6 +805,7 @@ impl<'a> IntoIterator for &'a Store { match self { Array(vec) => Iter::Array(vec.iter()), Bitmap(bits) => Iter::BitmapBorrowed(bits.iter()), + Run(intervals) => Iter::RunBorrowed(intervals.iter()), } } } @@ -513,6 +817,7 @@ impl IntoIterator for Store { match self { Array(vec) => Iter::Vec(vec.into_iter()), Bitmap(bits) => Iter::BitmapOwned(bits.into_iter()), + Run(intervals) => Iter::RunOwned(intervals.into_iter()), } } } @@ -525,6 +830,14 @@ impl PartialEq for Store { bits1.len() == bits2.len() && bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } + (Run(intervals1), Run(intervals2)) => intervals1 == intervals2, + (Run(run), Array(array)) | (Array(array), Run(run)) => { + run.len() == array.len() && array.iter().all(|&i| run.contains(i)) + } + (Run(run), Bitmap(bitmap)) | (Bitmap(bitmap), Run(run)) => { + run.len() == bitmap.len() + && run.iter_intervals().all(|&iv| bitmap.contains_range(iv.start()..=iv.end())) + } _ => false, } } @@ -548,6 +861,8 @@ impl Iter<'_> { } Iter::BitmapBorrowed(inner) => inner.advance_to(n), Iter::BitmapOwned(inner) => inner.advance_to(n), + Iter::RunOwned(inner) => inner.advance_to(n), + Iter::RunBorrowed(inner) => inner.advance_to(n), } } @@ -571,6 +886,8 @@ impl Iter<'_> { } Iter::BitmapBorrowed(inner) => inner.advance_back_to(n), Iter::BitmapOwned(inner) => inner.advance_back_to(n), + Iter::RunOwned(inner) => inner.advance_back_to(n), + Iter::RunBorrowed(inner) => inner.advance_back_to(n), } } } @@ -584,6 +901,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.next(), Iter::BitmapBorrowed(inner) => inner.next(), Iter::BitmapOwned(inner) => inner.next(), + Iter::RunOwned(inner) => inner.next(), + Iter::RunBorrowed(inner) => inner.next(), } } @@ -593,6 +912,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.size_hint(), Iter::BitmapBorrowed(inner) => inner.size_hint(), Iter::BitmapOwned(inner) => inner.size_hint(), + Iter::RunOwned(inner) => inner.size_hint(), + Iter::RunBorrowed(inner) => inner.size_hint(), } } @@ -605,6 +926,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.count(), Iter::BitmapBorrowed(inner) => inner.count(), Iter::BitmapOwned(inner) => inner.count(), + Iter::RunOwned(inner) => inner.count(), + Iter::RunBorrowed(inner) => inner.count(), } } @@ -614,6 +937,8 @@ impl Iterator for Iter<'_> { Iter::Vec(inner) => inner.nth(n), Iter::BitmapBorrowed(inner) => inner.nth(n), Iter::BitmapOwned(inner) => inner.nth(n), + Iter::RunOwned(inner) => inner.nth(n), + Iter::RunBorrowed(inner) => inner.nth(n), } } } @@ -625,6 +950,8 @@ impl DoubleEndedIterator for Iter<'_> { Iter::Vec(inner) => inner.next_back(), Iter::BitmapBorrowed(inner) => inner.next_back(), Iter::BitmapOwned(inner) => inner.next_back(), + Iter::RunOwned(inner) => inner.next_back(), + Iter::RunBorrowed(inner) => inner.next_back(), } } } diff --git a/roaring/src/lib.rs b/roaring/src/lib.rs index 1a78f8901..f6c4453b1 100644 --- a/roaring/src/lib.rs +++ b/roaring/src/lib.rs @@ -33,6 +33,16 @@ pub mod treemap; pub use bitmap::RoaringBitmap; pub use treemap::RoaringTreemap; +/// An error type that is returned when a `try_push` in a bitmap did not succeed. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct IntegerTooSmall; + +impl fmt::Display for IntegerTooSmall { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("inserted integer is smaller than the largest integer") + } +} + /// An error type that is returned when an iterator isn't sorted. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct NonSortedIntegers { diff --git a/roaring/src/treemap/inherent.rs b/roaring/src/treemap/inherent.rs index 63e2cb3bb..8f27f9f55 100644 --- a/roaring/src/treemap/inherent.rs +++ b/roaring/src/treemap/inherent.rs @@ -2,6 +2,7 @@ use alloc::collections::btree_map::{BTreeMap, Entry}; use core::iter; use core::ops::RangeBounds; +use crate::IntegerTooSmall; use crate::RoaringBitmap; use crate::RoaringTreemap; @@ -123,9 +124,32 @@ impl RoaringTreemap { /// /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); /// ``` + #[deprecated(since = "0.11.0", note = "use `try_push` instead")] pub fn push(&mut self, value: u64) -> bool { let (hi, lo) = util::split(value); - self.map.entry(hi).or_default().push(lo) + self.map.entry(hi).or_default().try_push(lo).is_ok() + } + + /// Pushes `value` in the treemap only if it is greater than the current maximum value. + /// + /// Returns an error if the value is not greater than the current maximum value. + /// + /// # Examples + /// + /// ```rust + /// use roaring::{RoaringTreemap, IntegerTooSmall}; + /// + /// let mut rb = RoaringTreemap::new(); + /// assert!(rb.try_push(1).is_ok()); + /// assert!(rb.try_push(3).is_ok()); + /// assert_eq!(rb.try_push(3), Err(IntegerTooSmall)); + /// assert!(rb.try_push(5).is_ok()); + /// + /// assert_eq!(rb.iter().collect::>(), vec![1, 3, 5]); + /// ``` + pub fn try_push(&mut self, value: u64) -> Result<(), IntegerTooSmall> { + let (hi, lo) = util::split(value); + self.map.entry(hi).or_default().try_push(lo) } /// Pushes `value` in the treemap only if it is greater than the current maximum value. diff --git a/roaring/src/treemap/iter.rs b/roaring/src/treemap/iter.rs index 57b39b900..b11f1c3e9 100644 --- a/roaring/src/treemap/iter.rs +++ b/roaring/src/treemap/iter.rs @@ -121,7 +121,7 @@ pub struct IntoIter { } impl Iter<'_> { - fn new(map: &BTreeMap) -> Iter { + fn new(map: &'_ BTreeMap) -> Iter<'_> { let outer = BitmapIter::new(map); Iter { outer, front: None, back: None } } @@ -346,7 +346,7 @@ impl RoaringTreemap { /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter(&self) -> Iter { + pub fn iter(&'_ self) -> Iter<'_> { Iter::new(&self.map) } @@ -365,7 +365,7 @@ impl RoaringTreemap { /// assert_eq!(bitmaps.next(), Some((0, &(0..6000).collect::()))); /// assert_eq!(bitmaps.next(), None); /// ``` - pub fn bitmaps(&self) -> BitmapIter { + pub fn bitmaps(&'_ self) -> BitmapIter<'_> { BitmapIter::new(&self.map) } diff --git a/roaring/tests/clone.rs b/roaring/tests/clone.rs index 9c485b44a..c5e9c15f4 100644 --- a/roaring/tests/clone.rs +++ b/roaring/tests/clone.rs @@ -42,3 +42,14 @@ fn bitmaps() { assert_eq!(clone, original); } + +#[test] +#[allow(clippy::redundant_clone)] +fn runs() { + let mut original = + RoaringBitmap::from_iter((0..6000).chain(1_000_000..1_012_000).chain(2_000_000..2_010_000)); + original.optimize(); + let clone = original.clone(); + + assert_eq!(clone, original); +} diff --git a/roaring/tests/lib.rs b/roaring/tests/lib.rs index 5edbcc19b..568de01d7 100644 --- a/roaring/tests/lib.rs +++ b/roaring/tests/lib.rs @@ -121,3 +121,32 @@ fn to_array() { assert!(!bitmap.contains(i)); } } + +#[test] +fn optimize_array() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn optimize_bitmap() { + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn remove_run_compression() { + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); + assert!(bitmap.remove_run_compression()); +} + +#[test] +fn optimize_run() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + // Calling optimize a second time should return false as no changes will be made + assert!(!bitmap.optimize()); +} diff --git a/roaring/tests/serialization.rs b/roaring/tests/serialization.rs index 78325017c..f4adc8d7c 100644 --- a/roaring/tests/serialization.rs +++ b/roaring/tests/serialization.rs @@ -30,10 +30,10 @@ fn test_deserialize_without_runs_from_provided_data() { #[test] fn test_deserialize_with_runs_from_provided_data() { - assert_eq!( - RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), - test_data_bitmap() - ); + let mut expected = test_data_bitmap(); + // Call optimize to create run containers + expected.optimize(); + assert_eq!(RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), expected); } #[test] @@ -44,6 +44,16 @@ fn test_serialize_into_provided_data() { assert!(BITMAP_WITHOUT_RUNS == &buffer[..]); } +#[test] +fn test_serialize_with_runs_into_provided_data() { + let mut bitmap = test_data_bitmap(); + // Call optimize to create run containers + bitmap.optimize(); + let mut buffer = vec![]; + bitmap.serialize_into(&mut buffer).unwrap(); + assert!(BITMAP_WITH_RUNS == &buffer[..]); +} + #[test] fn test_empty() { let original = RoaringBitmap::new(); @@ -541,3 +551,198 @@ fn test_strange() { let new = serialize_and_deserialize(&original); assert_eq!(original, new); } + +#[test] +fn test_runs() { + let mut original = RoaringBitmap::from_iter((1000..3000).chain(70000..77000)); + original.optimize(); + let new = serialize_and_deserialize(&original); + assert_eq!(original.len(), new.len()); + assert_eq!(original.min(), new.min()); + assert_eq!(original.max(), new.max()); +} + +fn assert_invalid_serialization(serialized: &[u8], msg: &str) { + let result = RoaringBitmap::deserialize_from(serialized); + if let Ok(res) = result { + panic!("Expected error: {msg}. Got: {res:?}"); + } +} + +#[test] +fn deserialize_negative_container_count() { + let data = [ + 0x3A, 0x30, 0, 0, // Serial cookie, no run containers + 0x00, 0x00, 0x00, 0x80, // Container count (NEGATIVE) + ]; + assert_invalid_serialization(&data, "Negative container count"); +} + +#[test] +fn deserialize_huge_container_count() { + const MAX_CONTAINERS: usize = 0xFFFF; + let data = [ + 0x3A, 0x30, 0, 0, // Serial cookie, no run containers + 0x01, 0x00, 0x01, 0x00, // Container count (MAX_CONTAINERS + 1) + ]; + assert_invalid_serialization(&data, "Huge container count"); + + // For each container, 32 bits for container offset, 16 bits for a key, cardinality - 1, and a + // single array value + let full_size = data.len() + (MAX_CONTAINERS + 1) * (4 + 3 * 2); + let mut full_data = vec![0; full_size]; + full_data[..data.len()].copy_from_slice(&data); + assert_invalid_serialization(&full_data, "Huge container count"); +} + +#[test] +fn deserialize_empty_run_container() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // First Container - Number of runs + ]; + assert_invalid_serialization(&data, "Empty run container"); +} + +#[test] +fn deserialize_run_container_contiguous_ranges() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 2, 0, // First Container - Number of runs + 0, 0, // First run start + 0, 0, // First run length - 1 + 1, 0, // Second run start (STARTS AT THE END OF THE FIRST) + 0, 0, // Second run length - 1 + ]; + + assert_invalid_serialization(&data, "Contiguous ranges in run container"); +} + +#[test] +fn deserialize_run_container_overlap() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 4, 0, // Cardinality - 1 of the first container + 2, 0, // First Container - Number of runs + 0, 0, // First run start + 4, 0, // First run length - 1 + 1, 0, // Second run start (STARTS INSIDE THE FIRST) + 0, 0, // Second run length - 1 + ]; + + assert_invalid_serialization(&data, "Overlapping ranges in run container"); +} + +#[test] +fn deserialize_run_container_overflow() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x00, 0x00, // Container count - 1 + 0x01, // Run Flag Bitset (single container is a run) + 0, 0, // Upper 16 bits of the first container + 4, 0, // Cardinality - 1 of the first container + 1, 0, // First Container - Number of runs + 0xFE, 0xFF, // First run start + 4, 0, // First run length - 1 (OVERFLOW) + ]; + + assert_invalid_serialization(&data, "Overflow in run container"); +} + +#[test] +fn deserialize_duplicate_keys() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0x01, 0x00, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // Upper 16 bits of the second container - DUPLICATE + 0, 0, // Cardinality - 1 of the second container + 0, 0, // Only value of first container + 0, 0, // Only value of second container + ]; + + assert_invalid_serialization(&data, "Duplicate keys in containers"); +} + +#[test] +fn deserialize_unsorted_keys() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 1, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 1, 0, // Upper 16 bits of the first container + 0, 0, // Cardinality - 1 of the first container + 0, 0, // Upper 16 bits of the second container (LESS THAN FIRST) + 0, 0, // Cardinality - 1 of the second container + 0, 0, // Only value of first container + 0, 0, // Only value of second container + ]; + + assert_invalid_serialization(&data, "Unsorted keys in containers"); +} + +#[test] +fn deserialize_array_duplicate_value() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 0, 0, // first value of first container + 0, 0, // second value of first container (DUPLICATE) + ]; + + assert_invalid_serialization(&data, "Duplicate values in array container"); +} + +#[test] +fn deserialize_array_unsorted_values() { + let data = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 1, 0, // Cardinality - 1 of the first container + 1, 0, // first value of first container + 0, 0, // second value of first container (LESS THAN FIRST) + ]; + + assert_invalid_serialization(&data, "Unsorted values in array container"); +} + +#[test] +fn deserialize_bitset_incorrect_cardinality() { + let data_start = [ + 0x3B, 0x30, // Serial Cookie + 0, 0, // Container count - 1 + 0, // Run Flag Bitset (no runs) + 0, 0, // Upper 16 bits of the first container + 0xFF, + 0xFF, // Cardinality - 1 of the first container. + + // First container is a bitset, should be followed by 1 << 16 bits + ]; + let mut data = vec![0xFF; data_start.len() + (1 << 16) / 8]; + data[..data_start.len()].copy_from_slice(&data_start); + // Bitset filled with 0xFF will have the correct cardinality + let result = RoaringBitmap::deserialize_from(&data[..]).unwrap(); + assert_eq!(result.len(), 0x1_0000); + + // Bitset will no longer have the correct cardinality + data[data_start.len()] = 0x0; + assert_invalid_serialization(&data, "Bitset incorrect cardinality"); +} diff --git a/roaring/tests/size_hint.rs b/roaring/tests/size_hint.rs index 00159be73..14dc5d142 100644 --- a/roaring/tests/size_hint.rs +++ b/roaring/tests/size_hint.rs @@ -23,6 +23,18 @@ fn bitmap() { assert_eq!((0, Some(0)), iter.size_hint()); } +#[test] +fn run() { + let mut bitmap = RoaringBitmap::from_iter(0..6000); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((6000, Some(6000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((3000, Some(3000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +} + #[test] fn arrays() { let bitmap = (0..2000) @@ -54,3 +66,16 @@ fn bitmaps() { iter.by_ref().for_each(drop); assert_eq!((0, Some(0)), iter.size_hint()); } + +#[test] +fn runs() { + let mut bitmap = + RoaringBitmap::from_iter((0..2000).chain(1_000_000..1_002_000).chain(2_000_000..2_001_000)); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((5000, Some(5000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((2000, Some(2000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +}