Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
55e32d2
identify a few spots
paleolimbot Oct 1, 2025
9a88f7e
add a dyn accumulator
paleolimbot Oct 1, 2025
6a34eaf
document and set
paleolimbot Oct 1, 2025
d59c644
maybe pipe into the encoder
paleolimbot Oct 1, 2025
2f268f3
try
paleolimbot Oct 1, 2025
5c9547e
remove some previous work
paleolimbot Oct 1, 2025
6516544
plausibly working
paleolimbot Oct 1, 2025
629afc7
test
paleolimbot Oct 1, 2025
7ae4f17
failing test but a bit better
paleolimbot Oct 2, 2025
523575d
passing test
paleolimbot Oct 2, 2025
203ea9c
test!
paleolimbot Oct 2, 2025
5305e4e
ensure size stats are written for geometry/geography from generic enc…
paleolimbot Oct 6, 2025
3089b69
remove tests that will start failing when new thift footer merges
paleolimbot Oct 6, 2025
9e12b57
more flexible testers
paleolimbot Oct 6, 2025
a3b729b
more tests
paleolimbot Oct 7, 2025
f8b58c6
add roundtrip tests, fix accumulator for the all empty case
paleolimbot Oct 7, 2025
7472ba6
more test files
paleolimbot Oct 7, 2025
be7b522
use code-compatible stats accessor
paleolimbot Oct 7, 2025
e60cd98
fix test for new accessor
paleolimbot Oct 7, 2025
a34e5c4
add documentation for new trait member
paleolimbot Oct 7, 2025
15bbe3c
explicit schema test
paleolimbot Oct 7, 2025
0fca11f
document the accumulator
paleolimbot Oct 7, 2025
cd0f609
tests
paleolimbot Oct 7, 2025
92d0d73
rename
paleolimbot Oct 7, 2025
182776e
once lock thinger
paleolimbot Oct 7, 2025
4c7c52a
remove uneeded todo
paleolimbot Oct 7, 2025
0e600b4
remove copied comment
paleolimbot Oct 7, 2025
59a00ed
add better docstring
paleolimbot Oct 7, 2025
a024793
Apply suggestions from code review
paleolimbot Oct 8, 2025
3798609
more monospace
paleolimbot Oct 8, 2025
85ebb72
more monospace
paleolimbot Oct 8, 2025
2bc7bbe
more compact updater
paleolimbot Oct 8, 2025
431da25
try_new_geo_stats_accumulator()
paleolimbot Oct 8, 2025
6156112
fix link
paleolimbot Oct 8, 2025
ed85f90
Merge branch 'main' into spatial-stats-write
paleolimbot Oct 8, 2025
9d6d6c3
fix build
paleolimbot Oct 8, 2025
392d949
remove duplicate test
paleolimbot Oct 8, 2025
f9112f7
maybe merge tests better
paleolimbot Oct 8, 2025
ec31096
Apply suggestions from code review
paleolimbot Oct 9, 2025
eac356e
document feature flag
paleolimbot Oct 9, 2025
1bb2cd8
verify stats/null count
paleolimbot Oct 9, 2025
d5ba2f2
test column index
paleolimbot Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
plausibly working
  • Loading branch information
paleolimbot committed Oct 1, 2025
commit 65165449aacd6fa81dff1a3add4aae0776453c4d
29 changes: 24 additions & 5 deletions parquet/src/arrow/arrow_writer/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

use crate::basic::{Encoding, LogicalType};
use crate::bloom_filter::Sbbf;
use crate::column::writer::encoder::{
update_geo_stats_accumulator, ColumnValueEncoder, DataPageValues, DictionaryPage,
};
use crate::column::writer::encoder::{ColumnValueEncoder, DataPageValues, DictionaryPage};
use crate::data_type::{AsBytes, ByteArray, Int32Type};
use crate::encodings::encoding::{DeltaBitPackEncoder, Encoder};
use crate::encodings::rle::RleEncoder;
Expand Down Expand Up @@ -572,9 +570,9 @@ where
T::Item: Copy + Ord + AsRef<[u8]>,
{
if encoder.statistics_enabled != EnabledStatistics::None {
// TODO Converted interval types no stats?
// TODO ensure Converted interval types have no stats written for them?
if let Some(accumulator) = encoder.geo_stats_accumulator.as_mut() {
update_geo_stats_accumulator(accumulator.as_mut(), [0x01].iter());
update_geo_stats_accumulator(accumulator.as_mut(), values, indices.iter().cloned());
} else if let Some((min, max)) = compute_min_max(values, indices.iter().cloned()) {
if encoder.min_value.as_ref().is_none_or(|m| m > &min) {
encoder.min_value = Some(min);
Expand Down Expand Up @@ -623,3 +621,24 @@ where
}
Some((min.as_ref().to_vec().into(), max.as_ref().to_vec().into()))
}

/// Computes the min and max for the provided array and indices
///
/// This is a free function so it can be used with `downcast_op!`
fn update_geo_stats_accumulator<T>(
bounder: &mut dyn GeoStatsAccumulator,
array: T,
valid: impl Iterator<Item = usize>,
) where
T: ArrayAccessor,
T::Item: Copy + Ord + AsRef<[u8]>,
{
if !bounder.is_valid() {
return;
}

for idx in valid {
let val = array.value(idx);
bounder.update_wkb(val.as_ref());
}
}
2 changes: 1 addition & 1 deletion parquet/src/column/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ fn replace_zero<T: ParquetValueType>(val: &T, descr: &ColumnDescriptor, replace:
}
}

pub fn update_geo_stats_accumulator<'a, T, I>(bounder: &mut dyn GeoStatsAccumulator, iter: I)
fn update_geo_stats_accumulator<'a, T, I>(bounder: &mut dyn GeoStatsAccumulator, iter: I)
where
T: ParquetValueType + 'a,
I: Iterator<Item = &'a T>,
Expand Down
Loading