Skip to content

Commit ee56940

Browse files
Fischer0522alamb
andauthored
fix panic in ParquetMetadata::memory_size: check has_min_max_set before invoking min()/max() (apache#6092)
* fix: check has_min_max_set before invoking min()/max() * chore: add unit test for statistics heap size * Fixup test --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 16915b5 commit ee56940

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

parquet/src/file/metadata/memory.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,14 @@ impl<T: ParquetValueType> HeapSize for PageIndex<T> {
173173

174174
impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
175175
fn heap_size(&self) -> usize {
176-
self.min().heap_size() + self.max().heap_size()
176+
if self.has_min_max_set() {
177+
return self.min().heap_size() + self.max().heap_size();
178+
} else if self.min_is_exact() {
179+
return self.min().heap_size();
180+
} else if self.max_is_exact() {
181+
return self.max().heap_size();
182+
}
183+
0
177184
}
178185
}
179186
impl HeapSize for bool {

parquet/src/file/metadata/mod.rs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,11 @@ mod tests {
12911291
let columns = schema_descr
12921292
.columns()
12931293
.iter()
1294-
.map(|column_descr| ColumnChunkMetaData::builder(column_descr.clone()).build())
1294+
.map(|column_descr| {
1295+
ColumnChunkMetaData::builder(column_descr.clone())
1296+
.set_statistics(Statistics::new::<i32>(None, None, None, 0, false))
1297+
.build()
1298+
})
12951299
.collect::<Result<Vec<_>>>()
12961300
.unwrap();
12971301
let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
@@ -1317,11 +1321,31 @@ mod tests {
13171321
num_rows,
13181322
created_by,
13191323
key_value_metadata,
1320-
schema_descr,
1324+
schema_descr.clone(),
13211325
column_orders,
13221326
);
1323-
let parquet_meta = ParquetMetaData::new(file_metadata.clone(), row_group_meta.clone());
1324-
let base_expected_size = 1320;
1327+
1328+
// Now, add in Exact Statistics
1329+
let columns_with_stats = schema_descr
1330+
.columns()
1331+
.iter()
1332+
.map(|column_descr| {
1333+
ColumnChunkMetaData::builder(column_descr.clone())
1334+
.set_statistics(Statistics::new::<i32>(Some(0), Some(100), None, 0, false))
1335+
.build()
1336+
})
1337+
.collect::<Result<Vec<_>>>()
1338+
.unwrap();
1339+
1340+
let row_group_meta_with_stats = RowGroupMetaData::builder(schema_descr)
1341+
.set_num_rows(1000)
1342+
.set_column_metadata(columns_with_stats)
1343+
.build()
1344+
.unwrap();
1345+
let row_group_meta_with_stats = vec![row_group_meta_with_stats];
1346+
1347+
let parquet_meta = ParquetMetaData::new(file_metadata.clone(), row_group_meta_with_stats);
1348+
let base_expected_size = 2024;
13251349
assert_eq!(parquet_meta.memory_size(), base_expected_size);
13261350

13271351
let mut column_index = ColumnIndexBuilder::new();

0 commit comments

Comments
 (0)