Skip to content

Commit bd9860e

Browse files
committed
Merge remote-tracking branch 'apache/master' into fix-metadata-write-nulls
2 parents d60d918 + 1336973 commit bd9860e

File tree

2 files changed

+35
-4
lines changed

2 files changed

+35
-4
lines changed

object_store/src/client/backoff.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@
1818
use rand::prelude::*;
1919
use std::time::Duration;
2020

21-
/// Exponential backoff with jitter
21+
/// Exponential backoff with decorrelated jitter algorithm
22+
///
23+
/// The first backoff will always be `init_backoff`.
24+
///
25+
/// Subsequent backoffs will pick a random value between `init_backoff` and
26+
/// `base * previous` where `previous` is the duration of the previous backoff
2227
///
2328
/// See <https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/>
2429
#[allow(missing_copy_implementations)]
@@ -28,7 +33,7 @@ pub struct BackoffConfig {
2833
pub init_backoff: Duration,
2934
/// The maximum backoff duration
3035
pub max_backoff: Duration,
31-
/// The base of the exponential to use
36+
/// The multiplier to use for the next backoff duration
3237
pub base: f64,
3338
}
3439

parquet/src/column/writer/mod.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
756756
if null_page && self.column_index_builder.valid() {
757757
self.column_index_builder.append(
758758
null_page,
759-
vec![0; 1],
760-
vec![0; 1],
759+
vec![],
760+
vec![],
761761
self.page_metrics.num_page_nulls as i64,
762762
);
763763
} else if self.column_index_builder.valid() {
@@ -2668,6 +2668,32 @@ mod tests {
26682668
),);
26692669
}
26702670

2671+
#[test]
2672+
fn test_column_index_with_null_pages() {
2673+
// write a single page of all nulls
2674+
let page_writer = get_test_page_writer();
2675+
let props = Default::default();
2676+
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
2677+
writer.write_batch(&[], Some(&[0, 0, 0, 0]), None).unwrap();
2678+
2679+
let r = writer.close().unwrap();
2680+
assert!(r.column_index.is_some());
2681+
let col_idx = r.column_index.unwrap();
2682+
// null_pages should be true for page 0
2683+
assert!(col_idx.null_pages[0]);
2684+
// min and max should be empty byte arrays
2685+
assert_eq!(col_idx.min_values[0].len(), 0);
2686+
assert_eq!(col_idx.max_values[0].len(), 0);
2687+
// null_counts should be defined and be 4 for page 0
2688+
assert!(col_idx.null_counts.is_some());
2689+
assert_eq!(col_idx.null_counts.as_ref().unwrap()[0], 4);
2690+
// there is no repetition so rep histogram should be absent
2691+
assert!(col_idx.repetition_level_histograms.is_none());
2692+
// definition_level_histogram should be present and should be 0:4, 1:0
2693+
assert!(col_idx.definition_level_histograms.is_some());
2694+
assert_eq!(col_idx.definition_level_histograms.unwrap(), &[4, 0]);
2695+
}
2696+
26712697
#[test]
26722698
fn test_column_offset_index_metadata() {
26732699
// write data

0 commit comments

Comments
 (0)