Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 94 additions & 5 deletions parquet/src/record/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,6 @@ impl TreeBuilder {
path.push(String::from(key_value_type.name()));

let key_type = &key_value_type.get_fields()[0];
assert!(
key_type.is_primitive(),
"Map key type is expected to be a primitive type, but found {key_type:?}"
);
let key_reader = self.reader_tree(
key_type.clone(),
path,
Expand Down Expand Up @@ -840,7 +836,7 @@ impl Iterator for ReaderIter {
mod tests {
use super::*;

use crate::data_type::Int64Type;
use crate::data_type::{ByteArray, Decimal, Int64Type};
use crate::file::reader::SerializedFileReader;
use crate::file::writer::SerializedFileWriter;
use crate::record::api::RowAccessor;
Expand Down Expand Up @@ -1259,6 +1255,99 @@ mod tests {
assert_eq!(rows, expected_rows);
}

#[test]
fn test_file_reader_rows_nullable1() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A more descriptive name here would be nice...test_compound_map_key perhaps?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, whoops, yeah, definitely need to fix this

let rows = test_file_reader_rows("databricks.parquet", None).unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise, something more descriptive than "databricks".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was named this because it's a file generated by databricks with every supported column type, I'll look into regenerating it

let expected_rows = vec![row![
("bigint".to_string(), Field::Long(42)),
("binary".to_string(), Field::Bytes(ByteArray::from("Spark"))),
("boolean".to_string(), Field::Bool(true)),
("boolean_null".to_string(), Field::Null),
("date_zero".to_string(), Field::Date(-719530)),
("date".to_string(), Field::Date(20236)),
(
"decimal".to_string(),
Field::Decimal(Decimal::Int64 {
value: [0, 0, 0, 0, 0, 0, 0, 5],
precision: 10,
scale: 0,
})
),
(
"decimal3".to_string(),
Field::Decimal(Decimal::Int32 {
value: [0, 0, 0, 5],
precision: 3,
scale: 0,
})
),
(
"decimal32".to_string(),
Field::Decimal(Decimal::Int32 {
value: [0, 0, 2, 23],
precision: 3,
scale: 2,
})
),
("double".to_string(), Field::Double(4.2)),
("float".to_string(), Field::Float(4.2)),
("int".to_string(), Field::Int(42)),
("smallint".to_string(), Field::Short(1)),
("string".to_string(), Field::Str("Spark".to_string())),
(
"timestamp".to_string(),
Field::TimestampMillis(1748390400000)
),
(
"timestamp_tz".to_string(),
Field::TimestampMillis(1625118208000)
),
(
"timestamp_ntz".to_string(),
Field::TimestampMicros(1748434348000000)
),
(
"timestamp_ntz_nanos".to_string(),
Field::TimestampMicros(1748434348123456)
),
("tinyint".to_string(), Field::Byte(1)),
(
"array".to_string(),
list![Field::Int(1), Field::Int(2), Field::Int(3)]
),
(
"nested_array".to_string(),
list![
list![Field::Long(1), Field::Long(2)],
list![Field::Long(3), Field::Long(4)]
]
),
(
"struct".to_string(),
group![
("col1".to_string(), Field::Str("Spark".to_string())),
("col2".to_string(), Field::Int(5))
]
),
(
"map".to_string(),
map![
(Field::Str("red".to_string()), Field::Int(1)),
(Field::Str("green".to_string()), Field::Int(2))
]
),
(
"map_nested".to_string(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the meat of the PR; would it be possible to only have this column in the test file? This is a lot to slog through and obscures the point of the test.

map![(
list![Field::Int(1), Field::Int(2)],
map![(Field::Str("green".to_string()), Field::Int(5))]
)]
),
("interval".to_string(), Field::Long(720830300000))
]];
assert_eq!(rows, expected_rows);
}

#[test]
fn test_file_reader_rows_projection() {
let schema = "
Expand Down
Loading