Skip to content
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add more column suppport
  • Loading branch information
zhuqi-lucas committed Jun 5, 2025
commit a380b634046e92a332ce6d195415fb441b25953f
13 changes: 9 additions & 4 deletions parquet/src/file/metadata/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ impl ParquetMetaDataReader {
/// let metadata = reader.finish().unwrap();
/// ```
pub fn try_parse_sized<R: ChunkReader>(&mut self, reader: &R, file_size: u64) -> Result<()> {
self.try_parse_sized_with_columns(reader, file_size, None)
}

pub fn try_parse_sized_with_columns<R: ChunkReader>(&mut self, reader: &R, file_size: u64, column_ids: Option<&[usize]>) -> Result<()> {
self.metadata = match self.parse_metadata(reader) {
Ok(metadata) => Some(metadata),
Err(ParquetError::NeedMoreData(needed)) => {
Expand All @@ -311,7 +315,7 @@ impl ParquetMetaDataReader {
return Ok(());
}

self.read_page_indexes_sized(reader, file_size)
self.read_page_indexes_sized(reader, file_size, column_ids)
}

/// Read the page index structures when a [`ParquetMetaData`] has already been obtained.
Expand All @@ -329,6 +333,7 @@ impl ParquetMetaDataReader {
&mut self,
reader: &R,
file_size: u64,
column_ids: Option<&[usize]>,
) -> Result<()> {
if self.metadata.is_none() {
return Err(general_err!(
Expand Down Expand Up @@ -385,8 +390,8 @@ impl ParquetMetaDataReader {
let bytes = reader.get_bytes(range.start - file_range.start, bytes_needed)?;
let offset = range.start;

self.parse_column_index(&bytes, offset)?;
self.parse_offset_index(&bytes, offset)?;
self.parse_column_index(&bytes, offset, column_ids)?;
self.parse_offset_index(&bytes, offset, column_ids)?;

Ok(())
}
Expand Down Expand Up @@ -437,7 +442,7 @@ impl ParquetMetaDataReader {
return Ok(());
}

self.load_page_index_with_remainder(fetch, remainder).await
self.load_page_index_with_remainder(fetch, remainder, None).await
}

/// Attempts to (asynchronously) parse the footer metadata (and optionally page indexes)
Expand Down