From 6587d64fc327c5fb142061e415d712e89ea9a1c5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Apr 2025 09:58:58 -0400 Subject: [PATCH 1/4] Improve documentation for AsyncFileReader::get_metadata --- parquet/src/arrow/async_reader/mod.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 5d5a7036eefb..2e01d1166943 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -98,10 +98,22 @@ pub trait AsyncFileReader: Send { .boxed() } - /// Provides asynchronous access to the [`ParquetMetaData`] of a parquet file, - /// allowing fine-grained control over how metadata is sourced, in particular allowing - /// for caching, pre-fetching, catalog metadata, etc... - /// ArrowReaderOptions may be provided to supply decryption parameters + /// Return a future which results in the [`ParquetMetaData`] for this Parquet file. + /// + /// This is an asynchronous operation as it may involve reading the file + /// footer and potentially other metadata from disk or a remote source. + /// + /// Reading data from Parquet, requires the metadata to understand the + /// schema, row groups, and location of pages within the file. This metadata + /// is stored in the footer of the Parquet file, and can be read using + /// [`ParquetMetaDataReader`]. + /// + /// However, implementations can significantly speed up reading Parquet by + /// supplying cached metadata or pre-fetched metadata via this API. + /// + /// # Parameters + /// * `options`: Optional [`ArrowReaderOptions`] that may contain decryption + /// and pther options that affect how the metadata is read. fn get_metadata<'a>( &'a mut self, options: Option<&'a ArrowReaderOptions>, From f85c89beb36247460c41ea5b4867adfce005d648 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Apr 2025 11:15:50 -0400 Subject: [PATCH 2/4] Update parquet/src/arrow/async_reader/mod.rs Co-authored-by: Ed Seidl --- parquet/src/arrow/async_reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 2e01d1166943..5ad1fb12215a 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -103,7 +103,7 @@ pub trait AsyncFileReader: Send { /// This is an asynchronous operation as it may involve reading the file /// footer and potentially other metadata from disk or a remote source. /// - /// Reading data from Parquet, requires the metadata to understand the + /// Reading data from Parquet requires the metadata to understand the /// schema, row groups, and location of pages within the file. This metadata /// is stored in the footer of the Parquet file, and can be read using /// [`ParquetMetaDataReader`]. From f54505023e75d259c8966fc15b4aeb8ee9bee851 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Apr 2025 11:16:01 -0400 Subject: [PATCH 3/4] Update parquet/src/arrow/async_reader/mod.rs Co-authored-by: Ed Seidl --- parquet/src/arrow/async_reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 5ad1fb12215a..d4ec19bbff6b 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -105,7 +105,7 @@ pub trait AsyncFileReader: Send { /// /// Reading data from Parquet requires the metadata to understand the /// schema, row groups, and location of pages within the file. This metadata - /// is stored in the footer of the Parquet file, and can be read using + /// is stored primarily in the footer of the Parquet file, and can be read using /// [`ParquetMetaDataReader`]. /// /// However, implementations can significantly speed up reading Parquet by From 87e46d2244eb56be9c1adea25072cf20d69d396e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 3 Apr 2025 16:42:28 -0400 Subject: [PATCH 4/4] Update parquet/src/arrow/async_reader/mod.rs Co-authored-by: Ed Seidl --- parquet/src/arrow/async_reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index d4ec19bbff6b..413b2d68e6bc 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -113,7 +113,7 @@ pub trait AsyncFileReader: Send { /// /// # Parameters /// * `options`: Optional [`ArrowReaderOptions`] that may contain decryption - /// and pther options that affect how the metadata is read. + /// and other options that affect how the metadata is read. fn get_metadata<'a>( &'a mut self, options: Option<&'a ArrowReaderOptions>,