Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add a knob to force string view in benchmark
  • Loading branch information
XiangpengHao committed Jul 17, 2024
commit 205e9f2928daf95bbea5c0b20f0eaf9d060100a3
4 changes: 3 additions & 1 deletion benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ impl RunOpt {
None => queries.min_query_id()..=queries.max_query_id(),
};

let config = self.common.config();
let mut config = self.common.config();
config.options_mut().execution.schema_force_string_view = self.common.string_view;

let ctx = SessionContext::new_with_config(config);
self.register_hits(&ctx).await?;

Expand Down
1 change: 1 addition & 0 deletions benchmarks/src/tpch/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ impl RunOpt {
.config()
.with_collect_statistics(!self.disable_statistics);
config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
config.options_mut().execution.schema_force_string_view = self.common.string_view;
let ctx = SessionContext::new_with_config(config);

// register tables
Expand Down
4 changes: 4 additions & 0 deletions benchmarks/src/util/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ pub struct CommonOpt {
/// Activate debug mode to see more details
#[structopt(short, long)]
pub debug: bool,

/// If true, will use StringView/BinaryViewArray instead of String/BinaryArray.
#[structopt(long)]
pub string_view: bool,
}

impl CommonOpt {
Expand Down
4 changes: 4 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@ config_namespace! {

/// Should DataFusion keep the columns used for partition_by in the output RecordBatches
pub keep_partition_by_columns: bool, default = false

/// If true, the parquet reader will replace `Utf8/Utf8Large` with `Utf8View`,
/// and `Binary/BinaryLarge` with `BinaryView`.
pub schema_force_string_view: bool, default = false
}
}

Expand Down
27 changes: 26 additions & 1 deletion datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,32 @@ impl ListingOptions {
.try_collect()
.await?;

self.format.infer_schema(state, &store, &files).await
let mut schema = self.format.infer_schema(state, &store, &files).await?;

if state.config_options().execution.schema_force_string_view {
let transformed_fields: Vec<Arc<Field>> = schema
.fields
.iter()
.map(|field| match field.data_type() {
DataType::Utf8 | DataType::LargeUtf8 => Arc::new(Field::new(
field.name(),
DataType::Utf8View,
field.is_nullable(),
)),
DataType::Binary | DataType::LargeBinary => Arc::new(Field::new(
field.name(),
DataType::BinaryView,
field.is_nullable(),
)),
_ => field.clone(),
})
.collect();
schema = Arc::new(Schema::new_with_metadata(
transformed_fields,
schema.metadata.clone(),
));
}
Ok(schema)
}

/// Infers the partition columns stored in `LOCATION` and compares
Expand Down