Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update tests
  • Loading branch information
XiangpengHao committed Aug 9, 2024
commit c83fee9351990feb03094ded03d14a9da706d06f
17 changes: 15 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

[workspace]
exclude = ["datafusion-cli", "dev/depcheck"]
exclude = ["datafusion-cli", "dev/depcheck", "datafusion-examples"]
members = [
"datafusion/common",
"datafusion/common-runtime",
Expand All @@ -40,7 +40,6 @@ members = [
"datafusion/sqllogictest",
"datafusion/substrait",
"datafusion/wasmtest",
"datafusion-examples",
"docs",
"test-utils",
"benchmarks",
Expand Down Expand Up @@ -158,3 +157,17 @@ large_futures = "warn"
[workspace.lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
unused_imports = "deny"

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
parquet = { git = "https://github.com/apache/arrow-rs.git" }
14 changes: 14 additions & 0 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,17 @@ tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { workspace = true }

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
parquet = { git = "https://github.com/apache/arrow-rs.git" }
2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ libc = "0.2.140"
num_cpus = { workspace = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true, optional = true, default-features = true }
pyo3 = { version = "0.21.0", optional = true }
pyo3 = { version = "0.22.0", optional = true }
sqlparser = { workspace = true }

[target.'cfg(target_family = "wasm")'.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ config_namespace! {

/// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
/// and `Binary/BinaryLarge` with `BinaryView`.
pub schema_force_string_view: bool, default = false
pub schema_force_string_view: bool, default = true
}
}

Expand Down
6 changes: 3 additions & 3 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4329,7 +4329,7 @@ mod tests {
.strip_backtrace();
assert_eq!(
err,
"Arrow error: Compute error: Overflow happened on: 2147483647 - -2147483648"
"Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
)
}

Expand All @@ -4350,7 +4350,7 @@ mod tests {
.sub_checked(&int_value_2)
.unwrap_err()
.strip_backtrace();
assert_eq!(err, "Arrow error: Compute error: Overflow happened on: 9223372036854775807 - -9223372036854775808")
assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
}

#[test]
Expand Down Expand Up @@ -5866,7 +5866,7 @@ mod tests {
let root_err = err.find_root();
match root_err{
DataFusionError::ArrowError(
ArrowError::ComputeError(_),
ArrowError::ArithmeticOverflow(_),
_,
) => {}
_ => return Err(err),
Expand Down
10 changes: 5 additions & 5 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ mod tests {
use arrow_schema::{DataType, Field};
use async_trait::async_trait;
use datafusion_common::cast::{
as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
as_binary_view_array, as_boolean_array, as_float32_array, as_float64_array,
as_int32_array, as_timestamp_nanosecond_array,
};
use datafusion_common::config::ParquetOptions;
Expand Down Expand Up @@ -1799,8 +1799,8 @@ mod tests {
bigint_col: Int64\n\
float_col: Float32\n\
double_col: Float64\n\
date_string_col: Binary\n\
string_col: Binary\n\
date_string_col: BinaryView\n\
string_col: BinaryView\n\
timestamp_col: Timestamp(Nanosecond, None)",
y
);
Expand Down Expand Up @@ -1956,7 +1956,7 @@ mod tests {
assert_eq!(1, batches[0].num_columns());
assert_eq!(8, batches[0].num_rows());

let array = as_binary_array(batches[0].column(0))?;
let array = as_binary_view_array(batches[0].column(0))?;
let mut values: Vec<&str> = vec![];
for i in 0..batches[0].num_rows() {
values.push(std::str::from_utf8(array.value(i)).unwrap());
Expand Down Expand Up @@ -2070,7 +2070,7 @@ mod tests {
let int_col_offset = offset_index.get(4).unwrap();

// 325 pages in int_col
assert_eq!(int_col_offset.len(), 325);
assert_eq!(int_col_offset.page_locations().len(), 325);
match int_col_index {
Index::INT32(index) => {
assert_eq!(index.indexes.len(), 325);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ impl<'a> PagesPruningStatistics<'a> {
converter,
column_index,
offset_index,
page_offsets,
page_offsets: &page_offsets.page_locations,
})
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/regex/regexpreplace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
let string_view_array = as_string_view_array(&args[0])?;

let mut builder = StringViewBuilder::with_capacity(string_view_array.len())
.with_block_size(1024 * 1024 * 2);
.with_fixed_block_size(1024 * 1024 * 2);

for val in string_view_array.iter() {
if let Some(val) = val {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ where
output_type,
map: hashbrown::raw::RawTable::with_capacity(INITIAL_MAP_CAPACITY),
map_size: 0,
builder: GenericByteViewBuilder::new().with_block_size(2 * 1024 * 1024),
builder: GenericByteViewBuilder::new().with_fixed_block_size(2 * 1024 * 1024),
random_state: RandomState::new(),
hashes_buffer: vec![],
null: None,
Expand Down
5 changes: 3 additions & 2 deletions datafusion/physical-plan/src/coalesce_batches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
// See https://github.com/apache/arrow-rs/issues/6094 for more details.
let mut builder = StringViewBuilder::with_capacity(s.len());
if ideal_buffer_size > 0 {
builder = builder.with_block_size(ideal_buffer_size as u32);
builder = builder.with_fixed_block_size(ideal_buffer_size as u32);
}

for v in s.iter() {
Expand Down Expand Up @@ -804,7 +804,8 @@ mod tests {
impl StringViewTest {
/// Create a `StringViewArray` with the parameters specified in this struct
fn build(self) -> StringViewArray {
let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
let mut builder =
StringViewBuilder::with_capacity(100).with_fixed_block_size(8192);
loop {
for &v in self.strings.iter() {
builder.append_option(v);
Expand Down
18 changes: 9 additions & 9 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2077,49 +2077,49 @@ mod tests {
"1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND",
),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
r#"INTERVAL '13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
),
(
interval_month_day_nano_lit("1.5 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
r#"INTERVAL '1 MONS 15 DAYS'"#,
),
(
interval_month_day_nano_lit("-3 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
r#"INTERVAL '-3 MONS'"#,
),
(
interval_month_day_nano_lit("1 MONTH")
.add(interval_month_day_nano_lit("1 DAY")),
IntervalStyle::PostgresVerbose,
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
r#"(INTERVAL '1 MONS' + INTERVAL '1 DAYS')"#,
),
(
interval_month_day_nano_lit("1 MONTH")
.sub(interval_month_day_nano_lit("1 DAY")),
IntervalStyle::PostgresVerbose,
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
r#"(INTERVAL '1 MONS' - INTERVAL '1 DAYS')"#,
),
(
interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
r#"INTERVAL '10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
),
(
interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
r#"INTERVAL '10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
),
(
interval_year_month_lit("1 YEAR 1 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
r#"INTERVAL '1 YEARS 1 MONS'"#,
),
(
interval_year_month_lit("1.5 YEAR 1 MONTH"),
IntervalStyle::PostgresVerbose,
r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
r#"INTERVAL '1 YEARS 7 MONS'"#,
),
(
interval_year_month_lit("1 YEAR 1 MONTH"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _) => DFColumnType::Float,
DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pulled into #12033

DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => {
DFColumnType::Text
}
DataType::Date32
| DataType::Date64
| DataType::Time32(_)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
[1, 2, 3]

# Tests for Utf8View
query ?T
query TT
select arrow_cast('MyAwesomeString', 'Utf8View'), arrow_typeof(arrow_cast('MyAwesomeString', 'Utf8View'))
----
MyAwesomeString Utf8View
Expand Down
22 changes: 11 additions & 11 deletions datafusion/sqllogictest/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -252,19 +252,19 @@ select abs(c1), abs(c2), abs(c3), abs(c4) from test_nullable_integer where datas
NULL NULL NULL NULL

# abs: Int8 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int8Array overflow on abs\(-128\)
statement error DataFusion error: Arrow error: Compute error: Int8Array overflow on abs\(-128\)
select abs(c1) from test_nullable_integer where dataset = 'mins'

# abs: Int16 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int16Array overflow on abs\(-32768\)
statement error DataFusion error: Arrow error: Compute error: Int16Array overflow on abs\(-32768\)
select abs(c2) from test_nullable_integer where dataset = 'mins'

# abs: Int32 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int32Array overflow on abs\(-2147483648\)
statement error DataFusion error: Arrow error: Compute error: Int32Array overflow on abs\(-2147483648\)
select abs(c3) from test_nullable_integer where dataset = 'mins'

# abs: Int64 overlow
statement error DataFusion error: Arrow error: Arithmetic overflow: Int64Array overflow on abs\(-9223372036854775808\)
statement error DataFusion error: Arrow error: Compute error: Int64Array overflow on abs\(-9223372036854775808\)
select abs(c4) from test_nullable_integer where dataset = 'mins'

statement ok
Expand Down Expand Up @@ -620,15 +620,15 @@ select gcd(a, b), gcd(c*d + 1, abs(e)) + f from signed_integers;
NULL NULL

# gcd(i64::MIN, i64::MIN)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
select gcd(-9223372036854775808, -9223372036854775808);

# gcd(i64::MIN, 0)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, 0\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, 0\)
select gcd(-9223372036854775808, 0);

# gcd(0, i64::MIN)
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(0, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(0, \-9223372036854775808\)
select gcd(0, -9223372036854775808);


Expand Down Expand Up @@ -662,22 +662,22 @@ select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
NULL NULL NULL

# Result cannot fit in i64
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
select lcm(-9223372036854775808, -9223372036854775808);

query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(1, \-9223372036854775808\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(1, \-9223372036854775808\)
select lcm(1, -9223372036854775808);

# Overflow on multiplication
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(2, 9223372036854775803\)
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(2, 9223372036854775803\)
select lcm(2, 9223372036854775803);


query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2107754225 \^ 1221660777
select power(2107754225, 1221660777);

# factorial overflow
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on FACTORIAL\(350943270\)
query error DataFusion error: Arrow error: Compute error: Overflow happened on FACTORIAL\(350943270\)
select FACTORIAL(350943270);

statement ok
Expand Down