Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
987e33b
Pin to pre-release version of arrow 52.2.0
alamb Jul 16, 2024
2c808fb
Update for deprecated method
alamb Jul 16, 2024
8d8732c
Add a config to force using string view in benchmark (#11514)
XiangpengHao Jul 19, 2024
8e0ca1a
Add String view helper functions (#11517)
XiangpengHao Jul 19, 2024
db65772
Add ArrowBytesViewMap and ArrowBytesViewSet (#11515)
XiangpengHao Jul 19, 2024
efcf5c6
Enable `GroupValueBytesView` for aggregation with StringView types (#…
XiangpengHao Jul 20, 2024
34d42bc
Initial support for regex_replace on `StringViewArray` (#11556)
XiangpengHao Jul 22, 2024
bb780b3
Add support for Utf8View for date/temporal codepaths (#11518)
a10y Jul 22, 2024
2b58fd5
GC `StringViewArray` in `CoalesceBatchesStream` (#11587)
XiangpengHao Jul 25, 2024
2b2b8ab
Merge remote-tracking branch 'apache/main' into string-view2
alamb Jul 26, 2024
ea11a9d
Merge remote-tracking branch 'apache/main' into string-view2
alamb Jul 26, 2024
f13bb82
[Bug] fix bug in return type inference of `utf8_to_int_type` (#11662)
XiangpengHao Jul 26, 2024
fb79638
Merge remote-tracking branch 'apache/main' into string-view2
alamb Jul 26, 2024
281fbed
Fix clippy
alamb Jul 26, 2024
5690712
Increase ByteViewMap block size to 2MB (#11674)
XiangpengHao Jul 27, 2024
322c3d2
Change `--string-view` to only apply to parquet formats (#11663)
XiangpengHao Jul 27, 2024
ab8005d
Implement native support StringView for character length (#11676)
XiangpengHao Jul 27, 2024
561aee8
Merge remote-tracking branch 'apache/main' into string-view2
alamb Jul 29, 2024
2e9c8a0
Remove uneeded patches
alamb Jul 29, 2024
f1f22fa
cargo fmt
alamb Jul 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add ArrowBytesViewMap and ArrowBytesViewSet (#11515)
* Update `string-view` branch to arrow-rs main (#10966)

* Pin to arrow main

* Fix clippy with latest arrow

* Uncomment test that needs new arrow-rs to work

* Update datafusion-cli Cargo.lock

* Update Cargo.lock

* tapelo

* merge

* update cast

* consistent dep

* fix ci

* add more tests

* make doc happy

* update new implementation

* fix bug

* avoid unused dep

* update dep

* update

* fix cargo check

* update doc

* pick up the comments change again

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
XiangpengHao and alamb authored Jul 19, 2024
commit db65772cd0d73f132402fb6dbad951e0db291dce
12 changes: 5 additions & 7 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ use arrow_buffer::IntervalDayTime;
use arrow_buffer::IntervalMonthDayNano;

use crate::cast::{
as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
as_large_list_array, as_list_array, as_primitive_array, as_string_array,
as_struct_array,
as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array,
as_string_array, as_string_view_array, as_struct_array,
};
use crate::error::{Result, _internal_err};

Expand Down Expand Up @@ -360,8 +360,6 @@ pub fn create_hashes<'a>(
random_state: &RandomState,
hashes_buffer: &'a mut Vec<u64>,
) -> Result<&'a mut Vec<u64>> {
use crate::cast::{as_binary_view_array, as_string_view_array};

for (i, col) in arrays.iter().enumerate() {
let array = col.as_ref();
// combine hashes with `combine_hashes` for all columns besides the first
Expand All @@ -371,11 +369,11 @@ pub fn create_hashes<'a>(
DataType::Null => hash_null(random_state, hashes_buffer, rehash),
DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
DataType::FixedSizeBinary(_) => {
let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
hash_array(array, random_state, hashes_buffer, rehash)
Expand Down
Loading