-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Specialized GroupValues for primitive and large_primitive
#16136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e3477f1
93045f2
6ce4857
1b5cde9
8c05f69
cf053cb
8e09c83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,6 +98,8 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> { | |
| values: Vec<T::Native>, | ||
| /// The random state used to generate hashes | ||
| random_state: RandomState, | ||
|
|
||
| append_row_indices: Vec<u32>, | ||
| } | ||
|
|
||
| impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> { | ||
|
|
@@ -109,6 +111,7 @@ impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> { | |
| values: Vec::with_capacity(128), | ||
| null_group: None, | ||
| random_state: Default::default(), | ||
| append_row_indices: Vec::new(), | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -119,13 +122,18 @@ where | |
| { | ||
| fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> { | ||
| assert_eq!(cols.len(), 1); | ||
| let col = cols[0].as_primitive::<T>(); | ||
|
|
||
| groups.clear(); | ||
| self.append_row_indices.clear(); | ||
|
|
||
| for v in cols[0].as_primitive::<T>() { | ||
| let mut num_total_groups = self.values.len(); | ||
| for (row_index, v) in col.iter().enumerate() { | ||
| let group_id = match v { | ||
| None => *self.null_group.get_or_insert_with(|| { | ||
| let group_id = self.values.len(); | ||
| self.values.push(Default::default()); | ||
| let group_id = num_total_groups; | ||
| self.append_row_indices.push(row_index as u32); | ||
| num_total_groups += 1; | ||
| group_id | ||
| }), | ||
| Some(key) => { | ||
|
|
@@ -140,16 +148,28 @@ where | |
| match insert { | ||
| hashbrown::hash_table::Entry::Occupied(o) => o.get().0, | ||
| hashbrown::hash_table::Entry::Vacant(v) => { | ||
| let g = self.values.len(); | ||
| let g = num_total_groups; | ||
| v.insert((g, key)); | ||
| self.values.push(key); | ||
| self.append_row_indices.push(row_index as u32); | ||
| num_total_groups += 1; | ||
| g | ||
| } | ||
| } | ||
| } | ||
| }; | ||
| groups.push(group_id) | ||
| } | ||
|
|
||
| // If all are new groups, we just extend it | ||
| if self.append_row_indices.len() == col.len() { | ||
| self.values.extend_from_slice(col.values()); | ||
| } else { | ||
| let col_values = col.values(); | ||
| for &row_index in self.append_row_indices.iter() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be written as
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually... I found no obvious improvement when switching to |
||
| self.values.push(col_values[row_index as usize]); | ||
| } | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can
extendtogroups