You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/30 09:13:51 UTC
[arrow-datafusion] 02/02: hash primitves
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch compare_hashes
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit 1e1d27b3ecf0e36f089cd2f51a7add522aac2a5d
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Fri Jun 30 11:13:38 2023 +0200
hash primitves
---
datafusion/physical-expr/src/hash_utils.rs | 40 +++++++++++++++++++++++++++---
1 file changed, 37 insertions(+), 3 deletions(-)
diff --git a/datafusion/physical-expr/src/hash_utils.rs b/datafusion/physical-expr/src/hash_utils.rs
index b751df928d..de3526992f 100644
--- a/datafusion/physical-expr/src/hash_utils.rs
+++ b/datafusion/physical-expr/src/hash_utils.rs
@@ -84,6 +84,40 @@ macro_rules! hash_float_value {
}
hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
+fn hash_array_primitve<T>(
+ array: &PrimitiveArray<T>,
+ random_state: &RandomState,
+ hashes_buffer: &mut [u64],
+ multi_col: bool,
+) where
+ T: ArrowPrimitiveType,
+ <T as arrow_array::ArrowPrimitiveType>::Native: HashValue,
+{
+ if array.null_count() == 0 {
+ if multi_col {
+ for (hash, &val) in hashes_buffer.iter_mut().zip(array.values().iter()) {
+ *hash = combine_hashes(val.hash_one(&random_state), *hash);
+ }
+ } else {
+ for (hash, &val) in hashes_buffer.iter_mut().zip(array.values().iter()) {
+ *hash = val.hash_one(&random_state);
+ }
+ }
+ } else if multi_col {
+ for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+ if !array.is_null(i) {
+ *hash = combine_hashes(array.value(i).hash_one(random_state), *hash);
+ }
+ }
+ } else {
+ for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+ if !array.is_null(i) {
+ *hash = array.value(i).hash_one(random_state);
+ }
+ }
+ }
+}
+
fn hash_array<T>(
array: T,
random_state: &RandomState,
@@ -215,7 +249,7 @@ pub fn create_hashes<'a>(
for col in arrays {
let array = col.as_ref();
downcast_primitive_array! {
- array => hash_array(array, random_state, hashes_buffer, multi_col),
+ array => hash_array_primitve(array, random_state, hashes_buffer, multi_col),
DataType::Null => hash_null(random_state, hashes_buffer, multi_col),
DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, multi_col),
DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, multi_col),
@@ -228,11 +262,11 @@ pub fn create_hashes<'a>(
}
DataType::Decimal128(_, _) => {
let array = as_primitive_array::<Decimal128Type>(array)?;
- hash_array(array, random_state, hashes_buffer, multi_col)
+ hash_array_primitve(array, random_state, hashes_buffer, multi_col)
}
DataType::Decimal256(_, _) => {
let array = as_primitive_array::<Decimal256Type>(array)?;
- hash_array(array, random_state, hashes_buffer, multi_col)
+ hash_array_primitve(array, random_state, hashes_buffer, multi_col)
}
DataType::Dictionary(_, _) => downcast_dictionary_array! {
array => hash_dictionary(array, random_state, hashes_buffer, multi_col)?,