You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/30 09:13:49 UTC

[arrow-datafusion] branch compare_hashes created (now 1e1d27b3ec)

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a change to branch compare_hashes
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


      at 1e1d27b3ec hash primitves

This branch includes the following new commits:

     new 2f3aa72960 compare hash value
     new 1e1d27b3ec hash primitves

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-datafusion] 02/02: hash primitves

Posted by dh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch compare_hashes
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 1e1d27b3ecf0e36f089cd2f51a7add522aac2a5d
Author: Daniël Heres <da...@coralogix.com>
AuthorDate: Fri Jun 30 11:13:38 2023 +0200

    hash primitves
---
 datafusion/physical-expr/src/hash_utils.rs | 40 +++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/src/hash_utils.rs b/datafusion/physical-expr/src/hash_utils.rs
index b751df928d..de3526992f 100644
--- a/datafusion/physical-expr/src/hash_utils.rs
+++ b/datafusion/physical-expr/src/hash_utils.rs
@@ -84,6 +84,40 @@ macro_rules! hash_float_value {
 }
 hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
 
+fn hash_array_primitve<T>(
+    array: &PrimitiveArray<T>,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+    multi_col: bool,
+) where
+    T: ArrowPrimitiveType,
+    <T as arrow_array::ArrowPrimitiveType>::Native: HashValue,
+{
+    if array.null_count() == 0 {
+        if multi_col {
+            for (hash, &val) in hashes_buffer.iter_mut().zip(array.values().iter()) {
+                *hash = combine_hashes(val.hash_one(&random_state), *hash);
+            }
+        } else {
+            for (hash, &val) in hashes_buffer.iter_mut().zip(array.values().iter()) {
+                *hash = val.hash_one(&random_state);
+            }
+        }
+    } else if multi_col {
+        for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+            if !array.is_null(i) {
+                *hash = combine_hashes(array.value(i).hash_one(random_state), *hash);
+            }
+        }
+    } else {
+        for (i, hash) in hashes_buffer.iter_mut().enumerate() {
+            if !array.is_null(i) {
+                *hash = array.value(i).hash_one(random_state);
+            }
+        }
+    }
+}
+
 fn hash_array<T>(
     array: T,
     random_state: &RandomState,
@@ -215,7 +249,7 @@ pub fn create_hashes<'a>(
     for col in arrays {
         let array = col.as_ref();
         downcast_primitive_array! {
-            array => hash_array(array, random_state, hashes_buffer, multi_col),
+            array => hash_array_primitve(array, random_state, hashes_buffer, multi_col),
             DataType::Null => hash_null(random_state, hashes_buffer, multi_col),
             DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, multi_col),
             DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, multi_col),
@@ -228,11 +262,11 @@ pub fn create_hashes<'a>(
             }
             DataType::Decimal128(_, _) => {
                 let array = as_primitive_array::<Decimal128Type>(array)?;
-                hash_array(array, random_state, hashes_buffer, multi_col)
+                hash_array_primitve(array, random_state, hashes_buffer, multi_col)
             }
             DataType::Decimal256(_, _) => {
                 let array = as_primitive_array::<Decimal256Type>(array)?;
-                hash_array(array, random_state, hashes_buffer, multi_col)
+                hash_array_primitve(array, random_state, hashes_buffer, multi_col)
             }
             DataType::Dictionary(_, _) => downcast_dictionary_array! {
                 array => hash_dictionary(array, random_state, hashes_buffer, multi_col)?,


[arrow-datafusion] 01/02: compare hash value

Posted by dh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch compare_hashes
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 2f3aa7296097f8a6c6f3b0d1ef81f1159646acb5
Author: Daniël Heres <da...@coralogix.com>
AuthorDate: Thu Jun 29 20:43:56 2023 +0200

    compare hash value
---
 datafusion/core/src/physical_plan/aggregates/row_hash.rs | 5 +++--
 testing                                                  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
index ba02bc096b..2cb343c03a 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
@@ -330,12 +330,13 @@ impl GroupedHashAggregateStream {
         } = &mut self.aggr_state;
 
         for (row, hash) in batch_hashes.into_iter().enumerate() {
-            let entry = map.get_mut(hash, |(_hash, group_idx)| {
+            let entry = map.get_mut(hash, |(hash2, group_idx)| {
                 // verify that a group that we are inserting with hash is
                 // actually the same key value as the group in
                 // existing_idx  (aka group_values @ row)
                 let group_state = &group_states[*group_idx];
-                group_rows.row(row) == group_state.group_by_values.row()
+
+                hash == *hash2 && group_rows.row(row) == group_state.group_by_values.row()
             });
 
             match entry {
diff --git a/testing b/testing
index e81d0c6de3..5bab2f264a 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit e81d0c6de35948b3be7984af8e00413b314cde6e
+Subproject commit 5bab2f264a23f5af68f69ea93d24ef1e8e77fc88