You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/11 20:26:04 UTC

[arrow-rs] branch master updated: Upgrade ahash to 0.8 (#2410)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 961cd2a65 Upgrade ahash to 0.8 (#2410)
961cd2a65 is described below

commit 961cd2a651eb9512d4f6994f91928b9e8fb269b0
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Thu Aug 11 22:26:00 2022 +0200

    Upgrade ahash to 0.8 (#2410)
    
    * Upgrade ahash to 0.8
    
    * Use hash_one
    
    * Use hash_one
    
    * Use hash_one
    
    * Use compile-time-rng for wasm
    
    * Use compile-time-rng for wasm
    
    * Use compile-time-rng for wasm
    
    * Clippy
    
    * Revert "Clippy"
    
    This reverts commit 4c693cb87f984fa57b871dbff3341622424c5558.
---
 arrow/Cargo.toml                                     |  7 ++++++-
 arrow/src/array/builder/string_dictionary_builder.rs | 15 ++++-----------
 parquet/Cargo.toml                                   |  2 +-
 parquet/src/util/interner.rs                         | 12 ++----------
 4 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 9e6bf532f..44a35a519 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -37,8 +37,13 @@ name = "arrow"
 path = "src/lib.rs"
 bench = false
 
+[target.'cfg(target_arch = "wasm32")'.dependencies]
+ahash = { version = "0.8", default-features = false, features=["compile-time-rng"] }
+
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+ahash = { version = "0.8", default-features = false, features=["runtime-rng"] }
+
 [dependencies]
-ahash = { version = "0.7", default-features = false }
 serde = { version = "1.0", default-features = false }
 serde_derive = { version = "1.0", default-features = false }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow/src/array/builder/string_dictionary_builder.rs
index cfbda38c0..15a36a64c 100644
--- a/arrow/src/array/builder/string_dictionary_builder.rs
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -137,7 +137,7 @@ where
         for (idx, maybe_value) in dictionary_values.iter().enumerate() {
             match maybe_value {
                 Some(value) => {
-                    let hash = compute_hash(&state, value.as_bytes());
+                    let hash = state.hash_one(value.as_bytes());
 
                     let key = K::Native::from_usize(idx)
                         .ok_or(ArrowError::DictionaryKeyOverflowError)?;
@@ -149,7 +149,7 @@ where
 
                     if let RawEntryMut::Vacant(v) = entry {
                         v.insert_with_hasher(hash, key, (), |key| {
-                            compute_hash(&state, get_bytes(&values_builder, key))
+                            state.hash_one(get_bytes(&values_builder, key))
                         });
                     }
 
@@ -217,7 +217,7 @@ where
 
         let state = &self.state;
         let storage = &mut self.values_builder;
-        let hash = compute_hash(state, value.as_bytes());
+        let hash = state.hash_one(value.as_bytes());
 
         let entry = self
             .dedup
@@ -234,7 +234,7 @@ where
 
                 *entry
                     .insert_with_hasher(hash, key, (), |key| {
-                        compute_hash(state, get_bytes(storage, key))
+                        state.hash_one(get_bytes(storage, key))
                     })
                     .0
             }
@@ -268,13 +268,6 @@ where
     }
 }
 
-fn compute_hash(hasher: &ahash::RandomState, value: &[u8]) -> u64 {
-    use std::hash::{BuildHasher, Hash, Hasher};
-    let mut state = hasher.build_hasher();
-    value.hash(&mut state);
-    state.finish()
-}
-
 fn get_bytes<'a, K: ArrowNativeType>(values: &'a StringBuilder, key: &K) -> &'a [u8] {
     let offsets = values.offsets_slice();
     let values = values.values_slice();
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index f92be0abc..5a8e4c485 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -30,7 +30,7 @@ edition = "2021"
 rust-version = "1.62"
 
 [dependencies]
-ahash = "0.7"
+ahash = "0.8"
 parquet-format = { version = "4.0.0", default-features = false }
 bytes = { version = "1.1", default-features = false, features = ["std"] }
 byteorder = { version = "1", default-features = false }
diff --git a/parquet/src/util/interner.rs b/parquet/src/util/interner.rs
index 319750dd1..e638237e0 100644
--- a/parquet/src/util/interner.rs
+++ b/parquet/src/util/interner.rs
@@ -18,7 +18,6 @@
 use crate::data_type::AsBytes;
 use hashbrown::hash_map::RawEntryMut;
 use hashbrown::HashMap;
-use std::hash::Hash;
 
 const DEFAULT_DEDUP_CAPACITY: usize = 4096;
 
@@ -62,7 +61,7 @@ impl<S: Storage> Interner<S> {
 
     /// Intern the value, returning the interned key, and if this was a new value
     pub fn intern(&mut self, value: &S::Value) -> S::Key {
-        let hash = compute_hash(&self.state, value);
+        let hash = self.state.hash_one(value.as_bytes());
 
         let entry = self
             .dedup
@@ -76,7 +75,7 @@ impl<S: Storage> Interner<S> {
 
                 *entry
                     .insert_with_hasher(hash, key, (), |key| {
-                        compute_hash(&self.state, self.storage.get(*key))
+                        self.state.hash_one(self.storage.get(*key).as_bytes())
                     })
                     .0
             }
@@ -93,10 +92,3 @@ impl<S: Storage> Interner<S> {
         self.storage
     }
 }
-
-fn compute_hash<T: AsBytes + ?Sized>(state: &ahash::RandomState, value: &T) -> u64 {
-    use std::hash::{BuildHasher, Hasher};
-    let mut hasher = state.build_hasher();
-    value.as_bytes().hash(&mut hasher);
-    hasher.finish()
-}