You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/11 20:26:04 UTC
[arrow-rs] branch master updated: Upgrade ahash to 0.8 (#2410)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 961cd2a65 Upgrade ahash to 0.8 (#2410)
961cd2a65 is described below
commit 961cd2a651eb9512d4f6994f91928b9e8fb269b0
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Thu Aug 11 22:26:00 2022 +0200
Upgrade ahash to 0.8 (#2410)
* Upgrade ahash to 0.8
* Use hash_one
* Use hash_one
* Use hash_one
* Use compile-time-rng for wasm
* Use compile-time-rng for wasm
* Use compile-time-rng for wasm
* Clippy
* Revert "Clippy"
This reverts commit 4c693cb87f984fa57b871dbff3341622424c5558.
---
arrow/Cargo.toml | 7 ++++++-
arrow/src/array/builder/string_dictionary_builder.rs | 15 ++++-----------
parquet/Cargo.toml | 2 +-
parquet/src/util/interner.rs | 12 ++----------
4 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 9e6bf532f..44a35a519 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -37,8 +37,13 @@ name = "arrow"
path = "src/lib.rs"
bench = false
+[target.'cfg(target_arch = "wasm32")'.dependencies]
+ahash = { version = "0.8", default-features = false, features=["compile-time-rng"] }
+
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+ahash = { version = "0.8", default-features = false, features=["runtime-rng"] }
+
[dependencies]
-ahash = { version = "0.7", default-features = false }
serde = { version = "1.0", default-features = false }
serde_derive = { version = "1.0", default-features = false }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow/src/array/builder/string_dictionary_builder.rs
index cfbda38c0..15a36a64c 100644
--- a/arrow/src/array/builder/string_dictionary_builder.rs
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -137,7 +137,7 @@ where
for (idx, maybe_value) in dictionary_values.iter().enumerate() {
match maybe_value {
Some(value) => {
- let hash = compute_hash(&state, value.as_bytes());
+ let hash = state.hash_one(value.as_bytes());
let key = K::Native::from_usize(idx)
.ok_or(ArrowError::DictionaryKeyOverflowError)?;
@@ -149,7 +149,7 @@ where
if let RawEntryMut::Vacant(v) = entry {
v.insert_with_hasher(hash, key, (), |key| {
- compute_hash(&state, get_bytes(&values_builder, key))
+ state.hash_one(get_bytes(&values_builder, key))
});
}
@@ -217,7 +217,7 @@ where
let state = &self.state;
let storage = &mut self.values_builder;
- let hash = compute_hash(state, value.as_bytes());
+ let hash = state.hash_one(value.as_bytes());
let entry = self
.dedup
@@ -234,7 +234,7 @@ where
*entry
.insert_with_hasher(hash, key, (), |key| {
- compute_hash(state, get_bytes(storage, key))
+ state.hash_one(get_bytes(storage, key))
})
.0
}
@@ -268,13 +268,6 @@ where
}
}
-fn compute_hash(hasher: &ahash::RandomState, value: &[u8]) -> u64 {
- use std::hash::{BuildHasher, Hash, Hasher};
- let mut state = hasher.build_hasher();
- value.hash(&mut state);
- state.finish()
-}
-
fn get_bytes<'a, K: ArrowNativeType>(values: &'a StringBuilder, key: &K) -> &'a [u8] {
let offsets = values.offsets_slice();
let values = values.values_slice();
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index f92be0abc..5a8e4c485 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -30,7 +30,7 @@ edition = "2021"
rust-version = "1.62"
[dependencies]
-ahash = "0.7"
+ahash = "0.8"
parquet-format = { version = "4.0.0", default-features = false }
bytes = { version = "1.1", default-features = false, features = ["std"] }
byteorder = { version = "1", default-features = false }
diff --git a/parquet/src/util/interner.rs b/parquet/src/util/interner.rs
index 319750dd1..e638237e0 100644
--- a/parquet/src/util/interner.rs
+++ b/parquet/src/util/interner.rs
@@ -18,7 +18,6 @@
use crate::data_type::AsBytes;
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
-use std::hash::Hash;
const DEFAULT_DEDUP_CAPACITY: usize = 4096;
@@ -62,7 +61,7 @@ impl<S: Storage> Interner<S> {
/// Intern the value, returning the interned key, and if this was a new value
pub fn intern(&mut self, value: &S::Value) -> S::Key {
- let hash = compute_hash(&self.state, value);
+ let hash = self.state.hash_one(value.as_bytes());
let entry = self
.dedup
@@ -76,7 +75,7 @@ impl<S: Storage> Interner<S> {
*entry
.insert_with_hasher(hash, key, (), |key| {
- compute_hash(&self.state, self.storage.get(*key))
+ self.state.hash_one(self.storage.get(*key).as_bytes())
})
.0
}
@@ -93,10 +92,3 @@ impl<S: Storage> Interner<S> {
self.storage
}
}
-
-fn compute_hash<T: AsBytes + ?Sized>(state: &ahash::RandomState, value: &T) -> u64 {
- use std::hash::{BuildHasher, Hasher};
- let mut hasher = state.build_hasher();
- value.as_bytes().hash(&mut hasher);
- hasher.finish()
-}