You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/19 07:27:23 UTC

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #1906: Fix misaligned reference and logic error in crc32

tustvold commented on code in PR #1906:
URL: https://github.com/apache/arrow-rs/pull/1906#discussion_r901060762


##########
parquet/src/util/hash_util.rs:
##########
@@ -107,27 +107,18 @@ unsafe fn crc32_hash(bytes: &[u8], seed: u32) -> u32 {
     #[cfg(target_arch = "x86_64")]
     use std::arch::x86_64::*;
 
-    let u32_num_bytes = std::mem::size_of::<u32>();
-    let mut num_bytes = bytes.len();
-    let num_words = num_bytes / u32_num_bytes;
-    num_bytes %= u32_num_bytes;
-
-    let bytes_u32: &[u32] = std::slice::from_raw_parts(
-        &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32,
-        num_words,
-    );
-
-    let mut offset = 0;
     let mut hash = seed;
-    while offset < num_words {
-        hash = _mm_crc32_u32(hash, bytes_u32[offset]);
-        offset += 1;
+    for chunk in bytes
+        .chunks_exact(4)
+        .map(|chunk| u32::from_le_bytes(chunk.try_into().unwrap()))
+    {
+        hash = _mm_crc32_u32(hash, chunk);
     }
 
-    offset = num_words * u32_num_bytes;
-    while offset < num_bytes {
-        hash = _mm_crc32_u8(hash, bytes[offset]);
-        offset += 1;
+    let remainder = bytes.len() % 4;

Review Comment:
   You could use https://doc.rust-lang.org/std/slice/struct.ChunksExact.html#method.remainder



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org