You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2019/07/04 05:52:04 UTC

[arrow] 25/38: ARROW-5380: [C++] Fix memory alignment UBSan errors.

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 508e745755a81ec21a3f5d1451532bfffa229d73
Author: Micah Kornfield <em...@gmail.com>
AuthorDate: Tue Jul 2 11:32:48 2019 +0200

    ARROW-5380: [C++] Fix memory alignment UBSan errors.
    
    - Add utility methods for unaligned loads use where errors
      are discovered.
    - Upgrade version of flatbuffers to avoid issues with unaligned
      load in that library
    - Discover bug in spec that makes zero-copy well defined behavior
      virtually impossible with flatbuffers (need to discuss on ML).  For now I'm
      not turning on ASAN and will file a follow-up JIRA to track this.
    
    Still needed:
     - [ ] Performance testing
     - [X] Discuss flatbuffers issues (I sent e-mail to LM)
    
    Author: Micah Kornfield <em...@gmail.com>
    Author: emkornfield <em...@gmail.com>
    
    Closes #4757 from emkornfield/ubsan_mem and squashes the following commits:
    
    5528584a7 <emkornfield> remove TODO
    db49fbbb4 <Micah Kornfield> Ubsan excluding flatbuffers
---
 cpp/src/arrow/util/bpacking.h    | 3409 ++++++++++++++++++++++----------------
 cpp/src/arrow/util/hashing.h     |    9 +-
 cpp/src/arrow/util/ubsan.h       |   16 +
 cpp/src/parquet/arrow/reader.cc  |   20 +-
 cpp/src/parquet/arrow/writer.h   |    5 +-
 cpp/src/parquet/column_reader.cc |    3 +-
 cpp/src/parquet/encoding.cc      |   11 +-
 cpp/src/parquet/file_reader.cc   |    3 +-
 cpp/src/plasma/common.cc         |    4 +-
 9 files changed, 2015 insertions(+), 1465 deletions(-)

diff --git a/cpp/src/arrow/util/bpacking.h b/cpp/src/arrow/util/bpacking.h
index 14258cf..98c2e7d 100644
--- a/cpp/src/arrow/util/bpacking.h
+++ b/cpp/src/arrow/util/bpacking.h
@@ -28,74 +28,76 @@
 #define ARROW_UTIL_BPACKING_H
 
 #include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace internal {
 
 inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) & 1;
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) & 1;
   out++;
-  *out = ((*in) >> 1) & 1;
+  *out = (inl >> 1) & 1;
   out++;
-  *out = ((*in) >> 2) & 1;
+  *out = (inl >> 2) & 1;
   out++;
-  *out = ((*in) >> 3) & 1;
+  *out = (inl >> 3) & 1;
   out++;
-  *out = ((*in) >> 4) & 1;
+  *out = (inl >> 4) & 1;
   out++;
-  *out = ((*in) >> 5) & 1;
+  *out = (inl >> 5) & 1;
   out++;
-  *out = ((*in) >> 6) & 1;
+  *out = (inl >> 6) & 1;
   out++;
-  *out = ((*in) >> 7) & 1;
+  *out = (inl >> 7) & 1;
   out++;
-  *out = ((*in) >> 8) & 1;
+  *out = (inl >> 8) & 1;
   out++;
-  *out = ((*in) >> 9) & 1;
+  *out = (inl >> 9) & 1;
   out++;
-  *out = ((*in) >> 10) & 1;
+  *out = (inl >> 10) & 1;
   out++;
-  *out = ((*in) >> 11) & 1;
+  *out = (inl >> 11) & 1;
   out++;
-  *out = ((*in) >> 12) & 1;
+  *out = (inl >> 12) & 1;
   out++;
-  *out = ((*in) >> 13) & 1;
+  *out = (inl >> 13) & 1;
   out++;
-  *out = ((*in) >> 14) & 1;
+  *out = (inl >> 14) & 1;
   out++;
-  *out = ((*in) >> 15) & 1;
+  *out = (inl >> 15) & 1;
   out++;
-  *out = ((*in) >> 16) & 1;
+  *out = (inl >> 16) & 1;
   out++;
-  *out = ((*in) >> 17) & 1;
+  *out = (inl >> 17) & 1;
   out++;
-  *out = ((*in) >> 18) & 1;
+  *out = (inl >> 18) & 1;
   out++;
-  *out = ((*in) >> 19) & 1;
+  *out = (inl >> 19) & 1;
   out++;
-  *out = ((*in) >> 20) & 1;
+  *out = (inl >> 20) & 1;
   out++;
-  *out = ((*in) >> 21) & 1;
+  *out = (inl >> 21) & 1;
   out++;
-  *out = ((*in) >> 22) & 1;
+  *out = (inl >> 22) & 1;
   out++;
-  *out = ((*in) >> 23) & 1;
+  *out = (inl >> 23) & 1;
   out++;
-  *out = ((*in) >> 24) & 1;
+  *out = (inl >> 24) & 1;
   out++;
-  *out = ((*in) >> 25) & 1;
+  *out = (inl >> 25) & 1;
   out++;
-  *out = ((*in) >> 26) & 1;
+  *out = (inl >> 26) & 1;
   out++;
-  *out = ((*in) >> 27) & 1;
+  *out = (inl >> 27) & 1;
   out++;
-  *out = ((*in) >> 28) & 1;
+  *out = (inl >> 28) & 1;
   out++;
-  *out = ((*in) >> 29) & 1;
+  *out = (inl >> 29) & 1;
   out++;
-  *out = ((*in) >> 30) & 1;
+  *out = (inl >> 30) & 1;
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
   out++;
 
@@ -103,70 +105,72 @@ inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 2);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 2);
+  *out = (inl >> 2) % (1U << 2);
   out++;
-  *out = ((*in) >> 4) % (1U << 2);
+  *out = (inl >> 4) % (1U << 2);
   out++;
-  *out = ((*in) >> 6) % (1U << 2);
+  *out = (inl >> 6) % (1U << 2);
   out++;
-  *out = ((*in) >> 8) % (1U << 2);
+  *out = (inl >> 8) % (1U << 2);
   out++;
-  *out = ((*in) >> 10) % (1U << 2);
+  *out = (inl >> 10) % (1U << 2);
   out++;
-  *out = ((*in) >> 12) % (1U << 2);
+  *out = (inl >> 12) % (1U << 2);
   out++;
-  *out = ((*in) >> 14) % (1U << 2);
+  *out = (inl >> 14) % (1U << 2);
   out++;
-  *out = ((*in) >> 16) % (1U << 2);
+  *out = (inl >> 16) % (1U << 2);
   out++;
-  *out = ((*in) >> 18) % (1U << 2);
+  *out = (inl >> 18) % (1U << 2);
   out++;
-  *out = ((*in) >> 20) % (1U << 2);
+  *out = (inl >> 20) % (1U << 2);
   out++;
-  *out = ((*in) >> 22) % (1U << 2);
+  *out = (inl >> 22) % (1U << 2);
   out++;
-  *out = ((*in) >> 24) % (1U << 2);
+  *out = (inl >> 24) % (1U << 2);
   out++;
-  *out = ((*in) >> 26) % (1U << 2);
+  *out = (inl >> 26) % (1U << 2);
   out++;
-  *out = ((*in) >> 28) % (1U << 2);
+  *out = (inl >> 28) % (1U << 2);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 2);
+  *out = (inl >> 0) % (1U << 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 2);
+  *out = (inl >> 2) % (1U << 2);
   out++;
-  *out = ((*in) >> 4) % (1U << 2);
+  *out = (inl >> 4) % (1U << 2);
   out++;
-  *out = ((*in) >> 6) % (1U << 2);
+  *out = (inl >> 6) % (1U << 2);
   out++;
-  *out = ((*in) >> 8) % (1U << 2);
+  *out = (inl >> 8) % (1U << 2);
   out++;
-  *out = ((*in) >> 10) % (1U << 2);
+  *out = (inl >> 10) % (1U << 2);
   out++;
-  *out = ((*in) >> 12) % (1U << 2);
+  *out = (inl >> 12) % (1U << 2);
   out++;
-  *out = ((*in) >> 14) % (1U << 2);
+  *out = (inl >> 14) % (1U << 2);
   out++;
-  *out = ((*in) >> 16) % (1U << 2);
+  *out = (inl >> 16) % (1U << 2);
   out++;
-  *out = ((*in) >> 18) % (1U << 2);
+  *out = (inl >> 18) % (1U << 2);
   out++;
-  *out = ((*in) >> 20) % (1U << 2);
+  *out = (inl >> 20) % (1U << 2);
   out++;
-  *out = ((*in) >> 22) % (1U << 2);
+  *out = (inl >> 22) % (1U << 2);
   out++;
-  *out = ((*in) >> 24) % (1U << 2);
+  *out = (inl >> 24) % (1U << 2);
   out++;
-  *out = ((*in) >> 26) % (1U << 2);
+  *out = (inl >> 26) % (1U << 2);
   out++;
-  *out = ((*in) >> 28) % (1U << 2);
+  *out = (inl >> 28) % (1U << 2);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
   out++;
 
@@ -174,73 +178,76 @@ inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 3);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 3);
+  *out = (inl >> 3) % (1U << 3);
   out++;
-  *out = ((*in) >> 6) % (1U << 3);
+  *out = (inl >> 6) % (1U << 3);
   out++;
-  *out = ((*in) >> 9) % (1U << 3);
+  *out = (inl >> 9) % (1U << 3);
   out++;
-  *out = ((*in) >> 12) % (1U << 3);
+  *out = (inl >> 12) % (1U << 3);
   out++;
-  *out = ((*in) >> 15) % (1U << 3);
+  *out = (inl >> 15) % (1U << 3);
   out++;
-  *out = ((*in) >> 18) % (1U << 3);
+  *out = (inl >> 18) % (1U << 3);
   out++;
-  *out = ((*in) >> 21) % (1U << 3);
+  *out = (inl >> 21) % (1U << 3);
   out++;
-  *out = ((*in) >> 24) % (1U << 3);
+  *out = (inl >> 24) % (1U << 3);
   out++;
-  *out = ((*in) >> 27) % (1U << 3);
+  *out = (inl >> 27) % (1U << 3);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (3 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (3 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 3);
+  *out = (inl >> 1) % (1U << 3);
   out++;
-  *out = ((*in) >> 4) % (1U << 3);
+  *out = (inl >> 4) % (1U << 3);
   out++;
-  *out = ((*in) >> 7) % (1U << 3);
+  *out = (inl >> 7) % (1U << 3);
   out++;
-  *out = ((*in) >> 10) % (1U << 3);
+  *out = (inl >> 10) % (1U << 3);
   out++;
-  *out = ((*in) >> 13) % (1U << 3);
+  *out = (inl >> 13) % (1U << 3);
   out++;
-  *out = ((*in) >> 16) % (1U << 3);
+  *out = (inl >> 16) % (1U << 3);
   out++;
-  *out = ((*in) >> 19) % (1U << 3);
+  *out = (inl >> 19) % (1U << 3);
   out++;
-  *out = ((*in) >> 22) % (1U << 3);
+  *out = (inl >> 22) % (1U << 3);
   out++;
-  *out = ((*in) >> 25) % (1U << 3);
+  *out = (inl >> 25) % (1U << 3);
   out++;
-  *out = ((*in) >> 28) % (1U << 3);
+  *out = (inl >> 28) % (1U << 3);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (3 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (3 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 3);
+  *out = (inl >> 2) % (1U << 3);
   out++;
-  *out = ((*in) >> 5) % (1U << 3);
+  *out = (inl >> 5) % (1U << 3);
   out++;
-  *out = ((*in) >> 8) % (1U << 3);
+  *out = (inl >> 8) % (1U << 3);
   out++;
-  *out = ((*in) >> 11) % (1U << 3);
+  *out = (inl >> 11) % (1U << 3);
   out++;
-  *out = ((*in) >> 14) % (1U << 3);
+  *out = (inl >> 14) % (1U << 3);
   out++;
-  *out = ((*in) >> 17) % (1U << 3);
+  *out = (inl >> 17) % (1U << 3);
   out++;
-  *out = ((*in) >> 20) % (1U << 3);
+  *out = (inl >> 20) % (1U << 3);
   out++;
-  *out = ((*in) >> 23) % (1U << 3);
+  *out = (inl >> 23) % (1U << 3);
   out++;
-  *out = ((*in) >> 26) % (1U << 3);
+  *out = (inl >> 26) % (1U << 3);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
   out++;
 
@@ -248,72 +255,76 @@ inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 4);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
   out++;
 
@@ -321,77 +332,82 @@ inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 5);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 5);
+  *out = (inl >> 5) % (1U << 5);
   out++;
-  *out = ((*in) >> 10) % (1U << 5);
+  *out = (inl >> 10) % (1U << 5);
   out++;
-  *out = ((*in) >> 15) % (1U << 5);
+  *out = (inl >> 15) % (1U << 5);
   out++;
-  *out = ((*in) >> 20) % (1U << 5);
+  *out = (inl >> 20) % (1U << 5);
   out++;
-  *out = ((*in) >> 25) % (1U << 5);
+  *out = (inl >> 25) % (1U << 5);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (5 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (5 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 5);
+  *out = (inl >> 3) % (1U << 5);
   out++;
-  *out = ((*in) >> 8) % (1U << 5);
+  *out = (inl >> 8) % (1U << 5);
   out++;
-  *out = ((*in) >> 13) % (1U << 5);
+  *out = (inl >> 13) % (1U << 5);
   out++;
-  *out = ((*in) >> 18) % (1U << 5);
+  *out = (inl >> 18) % (1U << 5);
   out++;
-  *out = ((*in) >> 23) % (1U << 5);
+  *out = (inl >> 23) % (1U << 5);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (5 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (5 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 5);
+  *out = (inl >> 1) % (1U << 5);
   out++;
-  *out = ((*in) >> 6) % (1U << 5);
+  *out = (inl >> 6) % (1U << 5);
   out++;
-  *out = ((*in) >> 11) % (1U << 5);
+  *out = (inl >> 11) % (1U << 5);
   out++;
-  *out = ((*in) >> 16) % (1U << 5);
+  *out = (inl >> 16) % (1U << 5);
   out++;
-  *out = ((*in) >> 21) % (1U << 5);
+  *out = (inl >> 21) % (1U << 5);
   out++;
-  *out = ((*in) >> 26) % (1U << 5);
+  *out = (inl >> 26) % (1U << 5);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (5 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (5 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 5);
+  *out = (inl >> 4) % (1U << 5);
   out++;
-  *out = ((*in) >> 9) % (1U << 5);
+  *out = (inl >> 9) % (1U << 5);
   out++;
-  *out = ((*in) >> 14) % (1U << 5);
+  *out = (inl >> 14) % (1U << 5);
   out++;
-  *out = ((*in) >> 19) % (1U << 5);
+  *out = (inl >> 19) % (1U << 5);
   out++;
-  *out = ((*in) >> 24) % (1U << 5);
+  *out = (inl >> 24) % (1U << 5);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (5 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (5 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 5);
+  *out = (inl >> 2) % (1U << 5);
   out++;
-  *out = ((*in) >> 7) % (1U << 5);
+  *out = (inl >> 7) % (1U << 5);
   out++;
-  *out = ((*in) >> 12) % (1U << 5);
+  *out = (inl >> 12) % (1U << 5);
   out++;
-  *out = ((*in) >> 17) % (1U << 5);
+  *out = (inl >> 17) % (1U << 5);
   out++;
-  *out = ((*in) >> 22) % (1U << 5);
+  *out = (inl >> 22) % (1U << 5);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
   out++;
 
@@ -399,78 +415,84 @@ inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 6);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 6);
+  *out = (inl >> 6) % (1U << 6);
   out++;
-  *out = ((*in) >> 12) % (1U << 6);
+  *out = (inl >> 12) % (1U << 6);
   out++;
-  *out = ((*in) >> 18) % (1U << 6);
+  *out = (inl >> 18) % (1U << 6);
   out++;
-  *out = ((*in) >> 24) % (1U << 6);
+  *out = (inl >> 24) % (1U << 6);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (6 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (6 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 6);
+  *out = (inl >> 4) % (1U << 6);
   out++;
-  *out = ((*in) >> 10) % (1U << 6);
+  *out = (inl >> 10) % (1U << 6);
   out++;
-  *out = ((*in) >> 16) % (1U << 6);
+  *out = (inl >> 16) % (1U << 6);
   out++;
-  *out = ((*in) >> 22) % (1U << 6);
+  *out = (inl >> 22) % (1U << 6);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (6 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (6 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 6);
+  *out = (inl >> 2) % (1U << 6);
   out++;
-  *out = ((*in) >> 8) % (1U << 6);
+  *out = (inl >> 8) % (1U << 6);
   out++;
-  *out = ((*in) >> 14) % (1U << 6);
+  *out = (inl >> 14) % (1U << 6);
   out++;
-  *out = ((*in) >> 20) % (1U << 6);
+  *out = (inl >> 20) % (1U << 6);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 6);
+  *out = (inl >> 0) % (1U << 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 6);
+  *out = (inl >> 6) % (1U << 6);
   out++;
-  *out = ((*in) >> 12) % (1U << 6);
+  *out = (inl >> 12) % (1U << 6);
   out++;
-  *out = ((*in) >> 18) % (1U << 6);
+  *out = (inl >> 18) % (1U << 6);
   out++;
-  *out = ((*in) >> 24) % (1U << 6);
+  *out = (inl >> 24) % (1U << 6);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (6 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (6 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 6);
+  *out = (inl >> 4) % (1U << 6);
   out++;
-  *out = ((*in) >> 10) % (1U << 6);
+  *out = (inl >> 10) % (1U << 6);
   out++;
-  *out = ((*in) >> 16) % (1U << 6);
+  *out = (inl >> 16) % (1U << 6);
   out++;
-  *out = ((*in) >> 22) % (1U << 6);
+  *out = (inl >> 22) % (1U << 6);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (6 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (6 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 6);
+  *out = (inl >> 2) % (1U << 6);
   out++;
-  *out = ((*in) >> 8) % (1U << 6);
+  *out = (inl >> 8) % (1U << 6);
   out++;
-  *out = ((*in) >> 14) % (1U << 6);
+  *out = (inl >> 14) % (1U << 6);
   out++;
-  *out = ((*in) >> 20) % (1U << 6);
+  *out = (inl >> 20) % (1U << 6);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
   out++;
 
@@ -478,81 +500,88 @@ inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 7);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 7);
+  *out = (inl >> 7) % (1U << 7);
   out++;
-  *out = ((*in) >> 14) % (1U << 7);
+  *out = (inl >> 14) % (1U << 7);
   out++;
-  *out = ((*in) >> 21) % (1U << 7);
+  *out = (inl >> 21) % (1U << 7);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (7 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (7 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 7);
+  *out = (inl >> 3) % (1U << 7);
   out++;
-  *out = ((*in) >> 10) % (1U << 7);
+  *out = (inl >> 10) % (1U << 7);
   out++;
-  *out = ((*in) >> 17) % (1U << 7);
+  *out = (inl >> 17) % (1U << 7);
   out++;
-  *out = ((*in) >> 24) % (1U << 7);
+  *out = (inl >> 24) % (1U << 7);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (7 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (7 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 7);
+  *out = (inl >> 6) % (1U << 7);
   out++;
-  *out = ((*in) >> 13) % (1U << 7);
+  *out = (inl >> 13) % (1U << 7);
   out++;
-  *out = ((*in) >> 20) % (1U << 7);
+  *out = (inl >> 20) % (1U << 7);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (7 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (7 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 7);
+  *out = (inl >> 2) % (1U << 7);
   out++;
-  *out = ((*in) >> 9) % (1U << 7);
+  *out = (inl >> 9) % (1U << 7);
   out++;
-  *out = ((*in) >> 16) % (1U << 7);
+  *out = (inl >> 16) % (1U << 7);
   out++;
-  *out = ((*in) >> 23) % (1U << 7);
+  *out = (inl >> 23) % (1U << 7);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (7 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (7 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 7);
+  *out = (inl >> 5) % (1U << 7);
   out++;
-  *out = ((*in) >> 12) % (1U << 7);
+  *out = (inl >> 12) % (1U << 7);
   out++;
-  *out = ((*in) >> 19) % (1U << 7);
+  *out = (inl >> 19) % (1U << 7);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (7 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (7 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 7);
+  *out = (inl >> 1) % (1U << 7);
   out++;
-  *out = ((*in) >> 8) % (1U << 7);
+  *out = (inl >> 8) % (1U << 7);
   out++;
-  *out = ((*in) >> 15) % (1U << 7);
+  *out = (inl >> 15) % (1U << 7);
   out++;
-  *out = ((*in) >> 22) % (1U << 7);
+  *out = (inl >> 22) % (1U << 7);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (7 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (7 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 7);
+  *out = (inl >> 4) % (1U << 7);
   out++;
-  *out = ((*in) >> 11) % (1U << 7);
+  *out = (inl >> 11) % (1U << 7);
   out++;
-  *out = ((*in) >> 18) % (1U << 7);
+  *out = (inl >> 18) % (1U << 7);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
   out++;
 
@@ -560,76 +589,84 @@ inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 8);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
   out++;
 
@@ -637,85 +674,94 @@ inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 9);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 9);
+  *out = (inl >> 9) % (1U << 9);
   out++;
-  *out = ((*in) >> 18) % (1U << 9);
+  *out = (inl >> 18) % (1U << 9);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (9 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (9 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 9);
+  *out = (inl >> 4) % (1U << 9);
   out++;
-  *out = ((*in) >> 13) % (1U << 9);
+  *out = (inl >> 13) % (1U << 9);
   out++;
-  *out = ((*in) >> 22) % (1U << 9);
+  *out = (inl >> 22) % (1U << 9);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (9 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (9 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 9);
+  *out = (inl >> 8) % (1U << 9);
   out++;
-  *out = ((*in) >> 17) % (1U << 9);
+  *out = (inl >> 17) % (1U << 9);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (9 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (9 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 9);
+  *out = (inl >> 3) % (1U << 9);
   out++;
-  *out = ((*in) >> 12) % (1U << 9);
+  *out = (inl >> 12) % (1U << 9);
   out++;
-  *out = ((*in) >> 21) % (1U << 9);
+  *out = (inl >> 21) % (1U << 9);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (9 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (9 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 9);
+  *out = (inl >> 7) % (1U << 9);
   out++;
-  *out = ((*in) >> 16) % (1U << 9);
+  *out = (inl >> 16) % (1U << 9);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (9 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (9 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 9);
+  *out = (inl >> 2) % (1U << 9);
   out++;
-  *out = ((*in) >> 11) % (1U << 9);
+  *out = (inl >> 11) % (1U << 9);
   out++;
-  *out = ((*in) >> 20) % (1U << 9);
+  *out = (inl >> 20) % (1U << 9);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (9 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (9 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 9);
+  *out = (inl >> 6) % (1U << 9);
   out++;
-  *out = ((*in) >> 15) % (1U << 9);
+  *out = (inl >> 15) % (1U << 9);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (9 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (9 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 9);
+  *out = (inl >> 1) % (1U << 9);
   out++;
-  *out = ((*in) >> 10) % (1U << 9);
+  *out = (inl >> 10) % (1U << 9);
   out++;
-  *out = ((*in) >> 19) % (1U << 9);
+  *out = (inl >> 19) % (1U << 9);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (9 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (9 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 9);
+  *out = (inl >> 5) % (1U << 9);
   out++;
-  *out = ((*in) >> 14) % (1U << 9);
+  *out = (inl >> 14) % (1U << 9);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
   out++;
 
@@ -723,86 +769,96 @@ inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 10);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 10);
+  *out = (inl >> 10) % (1U << 10);
   out++;
-  *out = ((*in) >> 20) % (1U << 10);
+  *out = (inl >> 20) % (1U << 10);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (10 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (10 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 10);
+  *out = (inl >> 8) % (1U << 10);
   out++;
-  *out = ((*in) >> 18) % (1U << 10);
+  *out = (inl >> 18) % (1U << 10);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (10 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (10 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 10);
+  *out = (inl >> 6) % (1U << 10);
   out++;
-  *out = ((*in) >> 16) % (1U << 10);
+  *out = (inl >> 16) % (1U << 10);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (10 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (10 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 10);
+  *out = (inl >> 4) % (1U << 10);
   out++;
-  *out = ((*in) >> 14) % (1U << 10);
+  *out = (inl >> 14) % (1U << 10);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (10 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (10 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 10);
+  *out = (inl >> 2) % (1U << 10);
   out++;
-  *out = ((*in) >> 12) % (1U << 10);
+  *out = (inl >> 12) % (1U << 10);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 10);
+  *out = (inl >> 0) % (1U << 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 10);
+  *out = (inl >> 10) % (1U << 10);
   out++;
-  *out = ((*in) >> 20) % (1U << 10);
+  *out = (inl >> 20) % (1U << 10);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (10 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (10 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 10);
+  *out = (inl >> 8) % (1U << 10);
   out++;
-  *out = ((*in) >> 18) % (1U << 10);
+  *out = (inl >> 18) % (1U << 10);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (10 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (10 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 10);
+  *out = (inl >> 6) % (1U << 10);
   out++;
-  *out = ((*in) >> 16) % (1U << 10);
+  *out = (inl >> 16) % (1U << 10);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (10 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (10 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 10);
+  *out = (inl >> 4) % (1U << 10);
   out++;
-  *out = ((*in) >> 14) % (1U << 10);
+  *out = (inl >> 14) % (1U << 10);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (10 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (10 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 10);
+  *out = (inl >> 2) % (1U << 10);
   out++;
-  *out = ((*in) >> 12) % (1U << 10);
+  *out = (inl >> 12) % (1U << 10);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
   out++;
 
@@ -810,89 +866,100 @@ inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 11);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 11);
+  *out = (inl >> 11) % (1U << 11);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (11 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (11 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 11);
+  *out = (inl >> 1) % (1U << 11);
   out++;
-  *out = ((*in) >> 12) % (1U << 11);
+  *out = (inl >> 12) % (1U << 11);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (11 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (11 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 11);
+  *out = (inl >> 2) % (1U << 11);
   out++;
-  *out = ((*in) >> 13) % (1U << 11);
+  *out = (inl >> 13) % (1U << 11);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (11 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (11 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 11);
+  *out = (inl >> 3) % (1U << 11);
   out++;
-  *out = ((*in) >> 14) % (1U << 11);
+  *out = (inl >> 14) % (1U << 11);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (11 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (11 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 11);
+  *out = (inl >> 4) % (1U << 11);
   out++;
-  *out = ((*in) >> 15) % (1U << 11);
+  *out = (inl >> 15) % (1U << 11);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (11 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (11 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 11);
+  *out = (inl >> 5) % (1U << 11);
   out++;
-  *out = ((*in) >> 16) % (1U << 11);
+  *out = (inl >> 16) % (1U << 11);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (11 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (11 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 11);
+  *out = (inl >> 6) % (1U << 11);
   out++;
-  *out = ((*in) >> 17) % (1U << 11);
+  *out = (inl >> 17) % (1U << 11);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (11 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (11 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 11);
+  *out = (inl >> 7) % (1U << 11);
   out++;
-  *out = ((*in) >> 18) % (1U << 11);
+  *out = (inl >> 18) % (1U << 11);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (11 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (11 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 11);
+  *out = (inl >> 8) % (1U << 11);
   out++;
-  *out = ((*in) >> 19) % (1U << 11);
+  *out = (inl >> 19) % (1U << 11);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (11 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (11 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 11);
+  *out = (inl >> 9) % (1U << 11);
   out++;
-  *out = ((*in) >> 20) % (1U << 11);
+  *out = (inl >> 20) % (1U << 11);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (11 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (11 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 11);
+  *out = (inl >> 10) % (1U << 11);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
   out++;
 
@@ -900,88 +967,100 @@ inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 12);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
   out++;
 
@@ -989,93 +1068,106 @@ inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 13);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 13);
+  *out = (inl >> 13) % (1U << 13);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (13 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (13 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 13);
+  *out = (inl >> 7) % (1U << 13);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (13 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (13 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 13);
+  *out = (inl >> 1) % (1U << 13);
   out++;
-  *out = ((*in) >> 14) % (1U << 13);
+  *out = (inl >> 14) % (1U << 13);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (13 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (13 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 13);
+  *out = (inl >> 8) % (1U << 13);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (13 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (13 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 13);
+  *out = (inl >> 2) % (1U << 13);
   out++;
-  *out = ((*in) >> 15) % (1U << 13);
+  *out = (inl >> 15) % (1U << 13);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (13 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (13 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 13);
+  *out = (inl >> 9) % (1U << 13);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (13 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (13 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 13);
+  *out = (inl >> 3) % (1U << 13);
   out++;
-  *out = ((*in) >> 16) % (1U << 13);
+  *out = (inl >> 16) % (1U << 13);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (13 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (13 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 13);
+  *out = (inl >> 10) % (1U << 13);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (13 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (13 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 13);
+  *out = (inl >> 4) % (1U << 13);
   out++;
-  *out = ((*in) >> 17) % (1U << 13);
+  *out = (inl >> 17) % (1U << 13);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (13 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (13 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 13);
+  *out = (inl >> 11) % (1U << 13);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (13 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (13 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 13);
+  *out = (inl >> 5) % (1U << 13);
   out++;
-  *out = ((*in) >> 18) % (1U << 13);
+  *out = (inl >> 18) % (1U << 13);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (13 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (13 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 13);
+  *out = (inl >> 12) % (1U << 13);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (13 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (13 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 13);
+  *out = (inl >> 6) % (1U << 13);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
   out++;
 
@@ -1083,94 +1175,108 @@ inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 14);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 14);
+  *out = (inl >> 14) % (1U << 14);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (14 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (14 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 14);
+  *out = (inl >> 10) % (1U << 14);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (14 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (14 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 14);
+  *out = (inl >> 6) % (1U << 14);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (14 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (14 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 14);
+  *out = (inl >> 2) % (1U << 14);
   out++;
-  *out = ((*in) >> 16) % (1U << 14);
+  *out = (inl >> 16) % (1U << 14);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (14 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (14 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 14);
+  *out = (inl >> 12) % (1U << 14);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (14 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (14 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 14);
+  *out = (inl >> 8) % (1U << 14);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (14 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (14 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 14);
+  *out = (inl >> 4) % (1U << 14);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 14);
+  *out = (inl >> 0) % (1U << 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 14);
+  *out = (inl >> 14) % (1U << 14);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (14 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (14 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 14);
+  *out = (inl >> 10) % (1U << 14);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (14 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (14 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 14);
+  *out = (inl >> 6) % (1U << 14);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (14 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (14 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 14);
+  *out = (inl >> 2) % (1U << 14);
   out++;
-  *out = ((*in) >> 16) % (1U << 14);
+  *out = (inl >> 16) % (1U << 14);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (14 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (14 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 14);
+  *out = (inl >> 12) % (1U << 14);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (14 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (14 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 14);
+  *out = (inl >> 8) % (1U << 14);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (14 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (14 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 14);
+  *out = (inl >> 4) % (1U << 14);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
   out++;
 
@@ -1178,97 +1284,112 @@ inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 15);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 15);
   out++;
-  *out = ((*in) >> 15) % (1U << 15);
+  *out = (inl >> 15) % (1U << 15);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (15 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (15 - 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 15);
+  *out = (inl >> 13) % (1U << 15);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (15 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (15 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 15);
+  *out = (inl >> 11) % (1U << 15);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (15 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (15 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 15);
+  *out = (inl >> 9) % (1U << 15);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (15 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (15 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 15);
+  *out = (inl >> 7) % (1U << 15);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (15 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (15 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 15);
+  *out = (inl >> 5) % (1U << 15);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (15 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (15 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 15);
+  *out = (inl >> 3) % (1U << 15);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (15 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (15 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 15);
+  *out = (inl >> 1) % (1U << 15);
   out++;
-  *out = ((*in) >> 16) % (1U << 15);
+  *out = (inl >> 16) % (1U << 15);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (15 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (15 - 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 15);
+  *out = (inl >> 14) % (1U << 15);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (15 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (15 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 15);
+  *out = (inl >> 12) % (1U << 15);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (15 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (15 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 15);
+  *out = (inl >> 10) % (1U << 15);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (15 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (15 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 15);
+  *out = (inl >> 8) % (1U << 15);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (15 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (15 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 15);
+  *out = (inl >> 6) % (1U << 15);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (15 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (15 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 15);
+  *out = (inl >> 4) % (1U << 15);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (15 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (15 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 15);
+  *out = (inl >> 2) % (1U << 15);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
   out++;
 
@@ -1276,84 +1397,100 @@ inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 16);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
   out++;
 
@@ -1361,101 +1498,118 @@ inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 17);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (17 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (17 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 17);
+  *out = (inl >> 2) % (1U << 17);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (17 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (17 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 17);
+  *out = (inl >> 4) % (1U << 17);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (17 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (17 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 17);
+  *out = (inl >> 6) % (1U << 17);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (17 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (17 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 17);
+  *out = (inl >> 8) % (1U << 17);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (17 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (17 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 17);
+  *out = (inl >> 10) % (1U << 17);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (17 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (17 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 17);
+  *out = (inl >> 12) % (1U << 17);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (17 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (17 - 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 17);
+  *out = (inl >> 14) % (1U << 17);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (17 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (17 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (17 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (17 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 17);
+  *out = (inl >> 1) % (1U << 17);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (17 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (17 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 17);
+  *out = (inl >> 3) % (1U << 17);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (17 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (17 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 17);
+  *out = (inl >> 5) % (1U << 17);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (17 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (17 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 17);
+  *out = (inl >> 7) % (1U << 17);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (17 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (17 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 17);
+  *out = (inl >> 9) % (1U << 17);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (17 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (17 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 17);
+  *out = (inl >> 11) % (1U << 17);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (17 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (17 - 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 17);
+  *out = (inl >> 13) % (1U << 17);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (17 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (17 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
   out++;
 
@@ -1463,102 +1617,120 @@ inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 18);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (18 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (18 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 18);
+  *out = (inl >> 4) % (1U << 18);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (18 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (18 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 18);
+  *out = (inl >> 8) % (1U << 18);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (18 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (18 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 18);
+  *out = (inl >> 12) % (1U << 18);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (18 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (18 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (18 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (18 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 18);
+  *out = (inl >> 2) % (1U << 18);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (18 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (18 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 18);
+  *out = (inl >> 6) % (1U << 18);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (18 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (18 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 18);
+  *out = (inl >> 10) % (1U << 18);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (18 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (18 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 18);
+  *out = (inl >> 0) % (1U << 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (18 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (18 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 18);
+  *out = (inl >> 4) % (1U << 18);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (18 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (18 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 18);
+  *out = (inl >> 8) % (1U << 18);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (18 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (18 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 18);
+  *out = (inl >> 12) % (1U << 18);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (18 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (18 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (18 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (18 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 18);
+  *out = (inl >> 2) % (1U << 18);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (18 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (18 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 18);
+  *out = (inl >> 6) % (1U << 18);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (18 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (18 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 18);
+  *out = (inl >> 10) % (1U << 18);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (18 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (18 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
   out++;
 
@@ -1566,105 +1738,124 @@ inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 19);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (19 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (19 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 19);
+  *out = (inl >> 6) % (1U << 19);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (19 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (19 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 19);
+  *out = (inl >> 12) % (1U << 19);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (19 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (19 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (19 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (19 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 19);
+  *out = (inl >> 5) % (1U << 19);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (19 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (19 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 19);
+  *out = (inl >> 11) % (1U << 19);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (19 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (19 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (19 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (19 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 19);
+  *out = (inl >> 4) % (1U << 19);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (19 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (19 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 19);
+  *out = (inl >> 10) % (1U << 19);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (19 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (19 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (19 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (19 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 19);
+  *out = (inl >> 3) % (1U << 19);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (19 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (19 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 19);
+  *out = (inl >> 9) % (1U << 19);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (19 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (19 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (19 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (19 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 19);
+  *out = (inl >> 2) % (1U << 19);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (19 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (19 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 19);
+  *out = (inl >> 8) % (1U << 19);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (19 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (19 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (19 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (19 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 19);
+  *out = (inl >> 1) % (1U << 19);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (19 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (19 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 19);
+  *out = (inl >> 7) % (1U << 19);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (19 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (19 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
   out++;
 
@@ -1672,104 +1863,124 @@ inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 20);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
   out++;
 
@@ -1777,109 +1988,130 @@ inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 21);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (21 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (21 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 21);
+  *out = (inl >> 10) % (1U << 21);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (21 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (21 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (21 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (21 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 21);
+  *out = (inl >> 9) % (1U << 21);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (21 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (21 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (21 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (21 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 21);
+  *out = (inl >> 8) % (1U << 21);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (21 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (21 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (21 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (21 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 21);
+  *out = (inl >> 7) % (1U << 21);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (21 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (21 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (21 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (21 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 21);
+  *out = (inl >> 6) % (1U << 21);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (21 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (21 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (21 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (21 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 21);
+  *out = (inl >> 5) % (1U << 21);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (21 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (21 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (21 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (21 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 21);
+  *out = (inl >> 4) % (1U << 21);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (21 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (21 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (21 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (21 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 21);
+  *out = (inl >> 3) % (1U << 21);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (21 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (21 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (21 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (21 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 21);
+  *out = (inl >> 2) % (1U << 21);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (21 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (21 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (21 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (21 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 21);
+  *out = (inl >> 1) % (1U << 21);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (21 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (21 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
   out++;
 
@@ -1887,110 +2119,132 @@ inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 22);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (22 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (22 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (22 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (22 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 22);
+  *out = (inl >> 2) % (1U << 22);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (22 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (22 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (22 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (22 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 22);
+  *out = (inl >> 4) % (1U << 22);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (22 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (22 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (22 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (22 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 22);
+  *out = (inl >> 6) % (1U << 22);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (22 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (22 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (22 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (22 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 22);
+  *out = (inl >> 8) % (1U << 22);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (22 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (22 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (22 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (22 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 22);
+  *out = (inl >> 0) % (1U << 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (22 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (22 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (22 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (22 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 22);
+  *out = (inl >> 2) % (1U << 22);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (22 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (22 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (22 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (22 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 22);
+  *out = (inl >> 4) % (1U << 22);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (22 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (22 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (22 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (22 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 22);
+  *out = (inl >> 6) % (1U << 22);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (22 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (22 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (22 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (22 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 22);
+  *out = (inl >> 8) % (1U << 22);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (22 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (22 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (22 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (22 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
   out++;
 
@@ -1998,113 +2252,136 @@ inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 23);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (23 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (23 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (23 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (23 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 23);
+  *out = (inl >> 5) % (1U << 23);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (23 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (23 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (23 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (23 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (23 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (23 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 23);
+  *out = (inl >> 1) % (1U << 23);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (23 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (23 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (23 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (23 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 23);
+  *out = (inl >> 6) % (1U << 23);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (23 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (23 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (23 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (23 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (23 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (23 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 23);
+  *out = (inl >> 2) % (1U << 23);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (23 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (23 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (23 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (23 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 23);
+  *out = (inl >> 7) % (1U << 23);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (23 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (23 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (23 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (23 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (23 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (23 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 23);
+  *out = (inl >> 3) % (1U << 23);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (23 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (23 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (23 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (23 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 23);
+  *out = (inl >> 8) % (1U << 23);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (23 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (23 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (23 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (23 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (23 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (23 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 23);
+  *out = (inl >> 4) % (1U << 23);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (23 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (23 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (23 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (23 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
   out++;
 
@@ -2112,108 +2389,132 @@ inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 24);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
   out++;
 
@@ -2221,117 +2522,142 @@ inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 25);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (25 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (25 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (25 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (25 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (25 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (25 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 25);
+  *out = (inl >> 4) % (1U << 25);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (25 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (25 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (25 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (25 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (25 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (25 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (25 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (25 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 25);
+  *out = (inl >> 1) % (1U << 25);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (25 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (25 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (25 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (25 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (25 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (25 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 25);
+  *out = (inl >> 5) % (1U << 25);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (25 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (25 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (25 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (25 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (25 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (25 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (25 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (25 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 25);
+  *out = (inl >> 2) % (1U << 25);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (25 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (25 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (25 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (25 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (25 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (25 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 25);
+  *out = (inl >> 6) % (1U << 25);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (25 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (25 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (25 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (25 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (25 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (25 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (25 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (25 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 25);
+  *out = (inl >> 3) % (1U << 25);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (25 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (25 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (25 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (25 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (25 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (25 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
   out++;
 
@@ -2339,118 +2665,144 @@ inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 26);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (26 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (26 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (26 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (26 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (26 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (26 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (26 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (26 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 26);
+  *out = (inl >> 2) % (1U << 26);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (26 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (26 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (26 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (26 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (26 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (26 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (26 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (26 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 26);
+  *out = (inl >> 4) % (1U << 26);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (26 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (26 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (26 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (26 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (26 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (26 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (26 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (26 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 26);
+  *out = (inl >> 0) % (1U << 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (26 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (26 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (26 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (26 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (26 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (26 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (26 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (26 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 26);
+  *out = (inl >> 2) % (1U << 26);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (26 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (26 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (26 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (26 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (26 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (26 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (26 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (26 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 26);
+  *out = (inl >> 4) % (1U << 26);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (26 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (26 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (26 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (26 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (26 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (26 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (26 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (26 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
   out++;
 
@@ -2458,121 +2810,148 @@ inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 27);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (27 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (27 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (27 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (27 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (27 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (27 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (27 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (27 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (27 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (27 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 27);
+  *out = (inl >> 2) % (1U << 27);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (27 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (27 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (27 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (27 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (27 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (27 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (27 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (27 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (27 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (27 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 27);
+  *out = (inl >> 4) % (1U << 27);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (27 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (27 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (27 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (27 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (27 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (27 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (27 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (27 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (27 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (27 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (27 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (27 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 27);
+  *out = (inl >> 1) % (1U << 27);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (27 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (27 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (27 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (27 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (27 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (27 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (27 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (27 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (27 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (27 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 27);
+  *out = (inl >> 3) % (1U << 27);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (27 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (27 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (27 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (27 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (27 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (27 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (27 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (27 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (27 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (27 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
   out++;
 
@@ -2580,120 +2959,148 @@ inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 28);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
   out++;
 
@@ -2701,125 +3108,154 @@ inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 29);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 29);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (29 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (29 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (29 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (29 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (29 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (29 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (29 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (29 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (29 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (29 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (29 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (29 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (29 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (29 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (29 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (29 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (29 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (29 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 29);
+  *out = (inl >> 2) % (1U << 29);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (29 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (29 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (29 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (29 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (29 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (29 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (29 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (29 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (29 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (29 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (29 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (29 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (29 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (29 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (29 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (29 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (29 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (29 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (29 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (29 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 29);
+  *out = (inl >> 1) % (1U << 29);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 27)) << (29 - 27);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 27)) << (29 - 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (29 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (29 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (29 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (29 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (29 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (29 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (29 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (29 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (29 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (29 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (29 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (29 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (29 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (29 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (29 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (29 - 3);
   out++;
-  *out = ((*in) >> 3);
+  *out = (inl >> 3);
   ++in;
   out++;
 
@@ -2827,126 +3263,156 @@ inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 30);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (30 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (30 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (30 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (30 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (30 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (30 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (30 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (30 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (30 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (30 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (30 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (30 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (30 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (30 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (30 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (30 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (30 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (30 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (30 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (30 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (30 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (30 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (30 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (30 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (30 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (30 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (30 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (30 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 30);
+  *out = (inl >> 0) % (1U << 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (30 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (30 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (30 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (30 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (30 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (30 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (30 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (30 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (30 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (30 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (30 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (30 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (30 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (30 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (30 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (30 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (30 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (30 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (30 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (30 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (30 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (30 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (30 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (30 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (30 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (30 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (30 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (30 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
   out++;
 
@@ -2954,129 +3420,160 @@ inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 31);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 31);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 30)) << (31 - 30);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 30)) << (31 - 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 29)) << (31 - 29);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 29)) << (31 - 29);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (31 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (31 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 27)) << (31 - 27);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 27)) << (31 - 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (31 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (31 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (31 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (31 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (31 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (31 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (31 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (31 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (31 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (31 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (31 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (31 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (31 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (31 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (31 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (31 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (31 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (31 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (31 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (31 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (31 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (31 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (31 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (31 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (31 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (31 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (31 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (31 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (31 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (31 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (31 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (31 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (31 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (31 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (31 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (31 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (31 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (31 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (31 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (31 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (31 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (31 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (31 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (31 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (31 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (31 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (31 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (31 - 3);
   out++;
-  *out = ((*in) >> 3);
+  *out = (inl >> 3);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (31 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (31 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (31 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (31 - 1);
   out++;
-  *out = ((*in) >> 1);
+  *out = (inl >> 1);
   ++in;
   out++;
 
@@ -3084,100 +3581,132 @@ inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
   out++;
 
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 49641d8..c053da8 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -149,9 +149,8 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
       // the results
       uint32_t x, y;
       hash_t hx, hy;
-      // XXX those are unaligned accesses.  Should we have a facility for that?
-      x = *reinterpret_cast<const uint32_t*>(p + n - 4);
-      y = *reinterpret_cast<const uint32_t*>(p);
+      x = util::SafeLoadAs<uint32_t>(p + n - 4);
+      y = util::SafeLoadAs<uint32_t>(p);
       hx = ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
       hy = ScalarHelper<uint32_t, AlgNum ^ 1>::ComputeHash(y);
       return n ^ hx ^ hy;
@@ -160,8 +159,8 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
     // Apply the same principle as above
     uint64_t x, y;
     hash_t hx, hy;
-    x = *reinterpret_cast<const uint64_t*>(p + n - 8);
-    y = *reinterpret_cast<const uint64_t*>(p);
+    x = util::SafeLoadAs<uint64_t>(p + n - 8);
+    y = util::SafeLoadAs<uint64_t>(p);
     hx = ScalarHelper<uint64_t, AlgNum>::ComputeHash(x);
     hy = ScalarHelper<uint64_t, AlgNum ^ 1>::ComputeHash(y);
     return n ^ hx ^ hy;
diff --git a/cpp/src/arrow/util/ubsan.h b/cpp/src/arrow/util/ubsan.h
index f9fcfb5..758f542 100644
--- a/cpp/src/arrow/util/ubsan.h
+++ b/cpp/src/arrow/util/ubsan.h
@@ -49,5 +49,21 @@ inline T* MakeNonNull(T* maybe_null) {
   return reinterpret_cast<T*>(&internal::non_null_filler);
 }
 
+template <typename T>
+inline typename std::enable_if<std::is_integral<T>::value, T>::type SafeLoadAs(
+    const uint8_t* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_integral<T>::value, T>::type SafeLoad(
+    const T* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 3fe37b0..f757b5f 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -83,6 +83,7 @@ namespace arrow {
 
 using ::arrow::BitUtil::FromBigEndian;
 using ::arrow::internal::SafeLeftShift;
+using ::arrow::util::SafeLoadAs;
 
 template <typename ArrowType>
 using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
@@ -1212,38 +1213,37 @@ static uint64_t BytesToInteger(const uint8_t* bytes, int32_t start, int32_t stop
     case 1:
       return bytes[start];
     case 2:
-      return FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint16_t>(bytes + start));
     case 3: {
-      const uint64_t first_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start));
+      const uint64_t first_two_bytes = FromBigEndian(SafeLoadAs<uint16_t>(bytes + start));
       const uint64_t last_byte = bytes[stop - 1];
       return first_two_bytes << 8 | last_byte;
     }
     case 4:
-      return FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
     case 5: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t last_byte = bytes[stop - 1];
       return first_four_bytes << 8 | last_byte;
     }
     case 6: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t last_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start + 4));
+          FromBigEndian(SafeLoadAs<uint16_t>(bytes + start + 4));
       return first_four_bytes << 16 | last_two_bytes;
     }
     case 7: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t second_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start + 4));
+          FromBigEndian(SafeLoadAs<uint16_t>(bytes + start + 4));
       const uint64_t last_byte = bytes[stop - 1];
       return first_four_bytes << 24 | second_two_bytes << 8 | last_byte;
     }
     case 8:
-      return FromBigEndian(*reinterpret_cast<const uint64_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint64_t>(bytes + start));
     default: {
       DCHECK(false);
       return UINT64_MAX;
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 8014e1a..5a72da6 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -211,8 +211,9 @@ inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_ti
   (*impala_timestamp).value[2] = (uint32_t)julian_days;
 
   int64_t last_day_units = time % UnitPerDay;
-  int64_t* impala_last_day_nanos = reinterpret_cast<int64_t*>(impala_timestamp);
-  *impala_last_day_nanos = last_day_units * NanosecondsPerUnit;
+  auto last_day_nanos = last_day_units * NanosecondsPerUnit;
+  // Strage might be unaligned, so use mempcy instead of reinterpret_cast
+  std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
 }
 
 constexpr int64_t kSecondsInNanos = INT64_C(1000000000);
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f66224e..130b75a 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -27,6 +27,7 @@
 #include "arrow/util/compression.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle-encoding.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_page.h"
 #include "parquet/encoding.h"
@@ -50,7 +51,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
   bit_width_ = BitUtil::Log2(max_level + 1);
   switch (encoding) {
     case Encoding::RLE: {
-      num_bytes = *reinterpret_cast<const int32_t*>(data);
+      num_bytes = arrow::util::SafeLoadAs<int32_t>(data);
       const uint8_t* decoder_data = data + sizeof(int32_t);
       if (!rle_decoder_) {
         rle_decoder_.reset(
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 77f86e3..304724b 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -29,6 +29,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/rle-encoding.h"
 #include "arrow/util/string_view.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/exception.h"
 #include "parquet/platform.h"
@@ -609,7 +610,7 @@ inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int nu
   int bytes_decoded = 0;
   int increment;
   for (int i = 0; i < num_values; ++i) {
-    uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data);
+    uint32_t len = out[i].len = arrow::util::SafeLoadAs<uint32_t>(data);
     increment = static_cast<int>(sizeof(uint32_t) + len);
     if (data_size < increment) ParquetException::EofException();
     out[i].ptr = data + sizeof(uint32_t);
@@ -719,7 +720,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
     int bytes_decoded = 0;
     while (i < num_values) {
       if (bit_reader.IsSet()) {
-        uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+        uint32_t len = arrow::util::SafeLoadAs<uint32_t>(data);
         increment = static_cast<int>(sizeof(uint32_t) + len);
         if (data_size < increment) {
           ParquetException::EofException();
@@ -752,7 +753,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
     int bytes_decoded = 0;
 
     while (i < num_values) {
-      uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+      uint32_t len = arrow::util::SafeLoadAs<uint32_t>(data);
       int increment = static_cast<int>(sizeof(uint32_t) + len);
       if (data_size < increment) ParquetException::EofException();
       builder->Append(data + sizeof(uint32_t), len);
@@ -1103,7 +1104,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
   virtual void SetData(int num_values, const uint8_t* data, int len) {
     num_values_ = num_values;
     if (len == 0) return;
-    int total_lengths_len = *reinterpret_cast<const int*>(data);
+    int total_lengths_len = arrow::util::SafeLoadAs<int32_t>(data);
     data += 4;
     this->len_decoder_.SetData(num_values, data, total_lengths_len);
     data_ = data + total_lengths_len;
@@ -1145,7 +1146,7 @@ class DeltaByteArrayDecoder : public DecoderImpl,
   virtual void SetData(int num_values, const uint8_t* data, int len) {
     num_values_ = num_values;
     if (len == 0) return;
-    int prefix_len_length = *reinterpret_cast<const int*>(data);
+    int prefix_len_length = arrow::util::SafeLoadAs<int32_t>(data);
     data += 4;
     len -= 4;
     prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 959ea0d..d0ca9ca 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -28,6 +28,7 @@
 #include "arrow/io/file.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
@@ -179,7 +180,7 @@ class SerializedFile : public ParquetFileReader::Contents {
       throw ParquetException("Invalid parquet file. Corrupt footer.");
     }
 
-    uint32_t metadata_len = *reinterpret_cast<const uint32_t*>(
+    uint32_t metadata_len = arrow::util::SafeLoadAs<uint32_t>(
         reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
         kFooterSize);
     int64_t metadata_start = file_size - kFooterSize - metadata_len;
diff --git a/cpp/src/plasma/common.cc b/cpp/src/plasma/common.cc
index 490aa15..0f1a0d1 100644
--- a/cpp/src/plasma/common.cc
+++ b/cpp/src/plasma/common.cc
@@ -19,6 +19,8 @@
 
 #include <limits>
 
+#include "arrow/util/ubsan.h"
+
 #include "plasma/plasma_generated.h"
 
 namespace fb = plasma::flatbuf;
@@ -64,7 +66,7 @@ uint64_t MurmurHash64A(const void* key, int len, unsigned int seed) {
   const uint64_t* end = data + (len / 8);
 
   while (data != end) {
-    uint64_t k = *data++;
+    uint64_t k = arrow::util::SafeLoad(data++);
 
     k *= m;
     k ^= k >> r;