You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/07/08 09:25:41 UTC
[impala] branch master updated: Revert "IMPALA-8710: Increase
allowed bit width to 64 for bit packing"
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 1776c7a Revert "IMPALA-8710: Increase allowed bit width to 64 for bit packing"
1776c7a is described below
commit 1776c7a54adb929bbd41eea50aed7665282f46f0
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Jul 5 15:15:57 2019 +0000
Revert "IMPALA-8710: Increase allowed bit width to 64 for bit packing"
This reverts commit b1cbf9e6b786132e86699cbb1e472ec98499bb11.
Change-Id: If18a083c72ed036f0b493506a43e20b2c2450a41
Reviewed-on: http://gerrit.cloudera.org:8080/13808
Reviewed-by: Attila Jeges <at...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/benchmarks/bit-packing-benchmark.cc | 198 ++++++++++++++---------------
be/src/util/CMakeLists.txt | 2 +-
be/src/util/bit-packing-test.cc | 26 ++--
be/src/util/bit-packing.h | 10 +-
be/src/util/bit-packing.inline.h | 103 +++++++--------
be/src/util/bit-stream-utils-test.cc | 147 ++-------------------
be/src/util/bit-stream-utils.h | 39 ++----
be/src/util/bit-stream-utils.inline.h | 87 +++----------
be/src/util/rle-encoding.h | 6 +-
be/src/util/rle-test.cc | 2 +-
10 files changed, 205 insertions(+), 415 deletions(-)
diff --git a/be/src/benchmarks/bit-packing-benchmark.cc b/be/src/benchmarks/bit-packing-benchmark.cc
index 6c895ad..79ac68b 100644
--- a/be/src/benchmarks/bit-packing-benchmark.cc
+++ b/be/src/benchmarks/bit-packing-benchmark.cc
@@ -24,237 +24,237 @@
// Unpack32Scalar internally.
//
//
-// Machine Info: Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
// Unpack32Values bit_width 0:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.05e+04 1.06e+04 1.07e+04 1X 1X 1X
-// Unpack32Scalar 1.29e+05 1.32e+05 1.34e+05 12.3X 12.5X 12.5X
-// UnpackScalar 3.18e+05 3.23e+05 3.26e+05 30.3X 30.4X 30.5X
+// BitReader 1.57e+04 1.59e+04 1.6e+04 1X 1X 1X
+// Unpack32Scalar 1.34e+05 1.35e+05 1.36e+05 8.51X 8.49X 8.51X
+// UnpackScalar 2.08e+05 2.1e+05 2.12e+05 13.3X 13.2X 13.2X
//
// Unpack32Values bit_width 1:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.28e+04 1.29e+04 1.31e+04 1X 1X 1X
-// Unpack32Scalar 8.22e+04 8.42e+04 8.5e+04 6.4X 6.51X 6.5X
-// UnpackScalar 7.96e+04 8.08e+04 8.16e+04 6.2X 6.24X 6.24X
+// BitReader 1.19e+04 1.2e+04 1.2e+04 1X 1X 1X
+// Unpack32Scalar 8.89e+04 8.94e+04 9.04e+04 7.48X 7.46X 7.51X
+// UnpackScalar 9.72e+04 9.8e+04 9.86e+04 8.18X 8.18X 8.19X
//
// Unpack32Values bit_width 2:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
// BitReader 1.18e+04 1.19e+04 1.2e+04 1X 1X 1X
-// Unpack32Scalar 8.37e+04 8.41e+04 8.49e+04 7.06X 7.06X 7.06X
-// UnpackScalar 7.6e+04 7.66e+04 7.73e+04 6.42X 6.44X 6.43X
+// Unpack32Scalar 8.84e+04 8.91e+04 8.99e+04 7.49X 7.48X 7.5X
+// UnpackScalar 9.68e+04 9.76e+04 9.84e+04 8.2X 8.19X 8.21X
//
// Unpack32Values bit_width 3:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.26e+04 1.27e+04 1.28e+04 1X 1X 1X
-// Unpack32Scalar 8.31e+04 8.37e+04 8.44e+04 6.61X 6.6X 6.61X
-// UnpackScalar 9.42e+04 9.54e+04 9.61e+04 7.49X 7.52X 7.52X
+// BitReader 1.16e+04 1.17e+04 1.18e+04 1X 1X 1X
+// Unpack32Scalar 8.67e+04 8.72e+04 8.79e+04 7.45X 7.42X 7.43X
+// UnpackScalar 9.6e+04 9.66e+04 9.74e+04 8.25X 8.22X 8.24X
//
// Unpack32Values bit_width 4:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.26e+04 1.27e+04 1.28e+04 1X 1X 1X
-// Unpack32Scalar 8.4e+04 8.46e+04 8.52e+04 6.68X 6.68X 6.67X
-// UnpackScalar 7.46e+04 7.52e+04 7.59e+04 5.93X 5.93X 5.94X
+// BitReader 1.08e+04 1.09e+04 1.1e+04 1X 1X 1X
+// Unpack32Scalar 9.13e+04 9.19e+04 9.25e+04 8.44X 8.43X 8.42X
+// UnpackScalar 9.65e+04 9.69e+04 9.78e+04 8.91X 8.89X 8.9X
//
// Unpack32Values bit_width 5:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.23e+04 1.25e+04 1.25e+04 1X 1X 1X
-// Unpack32Scalar 8.25e+04 8.32e+04 8.39e+04 6.68X 6.67X 6.69X
-// UnpackScalar 9.37e+04 9.43e+04 9.52e+04 7.6X 7.56X 7.58X
+// BitReader 1.14e+04 1.15e+04 1.16e+04 1X 1X 1X
+// Unpack32Scalar 8.35e+04 8.42e+04 8.49e+04 7.3X 7.31X 7.31X
+// UnpackScalar 9.41e+04 9.48e+04 9.56e+04 8.22X 8.22X 8.24X
//
// Unpack32Values bit_width 6:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.23e+04 1.24e+04 1.25e+04 1X 1X 1X
-// Unpack32Scalar 8.23e+04 8.32e+04 8.39e+04 6.69X 6.7X 6.71X
-// UnpackScalar 9.28e+04 9.42e+04 9.52e+04 7.54X 7.59X 7.61X
+// BitReader 1.14e+04 1.15e+04 1.16e+04 1X 1X 1X
+// Unpack32Scalar 8.46e+04 8.53e+04 8.6e+04 7.4X 7.41X 7.41X
+// UnpackScalar 9.35e+04 9.41e+04 9.51e+04 8.18X 8.16X 8.2X
//
// Unpack32Values bit_width 7:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.2e+04 1.23e+04 1.24e+04 1X 1X 1X
-// Unpack32Scalar 8.19e+04 8.26e+04 8.32e+04 6.8X 6.73X 6.72X
-// UnpackScalar 9.26e+04 9.36e+04 9.46e+04 7.69X 7.62X 7.64X
+// BitReader 1.09e+04 1.1e+04 1.11e+04 1X 1X 1X
+// Unpack32Scalar 8.11e+04 8.16e+04 8.25e+04 7.44X 7.44X 7.45X
+// UnpackScalar 9.16e+04 9.21e+04 9.3e+04 8.4X 8.4X 8.39X
//
// Unpack32Values bit_width 8:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.15e+04 1.2e+04 1.23e+04 1X 1X 1X
-// Unpack32Scalar 6.33e+04 8.31e+04 8.51e+04 5.5X 6.9X 6.94X
-// UnpackScalar 5.64e+04 7e+04 7.29e+04 4.91X 5.82X 5.95X
+// BitReader 1.14e+04 1.15e+04 1.16e+04 1X 1X 1X
+// Unpack32Scalar 9.02e+04 9.07e+04 9.14e+04 7.9X 7.9X 7.91X
+// UnpackScalar 9.48e+04 9.55e+04 9.63e+04 8.31X 8.33X 8.33X
//
// Unpack32Values bit_width 9:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.07e+04 1.14e+04 1.16e+04 1X 1X 1X
-// Unpack32Scalar 6.33e+04 8.16e+04 8.27e+04 5.9X 7.12X 7.14X
-// UnpackScalar 6.95e+04 9.22e+04 9.32e+04 6.47X 8.05X 8.05X
+// BitReader 1.11e+04 1.12e+04 1.13e+04 1X 1X 1X
+// Unpack32Scalar 7.94e+04 7.97e+04 8.06e+04 7.14X 7.12X 7.14X
+// UnpackScalar 8.78e+04 8.83e+04 8.9e+04 7.89X 7.88X 7.89X
//
// Unpack32Values bit_width 10:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.18e+04 1.19e+04 1.2e+04 1X 1X 1X
-// Unpack32Scalar 8.13e+04 8.18e+04 8.23e+04 6.89X 6.85X 6.84X
-// UnpackScalar 9.17e+04 9.26e+04 9.34e+04 7.76X 7.76X 7.75X
+// BitReader 1.1e+04 1.11e+04 1.12e+04 1X 1X 1X
+// Unpack32Scalar 8.07e+04 8.14e+04 8.21e+04 7.31X 7.32X 7.34X
+// UnpackScalar 8.95e+04 9.02e+04 9.09e+04 8.11X 8.12X 8.12X
//
// Unpack32Values bit_width 11:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.17e+04 1.18e+04 1.19e+04 1X 1X 1X
-// Unpack32Scalar 8.07e+04 8.14e+04 8.22e+04 6.89X 6.9X 6.92X
-// UnpackScalar 9.13e+04 9.22e+04 9.29e+04 7.79X 7.82X 7.83X
+// BitReader 1.09e+04 1.1e+04 1.11e+04 1X 1X 1X
+// Unpack32Scalar 7.63e+04 7.69e+04 7.75e+04 6.99X 6.99X 6.99X
+// UnpackScalar 8.55e+04 8.61e+04 8.69e+04 7.83X 7.83X 7.84X
//
// Unpack32Values bit_width 12:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.17e+04 1.17e+04 1.19e+04 1X 1X 1X
-// Unpack32Scalar 8.07e+04 8.14e+04 8.2e+04 6.92X 6.93X 6.92X
-// UnpackScalar 8.94e+04 9.14e+04 9.23e+04 7.67X 7.78X 7.79X
+// BitReader 1.09e+04 1.1e+04 1.1e+04 1X 1X 1X
+// Unpack32Scalar 8.23e+04 8.29e+04 8.35e+04 7.55X 7.56X 7.57X
+// UnpackScalar 9.06e+04 9.12e+04 9.19e+04 8.31X 8.31X 8.33X
//
// Unpack32Values bit_width 13:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.13e+04 1.15e+04 1.16e+04 1X 1X 1X
-// Unpack32Scalar 7.85e+04 8.05e+04 8.14e+04 6.93X 6.98X 6.99X
-// UnpackScalar 8.77e+04 9.09e+04 9.17e+04 7.74X 7.88X 7.87X
+// BitReader 1.07e+04 1.08e+04 1.09e+04 1X 1X 1X
+// Unpack32Scalar 7.42e+04 7.47e+04 7.55e+04 6.92X 6.9X 6.92X
+// UnpackScalar 8.16e+04 8.23e+04 8.29e+04 7.6X 7.6X 7.61X
//
// Unpack32Values bit_width 14:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.14e+04 1.15e+04 1.16e+04 1X 1X 1X
-// Unpack32Scalar 7.81e+04 8e+04 8.06e+04 6.84X 6.95X 6.94X
-// UnpackScalar 8.87e+04 9.01e+04 9.1e+04 7.77X 7.83X 7.84X
+// BitReader 1.07e+04 1.08e+04 1.09e+04 1X 1X 1X
+// Unpack32Scalar 7.58e+04 7.62e+04 7.68e+04 7.08X 7.08X 7.08X
+// UnpackScalar 8.33e+04 8.38e+04 8.46e+04 7.78X 7.78X 7.79X
//
// Unpack32Values bit_width 15:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.12e+04 1.13e+04 1.14e+04 1X 1X 1X
-// Unpack32Scalar 7.66e+04 7.96e+04 8.06e+04 6.85X 7.01X 7.04X
-// UnpackScalar 8.81e+04 8.94e+04 9.04e+04 7.87X 7.88X 7.9X
+// BitReader 1.06e+04 1.06e+04 1.07e+04 1X 1X 1X
+// Unpack32Scalar 7.16e+04 7.22e+04 7.29e+04 6.78X 6.79X 6.79X
+// UnpackScalar 7.96e+04 8.05e+04 8.09e+04 7.54X 7.57X 7.54X
//
// Unpack32Values bit_width 16:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.13e+04 1.13e+04 1.14e+04 1X 1X 1X
-// Unpack32Scalar 8.11e+04 8.19e+04 8.27e+04 7.2X 7.22X 7.23X
-// UnpackScalar 8.82e+04 8.91e+04 8.97e+04 7.84X 7.85X 7.85X
+// BitReader 1.08e+04 1.08e+04 1.09e+04 1X 1X 1X
+// Unpack32Scalar 8.71e+04 8.76e+04 8.83e+04 8.09X 8.09X 8.08X
+// UnpackScalar 9.22e+04 9.3e+04 9.37e+04 8.56X 8.58X 8.57X
//
// Unpack32Values bit_width 17:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.11e+04 1.12e+04 1.13e+04 1X 1X 1X
-// Unpack32Scalar 7.75e+04 7.86e+04 7.91e+04 7X 7.04X 7.03X
-// UnpackScalar 8.75e+04 8.82e+04 8.89e+04 7.89X 7.91X 7.9X
+// BitReader 1.04e+04 1.04e+04 1.05e+04 1X 1X 1X
+// Unpack32Scalar 6.98e+04 7.04e+04 7.09e+04 6.73X 6.74X 6.74X
+// UnpackScalar 7.73e+04 7.78e+04 7.85e+04 7.45X 7.45X 7.47X
//
// Unpack32Values bit_width 18:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.09e+04 1.11e+04 1.12e+04 1X 1X 1X
-// Unpack32Scalar 7.71e+04 7.81e+04 7.88e+04 7.06X 7.02X 7.03X
-// UnpackScalar 8.68e+04 8.81e+04 8.89e+04 7.95X 7.92X 7.93X
+// BitReader 1.03e+04 1.04e+04 1.05e+04 1X 1X 1X
+// Unpack32Scalar 7.1e+04 7.17e+04 7.22e+04 6.86X 6.88X 6.87X
+// UnpackScalar 7.77e+04 7.82e+04 7.89e+04 7.51X 7.5X 7.51X
//
// Unpack32Values bit_width 19:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.09e+04 1.1e+04 1.11e+04 1X 1X 1X
-// Unpack32Scalar 7.69e+04 7.75e+04 7.83e+04 7.06X 7.07X 7.07X
-// UnpackScalar 8.62e+04 8.72e+04 8.78e+04 7.91X 7.96X 7.93X
+// BitReader 1.02e+04 1.03e+04 1.04e+04 1X 1X 1X
+// Unpack32Scalar 6.74e+04 6.8e+04 6.85e+04 6.59X 6.6X 6.61X
+// UnpackScalar 7.43e+04 7.49e+04 7.54e+04 7.26X 7.27X 7.28X
//
// Unpack32Values bit_width 20:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.07e+04 1.1e+04 1.11e+04 1X 1X 1X
-// Unpack32Scalar 7.58e+04 7.74e+04 7.82e+04 7.1X 7.06X 7.07X
-// UnpackScalar 8.43e+04 8.64e+04 8.74e+04 7.89X 7.89X 7.9X
+// BitReader 1.02e+04 1.03e+04 1.03e+04 1X 1X 1X
+// Unpack32Scalar 7.28e+04 7.34e+04 7.4e+04 7.15X 7.15X 7.15X
+// UnpackScalar 7.94e+04 8.02e+04 8.07e+04 7.8X 7.81X 7.8X
//
// Unpack32Values bit_width 21:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.07e+04 1.08e+04 1.09e+04 1X 1X 1X
-// Unpack32Scalar 7.58e+04 7.66e+04 7.76e+04 7.07X 7.09X 7.14X
-// UnpackScalar 8.51e+04 8.61e+04 8.69e+04 7.95X 7.97X 8X
+// BitReader 1.01e+04 1.01e+04 1.02e+04 1X 1X 1X
+// Unpack32Scalar 6.56e+04 6.62e+04 6.67e+04 6.53X 6.54X 6.54X
+// UnpackScalar 7.1e+04 7.15e+04 7.19e+04 7.06X 7.06X 7.06X
//
// Unpack32Values bit_width 22:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.05e+04 1.08e+04 1.09e+04 1X 1X 1X
-// Unpack32Scalar 7.49e+04 7.62e+04 7.71e+04 7.1X 7.08X 7.1X
-// UnpackScalar 8.39e+04 8.59e+04 8.65e+04 7.96X 7.98X 7.97X
+// BitReader 1e+04 1.01e+04 1.02e+04 1X 1X 1X
+// Unpack32Scalar 6.68e+04 6.73e+04 6.79e+04 6.68X 6.68X 6.68X
+// UnpackScalar 7.35e+04 7.41e+04 7.46e+04 7.34X 7.35X 7.35X
//
// Unpack32Values bit_width 23:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.06e+04 1.07e+04 1.08e+04 1X 1X 1X
-// Unpack32Scalar 7.44e+04 7.54e+04 7.62e+04 7.03X 7.07X 7.08X
-// UnpackScalar 8.44e+04 8.54e+04 8.61e+04 7.97X 8X 8X
+// BitReader 9.87e+03 9.95e+03 1e+04 1X 1X 1X
+// Unpack32Scalar 6.44e+04 6.48e+04 6.53e+04 6.52X 6.52X 6.51X
+// UnpackScalar 6.93e+04 6.97e+04 7.04e+04 7.03X 7.01X 7.02X
//
// Unpack32Values bit_width 24:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.05e+04 1.06e+04 1.07e+04 1X 1X 1X
-// Unpack32Scalar 7.54e+04 7.62e+04 7.69e+04 7.17X 7.18X 7.2X
-// UnpackScalar 8.42e+04 8.51e+04 8.62e+04 8.01X 8.01X 8.07X
+// BitReader 9.93e+03 1e+04 1.01e+04 1X 1X 1X
+// Unpack32Scalar 7.44e+04 7.49e+04 7.55e+04 7.49X 7.49X 7.49X
+// UnpackScalar 8.12e+04 8.17e+04 8.27e+04 8.18X 8.17X 8.2X
//
// Unpack32Values bit_width 25:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.04e+04 1.05e+04 1.05e+04 1X 1X 1X
-// Unpack32Scalar 7.42e+04 7.49e+04 7.57e+04 7.15X 7.17X 7.18X
-// UnpackScalar 8.32e+04 8.43e+04 8.51e+04 8.01X 8.07X 8.07X
+// BitReader 9.71e+03 9.79e+03 9.86e+03 1X 1X 1X
+// Unpack32Scalar 6.12e+04 6.16e+04 6.22e+04 6.31X 6.29X 6.31X
+// UnpackScalar 6.44e+04 6.48e+04 6.53e+04 6.64X 6.62X 6.62X
//
// Unpack32Values bit_width 26:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.02e+04 1.04e+04 1.05e+04 1X 1X 1X
-// Unpack32Scalar 7.24e+04 7.46e+04 7.55e+04 7.12X 7.16X 7.19X
-// UnpackScalar 8.15e+04 8.43e+04 8.51e+04 8.01X 8.09X 8.1X
+// BitReader 9.67e+03 9.74e+03 9.81e+03 1X 1X 1X
+// Unpack32Scalar 6.21e+04 6.26e+04 6.31e+04 6.42X 6.42X 6.43X
+// UnpackScalar 6.53e+04 6.59e+04 6.64e+04 6.75X 6.77X 6.76X
//
// Unpack32Values bit_width 27:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.01e+04 1.03e+04 1.04e+04 1X 1X 1X
-// Unpack32Scalar 7.26e+04 7.4e+04 7.47e+04 7.18X 7.18X 7.2X
-// UnpackScalar 8.13e+04 8.35e+04 8.43e+04 8.03X 8.11X 8.12X
+// BitReader 9.56e+03 9.62e+03 9.7e+03 1X 1X 1X
+// Unpack32Scalar 5.99e+04 6.03e+04 6.09e+04 6.27X 6.27X 6.28X
+// UnpackScalar 6.32e+04 6.35e+04 6.42e+04 6.61X 6.6X 6.62X
//
// Unpack32Values bit_width 28:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1e+04 1.03e+04 1.03e+04 1X 1X 1X
-// Unpack32Scalar 7.07e+04 7.41e+04 7.48e+04 7.05X 7.22X 7.23X
-// UnpackScalar 7.97e+04 8.34e+04 8.42e+04 7.96X 8.13X 8.14X
+// BitReader 9.53e+03 9.61e+03 9.66e+03 1X 1X 1X
+// Unpack32Scalar 6.37e+04 6.42e+04 6.47e+04 6.69X 6.68X 6.7X
+// UnpackScalar 6.68e+04 6.73e+04 6.77e+04 7.01X 7X 7.01X
//
// Unpack32Values bit_width 29:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 1.01e+04 1.01e+04 1.02e+04 1X 1X 1X
-// Unpack32Scalar 7.06e+04 7.27e+04 7.35e+04 7.02X 7.18X 7.19X
-// UnpackScalar 8e+04 8.23e+04 8.32e+04 7.96X 8.12X 8.14X
+// BitReader 9.41e+03 9.46e+03 9.55e+03 1X 1X 1X
+// Unpack32Scalar 5.79e+04 5.82e+04 5.87e+04 6.15X 6.15X 6.14X
+// UnpackScalar 6.08e+04 6.11e+04 6.16e+04 6.46X 6.46X 6.46X
//
// Unpack32Values bit_width 30:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 9.95e+03 1.01e+04 1.02e+04 1X 1X 1X
-// Unpack32Scalar 7e+04 7.23e+04 7.32e+04 7.04X 7.16X 7.18X
-// UnpackScalar 7.9e+04 8.18e+04 8.24e+04 7.94X 8.09X 8.08X
+// BitReader 9.37e+03 9.45e+03 9.52e+03 1X 1X 1X
+// Unpack32Scalar 5.87e+04 5.92e+04 5.96e+04 6.26X 6.27X 6.26X
+// UnpackScalar 6.16e+04 6.2e+04 6.26e+04 6.58X 6.56X 6.57X
//
// Unpack32Values bit_width 31:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 9.06e+03 9.33e+03 9.43e+03 1X 1X 1X
-// Unpack32Scalar 6.52e+04 7.17e+04 7.26e+04 7.19X 7.69X 7.7X
-// UnpackScalar 7.67e+04 8.1e+04 8.19e+04 8.46X 8.69X 8.69X
+// BitReader 9.26e+03 9.33e+03 9.41e+03 1X 1X 1X
+// Unpack32Scalar 5.59e+04 5.63e+04 5.67e+04 6.03X 6.03X 6.03X
+// UnpackScalar 5.85e+04 5.89e+04 5.94e+04 6.31X 6.31X 6.31X
//
// Unpack32Values bit_width 32:Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// BitReader 9.73e+03 9.96e+03 1e+04 1X 1X 1X
-// Unpack32Scalar 1.06e+05 1.1e+05 1.11e+05 10.9X 11X 11.1X
-// UnpackScalar 1.41e+05 1.49e+05 1.5e+05 14.5X 14.9X 15X
+// BitReader 9.89e+03 9.96e+03 1e+04 1X 1X 1X
+// Unpack32Scalar 9.83e+04 9.96e+04 1.01e+05 9.95X 10X 10X
+// UnpackScalar 8.24e+04 8.36e+04 8.44e+04 8.34X 8.4X 8.41X
#include <cmath>
#include <cstdio>
#include <cstdlib>
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 21942d3..b12ae2d 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -159,7 +159,7 @@ target_link_libraries(loggingsupport ${IMPALA_LINK_LIBS_DYNAMIC_TARGETS})
ADD_UNIFIED_BE_LSAN_TEST(benchmark-test "BenchmarkTest.*")
ADD_UNIFIED_BE_LSAN_TEST(bitmap-test "Bitmap.*")
ADD_UNIFIED_BE_LSAN_TEST(bit-packing-test "BitPackingTest.*")
-ADD_UNIFIED_BE_LSAN_TEST(bit-stream-utils-test "BitArray.*:VLQInt.*")
+ADD_UNIFIED_BE_LSAN_TEST(bit-stream-utils-test "BitArray.*")
ADD_UNIFIED_BE_LSAN_TEST(bit-util-test "BitUtil.*")
ADD_UNIFIED_BE_LSAN_TEST(blocking-queue-test "BlockingQueueTest.*")
ADD_UNIFIED_BE_LSAN_TEST(bloom-filter-test "BloomFilter.*:BloomFilterTest.*")
diff --git a/be/src/util/bit-packing-test.cc b/be/src/util/bit-packing-test.cc
index 8010fe3..2cf86ba 100644
--- a/be/src/util/bit-packing-test.cc
+++ b/be/src/util/bit-packing-test.cc
@@ -32,8 +32,8 @@ using std::mt19937;
namespace impala {
namespace {
-uint64_t ComputeMask(int bit_width) {
- return (bit_width < 64) ? ((1UL << bit_width) - 1) : ~0UL;
+uint32_t ComputeMask(int bit_width) {
+ return (bit_width < 32) ? ((1U << bit_width) - 1) : ~0U;
}
}
@@ -45,19 +45,19 @@ uint64_t ComputeMask(int bit_width) {
///
/// This is to test that we do not overrun either the input or output buffer for smaller
/// batch sizes.
-void UnpackSubset(const uint64_t* in, const uint8_t* packed, int num_in_values,
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
int bit_width, bool aligned);
/// Test a packing/unpacking round-trip of the 'num_in_values' values in 'in',
/// packed with 'bit_width'. If 'aligned' is true, buffers for packed and unpacked data
/// are allocated at a 64-byte aligned address. Otherwise the buffers are misaligned
/// by 1 byte from a 64-byte aligned address.
-void PackUnpack(const uint64_t* in, int num_in_values, int bit_width, bool aligned) {
+void PackUnpack(const uint32_t* in, int num_in_values, int bit_width, bool aligned) {
LOG(INFO) << "num_in_values = " << num_in_values << " bit_width = " << bit_width
<< " aligned = " << aligned;
// Mask out higher bits so that the values to pack are in range.
- const uint64_t mask = ComputeMask(bit_width);
+ const uint32_t mask = ComputeMask(bit_width);
const int misalignment = aligned ? 0 : 1;
const int bytes_required = BitUtil::RoundUpNumBytes(bit_width * num_in_values);
@@ -79,8 +79,8 @@ void PackUnpack(const uint64_t* in, int num_in_values, int bit_width, bool align
for (const int num_to_unpack : {num_in_values, num_in_values + 1, num_in_values + 77}) {
LOG(INFO) << "Unpacking " << num_to_unpack;
// Size buffer exactly so that ASAN can detect reads/writes that overrun the buffer.
- AlignedAllocation out_storage(num_to_unpack * sizeof(uint64_t) + misalignment);
- uint64_t* out = reinterpret_cast<uint64_t*>(out_storage.data() + misalignment);
+ AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+ uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
const auto result = BitPacking::UnpackValues(
bit_width, packed, writer.bytes_written(), num_to_unpack, out);
ASSERT_EQ(packed + writer.bytes_written(), result.first)
@@ -105,7 +105,7 @@ void PackUnpack(const uint64_t* in, int num_in_values, int bit_width, bool align
UnpackSubset(in, packed, num_in_values, bit_width, aligned);
}
-void UnpackSubset(const uint64_t* in, const uint8_t* packed, int num_in_values,
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
int bit_width, bool aligned) {
const int misalignment = aligned ? 0 : 1;
for (int num_to_unpack : {1, 10, 77, num_in_values - 7}) {
@@ -116,8 +116,8 @@ void UnpackSubset(const uint64_t* in, const uint8_t* packed, int num_in_values,
AlignedAllocation packed_copy_storage(bytes_to_read + misalignment);
uint8_t* packed_copy = packed_copy_storage.data() + misalignment;
memcpy(packed_copy, packed, bytes_to_read);
- AlignedAllocation out_storage(num_to_unpack * sizeof(uint64_t) + misalignment);
- uint64_t* out = reinterpret_cast<uint64_t*>(out_storage.data() + misalignment);
+ AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+ uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
const auto result = BitPacking::UnpackValues(
bit_width, packed_copy, bytes_to_read, num_to_unpack, out);
ASSERT_EQ(packed_copy + bytes_to_read, result.first) << "Read wrong # of bytes";
@@ -132,9 +132,9 @@ void UnpackSubset(const uint64_t* in, const uint8_t* packed, int num_in_values,
TEST(BitPackingTest, RandomUnpack) {
constexpr int NUM_IN_VALUES = 64 * 1024;
- uint64_t in[NUM_IN_VALUES];
+ uint32_t in[NUM_IN_VALUES];
mt19937 rng;
- uniform_int_distribution<uint64_t> dist;
+ uniform_int_distribution<uint32_t> dist;
std::generate(std::begin(in), std::end(in), [&rng, &dist] { return dist(rng); });
// Test various odd input lengths to exercise boundary cases for full and partial
@@ -145,7 +145,7 @@ TEST(BitPackingTest, RandomUnpack) {
lengths.push_back(i);
}
- for (int bit_width = 0; bit_width <= 64; ++bit_width) {
+ for (int bit_width = 0; bit_width <= 32; ++bit_width) {
for (const int length : lengths) {
// Test that unpacking to/from aligned and unaligned memory works.
for (const bool aligned : {true, false}) {
diff --git a/be/src/util/bit-packing.h b/be/src/util/bit-packing.h
index d449048..c70b55e 100644
--- a/be/src/util/bit-packing.h
+++ b/be/src/util/bit-packing.h
@@ -40,17 +40,15 @@ namespace impala {
/// in the two output bytes: [ 0 0 1 | 0 0 0 0 1 ] [ x x x x x x | 1 0 ]
/// lower bits of 17--^ 1 next value ^--upper bits of 17
///
-/// Bit widths from 0 to 64 are supported (0 bit width means that every value is 0).
+/// Bit widths from 0 to 32 are supported (0 bit width means that every value is 0).
/// The batched unpacking functions operate on batches of 32 values. This batch size
/// is convenient because for every supported bit width, the end of a 32 value batch
/// falls on a byte boundary. It is also large enough to amortise loop overheads.
class BitPacking {
public:
- static constexpr int MAX_BITWIDTH = sizeof(uint64_t) * 8;
-
/// Unpack bit-packed values with 'bit_width' from 'in' to 'out'. Keeps unpacking until
/// either all 'in_bytes' are read or 'num_values' values are unpacked. 'out' must have
- /// enough space for 'num_values'. 0 <= 'bit_width' <= 64 and 'bit_width' <= # of bits
+ /// enough space for 'num_values'. 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits
/// in OutType. 'in' must point to 'in_bytes' of addressable memory.
///
/// Returns a pointer to the byte after the last byte of 'in' that was read and also the
@@ -88,7 +86,7 @@ class BitPacking {
/// Unpack exactly 32 values of 'bit_width' from 'in' to 'out'. 'in' must point to
/// 'in_bytes' of addressable memory, and 'in_bytes' must be at least
/// (32 * bit_width / 8). 'out' must have space for 32 OutType values.
- /// 0 <= 'bit_width' <= 64 and 'bit_width' <= # of bits in OutType.
+ /// 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits in OutType.
template <typename OutType>
static const uint8_t* Unpack32Values(int bit_width, const uint8_t* __restrict__ in,
int64_t in_bytes, OutType* __restrict__ out);
@@ -115,7 +113,7 @@ class BitPacking {
/// 'num_values' must be at most 31. 'in' must point to 'in_bytes' of addressable
/// memory, and 'in_bytes' must be at least ceil(num_values * bit_width / 8).
/// 'out' must have space for 'num_values' OutType values.
- /// 0 <= 'bit_width' <= 64 and 'bit_width' <= # of bits in OutType.
+ /// 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits in OutType.
template <typename OutType, int BIT_WIDTH>
static const uint8_t* UnpackUpTo31Values(const uint8_t* __restrict__ in,
int64_t in_bytes, int num_values, OutType* __restrict__ out);
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
index bbaa23b..8cebe40 100644
--- a/be/src/util/bit-packing.inline.h
+++ b/be/src/util/bit-packing.inline.h
@@ -58,8 +58,8 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackValues(int bit_width,
return UnpackValues<OutType, i>(in, in_bytes, num_values, out);
switch (bit_width) {
- // Expand cases from 0 to 64.
- BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_VALUES_CASE, ignore);
+ // Expand cases from 0 to 32.
+ BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
default:
DCHECK(false);
return std::make_pair(nullptr, -1);
@@ -77,14 +77,12 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackValues(
const int64_t remainder_values = values_to_read % BATCH_SIZE;
const uint8_t* in_pos = in;
OutType* out_pos = out;
-
// First unpack as many full batches as possible.
for (int64_t i = 0; i < batches_to_read; ++i) {
in_pos = Unpack32Values<OutType, BIT_WIDTH>(in_pos, in_bytes, out_pos);
out_pos += BATCH_SIZE;
in_bytes -= (BATCH_SIZE * BIT_WIDTH) / CHAR_BIT;
}
-
// Then unpack the final partial batch.
if (remainder_values > 0) {
in_pos = UnpackUpTo31Values<OutType, BIT_WIDTH>(
@@ -105,14 +103,15 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackAndDecodeValues(int bit_wid
in, in_bytes, dict, dict_len, num_values, out, stride, decode_error);
switch (bit_width) {
- // Expand cases from 0 to 64.
- BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_VALUES_CASE, ignore);
+ // Expand cases from 0 to 32.
+ BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
default:
DCHECK(false);
return std::make_pair(nullptr, -1);
}
#pragma pop_macro("UNPACK_VALUES_CASE")
}
+
template <typename OutType, int BIT_WIDTH>
std::pair<const uint8_t*, int64_t> BitPacking::UnpackAndDecodeValues(
const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ dict,
@@ -150,57 +149,49 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackAndDecodeValues(
// bpacking.c at https://github.com/lemire/FrameOfReference/, but is much more compact
// because it uses templates rather than source-level unrolling of all combinations.
//
-// After the template parameters are expanded and constants are propagated, all branches
+// After the template parameters is expanded and constants are propagated, all branches
// and offset/shift calculations should be optimized out, leaving only shifts by constants
// and bitmasks by constants. Calls to this must be stamped out manually or with
// BOOST_PP_REPEAT_FROM_TO: experimentation revealed that the GCC 4.9.2 optimiser was
// not able to fully propagate constants and remove branches when this was called from
// inside a for loop with constant bounds with VALUE_IDX changed to a function argument.
-//
-// We compute how many 32 bit words we have to read, which is either 1, 2 or 3. If it is
-// at least 2, the first two 32 bit words are read as one 64 bit word. Even if only one
-// word needs to be read, we try to read 64 bits if it does not lead to buffer overflow
-// because benchmarks show that it has a positive effect on performance.
template <int BIT_WIDTH, int VALUE_IDX>
-inline uint64_t ALWAYS_INLINE UnpackValue(const uint8_t* __restrict__ in_buf) {
- if (BIT_WIDTH == 0) return 0;
-
- constexpr int FIRST_BIT_IDX = VALUE_IDX * BIT_WIDTH;
- constexpr int FIRST_WORD_IDX = FIRST_BIT_IDX / 32;
- constexpr int LAST_BIT_IDX = FIRST_BIT_IDX + BIT_WIDTH;
- constexpr int LAST_WORD_IDX = BitUtil::RoundUpNumi32(LAST_BIT_IDX);
- constexpr int WORDS_TO_READ = LAST_WORD_IDX - FIRST_WORD_IDX;
- static_assert(WORDS_TO_READ <= 3, "At most three 32-bit words need to be loaded.");
-
- constexpr int FIRST_BIT_OFFSET = FIRST_BIT_IDX - FIRST_WORD_IDX * 32;
- constexpr uint64_t mask = BIT_WIDTH == 64 ? ~0L : (1UL << BIT_WIDTH) - 1;
- const uint32_t* const in = reinterpret_cast<const uint32_t*>(in_buf);
-
- // Avoid reading past the end of the buffer.
- constexpr bool CAN_READ_64 = FIRST_BIT_IDX - FIRST_BIT_OFFSET + 64 <= BIT_WIDTH * 32;
-
- // We do not try to read 64 bits when the bit width is a power of two (unless it is
- // necessary) because performance benchmarks show that it is better this way. We can
- // revisit it when we update the compiler version.
- constexpr bool READ_32_BITS = WORDS_TO_READ == 1
- && (!CAN_READ_64 || BitUtil::IsPowerOf2(BIT_WIDTH));
-
- if (READ_32_BITS) {
- uint32_t word = in[FIRST_WORD_IDX];
- word >>= FIRST_BIT_OFFSET < 32 ? FIRST_BIT_OFFSET : 0;
- return word & mask;
- }
-
- uint64_t word = *reinterpret_cast<const uint64_t*>(in + FIRST_WORD_IDX);
- word >>= FIRST_BIT_OFFSET;
-
- if (WORDS_TO_READ > 2) {
- constexpr int USEFUL_BITS = FIRST_BIT_OFFSET == 0 ? 0 : 64 - FIRST_BIT_OFFSET;
- uint64_t extra_word = in[FIRST_WORD_IDX + 2];
- word |= extra_word << USEFUL_BITS;
+inline uint32_t ALWAYS_INLINE UnpackValue(const uint8_t* __restrict__ in_buf) {
+ constexpr uint32_t LOAD_BIT_WIDTH = sizeof(uint32_t) * CHAR_BIT;
+ static_assert(BIT_WIDTH <= LOAD_BIT_WIDTH, "BIT_WIDTH > LOAD_BIT_WIDTH");
+ static_assert(VALUE_IDX >= 0 && VALUE_IDX < 32, "0 <= VALUE_IDX < 32");
+ // The index of the first bit of the value, relative to the start of 'in_buf'.
+ constexpr uint32_t FIRST_BIT = VALUE_IDX * BIT_WIDTH;
+ constexpr uint32_t IN_WORD_IDX = FIRST_BIT / LOAD_BIT_WIDTH;
+ constexpr uint32_t FIRST_BIT_OFFSET = FIRST_BIT % LOAD_BIT_WIDTH;
+ // Index of bit after last bit of this value, relative to start of IN_WORD_IDX.
+ constexpr uint32_t END_BIT_OFFSET = FIRST_BIT_OFFSET + BIT_WIDTH;
+
+ const uint32_t* in_words = reinterpret_cast<const uint32_t*>(in_buf);
+ // The lower bits of the value come from the first word.
+ const uint32_t lower_bits =
+ BIT_WIDTH > 0 ? in_words[IN_WORD_IDX] >> FIRST_BIT_OFFSET : 0U;
+ if (END_BIT_OFFSET < LOAD_BIT_WIDTH) {
+ // All bits of the value are in the first word, but we need to mask out upper bits
+ // that belong to the next value.
+ return lower_bits % (1UL << BIT_WIDTH);
+ } if (END_BIT_OFFSET == LOAD_BIT_WIDTH) {
+ // This value was exactly the uppermost bits of the first word - no masking required.
+ return lower_bits;
+ } else {
+ DCHECK_GT(END_BIT_OFFSET, LOAD_BIT_WIDTH);
+ DCHECK_LT(VALUE_IDX, 31)
+ << "Should not go down this branch for last value with no trailing bits.";
+ // Value is split between words, so grab trailing bits from the next word.
+ // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+ constexpr uint32_t NUM_TRAILING_BITS =
+ END_BIT_OFFSET < LOAD_BIT_WIDTH ? 0 : END_BIT_OFFSET - LOAD_BIT_WIDTH;
+ const uint32_t trailing_bits = in_words[IN_WORD_IDX + 1] % (1UL << NUM_TRAILING_BITS);
+ // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+ constexpr uint32_t TRAILING_BITS_SHIFT =
+ BIT_WIDTH == 32 ? 0 : (BIT_WIDTH - NUM_TRAILING_BITS);
+ return lower_bits | (trailing_bits << TRAILING_BITS_SHIFT);
}
-
- return word & mask;
}
template <typename OutType>
@@ -219,7 +210,7 @@ template <typename OutType, int BIT_WIDTH>
const uint8_t* BitPacking::Unpack32Values(
const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ out) {
static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
- static_assert(BIT_WIDTH <= MAX_BITWIDTH, "BIT_WIDTH too high");
+ static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
DCHECK_LE(BIT_WIDTH, sizeof(OutType) * CHAR_BIT) << "BIT_WIDTH too high for output";
constexpr int BYTES_TO_READ = BitUtil::RoundUpNumBytes(32 * BIT_WIDTH);
DCHECK_GE(in_bytes, BYTES_TO_READ);
@@ -242,8 +233,8 @@ const uint8_t* BitPacking::Unpack32Values(int bit_width, const uint8_t* __restri
case i: return Unpack32Values<OutType, i>(in, in_bytes, out);
switch (bit_width) {
- // Expand cases from 0 to 64.
- BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_VALUES_CASE, ignore);
+ // Expand cases from 0 to 32.
+ BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
default: DCHECK(false); return in;
}
#pragma pop_macro("UNPACK_VALUES_CASE")
@@ -254,7 +245,7 @@ const uint8_t* BitPacking::UnpackAndDecode32Values(const uint8_t* __restrict__ i
int64_t in_bytes, OutType* __restrict__ dict, int64_t dict_len,
OutType* __restrict__ out, int64_t stride, bool* __restrict__ decode_error) {
static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
- static_assert(BIT_WIDTH <= MAX_BITWIDTH, "BIT_WIDTH too high");
+ static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
constexpr int BYTES_TO_READ = BitUtil::RoundUpNumBytes(32 * BIT_WIDTH);
DCHECK_GE(in_bytes, BYTES_TO_READ);
// TODO: this could be optimised further by using SIMD instructions.
@@ -278,7 +269,7 @@ template <typename OutType, int BIT_WIDTH>
const uint8_t* BitPacking::UnpackUpTo31Values(const uint8_t* __restrict__ in,
int64_t in_bytes, int num_values, OutType* __restrict__ out) {
static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
- static_assert(BIT_WIDTH <= MAX_BITWIDTH, "BIT_WIDTH too high");
+ static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
DCHECK_LE(BIT_WIDTH, sizeof(OutType) * CHAR_BIT) << "BIT_WIDTH too high for output";
constexpr int MAX_BATCH_SIZE = 31;
const int BYTES_TO_READ = BitUtil::RoundUpNumBytes(num_values * BIT_WIDTH);
@@ -319,7 +310,7 @@ const uint8_t* BitPacking::UnpackAndDecodeUpTo31Values(const uint8_t* __restrict
int64_t in_bytes, OutType* __restrict__ dict, int64_t dict_len, int num_values,
OutType* __restrict__ out, int64_t stride, bool* __restrict__ decode_error) {
static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
- static_assert(BIT_WIDTH <= MAX_BITWIDTH, "BIT_WIDTH too high");
+ static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
constexpr int MAX_BATCH_SIZE = 31;
const int BYTES_TO_READ = BitUtil::RoundUpNumBytes(num_values * BIT_WIDTH);
DCHECK_GE(in_bytes, BYTES_TO_READ);
diff --git a/be/src/util/bit-stream-utils-test.cc b/be/src/util/bit-stream-utils-test.cc
index aa157e3..e02e3c3 100644
--- a/be/src/util/bit-stream-utils-test.cc
+++ b/be/src/util/bit-stream-utils-test.cc
@@ -151,16 +151,15 @@ TEST(BitArray, TestIntSkip) {
TestSkipping<int>(buffer, bytes_written, bit_width, 8, 56);
}
-// Writes 'num_vals' values with width 'bit_width' starting from 'start' and increasing
-// and reads them back.
-void TestBitArrayValues(int bit_width, uint64_t start, uint64_t num_vals) {
+// Writes 'num_vals' values with width 'bit_width' and reads them back.
+void TestBitArrayValues(int bit_width, int num_vals) {
const int len = BitUtil::Ceil(bit_width * num_vals, 8);
- const uint64_t mask = bit_width == 64 ? ~0UL : (1UL << bit_width) - 1;
+ const int64_t mod = bit_width == 64 ? 1 : 1LL << bit_width;
uint8_t buffer[(len > 0) ? len : 1];
BitWriter writer(buffer, len);
- for (uint64_t i = 0; i < num_vals; ++i) {
- bool result = writer.PutValue((start + i) & mask, bit_width);
+ for (int i = 0; i < num_vals; ++i) {
+ bool result = writer.PutValue(i % mod, bit_width);
EXPECT_TRUE(result);
}
writer.Flush();
@@ -173,20 +172,19 @@ void TestBitArrayValues(int bit_width, uint64_t start, uint64_t num_vals) {
// Unpack all values at once with one batched reader and in small batches with the
// other batched reader.
vector<int64_t> batch_vals(num_vals);
- const uint64_t BATCH_SIZE = 32;
+ const int BATCH_SIZE = 32;
vector<int64_t> batch_vals2(BATCH_SIZE);
EXPECT_EQ(num_vals,
reader.UnpackBatch(bit_width, num_vals, batch_vals.data()));
- for (uint64_t i = 0; i < num_vals; ++i) {
+ for (int i = 0; i < num_vals; ++i) {
if (i % BATCH_SIZE == 0) {
int num_to_unpack = min(BATCH_SIZE, num_vals - i);
EXPECT_EQ(num_to_unpack,
reader2.UnpackBatch(bit_width, num_to_unpack, batch_vals2.data()));
}
- EXPECT_EQ((start + i) & mask, batch_vals[i]);
- EXPECT_EQ((start + i) & mask, batch_vals2[i % BATCH_SIZE]);
+ EXPECT_EQ(i % mod, batch_vals[i]);
+ EXPECT_EQ(i % mod, batch_vals2[i % BATCH_SIZE]);
}
-
EXPECT_EQ(reader.bytes_left(), 0);
EXPECT_EQ(reader2.bytes_left(), 0);
reader.Reset(buffer, len);
@@ -196,132 +194,13 @@ void TestBitArrayValues(int bit_width, uint64_t start, uint64_t num_vals) {
TEST(BitArray, TestValues) {
for (int width = 0; width <= MAX_WIDTH; ++width) {
- TestBitArrayValues(width, 0, 1);
- TestBitArrayValues(width, 0, 2);
+ TestBitArrayValues(width, 1);
+ TestBitArrayValues(width, 2);
// Don't write too many values
- TestBitArrayValues(width, 0, (width < 12) ? (1 << width) : 4096);
- TestBitArrayValues(width, 0, 1024);
-
- // Also test values that need bitwidth > 32.
- TestBitArrayValues(width, 1099511627775LL, 1024);
+ TestBitArrayValues(width, (width < 12) ? (1 << width) : 4096);
+ TestBitArrayValues(width, 1024);
}
}
-template<typename UINT_T>
-void TestUleb128Encode(const UINT_T value, std::vector<uint8_t> expected_bytes) {
- const int len = BatchedBitReader::max_vlq_byte_len<UINT_T>();
-
- std::vector<uint8_t> buffer(len, 0);
- BitWriter writer(buffer.data(), len);
-
- const bool success = writer.PutUleb128(value);
- EXPECT_TRUE(success);
-
- if (expected_bytes.size() < len) {
- // Allow the user to input fewer bytes than the maximum.
- expected_bytes.resize(len, 0);
- }
-
- EXPECT_EQ(expected_bytes, buffer);
}
-TEST(VLQInt, TestPutUleb128) {
- TestUleb128Encode<uint32_t>(5, {0x05});
- TestUleb128Encode<uint32_t>(2018, {0xe2, 0x0f});
- TestUleb128Encode<uint32_t>(25248, {0xa0, 0xc5, 0x01});
- TestUleb128Encode<uint32_t>(55442211, {0xa3, 0xf6, 0xb7, 0x1a});
- TestUleb128Encode<uint32_t>(4294967295, {0xff, 0xff, 0xff, 0xff, 0x0f});
-
- TestUleb128Encode<uint64_t>(5, {0x05});
- TestUleb128Encode<uint64_t>(55442211, {0xa3, 0xf6, 0xb7, 0x1a});
- TestUleb128Encode<uint64_t>(1649267441664, {0x80, 0x80, 0x80, 0x80, 0x80, 0x30});
- TestUleb128Encode<uint64_t>(60736917191292289,
- {0x81, 0xeb, 0xc1, 0xaf, 0xf8, 0xfc, 0xf1, 0x6b});
- TestUleb128Encode<uint64_t>(18446744073709551615U,
- {0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x01});
-}
-
-template<typename UINT_T>
-void TestUleb128Decode(const UINT_T expected_value, const std::vector<uint8_t>& bytes) {
- BatchedBitReader reader(bytes.data(), bytes.size());
- UINT_T value;
- const bool success = reader.GetUleb128<UINT_T>(&value);
- EXPECT_TRUE(success);
- EXPECT_EQ(expected_value, value);
-}
-
-TEST(VLQInt, TestGetUleb128) {
- TestUleb128Decode<uint32_t>(5, {0x05});
- TestUleb128Decode<uint32_t>(2018, {0xe2, 0x0f});
- TestUleb128Decode<uint32_t>(25248, {0xa0, 0xc5, 0x01});
- TestUleb128Decode<uint32_t>(55442211, {0xa3, 0xf6, 0xb7, 0x1a});
- TestUleb128Decode<uint32_t>(4294967295, {0xff, 0xff, 0xff, 0xff, 0x0f});
-
- TestUleb128Decode<uint64_t>(5, {0x05});
- TestUleb128Decode<uint64_t>(55442211, {0xa3, 0xf6, 0xb7, 0x1a});
- TestUleb128Decode<uint64_t>(1649267441664, {0x80, 0x80, 0x80, 0x80, 0x80, 0x30});
- TestUleb128Decode<uint64_t>(60736917191292289,
- {0x81, 0xeb, 0xc1, 0xaf, 0xf8, 0xfc, 0xf1, 0x6b});
- TestUleb128Decode<uint64_t>(18446744073709551615U,
- {0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x01});
-}
-
-template<typename INT_T>
-void TestZigZagEncode(const INT_T value, std::vector<uint8_t> expected_bytes) {
- const int len = BatchedBitReader::max_vlq_byte_len<INT_T>();
-
- std::vector<uint8_t> buffer(len, 0);
- BitWriter writer(buffer.data(), len);
-
- const bool success = writer.PutZigZagInteger(value);
- EXPECT_TRUE(success);
-
- if (expected_bytes.size() < len) {
- // Allow the user to input fewer bytes than the maximum.
- expected_bytes.resize(len, 0);
- }
-
- EXPECT_EQ(expected_bytes, buffer);
-}
-
-TEST(VLQInt, TestPutZigZagInteger) {
- TestZigZagEncode<int32_t>(-3, {0x05});
- TestZigZagEncode<int32_t>(-2485, {0xe9, 0x26});
- TestZigZagEncode<int32_t>(5648448, {0x80, 0xc1, 0xb1, 0x5});
-
- TestZigZagEncode<int64_t>(1629267541664, {0xc0, 0xfa, 0xcd, 0xfe, 0xea, 0x5e});
- TestZigZagEncode<int64_t>(-9223372036854775808U, // Most negative int64_t.
- {0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x1});
-}
-
-template<typename INT_T>
-void TestZigZagDecode(const INT_T expected_value, std::vector<uint8_t> bytes) {
- BatchedBitReader reader(bytes.data(), bytes.size());
- INT_T value;
- const bool success = reader.GetZigZagInteger<INT_T>(&value);
- EXPECT_TRUE(success);
- EXPECT_EQ(expected_value, value);
-}
-
-TEST(VLQInt, TestGetZigZagInteger) {
- TestZigZagDecode<int32_t>(-3, {0x05});
- TestZigZagDecode<int32_t>(-2485, {0xe9, 0x26});
- TestZigZagDecode<int32_t>(5648448, {0x80, 0xc1, 0xb1, 0x5});
-
- TestZigZagDecode<int64_t>(1629267541664, {0xc0, 0xfa, 0xcd, 0xfe, 0xea, 0x5e});
- TestZigZagDecode<int64_t>(-9223372036854775808U, // Most negative int64_t.
- {0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x1});
-}
-
-TEST(VLQInt, TestMaxVlqByteLen) {
- EXPECT_EQ(2, BatchedBitReader::max_vlq_byte_len<uint8_t>());
- EXPECT_EQ(3, BatchedBitReader::max_vlq_byte_len<uint16_t>());
- EXPECT_EQ(5, BatchedBitReader::max_vlq_byte_len<uint32_t>());
- EXPECT_EQ(10, BatchedBitReader::max_vlq_byte_len<uint64_t>());
-}
-
-}
diff --git a/be/src/util/bit-stream-utils.h b/be/src/util/bit-stream-utils.h
index 230c9dd..9889f35 100644
--- a/be/src/util/bit-stream-utils.h
+++ b/be/src/util/bit-stream-utils.h
@@ -23,17 +23,16 @@
#include <string.h>
#include "common/compiler-util.h"
#include "common/logging.h"
-#include "util/bit-packing.h"
#include "util/bit-util.h"
namespace impala {
-/// Utility class to write bit/byte streams. This class can write data to either be
+/// Utility class to write bit/byte streams. This class can write data to either be
/// bit packed or byte aligned (and a single stream that has a mix of both).
/// This class does not allocate memory.
class BitWriter {
public:
- /// buffer: buffer to write bits to. Buffer should be preallocated with
+ /// buffer: buffer to write bits to. Buffer should be preallocated with
/// 'buffer_len' bytes.
BitWriter(uint8_t* buffer, int buffer_len) :
buffer_(buffer),
@@ -65,16 +64,7 @@ class BitWriter {
/// Write an unsigned ULEB-128 encoded int to the buffer. Return false if there was not
/// enough room. The value is written byte aligned. For more details on ULEB-128:
/// https://en.wikipedia.org/wiki/LEB128
- /// UINT_T must be an unsigned integer type.
- template<typename UINT_T>
- bool PutUleb128(UINT_T v);
-
- /// Write a ZigZag encoded int to the buffer. Return false if there was not enough
- /// room. The value is written byte aligned. For more details on ZigZag encoding:
- /// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
- /// INT_T must be a signed integer type.
- template<typename INT_T>
- bool PutZigZagInteger(INT_T v);
+ bool PutUleb128Int(uint32_t v);
/// Get a pointer to the next aligned byte and advance the underlying buffer
/// by num_bytes.
@@ -87,7 +77,7 @@ class BitWriter {
void Flush(bool align=false);
/// Maximum supported bitwidth for writer.
- static const int MAX_BITWIDTH = 64;
+ static const int MAX_BITWIDTH = 32;
private:
uint8_t* buffer_;
@@ -167,29 +157,16 @@ class BatchedBitReader {
/// at the beginning of a byte. Return false if there were not enough bytes in the
/// buffer or the int is invalid. For more details on ULEB-128:
/// https://en.wikipedia.org/wiki/LEB128
- /// UINT_T must be an unsigned integer type.
- template<typename UINT_T>
- bool GetUleb128(UINT_T* v);
-
- /// Read a ZigZag encoded int from the stream. The encoded int must start at the
- /// beginning of a byte. Return false if there were not enough bytes in the buffer or
- /// the int is invalid. For more details on ZigZag encoding:
- /// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
- /// INT_T must be a signed integer type.
- template<typename INT_T>
- bool GetZigZagInteger(INT_T* v);
+ bool GetUleb128Int(uint32_t* v);
/// Returns the number of bytes left in the stream.
int bytes_left() { return buffer_end_ - buffer_pos_; }
- /// Maximum byte length of a vlq encoded integer of type T.
- template <typename T>
- static constexpr int max_vlq_byte_len() {
- return BitUtil::Ceil(sizeof(T) * 8, 7);
- }
+ /// Maximum byte length of a vlq encoded int
+ static const int MAX_VLQ_BYTE_LEN = 5;
/// Maximum supported bitwidth for reader.
- static const int MAX_BITWIDTH = BitPacking::MAX_BITWIDTH;
+ static const int MAX_BITWIDTH = 32;
private:
/// Current read position in the buffer.
diff --git a/be/src/util/bit-stream-utils.inline.h b/be/src/util/bit-stream-utils.inline.h
index 868b562..18df34c 100644
--- a/be/src/util/bit-stream-utils.inline.h
+++ b/be/src/util/bit-stream-utils.inline.h
@@ -21,13 +21,14 @@
#include "util/bit-stream-utils.h"
#include "common/compiler-util.h"
+#include "util/bit-packing.h"
namespace impala {
inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+ // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
DCHECK_LE(num_bits, MAX_BITWIDTH);
- DCHECK(num_bits == MAX_BITWIDTH || v >> num_bits == 0)
- << "v = " << v << ", num_bits = " << num_bits;
+ DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
@@ -37,20 +38,11 @@ inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
if (UNLIKELY(bit_offset_ >= 64)) {
// Flush buffered_values_ and write out bits of v that did not fit
memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+ buffered_values_ = 0;
byte_offset_ += 8;
bit_offset_ -= 64;
-
- // Shifting with the same or greater amount than the number of bits in the number is
- // undefined behaviour.
- int shift = num_bits - bit_offset_;
-
- if (LIKELY(shift < 64)) {
- buffered_values_ = v >> shift;
- } else {
- buffered_values_ = 0;
- }
+ buffered_values_ = v >> (num_bits - bit_offset_);
}
-
DCHECK_LT(bit_offset_, 64);
return true;
}
@@ -84,40 +76,16 @@ inline bool BitWriter::PutAligned(T val, int num_bytes) {
return true;
}
-template<typename UINT_T>
-inline bool BitWriter::PutUleb128(UINT_T v) {
- static_assert(std::is_integral<UINT_T>::value, "Integral type required.");
- static_assert(std::is_unsigned<UINT_T>::value, "Unsigned type required.");
- static_assert(!std::is_same<UINT_T, bool>::value, "Bools are not supported.");
-
- constexpr UINT_T mask_lowest_7_bits = std::numeric_limits<UINT_T>::max() ^ 0x7Fu;
-
+inline bool BitWriter::PutUleb128Int(uint32_t v) {
bool result = true;
- while ((v & mask_lowest_7_bits) != 0L) {
- result &= PutAligned<uint8_t>((v & 0x7Fu) | 0x80u, 1);
+ while ((v & 0xFFFFFF80) != 0L) {
+ result &= PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
v >>= 7;
}
- result &= PutAligned<uint8_t>(v & 0x7Fu, 1);
+ result &= PutAligned<uint8_t>(v & 0x7F, 1);
return result;
}
-template<typename INT_T>
-bool BitWriter::PutZigZagInteger(INT_T v) {
- static_assert(std::is_integral<INT_T>::value, "Integral type required.");
- static_assert(std::is_signed<INT_T>::value, "Signed type required.");
-
- using UINT_T = std::make_unsigned_t<INT_T>;
- constexpr int bit_length_minus_one = sizeof(INT_T) * 8 - 1;
-
- /// For negative values, << results in undefined behaviour (prior to C++20), so we treat
- /// `v` as unsigned.
- const UINT_T v_unsigned = v;
-
- /// For the right shift, we rely on implementation defined behaviour as we expect it to
- /// be an arithmetic right shift (bits equal to the MSB are shifted in).
- return PutUleb128((v_unsigned << 1) ^ (v >> bit_length_minus_one));
-}
-
template<typename T>
inline int BatchedBitReader::UnpackBatch(int bit_width, int num_values, T* v) {
DCHECK(buffer_pos_ != nullptr);
@@ -178,42 +146,19 @@ inline bool BatchedBitReader::GetBytes(int num_bytes, T* v) {
return true;
}
-template<typename UINT_T>
-inline bool BatchedBitReader::GetUleb128(UINT_T* v) {
- static_assert(std::is_integral<UINT_T>::value, "Integral type required.");
- static_assert(std::is_unsigned<UINT_T>::value, "Unsigned type required.");
- static_assert(!std::is_same<UINT_T, bool>::value, "Bools are not supported.");
-
+inline bool BatchedBitReader::GetUleb128Int(uint32_t* v) {
*v = 0;
int shift = 0;
uint8_t byte = 0;
do {
- if (UNLIKELY(shift >= max_vlq_byte_len<UINT_T>() * 7)) return false;
+ if (UNLIKELY(shift >= MAX_VLQ_BYTE_LEN * 7)) return false;
if (!GetBytes(1, &byte)) return false;
-
- /// We need to convert 'byte' to UINT_T so that the result of the bitwise and
- /// operation is at least as long an integer as '*v', otherwise the shift may be too
- /// big and lead to undefined behaviour.
- const UINT_T byte_as_UINT_T = byte;
- *v |= (byte_as_UINT_T & 0x7Fu) << shift;
+ // The constant below must be explicitly unsigned to ensure that the result of the
+ // bitwise-and is unsigned, so that the left shift is always defined behavior under
+ // the C++ standard.
+ *v |= (byte & 0x7Fu) << shift;
shift += 7;
- } while ((byte & 0x80u) != 0);
- return true;
-}
-
-template<typename INT_T>
-bool BatchedBitReader::GetZigZagInteger(INT_T* v) {
- static_assert(std::is_integral<INT_T>::value, "Integral type required.");
- static_assert(std::is_signed<INT_T>::value, "Signed type required.");
-
- using UINT_T = std::make_unsigned_t<INT_T>;
-
- UINT_T v_unsigned;
- if (UNLIKELY(!GetUleb128<UINT_T>(&v_unsigned))) return false;
-
- /// Here we rely on implementation defined behaviour in converting UINT_T to INT_T.
- *v = (v_unsigned >> 1) ^ -(v_unsigned & 1u);
-
+ } while ((byte & 0x80) != 0);
return true;
}
}
diff --git a/be/src/util/rle-encoding.h b/be/src/util/rle-encoding.h
index 322b5d7..e4b87d0 100644
--- a/be/src/util/rle-encoding.h
+++ b/be/src/util/rle-encoding.h
@@ -244,7 +244,7 @@ class RleEncoder {
1 + BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8);
/// Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value.
int max_repeated_run_size =
- BatchedBitReader::max_vlq_byte_len<uint32_t>() + BitUtil::Ceil(bit_width, 8);
+ BatchedBitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8);
return std::max(max_literal_run_size, max_repeated_run_size);
}
@@ -406,7 +406,7 @@ inline void RleEncoder::FlushRepeatedRun() {
bool result = true;
// The lsb of 0 indicates this is a repeated run
uint32_t indicator_value = static_cast<uint32_t>(repeat_count_) << 1;
- result &= bit_writer_.PutUleb128<uint32_t>(indicator_value);
+ result &= bit_writer_.PutUleb128Int(indicator_value);
result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
DCHECK(result);
num_buffered_values_ = 0;
@@ -665,7 +665,7 @@ inline void RleBatchDecoder<T>::NextCounts() {
// Read the next run's indicator int, it could be a literal or repeated run.
// The int is encoded as a ULEB128-encoded value.
uint32_t indicator_value = 0;
- if (UNLIKELY(!bit_reader_.GetUleb128<uint32_t>(&indicator_value))) return;
+ if (UNLIKELY(!bit_reader_.GetUleb128Int(&indicator_value))) return;
// lsb indicates if it is a literal run or repeated run
bool is_literal = indicator_value & 1;
diff --git a/be/src/util/rle-test.cc b/be/src/util/rle-test.cc
index f74b8c3..4bdcff1 100644
--- a/be/src/util/rle-test.cc
+++ b/be/src/util/rle-test.cc
@@ -623,7 +623,7 @@ TEST_F(RleTest, RepeatCountOverflow) {
// are 1.
const uint32_t REPEATED_RUN_HEADER = 0xfffffffe;
const uint32_t LITERAL_RUN_HEADER = 0xffffffff;
- writer.PutUleb128<uint32_t>(literal_run ? LITERAL_RUN_HEADER : REPEATED_RUN_HEADER);
+ writer.PutUleb128Int(literal_run ? LITERAL_RUN_HEADER : REPEATED_RUN_HEADER);
writer.Flush();
RleBatchDecoder<uint64_t> decoder(buffer, BUFFER_LEN, 1);