You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2023/01/13 20:06:41 UTC

[datasketches-cpp] branch theta_compression updated: bit packing tests

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch theta_compression
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/theta_compression by this push:
     new 9dfdc11  bit packing tests
9dfdc11 is described below

commit 9dfdc11d741975ed5da005aed21c2334b25961bf
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jan 13 12:06:35 2023 -0800

    bit packing tests
---
 theta/include/bit_packing.hpp   | 13 ++-----
 theta/test/CMakeLists.txt       |  1 +
 theta/test/bit_packing_test.cpp | 77 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/theta/include/bit_packing.hpp b/theta/include/bit_packing.hpp
index 48166e8..4de0d86 100644
--- a/theta/include/bit_packing.hpp
+++ b/theta/include/bit_packing.hpp
@@ -67,17 +67,10 @@ static inline uint8_t unpack_bits(uint64_t& value, uint8_t width, const uint8_t*
   return offset;
 }
 
-static inline size_t pack_ULEB128(uint64_t value, uint8_t* ptr) {
-  const uint8_t* start = ptr;
-  while (value >= 0x80) {
-    *ptr++ = value | 0x80;
-    value >>= 7;
-  }
-  *ptr++ = value;
-  return ptr - start;
-}
-
 // pack given number of bits from a block of 8 64-bit values into bytes
+// we don't need 0 and 64 bits
+// we assume that higher bits (which we are not packing) are zeros
+// this assumption allows to avoid masking operations
 
 static inline void pack_bits_1(const uint64_t* values, uint8_t* ptr) {
   *ptr = values[0] << 7;
diff --git a/theta/test/CMakeLists.txt b/theta/test/CMakeLists.txt
index 7b1f0de..8cffa84 100644
--- a/theta/test/CMakeLists.txt
+++ b/theta/test/CMakeLists.txt
@@ -44,4 +44,5 @@ target_sources(theta_test
     theta_a_not_b_test.cpp
     theta_jaccard_similarity_test.cpp
     theta_setop_test.cpp
+    bit_packing_test.cpp
 )
diff --git a/theta/test/bit_packing_test.cpp b/theta/test/bit_packing_test.cpp
new file mode 100644
index 0000000..41dbdb7
--- /dev/null
+++ b/theta/test/bit_packing_test.cpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch2/catch.hpp>
+#include <bit_packing.hpp>
+
+namespace datasketches {
+
+// for every number of bits from 1 to 63
+// generate pseudo-random data, pack, unpack and compare
+
+TEST_CASE("pack unpack bits") {
+  for (uint8_t bits = 1; bits <= 63; ++bits) {
+    const uint64_t mask = (1ULL << bits) - 1;
+    std::vector<uint64_t> input(8, 0);
+    const uint64_t golden64 = 0x9e3779b97f4a7c13ULL;  // the golden ratio
+    uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
+    for (int i = 0; i < 8; ++i) {
+      input[i] = value & mask;
+      value += golden64;
+    }
+    std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
+    uint8_t offset = 0;
+    uint8_t* ptr = bytes.data();
+    for (int i = 0; i < 8; ++i) {
+      offset = pack_bits(input[i], bits, ptr, offset);
+    }
+
+    std::vector<uint64_t> output(8, 0);
+    offset = 0;
+    const uint8_t* cptr = bytes.data();
+    for (int i = 0; i < 8; ++i) {
+      offset = unpack_bits(output[i], bits, cptr, offset);
+    }
+    for (int i = 0; i < 8; ++i) {
+      REQUIRE((input[i] & mask) == output[i]);
+    }
+  }
+}
+
+TEST_CASE("pack unpack blocks") {
+  for (uint8_t bits = 1; bits <= 63; ++bits) {
+    const uint64_t mask = (1ULL << bits) - 1;
+    std::vector<uint64_t> input(8, 0);
+    const uint64_t golden64 = 0x9e3779b97f4a7c13ULL;  // the golden ratio
+    uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
+    for (int i = 0; i < 8; ++i) {
+      input[i] = value & mask;
+      value += golden64;
+    }
+    std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
+    pack_bits_block8(input.data(), bytes.data(), bits);
+    std::vector<uint64_t> output(8, 0);
+    unpack_bits_block8(output.data(), bytes.data(), bits);
+    for (int i = 0; i < 8; ++i) {
+      REQUIRE((input[i] & mask) == output[i]);
+    }
+  }
+}
+
+} /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org