You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2023/01/13 20:06:41 UTC
[datasketches-cpp] branch theta_compression updated: bit packing tests
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch theta_compression
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/theta_compression by this push:
new 9dfdc11 bit packing tests
9dfdc11 is described below
commit 9dfdc11d741975ed5da005aed21c2334b25961bf
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jan 13 12:06:35 2023 -0800
bit packing tests
---
theta/include/bit_packing.hpp | 13 ++-----
theta/test/CMakeLists.txt | 1 +
theta/test/bit_packing_test.cpp | 77 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+), 10 deletions(-)
diff --git a/theta/include/bit_packing.hpp b/theta/include/bit_packing.hpp
index 48166e8..4de0d86 100644
--- a/theta/include/bit_packing.hpp
+++ b/theta/include/bit_packing.hpp
@@ -67,17 +67,10 @@ static inline uint8_t unpack_bits(uint64_t& value, uint8_t width, const uint8_t*
return offset;
}
-static inline size_t pack_ULEB128(uint64_t value, uint8_t* ptr) {
- const uint8_t* start = ptr;
- while (value >= 0x80) {
- *ptr++ = value | 0x80;
- value >>= 7;
- }
- *ptr++ = value;
- return ptr - start;
-}
-
// pack given number of bits from a block of 8 64-bit values into bytes
+// we don't need 0 and 64 bits
+// we assume that higher bits (which we are not packing) are zeros
+// this assumption allows to avoid masking operations
static inline void pack_bits_1(const uint64_t* values, uint8_t* ptr) {
*ptr = values[0] << 7;
diff --git a/theta/test/CMakeLists.txt b/theta/test/CMakeLists.txt
index 7b1f0de..8cffa84 100644
--- a/theta/test/CMakeLists.txt
+++ b/theta/test/CMakeLists.txt
@@ -44,4 +44,5 @@ target_sources(theta_test
theta_a_not_b_test.cpp
theta_jaccard_similarity_test.cpp
theta_setop_test.cpp
+ bit_packing_test.cpp
)
diff --git a/theta/test/bit_packing_test.cpp b/theta/test/bit_packing_test.cpp
new file mode 100644
index 0000000..41dbdb7
--- /dev/null
+++ b/theta/test/bit_packing_test.cpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch2/catch.hpp>
+#include <bit_packing.hpp>
+
+namespace datasketches {
+
+// for every number of bits from 1 to 63
+// generate pseudo-random data, pack, unpack and compare
+
+TEST_CASE("pack unpack bits") {
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
+ const uint64_t mask = (1ULL << bits) - 1;
+ std::vector<uint64_t> input(8, 0);
+ const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += golden64;
+ }
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
+ uint8_t offset = 0;
+ uint8_t* ptr = bytes.data();
+ for (int i = 0; i < 8; ++i) {
+ offset = pack_bits(input[i], bits, ptr, offset);
+ }
+
+ std::vector<uint64_t> output(8, 0);
+ offset = 0;
+ const uint8_t* cptr = bytes.data();
+ for (int i = 0; i < 8; ++i) {
+ offset = unpack_bits(output[i], bits, cptr, offset);
+ }
+ for (int i = 0; i < 8; ++i) {
+ REQUIRE((input[i] & mask) == output[i]);
+ }
+ }
+}
+
+TEST_CASE("pack unpack blocks") {
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
+ const uint64_t mask = (1ULL << bits) - 1;
+ std::vector<uint64_t> input(8, 0);
+ const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
+ for (int i = 0; i < 8; ++i) {
+ input[i] = value & mask;
+ value += golden64;
+ }
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
+ pack_bits_block8(input.data(), bytes.data(), bits);
+ std::vector<uint64_t> output(8, 0);
+ unpack_bits_block8(output.data(), bytes.data(), bits);
+ for (int i = 0; i < 8; ++i) {
+ REQUIRE((input[i] & mask) == output[i]);
+ }
+ }
+}
+
+} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org