You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/23 03:27:51 UTC
[incubator-datasketches-cpp] 02/03: tests
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 67f90407590994e8d0a4a7b67ffcbef1ddbf0f2e
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 20:27:06 2020 -0700
tests
---
tuple/test/theta_sketch_experimental_test.cpp | 244 +++++++++++++++++++++++---
1 file changed, 216 insertions(+), 28 deletions(-)
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index 0fb0286..e0ccdf3 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -17,40 +17,228 @@
* under the License.
*/
-#include <iostream>
+#include <fstream>
+#include <sstream>
#include <catch.hpp>
#include <theta_sketch_experimental.hpp>
-#include <../../theta/include/theta_sketch.hpp>
namespace datasketches {
-TEST_CASE("theta_sketch_experimental: basics ", "[theta_sketch]") {
- auto update_sketch = update_theta_sketch_experimental<>::builder().build();
+#ifdef TEST_BINARY_INPUT_PATH
+const std::string inputPath = TEST_BINARY_INPUT_PATH;
+#else
+const std::string inputPath = "test/";
+#endif
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+
+TEST_CASE("theta sketch: empty", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+ REQUIRE(update_sketch.is_empty());
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
+ REQUIRE(update_sketch.get_theta() == 1.0);
+ REQUIRE(update_sketch.get_estimate() == 0.0);
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
+
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ REQUIRE(compact_sketch.is_empty());
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+ REQUIRE(compact_sketch.get_theta() == 1.0);
+ REQUIRE(compact_sketch.get_estimate() == 0.0);
+ REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
+ REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
+}
+
+TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
+ update_sketch.update(1);
+ //std::cerr << update_sketch.to_string();
+ REQUIRE(update_sketch.get_num_retained() == 0);
+ REQUIRE_FALSE(update_sketch.is_empty());
+ REQUIRE(update_sketch.is_estimation_mode());
+ REQUIRE(update_sketch.get_estimate() == 0.0);
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
+ REQUIRE(update_sketch.get_upper_bound(1) > 0);
+
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ REQUIRE(compact_sketch.get_num_retained() == 0);
+ REQUIRE_FALSE(compact_sketch.is_empty());
+ REQUIRE(compact_sketch.is_estimation_mode());
+ REQUIRE(compact_sketch.get_estimate() == 0.0);
+ REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
+ REQUIRE(compact_sketch.get_upper_bound(1) > 0);
+}
+
+TEST_CASE("theta sketch: single item", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+ update_sketch.update(1);
+ REQUIRE_FALSE(update_sketch.is_empty());
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
+ REQUIRE(update_sketch.get_theta() == 1.0);
+ REQUIRE(update_sketch.get_estimate() == 1.0);
+ REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
+ REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
+
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ REQUIRE_FALSE(compact_sketch.is_empty());
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+ REQUIRE(compact_sketch.get_theta() == 1.0);
+ REQUIRE(compact_sketch.get_estimate() == 1.0);
+ REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
+ REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
+}
+
+TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+ for (int i = 0; i < 2000; i++) update_sketch.update(i);
+ REQUIRE_FALSE(update_sketch.is_empty());
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
+ REQUIRE(update_sketch.get_theta() == 1.0);
+ REQUIRE(update_sketch.get_estimate() == 2000.0);
+ REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
+ REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
+
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ REQUIRE_FALSE(compact_sketch.is_empty());
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+ REQUIRE(compact_sketch.get_theta() == 1.0);
+ REQUIRE(compact_sketch.get_estimate() == 2000.0);
+ REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
+ REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
+}
+
+TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().set_resize_factor(update_theta_sketch::resize_factor::X1).build();
+ const int n = 8000;
+ for (int i = 0; i < n; i++) update_sketch.update(i);
+ //std::cerr << update_sketch.to_string();
+ REQUIRE_FALSE(update_sketch.is_empty());
+ REQUIRE(update_sketch.is_estimation_mode());
+ REQUIRE(update_sketch.get_theta() < 1.0);
+ REQUIRE(update_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
+ REQUIRE(update_sketch.get_lower_bound(1) < n);
+ REQUIRE(update_sketch.get_upper_bound(1) > n);
+
+ const uint32_t k = 1 << update_theta_sketch::builder::DEFAULT_LG_K;
+ REQUIRE(update_sketch.get_num_retained() >= k);
+ update_sketch.trim();
+ REQUIRE(update_sketch.get_num_retained() == k);
+
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ REQUIRE_FALSE(compact_sketch.is_empty());
+ REQUIRE(compact_sketch.is_ordered());
+ REQUIRE(compact_sketch.is_estimation_mode());
+ REQUIRE(compact_sketch.get_theta() < 1.0);
+ REQUIRE(compact_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
+ REQUIRE(compact_sketch.get_lower_bound(1) < n);
+ REQUIRE(compact_sketch.get_upper_bound(1) > n);
+}
+
+TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
+ auto sketch = compact_theta_sketch::deserialize(is);
+ REQUIRE(sketch.is_empty());
+ REQUIRE_FALSE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_num_retained() == 0);
+ REQUIRE(sketch.get_theta() == 1.0);
+ REQUIRE(sketch.get_estimate() == 0.0);
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
+}
+
+TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
+ auto sketch = compact_theta_sketch::deserialize(is);
+ REQUIRE_FALSE(sketch.is_empty());
+ REQUIRE_FALSE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_num_retained() == 1);
+ REQUIRE(sketch.get_theta() == 1.0);
+ REQUIRE(sketch.get_estimate() == 1.0);
+ REQUIRE(sketch.get_lower_bound(1) == 1.0);
+ REQUIRE(sketch.get_upper_bound(1) == 1.0);
+}
+
+TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
+ auto sketch = compact_theta_sketch::deserialize(is);
+ REQUIRE_FALSE(sketch.is_empty());
+ REQUIRE(sketch.is_estimation_mode());
+ REQUIRE(sketch.is_ordered());
+ REQUIRE(sketch.get_num_retained() == 4342);
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
+
+ // the same construction process in Java must have produced exactly the same sketch
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+ const int n = 8192;
+ for (int i = 0; i < n; i++) update_sketch.update(i);
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
+ compact_theta_sketch compact_sketch = update_sketch.compact();
+ // the sketches are ordered, so the iteration sequence must match exactly
+ auto iter = sketch.begin();
+ for (const auto& key: compact_sketch) {
+ REQUIRE(*iter == key);
+ ++iter;
+ }
+}
+
+TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+ const int n = 8192;
+ for (int i = 0; i < n; i++) update_sketch.update(i);
+
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+ update_sketch.compact().serialize(s);
+ auto bytes = update_sketch.compact().serialize();
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ REQUIRE(((char*)bytes.data())[i] == (char)s.get());
+ }
+
+ s.seekg(0); // rewind
+ compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
+ compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
+ REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
+ REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
+ REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
+ REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
+ REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
+ REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
+ REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
+ // the sketches are ordered, so the iteration sequence must match exactly
+ auto iter = deserialized_sketch1.begin();
+ for (auto key: deserialized_sketch2) {
+ REQUIRE(*iter == key);
+ ++iter;
+ }
+}
+
+TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
update_sketch.update(1);
- update_sketch.update(2);
- REQUIRE(update_sketch.get_num_retained() == 2);
- int count = 0;
- for (const auto& entry: update_sketch) ++count;
- REQUIRE(count == 2);
-
- auto compact_sketch = update_sketch.compact();
- REQUIRE(compact_sketch.get_num_retained() == 2);
-}
-
-//TEST_CASE("theta_sketch_experimental: compare with theta production", "[theta_sketch]") {
-// auto test = theta_sketch_experimental<>::builder().build();
-// update_theta_sketch prod = update_theta_sketch::builder().build();
-//
-// for (int i = 0; i < 1000000; ++i) {
-// test.update(i);
-// prod.update(i);
-// }
-//
-// std::cout << "--- theta production vs experimental ---" << std::endl;
-// std::cout << test.to_string(true);
-// std::cout << "sizeof(update_theta_sketch)=" << sizeof(update_theta_sketch) << std::endl;
-// std::cout << prod.to_string(true);
-//}
+ auto bytes = update_sketch.compact().serialize();
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
+}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org