You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/08/19 23:47:31 UTC
[incubator-datasketches-cpp] 02/02: more tests for kll sketch of
strings
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch kll_minor_cleanup
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit c868803ef6181ac3783905d968e6ff90aa3e82c5
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Aug 19 16:47:13 2019 -0700
more tests for kll sketch of strings
---
kll/test/kll_sketch_test.cpp | 96 +++++++++++++++++++++++++++++++-------------
1 file changed, 69 insertions(+), 27 deletions(-)
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index 8e12791..c14bb03 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -67,7 +67,9 @@ class kll_sketch_test: public CppUnit::TestFixture {
CPPUNIT_TEST(merge_min_value_from_other);
CPPUNIT_TEST(merge_min_and_max_from_other);
CPPUNIT_TEST(sketch_of_ints);
- CPPUNIT_TEST(sketch_of_strings);
+ CPPUNIT_TEST(sketch_of_strings_stream);
+ CPPUNIT_TEST(sketch_of_strings_bytes);
+ CPPUNIT_TEST(sketch_of_strings_single_item_bytes);
CPPUNIT_TEST(copy);
CPPUNIT_TEST_SUITE_END();
@@ -502,43 +504,83 @@ public:
CPPUNIT_ASSERT_EQUAL(sketch.get_rank(n), sketch2.get_rank(n));
}
- void sketch_of_strings() {
- kll_string_sketch sketch;
- CPPUNIT_ASSERT_THROW(sketch.get_quantile(0), std::runtime_error);
- CPPUNIT_ASSERT_THROW(sketch.get_min_value(), std::runtime_error);
- CPPUNIT_ASSERT_THROW(sketch.get_max_value(), std::runtime_error);
- CPPUNIT_ASSERT_EQUAL(8u, sketch.get_serialized_size_bytes());
+ void sketch_of_strings_stream() {
+ kll_string_sketch sketch1;
+ CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+ CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
- const int n(1000);
- for (int i = 0; i < n; i++) sketch.update(std::to_string(i));
+ const int n = 1000;
+ for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch.get_min_value());
- CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
- sketch.serialize(s);
- CPPUNIT_ASSERT_EQUAL(sketch.get_serialized_size_bytes(), (uint32_t) s.tellp());
+ sketch1.serialize(s);
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_serialized_size_bytes(), (uint32_t) s.tellp());
auto sketch2 = kll_string_sketch::deserialize(s);
CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) s.tellg());
CPPUNIT_ASSERT_EQUAL(s.tellp(), s.tellg());
- CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch2.is_empty());
- CPPUNIT_ASSERT_EQUAL(sketch.is_estimation_mode(), sketch2.is_estimation_mode());
- CPPUNIT_ASSERT_EQUAL(sketch.get_n(), sketch2.get_n());
- CPPUNIT_ASSERT_EQUAL(sketch.get_num_retained(), sketch2.get_num_retained());
- CPPUNIT_ASSERT_EQUAL(sketch.get_min_value(), sketch2.get_min_value());
- CPPUNIT_ASSERT_EQUAL(sketch.get_max_value(), sketch2.get_max_value());
- CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
- CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
- CPPUNIT_ASSERT_EQUAL(sketch.get_quantile(0.5), sketch2.get_quantile(0.5));
- CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
- CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
// to take a look using hexdump
- std::ofstream os("kll-string.bin");
- sketch.serialize(os);
+ //std::ofstream os("kll-string.bin");
+ //sketch1.serialize(os);
// debug print
- //sketch.to_stream(std::cout);
+ //sketch1.to_stream(std::cout);
+ }
+
+ void sketch_of_strings_bytes() {
+ kll_string_sketch sketch1;
+ CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+ CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
+
+ const int n = 1000;
+ for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
+
+ CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
+
+ auto data = sketch1.serialize();
+ CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+ auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+ }
+
+
+ void sketch_of_strings_single_item_bytes() {
+ kll_string_sketch sketch1;
+ sketch1.update("a");
+ auto data = sketch1.serialize();
+ CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+ auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
}
void copy() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org