You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/13 00:50:56 UTC
[incubator-datasketches-website] branch master updated: HLL C++
example
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-website.git
The following commit(s) were added to refs/heads/master by this push:
new c1ffb43 HLL C++ example
c1ffb43 is described below
commit c1ffb43c44d58eaa3bb1be679b89eac5a9249aba
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Mar 12 17:50:13 2020 -0700
HLL C++ example
---
docs/HLL/HllCppExample.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/docs/HLL/HllCppExample.md b/docs/HLL/HllCppExample.md
new file mode 100644
index 0000000..dd644a4
--- /dev/null
+++ b/docs/HLL/HllCppExample.md
@@ -0,0 +1,88 @@
+---
+layout: doc_page
+---
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+# Hyper Log Log Sketch C++ Example
+
+ #include <iostream>
+ #include <fstream>
+
+ #include <hll.hpp>
+
+ //simplified file operations and no error handling for clarity
+ int main(int argc, char **argv) {
+ const int lg_k = 11;
+ const auto type = datasketches::HLL_4; // this is the default, but explicit here for illustration
+
+ // this section generates two sketches with some overlap and serializes them into files
+ {
+ // 100000 distinct keys
+ datasketches::hll_sketch sketch1(lg_k, type); // type is optional, defaults to HLL_4
+ for (int key = 0; key < 100000; key++) sketch1.update(key);
+ std::ofstream os1("hll_sketch1.bin");
+ sketch1.serialize_compact(os1);
+
+ // 100000 distinct keys
+ datasketches::hll_sketch sketch2(lg_k, type); // type is optional, defaults to HLL_4
+ for (int key = 50000; key < 150000; key++) sketch2.update(key);
+ std::ofstream os2("hll_sketch2.bin");
+ sketch2.serialize_compact(os2);
+ }
+
+ // this section deserializes the sketches, produces union and prints the result
+ {
+ std::ifstream is1("hll_sketch1.bin");
+ datasketches::hll_sketch sketch1 = datasketches::hll_sketch::deserialize(is1);
+
+ std::ifstream is2("hll_sketch2.bin");
+ datasketches::hll_sketch sketch2 = datasketches::hll_sketch::deserialize(is2);
+
+ datasketches::hll_union u(lg_k);
+ u.update(sketch1);
+ u.update(sketch2);
+ datasketches::hll_sketch sketch = u.get_result(type); // type is optional, defaults to HLL_4
+
+ // debug summary of the union result sketch
+ sketch.to_string(std::cout);
+
+ std::cout << "Distinct count estimate: " << sketch.get_estimate() << std::endl;
+ std::cout << "Distinct count lower bound 95% confidence: " << sketch.get_lower_bound(2) << std::endl;
+ std::cout << "Distinct count upper bound 95% confidence: " << sketch.get_upper_bound(2) << std::endl;
+ }
+
+ return 0;
+ }
+
+ ### HLL SKETCH SUMMARY:
+ Log Config K : 11
+ Hll Target : HLL_4
+ Current Mode : HLL
+ LB : 148634
+ Estimate : 152041
+ UB : 155614
+ OutOfOrder flag: true
+ CurMin : 4
+ NumAtCurMin : 21
+ HipAccum : 147291
+ KxQ0 : 19.889
+ KxQ1 : 0
+ Distinct count estimate: 152041
+ Distinct count lower bound 95% confidence: 145234
+ Distinct count upper bound 95% confidence: 159184
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org