You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/18 01:29:51 UTC
[incubator-datasketches-website] 01/01: frequent items c++ example
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch more_examples
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-website.git
commit ef6dffd56ae6534872a1bf0cf9e5f2d3cade3b9a
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Mar 17 18:29:20 2020 -0700
frequent items c++ example
---
docs/Frequency/FrequentItemsCppExample.md | 94 +++++++++++++++++++++++++++++++
src/main/resources/docgen/toc.json | 1 +
2 files changed, 95 insertions(+)
diff --git a/docs/Frequency/FrequentItemsCppExample.md b/docs/Frequency/FrequentItemsCppExample.md
new file mode 100644
index 0000000..d4ce69f
--- /dev/null
+++ b/docs/Frequency/FrequentItemsCppExample.md
@@ -0,0 +1,94 @@
+---
+layout: doc_page
+---
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+# Frequent Items Sketch C++ Example
+
+ #include <iostream>
+ #include <fstream>
+
+ #include <frequent_items_sketch.hpp>
+
+ //simplified file operations and no error handling for clarity
+ int main(int argc, char **argv) {
+ typedef datasketches::frequent_items_sketch<std::string> frequent_strings_sketch;
+
+ // this section generates two sketches and serializes them into files
+ {
+ frequent_strings_sketch sketch1(64);
+ sketch1.update("a");
+ sketch1.update("a");
+ sketch1.update("b");
+ sketch1.update("c");
+ sketch1.update("a");
+ sketch1.update("d");
+ sketch1.update("a");
+ std::ofstream os1("freq_str_sketch1.bin");
+ sketch1.serialize(os1);
+
+ frequent_strings_sketch sketch2(64);
+ sketch2.update("e");
+ sketch2.update("a");
+ sketch2.update("f");
+ sketch2.update("f");
+ sketch2.update("f");
+ sketch2.update("g");
+ sketch2.update("a");
+ sketch2.update("f");
+ std::ofstream os2("freq_str_sketch2.bin");
+ sketch2.serialize(os2);
+ }
+
+ // this section deserializes the sketches, produces a union and prints the result
+ {
+ std::ifstream is1("freq_str_sketch1.bin");
+ frequent_strings_sketch sketch1 = frequent_strings_sketch::deserialize(is1);
+
+ std::ifstream is2("freq_str_sketch2.bin");
+ frequent_strings_sketch sketch2 = frequent_strings_sketch::deserialize(is2);
+
+ // we could merge sketch2 into sketch1 or the other way around
+ // this is an example of using a new sketch as a union and keeping the original sketches intact
+ frequent_strings_sketch u(64);
+ u.merge(sketch1);
+ u.merge(sketch2);
+
+ auto items = u.get_frequent_items(datasketches::NO_FALSE_POSITIVES);
+ std::cout << "Frequent strings: " << items.size() << std::endl;
+ std::cout << "Str\tEst\tLB\tUB" << std::endl;
+ for (auto row: items) {
+ std::cout << row.get_item() << "\t" << row.get_estimate() << "\t"
+ << row.get_lower_bound() << "\t" << row.get_upper_bound() << std::endl;
+ }
+ }
+
+ return 0;
+ }
+
+ Output:
+ Frequent strings: 7
+ Str Est LB UB
+ a 6 6 6
+ f 4 4 4
+ c 1 1 1
+ d 1 1 1
+ e 1 1 1
+ b 1 1 1
+ g 1 1 1
diff --git a/src/main/resources/docgen/toc.json b/src/main/resources/docgen/toc.json
index 88099d8..304e58e 100644
--- a/src/main/resources/docgen/toc.json
+++ b/src/main/resources/docgen/toc.json
@@ -172,6 +172,7 @@
{ "class":"Dropdown", "desc" : "Most Frequent Examples", "array":
[
{"class":"Doc", "desc" : "Frequent Items Java Example", "dir" : "Frequency", "file": "FrequentItemsJavaExample" },
+ {"class":"Doc", "desc" : "Frequent Items C++ Example", "dir" : "Frequency", "file": "FrequentItemsCppExample" },
{"class":"Doc", "desc" : "Frequent Items Pig UDFs", "dir" : "Frequency", "file": "FrequentItemsPigUDFs" },
{"class":"Doc", "desc" : "Frequent Items Hive UDFs", "dir" : "Frequency", "file": "FrequentItemsHiveUDFs" },
{"class":"Doc", "desc" : "Using Sketches in Druid", "dir" : "", "file": "DruidIntegration" },
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org