You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/18 01:29:51 UTC

[incubator-datasketches-website] 01/01: frequent items c++ example

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch more_examples
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-website.git

commit ef6dffd56ae6534872a1bf0cf9e5f2d3cade3b9a
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Mar 17 18:29:20 2020 -0700

    frequent items c++ example
---
 docs/Frequency/FrequentItemsCppExample.md | 94 +++++++++++++++++++++++++++++++
 src/main/resources/docgen/toc.json        |  1 +
 2 files changed, 95 insertions(+)

diff --git a/docs/Frequency/FrequentItemsCppExample.md b/docs/Frequency/FrequentItemsCppExample.md
new file mode 100644
index 0000000..d4ce69f
--- /dev/null
+++ b/docs/Frequency/FrequentItemsCppExample.md
@@ -0,0 +1,94 @@
+---
+layout: doc_page
+---
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+# Frequent Items Sketch C++ Example
+
+    #include <iostream>
+    #include <fstream>
+
+    #include <frequent_items_sketch.hpp>
+
+    //simplified file operations and no error handling for clarity
+    int main(int argc, char **argv) {
+      typedef datasketches::frequent_items_sketch<std::string> frequent_strings_sketch;
+
+      // this section generates two sketches and serializes them into files
+      {
+        frequent_strings_sketch sketch1(64);
+        sketch1.update("a");
+        sketch1.update("a");
+        sketch1.update("b");
+        sketch1.update("c");
+        sketch1.update("a");
+        sketch1.update("d");
+        sketch1.update("a");
+        std::ofstream os1("freq_str_sketch1.bin");
+        sketch1.serialize(os1);
+
+        frequent_strings_sketch sketch2(64);
+        sketch2.update("e");
+        sketch2.update("a");
+        sketch2.update("f");
+        sketch2.update("f");
+        sketch2.update("f");
+        sketch2.update("g");
+        sketch2.update("a");
+        sketch2.update("f");
+        std::ofstream os2("freq_str_sketch2.bin");
+        sketch2.serialize(os2);
+      }
+
+      // this section deserializes the sketches, produces a union and prints the result
+      {
+        std::ifstream is1("freq_str_sketch1.bin");
+        frequent_strings_sketch sketch1 = frequent_strings_sketch::deserialize(is1);
+
+        std::ifstream is2("freq_str_sketch2.bin");
+        frequent_strings_sketch sketch2 = frequent_strings_sketch::deserialize(is2);
+
+        // we could merge sketch2 into sketch1 or the other way around
+        // this is an example of using a new sketch as a union and keeping the original sketches intact
+        frequent_strings_sketch u(64);
+        u.merge(sketch1);
+        u.merge(sketch2);
+
+        auto items = u.get_frequent_items(datasketches::NO_FALSE_POSITIVES);
+        std::cout << "Frequent strings: " << items.size() << std::endl;
+        std::cout << "Str\tEst\tLB\tUB" << std::endl;
+        for (auto row: items) {
+          std::cout << row.get_item() << "\t" << row.get_estimate() << "\t"
+            << row.get_lower_bound() << "\t" << row.get_upper_bound() << std::endl;
+        }
+      }
+
+      return 0;
+    }
+
+    Output:
+    Frequent strings: 7
+    Str	Est	LB	UB
+    a	6	6	6
+    f	4	4	4
+    c	1	1	1
+    d	1	1	1
+    e	1	1	1
+    b	1	1	1
+    g	1	1	1
diff --git a/src/main/resources/docgen/toc.json b/src/main/resources/docgen/toc.json
index 88099d8..304e58e 100644
--- a/src/main/resources/docgen/toc.json
+++ b/src/main/resources/docgen/toc.json
@@ -172,6 +172,7 @@
                 { "class":"Dropdown", "desc" : "Most Frequent Examples", "array":
                   [
                     {"class":"Doc",  "desc" : "Frequent Items Java Example",  "dir" : "Frequency", "file": "FrequentItemsJavaExample" },
+                    {"class":"Doc",  "desc" : "Frequent Items C++ Example",   "dir" : "Frequency", "file": "FrequentItemsCppExample" },
                     {"class":"Doc",  "desc" : "Frequent Items Pig UDFs",      "dir" : "Frequency", "file": "FrequentItemsPigUDFs" },
                     {"class":"Doc",  "desc" : "Frequent Items Hive UDFs",     "dir" : "Frequency", "file": "FrequentItemsHiveUDFs" },
                     {"class":"Doc",  "desc" : "Using Sketches in Druid",              "dir" : "",    "file": "DruidIntegration" },


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org