You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/08/20 23:10:44 UTC

[incubator-datasketches-cpp] branch tuple_sketch updated: theta to tuple adapter for mixed set operations

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/tuple_sketch by this push:
     new c2c7158  theta to tuple adapter for mixed set operations
c2c7158 is described below

commit c2c7158e9b6c790ff60ed1c996458092639ca4c6
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Aug 20 16:10:32 2020 -0700

    theta to tuple adapter for mixed set operations
---
 tuple/CMakeLists.txt                               |  3 +
 tuple/include/theta_to_tuple_sketch_adapter.hpp    | 77 ++++++++++++++++++
 .../include/theta_to_tuple_sketch_adapter_impl.hpp | 93 ++++++++++++++++++++++
 tuple/test/CMakeLists.txt                          |  1 +
 tuple/test/mixed_union_test.cpp                    | 74 +++++++++++++++++
 5 files changed, 248 insertions(+)

diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
index fce73ab..2a452f7 100644
--- a/tuple/CMakeLists.txt
+++ b/tuple/CMakeLists.txt
@@ -44,6 +44,7 @@ list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_s
 list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
 list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_to_tuple_sketch_adapter.hpp;include/theta_to_tuple_sketch_adapter_impl.hpp")
 
 install(TARGETS tuple
   EXPORT ${PROJECT_NAME}
@@ -76,4 +77,6 @@ target_sources(tuple
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_to_tuple_sketch_adapter.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_to_tuple_sketch_adapter_impl.hpp
 )
diff --git a/tuple/include/theta_to_tuple_sketch_adapter.hpp b/tuple/include/theta_to_tuple_sketch_adapter.hpp
new file mode 100644
index 0000000..2a3cb7e
--- /dev/null
+++ b/tuple/include/theta_to_tuple_sketch_adapter.hpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_TO_TUPLE_SKETCH_ADAPTER_HPP_
+#define THETA_TO_TUPLE_SKETCH_ADAPTER_HPP_
+
+#include <memory>
+
+#include "theta_sketch_experimental.hpp"
+
+namespace datasketches {
+
+template<typename Summary, typename Allocator = std::allocator<uint64_t>>
+class theta_to_tuple_sketch_adapter {
+public:
+  theta_to_tuple_sketch_adapter(const update_theta_sketch_experimental<Allocator>& sketch, const Summary& summary);
+  theta_to_tuple_sketch_adapter(const compact_theta_sketch_experimental<Allocator>& sketch, const Summary& summary);
+  bool is_empty() const;
+  bool is_ordered() const;
+  uint16_t get_seed_hash() const;
+  uint64_t get_theta64() const;
+
+  class const_iterator;
+  const_iterator begin();
+  const_iterator end();
+
+private:
+  const theta_sketch_experimental<Allocator>* sketch_ptr;
+  Summary summary;
+};
+
+template<typename Summary, typename Allocator>
+class theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator {
+public:
+  using Entry = std::pair<uint64_t, Summary>;
+  using theta_const_iterator = typename theta_sketch_experimental<Allocator>::const_iterator;
+
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = Entry;
+  using difference_type = std::ptrdiff_t;
+  using pointer = Entry*;
+  using reference = Entry&;
+
+  const_iterator(const theta_const_iterator& it, const Summary& summary);
+  const_iterator& operator++();
+  const_iterator operator++(int);
+  bool operator==(const const_iterator& other) const;
+  bool operator!=(const const_iterator& other) const;
+  Entry& operator*() const;
+
+private:
+  theta_const_iterator it;
+  Summary summary;
+  mutable Entry entry;
+};
+
+} /* namespace datasketches */
+
+#include "theta_to_tuple_sketch_adapter_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp b/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp
new file mode 100644
index 0000000..3318700
--- /dev/null
+++ b/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::theta_to_tuple_sketch_adapter(const update_theta_sketch_experimental<Allocator>& sketch, const Summary& summary):
+sketch_ptr(&sketch), summary(summary) {}
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::theta_to_tuple_sketch_adapter(const compact_theta_sketch_experimental<Allocator>& sketch, const Summary& summary):
+sketch_ptr(&sketch), summary(summary) {}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::is_empty() const {
+  return sketch_ptr->is_empty();
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::is_ordered() const {
+  return sketch_ptr->is_ordered();
+}
+
+template<typename Summary, typename Allocator>
+uint16_t theta_to_tuple_sketch_adapter<Summary, Allocator>::get_seed_hash() const {
+  return sketch_ptr->get_seed_hash();
+}
+
+template<typename Summary, typename Allocator>
+uint64_t theta_to_tuple_sketch_adapter<Summary, Allocator>::get_theta64() const {
+  return sketch_ptr->get_theta64();
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::begin() -> const_iterator {
+  return const_iterator(sketch_ptr->begin(), summary);
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::end() -> const_iterator {
+  return const_iterator(sketch_ptr->end(), summary);
+}
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::const_iterator(const theta_const_iterator& it, const Summary& summary):
+it(it), summary(summary), entry(0, summary) {}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator++() -> const_iterator& {
+  ++it;
+  return *this;
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator++(int) -> const_iterator {
+  const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator==(const const_iterator& other) const {
+  return this->it == other.it;
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator!=(const const_iterator& other) const {
+  return this->it != other.it;
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator*() const -> Entry& {
+  entry = Entry(*it, summary); // fresh entry every time
+  return entry;
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 53a3a7e..17be15b 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -46,4 +46,5 @@ target_sources(tuple_test
     theta_sketch_experimental_test.cpp
     theta_union_experimental_test.cpp
     array_of_doubles_sketch_test.cpp
+    mixed_union_test.cpp
 )
diff --git a/tuple/test/mixed_union_test.cpp b/tuple/test/mixed_union_test.cpp
new file mode 100644
index 0000000..7a5fdb3
--- /dev/null
+++ b/tuple/test/mixed_union_test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include <catch.hpp>
+#include <tuple_union.hpp>
+#include <theta_sketch_experimental.hpp>
+#include <theta_to_tuple_sketch_adapter.hpp>
+
+namespace datasketches {
+
+TEST_CASE("mixed_union float: empty", "[tuple union]") {
+  auto update_sketch = update_theta_sketch_experimental<>::builder().build();
+
+  auto u = tuple_union<float>::builder().build();
+  u.update(theta_to_tuple_sketch_adapter<float>(update_sketch, 0));
+  auto result = u.get_result();
+//  std::cout << result.to_string(true);
+  REQUIRE(result.is_empty());
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(!result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0);
+}
+
+TEST_CASE("mixed_union float: full overlap", "[tuple union]") {
+  auto u = tuple_union<float>::builder().build();
+
+  // theta update
+  auto update_theta = update_theta_sketch_experimental<>::builder().build();
+  for (unsigned i = 0; i < 10; ++i) update_theta.update(i);
+  u.update(theta_to_tuple_sketch_adapter<float>(update_theta, 1));
+
+  // theta compact
+  auto compact_theta = update_theta.compact();
+  u.update(theta_to_tuple_sketch_adapter<float>(compact_theta, 1));
+
+  // tuple update
+  auto update_tuple = update_tuple_sketch<float>::builder().build();
+  for (unsigned i = 0; i < 10; ++i) update_tuple.update(i, 1);
+  u.update(update_tuple);
+
+  // tuple compact
+  auto compact_tuple = update_tuple.compact();
+  u.update(compact_tuple);
+
+//  auto result = u.get_result();
+  std::cout << result.to_string(true);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.get_num_retained() == 10);
+  REQUIRE(!result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 10);
+  for (const auto& entry: result) {
+    REQUIRE(entry.second == 4);
+  }
+}
+
+} /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org