You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/08/20 23:10:44 UTC
[incubator-datasketches-cpp] branch tuple_sketch updated: theta to
tuple adapter for mixed set operations
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new c2c7158 theta to tuple adapter for mixed set operations
c2c7158 is described below
commit c2c7158e9b6c790ff60ed1c996458092639ca4c6
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Aug 20 16:10:32 2020 -0700
theta to tuple adapter for mixed set operations
---
tuple/CMakeLists.txt | 3 +
tuple/include/theta_to_tuple_sketch_adapter.hpp | 77 ++++++++++++++++++
.../include/theta_to_tuple_sketch_adapter_impl.hpp | 93 ++++++++++++++++++++++
tuple/test/CMakeLists.txt | 1 +
tuple/test/mixed_union_test.cpp | 74 +++++++++++++++++
5 files changed, 248 insertions(+)
diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
index fce73ab..2a452f7 100644
--- a/tuple/CMakeLists.txt
+++ b/tuple/CMakeLists.txt
@@ -44,6 +44,7 @@ list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_s
list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_to_tuple_sketch_adapter.hpp;include/theta_to_tuple_sketch_adapter_impl.hpp")
install(TARGETS tuple
EXPORT ${PROJECT_NAME}
@@ -76,4 +77,6 @@ target_sources(tuple
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_to_tuple_sketch_adapter.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_to_tuple_sketch_adapter_impl.hpp
)
diff --git a/tuple/include/theta_to_tuple_sketch_adapter.hpp b/tuple/include/theta_to_tuple_sketch_adapter.hpp
new file mode 100644
index 0000000..2a3cb7e
--- /dev/null
+++ b/tuple/include/theta_to_tuple_sketch_adapter.hpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_TO_TUPLE_SKETCH_ADAPTER_HPP_
+#define THETA_TO_TUPLE_SKETCH_ADAPTER_HPP_
+
+#include <memory>
+
+#include "theta_sketch_experimental.hpp"
+
+namespace datasketches {
+
+template<typename Summary, typename Allocator = std::allocator<uint64_t>>
+class theta_to_tuple_sketch_adapter {
+public:
+ theta_to_tuple_sketch_adapter(const update_theta_sketch_experimental<Allocator>& sketch, const Summary& summary);
+ theta_to_tuple_sketch_adapter(const compact_theta_sketch_experimental<Allocator>& sketch, const Summary& summary);
+ bool is_empty() const;
+ bool is_ordered() const;
+ uint16_t get_seed_hash() const;
+ uint64_t get_theta64() const;
+
+ class const_iterator;
+ const_iterator begin();
+ const_iterator end();
+
+private:
+ const theta_sketch_experimental<Allocator>* sketch_ptr;
+ Summary summary;
+};
+
+template<typename Summary, typename Allocator>
+class theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator {
+public:
+ using Entry = std::pair<uint64_t, Summary>;
+ using theta_const_iterator = typename theta_sketch_experimental<Allocator>::const_iterator;
+
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = Entry;
+ using difference_type = std::ptrdiff_t;
+ using pointer = Entry*;
+ using reference = Entry&;
+
+ const_iterator(const theta_const_iterator& it, const Summary& summary);
+ const_iterator& operator++();
+ const_iterator operator++(int);
+ bool operator==(const const_iterator& other) const;
+ bool operator!=(const const_iterator& other) const;
+ Entry& operator*() const;
+
+private:
+ theta_const_iterator it;
+ Summary summary;
+ mutable Entry entry;
+};
+
+} /* namespace datasketches */
+
+#include "theta_to_tuple_sketch_adapter_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp b/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp
new file mode 100644
index 0000000..3318700
--- /dev/null
+++ b/tuple/include/theta_to_tuple_sketch_adapter_impl.hpp
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::theta_to_tuple_sketch_adapter(const update_theta_sketch_experimental<Allocator>& sketch, const Summary& summary):
+sketch_ptr(&sketch), summary(summary) {}
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::theta_to_tuple_sketch_adapter(const compact_theta_sketch_experimental<Allocator>& sketch, const Summary& summary):
+sketch_ptr(&sketch), summary(summary) {}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::is_empty() const {
+ return sketch_ptr->is_empty();
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::is_ordered() const {
+ return sketch_ptr->is_ordered();
+}
+
+template<typename Summary, typename Allocator>
+uint16_t theta_to_tuple_sketch_adapter<Summary, Allocator>::get_seed_hash() const {
+ return sketch_ptr->get_seed_hash();
+}
+
+template<typename Summary, typename Allocator>
+uint64_t theta_to_tuple_sketch_adapter<Summary, Allocator>::get_theta64() const {
+ return sketch_ptr->get_theta64();
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::begin() -> const_iterator {
+ return const_iterator(sketch_ptr->begin(), summary);
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::end() -> const_iterator {
+ return const_iterator(sketch_ptr->end(), summary);
+}
+
+template<typename Summary, typename Allocator>
+theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::const_iterator(const theta_const_iterator& it, const Summary& summary):
+it(it), summary(summary), entry(0, summary) {}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator++() -> const_iterator& {
+ ++it;
+ return *this;
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator++(int) -> const_iterator {
+ const_iterator tmp(*this);
+ operator++();
+ return tmp;
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator==(const const_iterator& other) const {
+ return this->it == other.it;
+}
+
+template<typename Summary, typename Allocator>
+bool theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator!=(const const_iterator& other) const {
+ return this->it != other.it;
+}
+
+template<typename Summary, typename Allocator>
+auto theta_to_tuple_sketch_adapter<Summary, Allocator>::const_iterator::operator*() const -> Entry& {
+ entry = Entry(*it, summary); // fresh entry every time
+ return entry;
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 53a3a7e..17be15b 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -46,4 +46,5 @@ target_sources(tuple_test
theta_sketch_experimental_test.cpp
theta_union_experimental_test.cpp
array_of_doubles_sketch_test.cpp
+ mixed_union_test.cpp
)
diff --git a/tuple/test/mixed_union_test.cpp b/tuple/test/mixed_union_test.cpp
new file mode 100644
index 0000000..7a5fdb3
--- /dev/null
+++ b/tuple/test/mixed_union_test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include <catch.hpp>
+#include <tuple_union.hpp>
+#include <theta_sketch_experimental.hpp>
+#include <theta_to_tuple_sketch_adapter.hpp>
+
+namespace datasketches {
+
+TEST_CASE("mixed_union float: empty", "[tuple union]") {
+ auto update_sketch = update_theta_sketch_experimental<>::builder().build();
+
+ auto u = tuple_union<float>::builder().build();
+ u.update(theta_to_tuple_sketch_adapter<float>(update_sketch, 0));
+ auto result = u.get_result();
+// std::cout << result.to_string(true);
+ REQUIRE(result.is_empty());
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(!result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0);
+}
+
+TEST_CASE("mixed_union float: full overlap", "[tuple union]") {
+ auto u = tuple_union<float>::builder().build();
+
+ // theta update
+ auto update_theta = update_theta_sketch_experimental<>::builder().build();
+ for (unsigned i = 0; i < 10; ++i) update_theta.update(i);
+ u.update(theta_to_tuple_sketch_adapter<float>(update_theta, 1));
+
+ // theta compact
+ auto compact_theta = update_theta.compact();
+ u.update(theta_to_tuple_sketch_adapter<float>(compact_theta, 1));
+
+ // tuple update
+ auto update_tuple = update_tuple_sketch<float>::builder().build();
+ for (unsigned i = 0; i < 10; ++i) update_tuple.update(i, 1);
+ u.update(update_tuple);
+
+ // tuple compact
+ auto compact_tuple = update_tuple.compact();
+ u.update(compact_tuple);
+
+// auto result = u.get_result();
+ std::cout << result.to_string(true);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.get_num_retained() == 10);
+ REQUIRE(!result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 10);
+ for (const auto& entry: result) {
+ REQUIRE(entry.second == 4);
+ }
+}
+
+} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org