You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/06/02 21:51:01 UTC
[incubator-datasketches-cpp] branch patch_for_rc4 created (now
6f04d3f)
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a change to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.
at 6f04d3f added new header
This branch includes the following new commits:
new c60448e cherry-pick #151: no checking of seed hash for empty compact sketches
new baef5b8 fixed ordered a-not-b
new ef43c5b conditional back inserter
new 9b1b34b reference member is problematic
new 97dcd5b lambda as rvalue
new 4baf1db MSVC compatibility
new 98497af one more attempt at MSVC compatibility
new 680ddc1 another attempt at MSVC compatibility
new 6f04d3f added new header
The 9 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 08/09: another attempt at MSVC
compatibility
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 680ddc103443a0c5ada12e1ae9fa68496a3a34a3
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Jun 1 10:29:31 2020 -0700
another attempt at MSVC compatibility
---
theta/include/conditional_back_inserter.hpp | 9 +++++++++
theta/include/theta_a_not_b.hpp | 7 +++++++
theta/include/theta_a_not_b_impl.hpp | 4 ++--
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/theta/include/conditional_back_inserter.hpp b/theta/include/conditional_back_inserter.hpp
index e9b0590..9b33833 100644
--- a/theta/include/conditional_back_inserter.hpp
+++ b/theta/include/conditional_back_inserter.hpp
@@ -31,6 +31,15 @@ public:
template<typename P>
conditional_back_insert_iterator(Container& c, P&& p): std::back_insert_iterator<Container>(c), p(std::forward<P>(p)) {}
+ // MSVC seems to insist on having copy constructor and assignment
+ conditional_back_insert_iterator(const conditional_back_insert_iterator& other):
+ std::back_insert_iterator<Container>(other), p(other.p) {}
+ conditional_back_insert_iterator& operator=(const conditional_back_insert_iterator& other) {
+ std::back_insert_iterator<Container>::operator=(other);
+ p = other.p;
+ return *this;
+ }
+
conditional_back_insert_iterator& operator=(typename Container::const_reference value) {
if (p(value)) std::back_insert_iterator<Container>::operator=(value);
return *this;
diff --git a/theta/include/theta_a_not_b.hpp b/theta/include/theta_a_not_b.hpp
index 5287887..db66ac7 100644
--- a/theta/include/theta_a_not_b.hpp
+++ b/theta/include/theta_a_not_b.hpp
@@ -54,6 +54,13 @@ private:
typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
uint16_t seed_hash_;
+ class less_than {
+ public:
+ explicit less_than(uint64_t value): value(value) {}
+ bool operator()(uint64_t value) const { return value < this->value; }
+ private:
+ uint64_t value;
+ };
};
// alias with default allocator for convenience
diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp
index 2ffca28..4343ee3 100644
--- a/theta/include/theta_a_not_b_impl.hpp
+++ b/theta/include/theta_a_not_b_impl.hpp
@@ -48,10 +48,10 @@ compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch
bool is_empty = a.is_empty();
if (b.get_num_retained() == 0) {
- std::copy_if(a.begin(), a.end(), std::back_inserter(keys), [theta](uint64_t key) { return key < theta; });
+ std::copy_if(a.begin(), a.end(), std::back_inserter(keys), less_than(theta));
} else {
if (a.is_ordered() && b.is_ordered()) { // sort-based
- std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, [theta](uint64_t key) { return key < theta; }));
+ std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, less_than(theta)));
} else { // hash-based
const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
vector_u64<A> b_hash_table(1 << lg_size, 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 04/09: reference member is problematic
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 9b1b34b7ba7f833d6e2278321535ea860383586e
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 16:59:36 2020 -0700
reference member is problematic
---
theta/include/conditional_back_inserter.hpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/theta/include/conditional_back_inserter.hpp b/theta/include/conditional_back_inserter.hpp
index f128a3b..2e40e9f 100644
--- a/theta/include/conditional_back_inserter.hpp
+++ b/theta/include/conditional_back_inserter.hpp
@@ -28,7 +28,7 @@ namespace datasketches {
template <class Container, class Predicate>
class conditional_back_insert_iterator: public std::back_insert_iterator<Container> {
public:
- conditional_back_insert_iterator(Container& c, Predicate& p): std::back_insert_iterator<Container>(c), p(p) {}
+ conditional_back_insert_iterator(Container& c, Predicate&& p): std::back_insert_iterator<Container>(c), p(std::forward<Predicate>(p)) {}
conditional_back_insert_iterator& operator=(typename Container::const_reference value) {
if (p(value)) std::back_insert_iterator<Container>::operator=(value);
@@ -40,12 +40,12 @@ public:
conditional_back_insert_iterator& operator++(int) { return *this; }
private:
- Predicate& p;
+ Predicate p;
};
template< class Container, class Predicate>
-conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate& p) {
- return conditional_back_insert_iterator<Container, Predicate>(c, p);
+conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate&& p) {
+ return conditional_back_insert_iterator<Container, Predicate>(c, std::forward<Predicate>(p));
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 09/09: added new header
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 6f04d3f16938a3a7dc62a0c3c912b92bcb5d289f
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Jun 1 14:28:43 2020 -0700
added new header
---
theta/CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/theta/CMakeLists.txt b/theta/CMakeLists.txt
index ae02e79..69af9db 100644
--- a/theta/CMakeLists.txt
+++ b/theta/CMakeLists.txt
@@ -36,6 +36,7 @@ set(theta_HEADERS "")
list(APPEND theta_HEADERS "include/theta_sketch.hpp;include/theta_union.hpp;include/theta_intersection.hpp")
list(APPEND theta_HEADERS "include/theta_a_not_b.hpp;include/binomial_bounds.hpp;include/theta_sketch_impl.hpp")
list(APPEND theta_HEADERS "include/theta_union_impl.hpp;include/theta_intersection_impl.hpp;include/theta_a_not_b_impl.hpp")
+list(APPEND theta_HEADERS "include/conditional_back_inserter.hpp")
install(TARGETS theta
EXPORT ${PROJECT_NAME}
@@ -55,4 +56,5 @@ target_sources(theta
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 02/09: fixed ordered a-not-b
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit baef5b8947489d6f02d4499a1c8a617f2d69e16b
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 16:26:06 2020 -0700
fixed ordered a-not-b
---
theta/include/theta_a_not_b_impl.hpp | 65 ++++++++++++++----------------------
theta/test/theta_a_not_b_test.cpp | 24 +++++++++++++
2 files changed, 49 insertions(+), 40 deletions(-)
diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp
index f080903..8951a8f 100644
--- a/theta/include/theta_a_not_b_impl.hpp
+++ b/theta/include/theta_a_not_b_impl.hpp
@@ -22,6 +22,8 @@
#include <algorithm>
+#include "conditional_back_inserter.hpp"
+
namespace datasketches {
/*
@@ -43,54 +45,37 @@ compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch
const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
vector_u64<A> keys;
- uint32_t keys_size = 0;
- uint32_t count = 0;
bool is_empty = a.is_empty();
+ auto less_than_theta = [theta](uint64_t key) { return key < theta; };
if (b.get_num_retained() == 0) {
- for (auto key: a) if (key < theta) ++count;
- keys.resize(count);
- std::copy_if(a.begin(), a.end(), &keys[0], [theta](uint64_t key) { return key < theta; });
- if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
- if (count == 0 && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
- return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered);
- }
-
- keys_size = a.get_num_retained();
- keys.resize(keys_size);
-
- if (a.is_ordered() && b.is_ordered()) { // sort-based
- const auto end = std::set_difference(a.begin(), a.end(), b.begin(), b.end(), keys.begin());
- count = end - keys.begin();
- } else { // hash-based
- const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
- vector_u64<A> b_hash_table(1 << lg_size, 0);
- for (auto key: b) {
- if (key < theta) {
- update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table.data(), lg_size);
- } else if (b.is_ordered()) {
- break; // early stop
+ std::copy_if(a.begin(), a.end(), std::back_inserter(keys), less_than_theta);
+ } else {
+ if (a.is_ordered() && b.is_ordered()) { // sort-based
+ std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, less_than_theta));
+ } else { // hash-based
+ const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
+ vector_u64<A> b_hash_table(1 << lg_size, 0);
+ for (auto key: b) {
+ if (key < theta) {
+ update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table.data(), lg_size);
+ } else if (b.is_ordered()) {
+ break; // early stop
+ }
}
- }
- // scan A lookup B
- for (auto key: a) {
- if (key < theta) {
- if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table.data(), lg_size)) keys[count++] = key;
- } else if (a.is_ordered()) {
- break; // early stop
+ // scan A lookup B
+ for (auto key: a) {
+ if (key < theta) {
+ if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table.data(), lg_size)) keys.push_back(key);
+ } else if (a.is_ordered()) {
+ break; // early stop
+ }
}
}
}
-
- if (count == 0) {
- keys.resize(0);
- if (theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
- } else if (count < keys_size) {
- keys.resize(count);
- if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
- }
-
+ if (keys.empty() && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
+ if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered);
}
diff --git a/theta/test/theta_a_not_b_test.cpp b/theta/test/theta_a_not_b_test.cpp
index 54bd3fa..1ef5255 100644
--- a/theta/test/theta_a_not_b_test.cpp
+++ b/theta/test/theta_a_not_b_test.cpp
@@ -217,4 +217,28 @@ TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
}
+TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 25000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+}
+
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 06/09: MSVC compatibility
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 4baf1db31b3f4bdba1c43830935a476a7ef7c361
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 21:42:38 2020 -0700
MSVC compatibility
---
theta/include/conditional_back_inserter.hpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/theta/include/conditional_back_inserter.hpp b/theta/include/conditional_back_inserter.hpp
index 2e40e9f..5e7f2ef 100644
--- a/theta/include/conditional_back_inserter.hpp
+++ b/theta/include/conditional_back_inserter.hpp
@@ -25,9 +25,10 @@
namespace datasketches {
-template <class Container, class Predicate>
+template <typename Container, typename Predicate>
class conditional_back_insert_iterator: public std::back_insert_iterator<Container> {
public:
+ conditional_back_insert_iterator(Container& c, Predicate& p): std::back_insert_iterator<Container>(c), p(p) {}
conditional_back_insert_iterator(Container& c, Predicate&& p): std::back_insert_iterator<Container>(c), p(std::forward<Predicate>(p)) {}
conditional_back_insert_iterator& operator=(typename Container::const_reference value) {
@@ -43,7 +44,12 @@ private:
Predicate p;
};
-template< class Container, class Predicate>
+template<typename Container, typename Predicate>
+conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate& p) {
+ return conditional_back_insert_iterator<Container, Predicate>(c, p);
+}
+
+template<typename Container, typename Predicate>
conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate&& p) {
return conditional_back_insert_iterator<Container, Predicate>(c, std::forward<Predicate>(p));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 03/09: conditional back inserter
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit ef43c5ba50b9a4e901f06159946bb55535834e8e
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 16:28:09 2020 -0700
conditional back inserter
---
theta/include/conditional_back_inserter.hpp | 53 +++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/theta/include/conditional_back_inserter.hpp b/theta/include/conditional_back_inserter.hpp
new file mode 100644
index 0000000..f128a3b
--- /dev/null
+++ b/theta/include/conditional_back_inserter.hpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CONDITIONAL_BACK_INSERTER_HPP_
+#define CONDITIONAL_BACK_INSERTER_HPP_
+
+#include <iterator>
+#include <functional>
+
+namespace datasketches {
+
+template <class Container, class Predicate>
+class conditional_back_insert_iterator: public std::back_insert_iterator<Container> {
+public:
+ conditional_back_insert_iterator(Container& c, Predicate& p): std::back_insert_iterator<Container>(c), p(p) {}
+
+ conditional_back_insert_iterator& operator=(typename Container::const_reference value) {
+ if (p(value)) std::back_insert_iterator<Container>::operator=(value);
+ return *this;
+ }
+
+ conditional_back_insert_iterator& operator*() { return *this; }
+ conditional_back_insert_iterator& operator++() { return *this; }
+ conditional_back_insert_iterator& operator++(int) { return *this; }
+
+private:
+ Predicate& p;
+};
+
+template< class Container, class Predicate>
+conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate& p) {
+ return conditional_back_insert_iterator<Container, Predicate>(c, p);
+}
+
+} /* namespace datasketches */
+
+#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 05/09: lambda as rvalue
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 97dcd5b28e55f630d087c80e46a8c7a097f87e4b
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 17:57:25 2020 -0700
lambda as rvalue
---
theta/include/theta_a_not_b_impl.hpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp
index 8951a8f..2ffca28 100644
--- a/theta/include/theta_a_not_b_impl.hpp
+++ b/theta/include/theta_a_not_b_impl.hpp
@@ -46,13 +46,12 @@ compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch
const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
vector_u64<A> keys;
bool is_empty = a.is_empty();
- auto less_than_theta = [theta](uint64_t key) { return key < theta; };
if (b.get_num_retained() == 0) {
- std::copy_if(a.begin(), a.end(), std::back_inserter(keys), less_than_theta);
+ std::copy_if(a.begin(), a.end(), std::back_inserter(keys), [theta](uint64_t key) { return key < theta; });
} else {
if (a.is_ordered() && b.is_ordered()) { // sort-based
- std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, less_than_theta));
+ std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, [theta](uint64_t key) { return key < theta; }));
} else { // hash-based
const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
vector_u64<A> b_hash_table(1 << lg_size, 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 01/09: cherry-pick #151: no checking
of seed hash for empty compact sketches
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit c60448e0443f1bb40f84f378750f17ca65bc5541
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Jun 2 13:54:50 2020 -0700
cherry-pick #151: no checking of seed hash for empty compact sketches
---
theta/include/theta_a_not_b_impl.hpp | 3 +--
theta/include/theta_intersection_impl.hpp | 2 +-
theta/include/theta_sketch_impl.hpp | 14 ++++++++++----
theta/test/theta_compact_empty_from_java.sk | Bin 8 -> 8 bytes
4 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp
index cc171ce..f080903 100644
--- a/theta/include/theta_a_not_b_impl.hpp
+++ b/theta/include/theta_a_not_b_impl.hpp
@@ -37,10 +37,9 @@ seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
template<typename A>
compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered) const {
- if (a.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered);
+ if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered);
if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");
- if (a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered);
const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
vector_u64<A> keys;
diff --git a/theta/include/theta_intersection_impl.hpp b/theta/include/theta_intersection_impl.hpp
index 79fea4e..6be6757 100644
--- a/theta/include/theta_intersection_impl.hpp
+++ b/theta/include/theta_intersection_impl.hpp
@@ -44,7 +44,7 @@ seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
template<typename A>
void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
if (is_empty_) return;
- if (sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
+ if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
is_empty_ |= sketch.is_empty();
theta_ = std::min(theta_, sketch.get_theta64());
if (is_valid_ && num_keys_ == 0) return;
diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp
index 417dfa1..0514884 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -101,9 +101,9 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(st
is.read((char*)&seed_hash, sizeof(seed_hash));
check_serial_version(serial_version, SERIAL_VERSION);
- check_seed_hash(seed_hash, get_seed_hash(seed));
if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
+ check_seed_hash(seed_hash, get_seed_hash(seed));
typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
return unique_ptr(
@@ -114,6 +114,8 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(st
}
);
} else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+ if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
return unique_ptr(
static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
@@ -146,9 +148,9 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(co
ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
check_serial_version(serial_version, SERIAL_VERSION);
- check_seed_hash(seed_hash, get_seed_hash(seed));
if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
+ check_seed_hash(seed_hash, get_seed_hash(seed));
typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
return unique_ptr(
@@ -161,6 +163,8 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(co
}
);
} else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+ if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
return unique_ptr(
static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
@@ -753,7 +757,8 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is
is.read((char*)&seed_hash, sizeof(seed_hash));
theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+ if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
}
@@ -801,7 +806,8 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+ if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
}
diff --git a/theta/test/theta_compact_empty_from_java.sk b/theta/test/theta_compact_empty_from_java.sk
index 44730d3..f6c647f 100644
Binary files a/theta/test/theta_compact_empty_from_java.sk and b/theta/test/theta_compact_empty_from_java.sk differ
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org
[incubator-datasketches-cpp] 07/09: one more attempt at MSVC
compatibility
Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch patch_for_rc4
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 98497af00743aaa183c4df7f9e5381ca47780232
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri May 29 22:02:22 2020 -0700
one more attempt at MSVC compatibility
---
theta/include/conditional_back_inserter.hpp | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/theta/include/conditional_back_inserter.hpp b/theta/include/conditional_back_inserter.hpp
index 5e7f2ef..e9b0590 100644
--- a/theta/include/conditional_back_inserter.hpp
+++ b/theta/include/conditional_back_inserter.hpp
@@ -28,8 +28,8 @@ namespace datasketches {
template <typename Container, typename Predicate>
class conditional_back_insert_iterator: public std::back_insert_iterator<Container> {
public:
- conditional_back_insert_iterator(Container& c, Predicate& p): std::back_insert_iterator<Container>(c), p(p) {}
- conditional_back_insert_iterator(Container& c, Predicate&& p): std::back_insert_iterator<Container>(c), p(std::forward<Predicate>(p)) {}
+ template<typename P>
+ conditional_back_insert_iterator(Container& c, P&& p): std::back_insert_iterator<Container>(c), p(std::forward<P>(p)) {}
conditional_back_insert_iterator& operator=(typename Container::const_reference value) {
if (p(value)) std::back_insert_iterator<Container>::operator=(value);
@@ -45,11 +45,6 @@ private:
};
template<typename Container, typename Predicate>
-conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate& p) {
- return conditional_back_insert_iterator<Container, Predicate>(c, p);
-}
-
-template<typename Container, typename Predicate>
conditional_back_insert_iterator<Container, Predicate> conditional_back_inserter(Container& c, Predicate&& p) {
return conditional_back_insert_iterator<Container, Predicate>(c, std::forward<Predicate>(p));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org