You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/08/05 23:36:51 UTC
[arrow] branch master updated: ARROW-6135: [C++] Make
KeyValueMetadata::Equals() order-insensitive
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cf90ea9 ARROW-6135: [C++] Make KeyValueMetadata::Equals() order-insensitive
cf90ea9 is described below
commit cf90ea9dbc602b94414ec9c2bd310a03c15ce5b8
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Aug 5 18:36:42 2019 -0500
ARROW-6135: [C++] Make KeyValueMetadata::Equals() order-insensitive
Closes #5009 from pitrou/ARROW-6135-key-value-metadata-unordered and squashes the following commits:
673e8151c <Antoine Pitrou> ARROW-6135: Make KeyValueMetadata::Equals() order-insensitive
Authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Wes McKinney <we...@apache.org>
---
cpp/src/arrow/util/key-value-metadata-test.cc | 13 +++++++++++++
cpp/src/arrow/util/key_value_metadata.cc | 19 ++++++++++++++++---
cpp/src/arrow/util/stl.h | 12 ++++++++++++
3 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/util/key-value-metadata-test.cc b/cpp/src/arrow/util/key-value-metadata-test.cc
index c1e6857..71cd3ad 100644
--- a/cpp/src/arrow/util/key-value-metadata-test.cc
+++ b/cpp/src/arrow/util/key-value-metadata-test.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
@@ -104,6 +105,18 @@ TEST(KeyValueMetadataTest, Equals) {
ASSERT_TRUE(metadata.Equals(metadata2));
ASSERT_FALSE(metadata.Equals(metadata3));
+
+ // Key / value pairs are semantically unordered
+ std::reverse(keys.begin(), keys.end());
+ KeyValueMetadata metadata4(keys, values);
+ std::reverse(values.begin(), values.end());
+ KeyValueMetadata metadata5(keys, values);
+
+ ASSERT_FALSE(metadata.Equals(metadata4));
+ ASSERT_TRUE(metadata.Equals(metadata5));
+
+ KeyValueMetadata metadata6({"foo"}, {"bizz"});
+ ASSERT_FALSE(metadata.Equals(metadata6));
}
TEST(KeyValueMetadataTest, ToString) {
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index 1c73e30..2335dd2 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -27,6 +27,7 @@
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/logging.h"
+#include "arrow/util/stl.h"
using std::size_t;
@@ -119,9 +120,21 @@ std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Copy() const {
}
bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
- return size() == other.size() &&
- std::equal(keys_.cbegin(), keys_.cend(), other.keys_.cbegin()) &&
- std::equal(values_.cbegin(), values_.cend(), other.values_.cbegin());
+ if (size() != other.size()) {
+ return false;
+ }
+
+ auto indices = internal::ArgSort(keys_);
+ auto other_indices = internal::ArgSort(other.keys_);
+
+ for (int64_t i = 0; i < size(); ++i) {
+ auto j = indices[i];
+ auto k = other_indices[i];
+ if (keys_[j] != other.keys_[k] || values_[j] != other.values_[k]) {
+ return false;
+ }
+ }
+ return true;
}
std::string KeyValueMetadata::ToString() const {
diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index f1b1e18..d87cb17 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -18,7 +18,10 @@
#ifndef ARROW_UTIL_STL_H
#define ARROW_UTIL_STL_H
+#include <algorithm>
+#include <cstdint>
#include <memory>
+#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
@@ -89,6 +92,15 @@ inline std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t
return out;
}
+template <typename T>
+std::vector<int64_t> ArgSort(const std::vector<T>& values) {
+ std::vector<int64_t> indices(values.size());
+ std::iota(indices.begin(), indices.end(), 0);
+ std::sort(indices.begin(), indices.end(),
+ [&](int64_t i, int64_t j) -> bool { return values[i] < values[j]; });
+ return indices;
+}
+
} // namespace internal
} // namespace arrow