You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/08/05 23:36:51 UTC

[arrow] branch master updated: ARROW-6135: [C++] Make KeyValueMetadata::Equals() order-insensitive

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cf90ea9  ARROW-6135: [C++] Make KeyValueMetadata::Equals() order-insensitive
cf90ea9 is described below

commit cf90ea9dbc602b94414ec9c2bd310a03c15ce5b8
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Aug 5 18:36:42 2019 -0500

    ARROW-6135: [C++] Make KeyValueMetadata::Equals() order-insensitive
    
    Closes #5009 from pitrou/ARROW-6135-key-value-metadata-unordered and squashes the following commits:
    
    673e8151c <Antoine Pitrou> ARROW-6135:  Make KeyValueMetadata::Equals() order-insensitive
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/src/arrow/util/key-value-metadata-test.cc | 13 +++++++++++++
 cpp/src/arrow/util/key_value_metadata.cc      | 19 ++++++++++++++++---
 cpp/src/arrow/util/stl.h                      | 12 ++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/key-value-metadata-test.cc b/cpp/src/arrow/util/key-value-metadata-test.cc
index c1e6857..71cd3ad 100644
--- a/cpp/src/arrow/util/key-value-metadata-test.cc
+++ b/cpp/src/arrow/util/key-value-metadata-test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -104,6 +105,18 @@ TEST(KeyValueMetadataTest, Equals) {
 
   ASSERT_TRUE(metadata.Equals(metadata2));
   ASSERT_FALSE(metadata.Equals(metadata3));
+
+  // Key / value pairs are semantically unordered
+  std::reverse(keys.begin(), keys.end());
+  KeyValueMetadata metadata4(keys, values);
+  std::reverse(values.begin(), values.end());
+  KeyValueMetadata metadata5(keys, values);
+
+  ASSERT_FALSE(metadata.Equals(metadata4));
+  ASSERT_TRUE(metadata.Equals(metadata5));
+
+  KeyValueMetadata metadata6({"foo"}, {"bizz"});
+  ASSERT_FALSE(metadata.Equals(metadata6));
 }
 
 TEST(KeyValueMetadataTest, ToString) {
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index 1c73e30..2335dd2 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -27,6 +27,7 @@
 
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/stl.h"
 
 using std::size_t;
 
@@ -119,9 +120,21 @@ std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Copy() const {
 }
 
 bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
-  return size() == other.size() &&
-         std::equal(keys_.cbegin(), keys_.cend(), other.keys_.cbegin()) &&
-         std::equal(values_.cbegin(), values_.cend(), other.values_.cbegin());
+  if (size() != other.size()) {
+    return false;
+  }
+
+  auto indices = internal::ArgSort(keys_);
+  auto other_indices = internal::ArgSort(other.keys_);
+
+  for (int64_t i = 0; i < size(); ++i) {
+    auto j = indices[i];
+    auto k = other_indices[i];
+    if (keys_[j] != other.keys_[k] || values_[j] != other.values_[k]) {
+      return false;
+    }
+  }
+  return true;
 }
 
 std::string KeyValueMetadata::ToString() const {
diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index f1b1e18..d87cb17 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -18,7 +18,10 @@
 #ifndef ARROW_UTIL_STL_H
 #define ARROW_UTIL_STL_H
 
+#include <algorithm>
+#include <cstdint>
 #include <memory>
+#include <numeric>
 #include <type_traits>
 #include <utility>
 #include <vector>
@@ -89,6 +92,15 @@ inline std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t
   return out;
 }
 
+template <typename T>
+std::vector<int64_t> ArgSort(const std::vector<T>& values) {
+  std::vector<int64_t> indices(values.size());
+  std::iota(indices.begin(), indices.end(), 0);
+  std::sort(indices.begin(), indices.end(),
+            [&](int64_t i, int64_t j) -> bool { return values[i] < values[j]; });
+  return indices;
+}
+
 }  // namespace internal
 }  // namespace arrow