You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/08/05 19:37:29 UTC

[kudu] branch master updated: [common] Update PartitionSchema::operator==()

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 11cad7f  [common] Update PartitionSchema::operator==()
11cad7f is described below

commit 11cad7f7b7a9953850cc6cccef08c2b19fafbdac
Author: Mahesh Reddy <mr...@cloudera.com>
AuthorDate: Thu Jul 29 23:20:01 2021 -0700

    [common] Update PartitionSchema::operator==()
    
    This patch modifies the overloaded equals operator
    function to include checks for custom hash schemas
    per range and their range bounds. In addition,
    some minor style changes were made.
    
    Change-Id: I09c626ecdcbe7c0465e6483d6a23369fb7194bfb
    Reviewed-on: http://gerrit.cloudera.org:8080/17753
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/common/partition-test.cc   | 117 ++++++++++++++++++++++++++++++++++--
 src/kudu/common/partition.cc        |  40 ++++++++----
 src/kudu/common/partition.h         |  23 +++++++
 src/kudu/common/partition_pruner.cc |   2 +-
 4 files changed, 164 insertions(+), 18 deletions(-)

diff --git a/src/kudu/common/partition-test.cc b/src/kudu/common/partition-test.cc
index de465bf..411fd3c 100644
--- a/src/kudu/common/partition-test.cc
+++ b/src/kudu/common/partition-test.cc
@@ -1402,8 +1402,8 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
     encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
     encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
 
-    auto range_hash_component = pb.add_range_hash_schemas();
-    auto hash_component = range_hash_component->add_hash_schemas();
+    auto* range_hash_component = pb.add_range_hash_schemas();
+    auto* hash_component = range_hash_component->add_hash_schemas();
     hash_component->add_columns()->set_name("a");
     hash_component->set_num_buckets(4);
   }
@@ -1420,11 +1420,11 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
     encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
     encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
 
-    auto range_hash_component = pb.add_range_hash_schemas();
-    auto hash_component_1 = range_hash_component->add_hash_schemas();
+    auto* range_hash_component = pb.add_range_hash_schemas();
+    auto* hash_component_1 = range_hash_component->add_hash_schemas();
     hash_component_1->add_columns()->set_name("a");
     hash_component_1->set_num_buckets(2);
-    auto hash_component_2 = range_hash_component->add_hash_schemas();
+    auto* hash_component_2 = range_hash_component->add_hash_schemas();
     hash_component_2->add_columns()->set_name("b");
     hash_component_2->set_num_buckets(3);
   }
@@ -1545,4 +1545,111 @@ TEST_F(PartitionTest, TestMalformedPartitionSchemaPB) {
   ASSERT_EQ("Invalid argument: missing upper range bound in request",
             s2.ToString());
 }
+
+TEST_F(PartitionTest, TestOverloadedEqualsOperator) {
+  // CREATE TABLE t (a VARCHAR, b VARCHAR, c VARCHAR, PRIMARY KEY (a, b, c))
+  // PARTITION BY [RANGE (a, b, c)];
+  Schema schema({ ColumnSchema("a", STRING),
+                  ColumnSchema("b", STRING),
+                  ColumnSchema("c", STRING) },
+                { ColumnId(0), ColumnId(1), ColumnId(2) }, 3);
+
+  PartitionSchemaPB schema_builder;
+  PartitionSchema partition_schema;
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+
+  PartitionSchemaPB schema_builder_1;
+  PartitionSchema partition_schema_1;
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+
+  // Same object.
+  ASSERT_EQ(partition_schema, partition_schema);
+  ASSERT_EQ(partition_schema_1, partition_schema_1);
+
+  // Both schemas should be identical.
+  ASSERT_EQ(partition_schema, partition_schema_1);
+
+  // Clears the range schema of 'schema_builder_1'.
+  SetRangePartitionComponent(&schema_builder_1, {});
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+  ASSERT_NE(partition_schema, partition_schema_1);
+
+  // Resets range schema so both will be equal again.
+  SetRangePartitionComponent(&schema_builder_1, {"a", "b", "c"});
+
+  // Table wide hash schemas are different.
+  AddHashBucketComponent(&schema_builder, { "a" }, 2, 0);
+  AddHashBucketComponent(&schema_builder_1, { "b" }, 2, 0);
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+  ASSERT_NE(partition_schema, partition_schema_1);
+
+  // Resets table wide hash schemas so both will be equal again.
+  schema_builder_1.clear_hash_bucket_schemas();
+  AddHashBucketComponent(&schema_builder_1, { "a" }, 2, 0);
+
+  // Different sizes of field 'ranges_with_hash_schemas_'
+  // [(a, _, _), (b, _, _))
+  {
+    RowOperationsPBEncoder encoder(schema_builder_1.add_range_bounds());
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(lower.SetStringCopy("a", "a"));
+    ASSERT_OK(upper.SetStringCopy("a", "b"));
+    encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+    encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+    auto* range_hash_component = schema_builder_1.add_range_hash_schemas();
+    auto* hash_component = range_hash_component->add_hash_schemas();
+    hash_component->add_columns()->set_name("a");
+    hash_component->set_num_buckets(4);
+  }
+
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+  ASSERT_NE(partition_schema, partition_schema_1);
+
+  // Different custom hash bucket schema but same range bounds.
+  // [(a, _, _), (b, _, _))
+  {
+    RowOperationsPBEncoder encoder(schema_builder.add_range_bounds());
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(lower.SetStringCopy("a", "a"));
+    ASSERT_OK(upper.SetStringCopy("a", "b"));
+    encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+    encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+    auto* range_hash_component = schema_builder.add_range_hash_schemas();
+    auto* hash_component = range_hash_component->add_hash_schemas();
+    hash_component->add_columns()->set_name("a");
+    hash_component->set_num_buckets(2);
+  }
+
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+  ASSERT_NE(partition_schema, partition_schema_1);
+
+  // Clears custom hash schemas and range bounds field.
+  schema_builder.clear_range_hash_schemas();
+  schema_builder.clear_range_bounds();
+
+  // Different range bounds but same custom hash bucket schema.
+  // [(a, _, _), (c, _, _))
+  {
+    RowOperationsPBEncoder encoder(schema_builder.add_range_bounds());
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(lower.SetStringCopy("a", "a"));
+    ASSERT_OK(upper.SetStringCopy("a", "c"));
+    encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+    encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+    auto* range_hash_component = schema_builder.add_range_hash_schemas();
+    auto* hash_component = range_hash_component->add_hash_schemas();
+    hash_component->add_columns()->set_name("a");
+    hash_component->set_num_buckets(4);
+  }
+
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+  ASSERT_NE(partition_schema, partition_schema_1);
+}
 } // namespace kudu
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 950fdc1..8a5a846 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -1106,27 +1106,43 @@ bool PartitionSchema::operator==(const PartitionSchema& rhs) const {
     return false;
   }
 
-  // Compare hash bucket components.
-  const auto& hb_schemas = hash_bucket_schemas_;
-  const auto& rhs_hb_schemas = rhs.hash_bucket_schemas_;
-  if (hb_schemas.size() != rhs_hb_schemas.size()) {
+  if (hash_bucket_schemas_.size() != rhs.hash_bucket_schemas_.size() ||
+      ranges_with_hash_schemas_.size() != rhs.ranges_with_hash_schemas_.size()) {
     return false;
   }
-  for (size_t i = 0; i < hb_schemas.size(); ++i) {
-    if (hb_schemas[i].seed != rhs_hb_schemas[i].seed) {
+
+  // Compare table wide hash bucket schemas.
+  for (size_t i = 0; i < hash_bucket_schemas_.size(); ++i) {
+    if (hash_bucket_schemas_[i] != rhs.hash_bucket_schemas_[i]) {
       return false;
     }
-    if (hb_schemas[i].num_buckets != rhs_hb_schemas[i].num_buckets) {
+  }
+
+  // Compare range bounds and per range hash bucket schemas.
+  for (size_t i = 0; i < ranges_with_hash_schemas_.size(); ++i) {
+    if (ranges_with_hash_schemas_[i].lower != rhs.ranges_with_hash_schemas_[i].lower ||
+        ranges_with_hash_schemas_[i].upper != rhs.ranges_with_hash_schemas_[i].upper) {
       return false;
     }
-    if (hb_schemas[i].column_ids != rhs_hb_schemas[i].column_ids) {
+    const auto& lhs_hash_schemas = ranges_with_hash_schemas_[i].hash_schemas;
+    const auto& rhs_hash_schemas = rhs.ranges_with_hash_schemas_[i].hash_schemas;
+    if (lhs_hash_schemas.size() != rhs_hash_schemas.size()) {
       return false;
     }
+    for (size_t j = 0; j < lhs_hash_schemas.size(); ++j) {
+      if (lhs_hash_schemas[j] != rhs_hash_schemas[j]) {
+        return false;
+      }
+    }
   }
 
   return true;
 }
 
+bool PartitionSchema::operator!=(const PartitionSchema& rhs) const {
+  return !(*this == rhs);
+}
+
 // Encodes the specified primary key columns of the supplied row into the buffer.
 void PartitionSchema::EncodeColumns(const ConstContiguousRow& row,
                                     const vector<ColumnId>& column_ids,
@@ -1560,8 +1576,8 @@ Status PartitionSchema::GetRangeSchemaColumnIndexes(const Schema& schema,
 int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& schema,
                                                               int32_t col_idx) const {
   const ColumnId column_id = schema.column_id(col_idx);
-  for (size_t i = 0; i < hash_partition_schemas().size(); ++i) {
-    auto hash_partition = hash_partition_schemas()[i];
+  for (int i = 0; i < hash_bucket_schemas_.size(); ++i) {
+    const auto& hash_partition = hash_bucket_schemas_[i];
     if (hash_partition.column_ids.size() == 1 && hash_partition.column_ids[0] == column_id) {
       return i;
     }
@@ -1571,8 +1587,8 @@ int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& sche
 
 bool PartitionSchema::IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const {
   const ColumnId column_id = schema.column_id(col_idx);
-  return range_partition_schema().column_ids.size() == 1 &&
-         range_partition_schema().column_ids[0] == column_id;
+  return range_schema_.column_ids.size() == 1 &&
+         range_schema_.column_ids[0] == column_id;
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index 0bfdae2..bb3e773 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -151,6 +151,26 @@ class PartitionSchema {
     std::vector<ColumnId> column_ids;
     int32_t num_buckets;
     uint32_t seed;
+
+    bool operator==(const HashBucketSchema& rhs) const {
+      if (this == &rhs) {
+        return true;
+      }
+      if (seed != rhs.seed) {
+        return false;
+      }
+      if (num_buckets != rhs.num_buckets) {
+        return false;
+      }
+      if (column_ids != rhs.column_ids) {
+        return false;
+      }
+      return true;
+    }
+
+    bool operator!=(const HashBucketSchema& rhs) const {
+      return !(*this == rhs);
+    }
   };
 
   typedef std::vector<HashBucketSchema> HashBucketSchemas;
@@ -272,6 +292,9 @@ class PartitionSchema {
   // Returns 'true' iff the partition schema 'rhs' is equivalent to this one.
   bool operator==(const PartitionSchema& rhs) const;
 
+  // Returns 'true' iff the partition schema 'rhs' is not equivalent to this one.
+  bool operator!=(const PartitionSchema& rhs) const;
+
   // Transforms an exclusive lower bound range partition key into an inclusive
   // lower bound range partition key.
   //
diff --git a/src/kudu/common/partition_pruner.cc b/src/kudu/common/partition_pruner.cc
index 191c035..93f225a 100644
--- a/src/kudu/common/partition_pruner.cc
+++ b/src/kudu/common/partition_pruner.cc
@@ -405,7 +405,7 @@ void PartitionPruner::Init(const Schema& schema,
   // the lower and upper bounds specified by the scan.
   string scan_range_lower_bound;
   string scan_range_upper_bound;
-  const vector<ColumnId> &range_columns = partition_schema.range_schema_.column_ids;
+  const vector<ColumnId>& range_columns = partition_schema.range_schema_.column_ids;
   if (!range_columns.empty()) {
     if (AreRangeColumnsPrefixOfPrimaryKey(schema, range_columns)) {
       EncodeRangeKeysFromPrimaryKeyBounds(schema,