You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/08/05 19:37:29 UTC
[kudu] branch master updated: [common] Update
PartitionSchema::operator==()
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 11cad7f [common] Update PartitionSchema::operator==()
11cad7f is described below
commit 11cad7f7b7a9953850cc6cccef08c2b19fafbdac
Author: Mahesh Reddy <mr...@cloudera.com>
AuthorDate: Thu Jul 29 23:20:01 2021 -0700
[common] Update PartitionSchema::operator==()
This patch modifies the overloaded equals operator
function to include checks for custom hash schemas
per range and their range bounds. In addition,
some minor style changes were made.
Change-Id: I09c626ecdcbe7c0465e6483d6a23369fb7194bfb
Reviewed-on: http://gerrit.cloudera.org:8080/17753
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/common/partition-test.cc | 117 ++++++++++++++++++++++++++++++++++--
src/kudu/common/partition.cc | 40 ++++++++----
src/kudu/common/partition.h | 23 +++++++
src/kudu/common/partition_pruner.cc | 2 +-
4 files changed, 164 insertions(+), 18 deletions(-)
diff --git a/src/kudu/common/partition-test.cc b/src/kudu/common/partition-test.cc
index de465bf..411fd3c 100644
--- a/src/kudu/common/partition-test.cc
+++ b/src/kudu/common/partition-test.cc
@@ -1402,8 +1402,8 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
- auto range_hash_component = pb.add_range_hash_schemas();
- auto hash_component = range_hash_component->add_hash_schemas();
+ auto* range_hash_component = pb.add_range_hash_schemas();
+ auto* hash_component = range_hash_component->add_hash_schemas();
hash_component->add_columns()->set_name("a");
hash_component->set_num_buckets(4);
}
@@ -1420,11 +1420,11 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
- auto range_hash_component = pb.add_range_hash_schemas();
- auto hash_component_1 = range_hash_component->add_hash_schemas();
+ auto* range_hash_component = pb.add_range_hash_schemas();
+ auto* hash_component_1 = range_hash_component->add_hash_schemas();
hash_component_1->add_columns()->set_name("a");
hash_component_1->set_num_buckets(2);
- auto hash_component_2 = range_hash_component->add_hash_schemas();
+ auto* hash_component_2 = range_hash_component->add_hash_schemas();
hash_component_2->add_columns()->set_name("b");
hash_component_2->set_num_buckets(3);
}
@@ -1545,4 +1545,111 @@ TEST_F(PartitionTest, TestMalformedPartitionSchemaPB) {
ASSERT_EQ("Invalid argument: missing upper range bound in request",
s2.ToString());
}
+
+TEST_F(PartitionTest, TestOverloadedEqualsOperator) {
+ // CREATE TABLE t (a VARCHAR, b VARCHAR, c VARCHAR, PRIMARY KEY (a, b, c))
+ // PARTITION BY [RANGE (a, b, c)];
+ Schema schema({ ColumnSchema("a", STRING),
+ ColumnSchema("b", STRING),
+ ColumnSchema("c", STRING) },
+ { ColumnId(0), ColumnId(1), ColumnId(2) }, 3);
+
+ PartitionSchemaPB schema_builder;
+ PartitionSchema partition_schema;
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+
+ PartitionSchemaPB schema_builder_1;
+ PartitionSchema partition_schema_1;
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+
+ // Same object.
+ ASSERT_EQ(partition_schema, partition_schema);
+ ASSERT_EQ(partition_schema_1, partition_schema_1);
+
+ // Both schemas should be identical.
+ ASSERT_EQ(partition_schema, partition_schema_1);
+
+ // Clears the range schema of 'schema_builder_1'.
+ SetRangePartitionComponent(&schema_builder_1, {});
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+ ASSERT_NE(partition_schema, partition_schema_1);
+
+ // Resets range schema so both will be equal again.
+ SetRangePartitionComponent(&schema_builder_1, {"a", "b", "c"});
+
+ // Table wide hash schemas are different.
+ AddHashBucketComponent(&schema_builder, { "a" }, 2, 0);
+ AddHashBucketComponent(&schema_builder_1, { "b" }, 2, 0);
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+ ASSERT_NE(partition_schema, partition_schema_1);
+
+ // Resets table wide hash schemas so both will be equal again.
+ schema_builder_1.clear_hash_bucket_schemas();
+ AddHashBucketComponent(&schema_builder_1, { "a" }, 2, 0);
+
+ // Different sizes of field 'ranges_with_hash_schemas_'
+ // [(a, _, _), (b, _, _))
+ {
+ RowOperationsPBEncoder encoder(schema_builder_1.add_range_bounds());
+ KuduPartialRow lower(&schema);
+ KuduPartialRow upper(&schema);
+ ASSERT_OK(lower.SetStringCopy("a", "a"));
+ ASSERT_OK(upper.SetStringCopy("a", "b"));
+ encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+ encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+ auto* range_hash_component = schema_builder_1.add_range_hash_schemas();
+ auto* hash_component = range_hash_component->add_hash_schemas();
+ hash_component->add_columns()->set_name("a");
+ hash_component->set_num_buckets(4);
+ }
+
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
+ ASSERT_NE(partition_schema, partition_schema_1);
+
+ // Different custom hash bucket schema but same range bounds.
+ // [(a, _, _), (b, _, _))
+ {
+ RowOperationsPBEncoder encoder(schema_builder.add_range_bounds());
+ KuduPartialRow lower(&schema);
+ KuduPartialRow upper(&schema);
+ ASSERT_OK(lower.SetStringCopy("a", "a"));
+ ASSERT_OK(upper.SetStringCopy("a", "b"));
+ encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+ encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+ auto* range_hash_component = schema_builder.add_range_hash_schemas();
+ auto* hash_component = range_hash_component->add_hash_schemas();
+ hash_component->add_columns()->set_name("a");
+ hash_component->set_num_buckets(2);
+ }
+
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+ ASSERT_NE(partition_schema, partition_schema_1);
+
+ // Clears custom hash schemas and range bounds field.
+ schema_builder.clear_range_hash_schemas();
+ schema_builder.clear_range_bounds();
+
+ // Different range bounds but same custom hash bucket schema.
+ // [(a, _, _), (c, _, _))
+ {
+ RowOperationsPBEncoder encoder(schema_builder.add_range_bounds());
+ KuduPartialRow lower(&schema);
+ KuduPartialRow upper(&schema);
+ ASSERT_OK(lower.SetStringCopy("a", "a"));
+ ASSERT_OK(upper.SetStringCopy("a", "c"));
+ encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
+ encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
+
+ auto* range_hash_component = schema_builder.add_range_hash_schemas();
+ auto* hash_component = range_hash_component->add_hash_schemas();
+ hash_component->add_columns()->set_name("a");
+ hash_component->set_num_buckets(4);
+ }
+
+ ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+ ASSERT_NE(partition_schema, partition_schema_1);
+}
} // namespace kudu
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index 950fdc1..8a5a846 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -1106,27 +1106,43 @@ bool PartitionSchema::operator==(const PartitionSchema& rhs) const {
return false;
}
- // Compare hash bucket components.
- const auto& hb_schemas = hash_bucket_schemas_;
- const auto& rhs_hb_schemas = rhs.hash_bucket_schemas_;
- if (hb_schemas.size() != rhs_hb_schemas.size()) {
+ if (hash_bucket_schemas_.size() != rhs.hash_bucket_schemas_.size() ||
+ ranges_with_hash_schemas_.size() != rhs.ranges_with_hash_schemas_.size()) {
return false;
}
- for (size_t i = 0; i < hb_schemas.size(); ++i) {
- if (hb_schemas[i].seed != rhs_hb_schemas[i].seed) {
+
+ // Compare table wide hash bucket schemas.
+ for (size_t i = 0; i < hash_bucket_schemas_.size(); ++i) {
+ if (hash_bucket_schemas_[i] != rhs.hash_bucket_schemas_[i]) {
return false;
}
- if (hb_schemas[i].num_buckets != rhs_hb_schemas[i].num_buckets) {
+ }
+
+ // Compare range bounds and per range hash bucket schemas.
+ for (size_t i = 0; i < ranges_with_hash_schemas_.size(); ++i) {
+ if (ranges_with_hash_schemas_[i].lower != rhs.ranges_with_hash_schemas_[i].lower ||
+ ranges_with_hash_schemas_[i].upper != rhs.ranges_with_hash_schemas_[i].upper) {
return false;
}
- if (hb_schemas[i].column_ids != rhs_hb_schemas[i].column_ids) {
+ const auto& lhs_hash_schemas = ranges_with_hash_schemas_[i].hash_schemas;
+ const auto& rhs_hash_schemas = rhs.ranges_with_hash_schemas_[i].hash_schemas;
+ if (lhs_hash_schemas.size() != rhs_hash_schemas.size()) {
return false;
}
+ for (size_t j = 0; j < lhs_hash_schemas.size(); ++j) {
+ if (lhs_hash_schemas[j] != rhs_hash_schemas[j]) {
+ return false;
+ }
+ }
}
return true;
}
+bool PartitionSchema::operator!=(const PartitionSchema& rhs) const {
+ return !(*this == rhs);
+}
+
// Encodes the specified primary key columns of the supplied row into the buffer.
void PartitionSchema::EncodeColumns(const ConstContiguousRow& row,
const vector<ColumnId>& column_ids,
@@ -1560,8 +1576,8 @@ Status PartitionSchema::GetRangeSchemaColumnIndexes(const Schema& schema,
int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& schema,
int32_t col_idx) const {
const ColumnId column_id = schema.column_id(col_idx);
- for (size_t i = 0; i < hash_partition_schemas().size(); ++i) {
- auto hash_partition = hash_partition_schemas()[i];
+ for (int i = 0; i < hash_bucket_schemas_.size(); ++i) {
+ const auto& hash_partition = hash_bucket_schemas_[i];
if (hash_partition.column_ids.size() == 1 && hash_partition.column_ids[0] == column_id) {
return i;
}
@@ -1571,8 +1587,8 @@ int32_t PartitionSchema::TryGetSingleColumnHashPartitionIndex(const Schema& sche
bool PartitionSchema::IsColumnSingleRangeSchema(const Schema& schema, int32_t col_idx) const {
const ColumnId column_id = schema.column_id(col_idx);
- return range_partition_schema().column_ids.size() == 1 &&
- range_partition_schema().column_ids[0] == column_id;
+ return range_schema_.column_ids.size() == 1 &&
+ range_schema_.column_ids[0] == column_id;
}
} // namespace kudu
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index 0bfdae2..bb3e773 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -151,6 +151,26 @@ class PartitionSchema {
std::vector<ColumnId> column_ids;
int32_t num_buckets;
uint32_t seed;
+
+ bool operator==(const HashBucketSchema& rhs) const {
+ if (this == &rhs) {
+ return true;
+ }
+ if (seed != rhs.seed) {
+ return false;
+ }
+ if (num_buckets != rhs.num_buckets) {
+ return false;
+ }
+ if (column_ids != rhs.column_ids) {
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const HashBucketSchema& rhs) const {
+ return !(*this == rhs);
+ }
};
typedef std::vector<HashBucketSchema> HashBucketSchemas;
@@ -272,6 +292,9 @@ class PartitionSchema {
// Returns 'true' iff the partition schema 'rhs' is equivalent to this one.
bool operator==(const PartitionSchema& rhs) const;
+ // Returns 'true' iff the partition schema 'rhs' is not equivalent to this one.
+ bool operator!=(const PartitionSchema& rhs) const;
+
// Transforms an exclusive lower bound range partition key into an inclusive
// lower bound range partition key.
//
diff --git a/src/kudu/common/partition_pruner.cc b/src/kudu/common/partition_pruner.cc
index 191c035..93f225a 100644
--- a/src/kudu/common/partition_pruner.cc
+++ b/src/kudu/common/partition_pruner.cc
@@ -405,7 +405,7 @@ void PartitionPruner::Init(const Schema& schema,
// the lower and upper bounds specified by the scan.
string scan_range_lower_bound;
string scan_range_upper_bound;
- const vector<ColumnId> &range_columns = partition_schema.range_schema_.column_ids;
+ const vector<ColumnId>& range_columns = partition_schema.range_schema_.column_ids;
if (!range_columns.empty()) {
if (AreRangeColumnsPrefixOfPrimaryKey(schema, range_columns)) {
EncodeRangeKeysFromPrimaryKeyBounds(schema,