You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/08/18 06:22:03 UTC
[kudu] branch master updated: [partition] update naming of related
entities
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new b5cf9d9 [partition] update naming of related entities
b5cf9d9 is described below
commit b5cf9d9a5738d77f692a4dc1a33dd9c1a7a367e2
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Fri Aug 13 22:12:44 2021 -0700
[partition] update naming of related entities
With this patch, the naming for the partition-related entities
in the C++ code has changed:
* renamed HashBucketSchema into HashDimension
* renamed HashBucketSchemas into HashSchema, so HashSchema is now
a typedef for std::vector<HashDimension>
* partitioning schema --> partition schema
* hash bucket schema --> hash schema
* hash partitioning schema --> hash schema
* range partition schema --> range schema
* hash_bucket_schemas_ --> hash_schema_
* removed the PerRangeHashBucketSchemas typedef
The idea is to represent the hash-related component of the partition
schema as a set of hash bucket dimensions. Before, it was called
'hash bucket schemas' to be 1-to-1 mapping with the
PartitionSchemaPB::HashBucketSchemaPB protobuf message defined in
common.proto. I found it a bit awkward calling a single dimension of
hash bucketing 'schema' because it's semantically confusing, since
'schema' usually means the whole set of rules, not just a part of those.
It would be nice to update the proto-related names as well, but doing so
would break backwards compatibility.
I also updated the in-line documentation for the partition-related
entities in the PartitionSchema's class-wide comment and added TODOs
where more updates are required to accommodate the recent changes
introduced in the context of KUDU-2671.
Change-Id: I6a858e97090930b21e9c767dac2f5cc8b9816033
Reviewed-on: http://gerrit.cloudera.org:8080/17775
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
Reviewed-by: Mahesh Reddy <mr...@cloudera.com>
---
src/kudu/client/client.cc | 26 +-
src/kudu/client/scan_token-test.cc | 2 +-
src/kudu/client/table_creator-internal.cc | 10 +-
src/kudu/client/table_creator-internal.h | 17 +-
src/kudu/common/partition-test.cc | 78 +++---
src/kudu/common/partition.cc | 310 +++++++++++----------
src/kudu/common/partition.h | 138 +++++----
src/kudu/common/partition_pruner-test.cc | 44 +--
src/kudu/common/partition_pruner.cc | 54 ++--
src/kudu/common/partition_pruner.h | 7 +-
.../integration-tests/table_locations-itest.cc | 50 ++--
src/kudu/integration-tests/txn_commit-itest.cc | 2 +-
src/kudu/master/catalog_manager.cc | 10 +-
src/kudu/master/master-test.cc | 42 ++-
src/kudu/master/master.proto | 2 +-
src/kudu/tools/kudu-tool-test.cc | 2 +-
src/kudu/tools/table_scanner.cc | 14 +-
src/kudu/tools/tool_action_perf.cc | 2 +-
18 files changed, 421 insertions(+), 389 deletions(-)
diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index 1befc90..897cc6e 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -832,8 +832,7 @@ KuduTableCreator& KuduTableCreator::add_hash_partitions(const vector<string>& co
KuduTableCreator& KuduTableCreator::add_hash_partitions(const vector<string>& columns,
int32_t num_buckets,
int32_t seed) {
- PartitionSchemaPB::HashBucketSchemaPB* bucket_schema =
- data_->partition_schema_.add_hash_bucket_schemas();
+ auto* bucket_schema = data_->partition_schema_.add_hash_bucket_schemas();
for (const string& col_name : columns) {
bucket_schema->add_columns()->set_name(col_name);
}
@@ -964,11 +963,9 @@ Status KuduTableCreator::Create() {
has_range_splits = true;
}
- // A preliminary pass over the ranges is here to check if any custom hash
- // partitioning schemas are present.
bool has_range_with_custom_hash_schema = false;
for (const auto& p : data_->range_partitions_) {
- if (!p->data_->hash_bucket_schemas_.empty()) {
+ if (!p->data_->hash_schema_.empty()) {
has_range_with_custom_hash_schema = true;
break;
}
@@ -1011,17 +1008,17 @@ Status KuduTableCreator::Create() {
encoder.Add(lower_bound_type, *range->lower_bound_);
encoder.Add(upper_bound_type, *range->upper_bound_);
// Populate corresponding element in 'range_hash_schemas' if there is at
- // least one range with custom hash partitioning schema.
+ // least one range with custom hash schema.
auto* schemas_pb = partition_schema->add_range_hash_schemas();
- if (range->hash_bucket_schemas_.empty()) {
+ if (range->hash_schema_.empty()) {
schemas_pb->mutable_hash_schemas()->CopyFrom(
data_->partition_schema_.hash_bucket_schemas());
} else {
- for (const auto& schema : range->hash_bucket_schemas_) {
+ for (const auto& hash_dimension : range->hash_schema_) {
auto* pb = schemas_pb->add_hash_schemas();
- pb->set_seed(schema.seed);
- pb->set_num_buckets(schema.num_buckets);
- for (const auto& column_name : schema.column_names) {
+ pb->set_seed(hash_dimension.seed);
+ pb->set_num_buckets(hash_dimension.num_buckets);
+ for (const auto& column_name : hash_dimension.column_names) {
pb->add_columns()->set_name(column_name);
}
}
@@ -1072,6 +1069,13 @@ Status KuduTableCreator::KuduRangePartition::add_hash_partitions(
const vector<string>& columns,
int32_t num_buckets,
int32_t seed) {
+ if (seed < 0) {
+ // TODO(aserbin): change the signature of
+ // KuduRangePartition::add_hash_partitions() to use uint32_t
+ // for the 'seed' parameter while it's still possible since
+ // the client API hasn't been released yet
+ return Status::InvalidArgument("hash seed must non-negative");
+ }
return data_->add_hash_partitions(columns, num_buckets, seed);
}
diff --git a/src/kudu/client/scan_token-test.cc b/src/kudu/client/scan_token-test.cc
index 71dbaf6..ac75b45 100644
--- a/src/kudu/client/scan_token-test.cc
+++ b/src/kudu/client/scan_token-test.cc
@@ -1049,7 +1049,7 @@ TEST_P(StaleScanTokensParamTest, DroppingFirstRange) {
// Start another tablet scan using the other identical set of scan tokens.
// The client metacache should not produce any errors: it should re-fetch
- // the information about the current partitioning scheme and scan the table
+ // the information about the current partition schema and scan the table
// within the range of the new partitions which correspond to the originally
// supplied range.
int64_t row_count_b = -1;
diff --git a/src/kudu/client/table_creator-internal.cc b/src/kudu/client/table_creator-internal.cc
index 7bfbddd..3d64b27 100644
--- a/src/kudu/client/table_creator-internal.cc
+++ b/src/kudu/client/table_creator-internal.cc
@@ -46,7 +46,7 @@ KuduTableCreator::KuduRangePartition::Data::Data(
Status KuduTableCreator::KuduRangePartition::Data::add_hash_partitions(
const vector<string>& column_names,
int32_t num_buckets,
- int32_t seed) {
+ uint32_t seed) {
if (column_names.empty()) {
return Status::InvalidArgument(
"set of columns for hash partitioning must not be empty");
@@ -56,10 +56,10 @@ Status KuduTableCreator::KuduRangePartition::Data::add_hash_partitions(
"at least two buckets are required to establish hash partitioning");
}
- // It's totally fine to have multiple hash levels with same parameters,
- // so there is no need to check for logical duplicates in the
- // 'hash_bucket_schemas_' vector.
- hash_bucket_schemas_.emplace_back(column_names, num_buckets, seed);
+ // If many hash dimensions use same columns, the server side will check
+ // for such a condition and report an error appropriately. So, to simplify the
+ // client-side code, there is no check for such a condition.
+ hash_schema_.emplace_back(column_names, num_buckets, seed);
return Status::OK();
}
diff --git a/src/kudu/client/table_creator-internal.h b/src/kudu/client/table_creator-internal.h
index d7fcb2f..4cfe477 100644
--- a/src/kudu/client/table_creator-internal.h
+++ b/src/kudu/client/table_creator-internal.h
@@ -37,19 +37,20 @@ namespace client {
class KuduSchema;
-struct HashBucketSchema {
- HashBucketSchema(std::vector<std::string> column_names,
- uint32_t num_buckets,
- int32_t seed)
+struct HashDimension {
+ HashDimension(std::vector<std::string> column_names,
+ uint32_t num_buckets,
+ uint32_t seed)
: column_names(std::move(column_names)),
num_buckets(num_buckets),
seed(seed) {
}
const std::vector<std::string> column_names;
- const uint32_t num_buckets;
- const int32_t seed;
+ const int32_t num_buckets;
+ const uint32_t seed;
};
+typedef std::vector<HashDimension> HashSchema;
class KuduTableCreator::Data {
public:
@@ -98,7 +99,7 @@ class KuduTableCreator::KuduRangePartition::Data {
Status add_hash_partitions(const std::vector<std::string>& column_names,
int32_t num_buckets,
- int32_t seed);
+ uint32_t seed);
const RangePartitionBound lower_bound_type_;
const RangePartitionBound upper_bound_type_;
@@ -106,7 +107,7 @@ class KuduTableCreator::KuduRangePartition::Data {
std::unique_ptr<KuduPartialRow> lower_bound_;
std::unique_ptr<KuduPartialRow> upper_bound_;
- std::vector<HashBucketSchema> hash_bucket_schemas_;
+ HashSchema hash_schema_;
private:
DISALLOW_COPY_AND_ASSIGN(Data);
diff --git a/src/kudu/common/partition-test.cc b/src/kudu/common/partition-test.cc
index 1666491..a9719de 100644
--- a/src/kudu/common/partition-test.cc
+++ b/src/kudu/common/partition-test.cc
@@ -52,11 +52,11 @@ using std::make_pair;
namespace kudu {
namespace {
-void AddHashBucketComponent(PartitionSchemaPB* partition_schema_pb,
- const vector<string>& columns,
- uint32_t num_buckets, int32_t seed) {
- PartitionSchemaPB::HashBucketSchemaPB* hash_bucket_schema =
- partition_schema_pb->add_hash_bucket_schemas();
+void AddHashDimension(PartitionSchemaPB* partition_schema_pb,
+ const vector<string>& columns,
+ int32_t num_buckets,
+ uint32_t seed) {
+ auto* hash_bucket_schema = partition_schema_pb->add_hash_bucket_schemas();
for (const string& column : columns) {
hash_bucket_schema->add_columns()->set_name(column);
}
@@ -206,8 +206,8 @@ TEST_F(PartitionTest, TestPartitionKeyEncoding) {
{ ColumnId(0), ColumnId(1), ColumnId(2) }, 3);
PartitionSchemaPB schema_builder;
- AddHashBucketComponent(&schema_builder, { "a", "b" }, 32, 0);
- AddHashBucketComponent(&schema_builder, { "c" }, 32, 42);
+ AddHashDimension(&schema_builder, { "a", "b" }, 32, 0);
+ AddHashDimension(&schema_builder, { "c" }, 32, 42);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
@@ -418,14 +418,14 @@ TEST_F(PartitionTest, TestCreateRangePartitions) {
}
}
-TEST_F(PartitionTest, TestCreateHashBucketPartitions) {
+TEST_F(PartitionTest, TestCreateHashPartitions) {
// CREATE TABLE t (a VARCHAR PRIMARY KEY),
// PARTITION BY [HASH BUCKET (a)];
Schema schema({ ColumnSchema("a", STRING) }, { ColumnId(0) }, 1);
PartitionSchemaPB schema_builder;
SetRangePartitionComponent(&schema_builder, vector<string>());
- AddHashBucketComponent(&schema_builder, { "a" }, 3, 42);
+ AddHashDimension(&schema_builder, { "a" }, 3, 42);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
@@ -481,8 +481,8 @@ TEST_F(PartitionTest, TestCreatePartitions) {
{ ColumnId(0), ColumnId(1), ColumnId(2) }, 3);
PartitionSchemaPB schema_builder;
- AddHashBucketComponent(&schema_builder, { "a" }, 2, 0);
- AddHashBucketComponent(&schema_builder, { "b" }, 2, 0);
+ AddHashDimension(&schema_builder, { "a" }, 2, 0);
+ AddHashDimension(&schema_builder, { "b" }, 2, 0);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
@@ -887,8 +887,8 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
PartitionSchemaPB schema_builder;
// Table-wide hash schema defined below, 3 by 2 buckets so 6 total.
- AddHashBucketComponent(&schema_builder, { "a", "c" }, 3, 0);
- AddHashBucketComponent(&schema_builder, { "b" }, 2, 0);
+ AddHashDimension(&schema_builder, { "a", "c" }, 3, 0);
+ AddHashDimension(&schema_builder, { "b" }, 2, 0);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
CheckSerializationFunctions(schema_builder, partition_schema, schema);
@@ -897,9 +897,9 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
partition_schema.DebugString(schema));
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
vector<pair<pair<KuduPartialRow, KuduPartialRow>,
- PartitionSchema::HashBucketSchemas>> bounds_with_hash_schemas;
+ PartitionSchema::HashSchema>> bounds_with_hash_schemas;
{ // [(a1, _, c1), (a2, _, c2))
KuduPartialRow lower(&schema);
@@ -908,7 +908,7 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
ASSERT_OK(lower.SetStringCopy("c", "c1"));
ASSERT_OK(upper.SetStringCopy("a", "a2"));
ASSERT_OK(upper.SetStringCopy("c", "c2"));
- PartitionSchema::HashBucketSchemas hash_schema_4_buckets = {{{ColumnId(0)}, 4, 0}};
+ PartitionSchema::HashSchema hash_schema_4_buckets = {{{ColumnId(0)}, 4, 0}};
bounds.emplace_back(lower, upper);
range_hash_schemas.emplace_back(hash_schema_4_buckets);
bounds_with_hash_schemas.emplace_back(make_pair(std::move(lower), std::move(upper)),
@@ -923,9 +923,9 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
ASSERT_OK(upper.SetStringCopy("a", "a4"));
ASSERT_OK(upper.SetStringCopy("b", "b4"));
bounds.emplace_back(lower, upper);
- range_hash_schemas.emplace_back(PartitionSchema::HashBucketSchemas());
+ range_hash_schemas.emplace_back(PartitionSchema::HashSchema());
bounds_with_hash_schemas.emplace_back(make_pair(std::move(lower), std::move(upper)),
- PartitionSchema::HashBucketSchemas());
+ PartitionSchema::HashSchema());
}
{ // [(a5, b5, _), (a6, _, c6))
@@ -935,7 +935,7 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerRange) {
ASSERT_OK(lower.SetStringCopy("b", "b5"));
ASSERT_OK(upper.SetStringCopy("a", "a6"));
ASSERT_OK(upper.SetStringCopy("c", "c6"));
- PartitionSchema::HashBucketSchemas hash_schema_2_buckets_by_3 = {
+ PartitionSchema::HashSchema hash_schema_2_buckets_by_3 = {
{{ColumnId(0)}, 2, 0},
{{ColumnId(1)}, 3, 0}
};
@@ -1182,8 +1182,8 @@ TEST_F(PartitionTest, CustomHashSchemasPerRangeOnly) {
typedef pair<KuduPartialRow, KuduPartialRow> RangeBound;
vector<RangeBound> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
- vector<pair<RangeBound, PartitionSchema::HashBucketSchemas>>
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
+ vector<pair<RangeBound, PartitionSchema::HashSchema>>
bounds_with_hash_schemas;
// [(a1, b1), (a2, b2))
@@ -1194,7 +1194,7 @@ TEST_F(PartitionTest, CustomHashSchemasPerRangeOnly) {
ASSERT_OK(lower.SetStringNoCopy("b", "b1"));
ASSERT_OK(upper.SetStringNoCopy("a", "a2"));
ASSERT_OK(upper.SetStringNoCopy("b", "b2"));
- PartitionSchema::HashBucketSchemas hash_schema_2_buckets =
+ PartitionSchema::HashSchema hash_schema_2_buckets =
{ { { ColumnId(0) }, 2, 0 } };
bounds.emplace_back(lower, upper);
range_hash_schemas.emplace_back(hash_schema_2_buckets);
@@ -1236,7 +1236,7 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerUnboundedRanges) {
PartitionSchemaPB schema_builder;
// Table-wide hash schema defined below.
- AddHashBucketComponent(&schema_builder, { "b" }, 2, 0);
+ AddHashDimension(&schema_builder, { "b" }, 2, 0);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
CheckSerializationFunctions(schema_builder, partition_schema, schema);
@@ -1245,14 +1245,14 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerUnboundedRanges) {
partition_schema.DebugString(schema));
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
{ // [(_, _, _), (a1, _, c1))
KuduPartialRow lower(&schema);
KuduPartialRow upper(&schema);
ASSERT_OK(upper.SetStringCopy("a", "a1"));
ASSERT_OK(upper.SetStringCopy("c", "c1"));
- PartitionSchema::HashBucketSchemas hash_schema_4_buckets = {{{ColumnId(0)}, 4, 0}};
+ PartitionSchema::HashSchema hash_schema_4_buckets = {{{ColumnId(0)}, 4, 0}};
bounds.emplace_back(lower, upper);
range_hash_schemas.emplace_back(hash_schema_4_buckets);
}
@@ -1265,7 +1265,7 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerUnboundedRanges) {
ASSERT_OK(upper.SetStringCopy("a", "a3"));
ASSERT_OK(upper.SetStringCopy("b", "b3"));
bounds.emplace_back(lower, upper);
- range_hash_schemas.emplace_back(PartitionSchema::HashBucketSchemas());
+ range_hash_schemas.emplace_back(PartitionSchema::HashSchema());
}
{ // [(a4, b4, _), (_, _, _))
@@ -1273,7 +1273,7 @@ TEST_F(PartitionTest, TestVaryingHashSchemasPerUnboundedRanges) {
KuduPartialRow upper(&schema);
ASSERT_OK(lower.SetStringCopy("a", "a4"));
ASSERT_OK(lower.SetStringCopy("b", "b4"));
- PartitionSchema::HashBucketSchemas hash_schema_2_buckets_by_3 = {
+ PartitionSchema::HashSchema hash_schema_2_buckets_by_3 = {
{{ColumnId(0)}, 2, 0},
{{ColumnId(2)}, 3, 0}
};
@@ -1388,7 +1388,7 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
PartitionSchemaPB pb;
// Table-wide hash schema defined below.
- AddHashBucketComponent(&pb, { "b" }, 2, 0);
+ AddHashDimension(&pb, { "b" }, 2, 0);
// [(a0, _, c0), (a0, _, c1))
{
@@ -1449,36 +1449,36 @@ TEST_F(PartitionTest, TestPartitionSchemaPB) {
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
// Check fields of 'partition_schema' to verify decoder function.
- ASSERT_EQ(1, partition_schema.hash_partition_schemas().size());
+ ASSERT_EQ(1, partition_schema.hash_schema().size());
const auto& ranges_with_hash_schemas = partition_schema.ranges_with_hash_schemas();
ASSERT_EQ(3, ranges_with_hash_schemas.size());
EXPECT_EQ(string("a0\0\0\0\0c0", 8), ranges_with_hash_schemas[0].lower);
EXPECT_EQ(string("a0\0\0\0\0c1", 8), ranges_with_hash_schemas[0].upper);
- EXPECT_EQ(1, ranges_with_hash_schemas[0].hash_schemas.size());
+ EXPECT_EQ(1, ranges_with_hash_schemas[0].hash_schema.size());
- const auto& range1_hash_schema = ranges_with_hash_schemas[0].hash_schemas[0];
+ const auto& range1_hash_schema = ranges_with_hash_schemas[0].hash_schema[0];
EXPECT_EQ(1, range1_hash_schema.column_ids.size());
EXPECT_EQ(0, range1_hash_schema.column_ids[0]);
EXPECT_EQ(4, range1_hash_schema.num_buckets);
EXPECT_EQ(string("a1\0\0\0\0c2", 8), ranges_with_hash_schemas[1].lower);
EXPECT_EQ(string("a1\0\0\0\0c3", 8), ranges_with_hash_schemas[1].upper);
- EXPECT_EQ(2, ranges_with_hash_schemas[1].hash_schemas.size());
+ EXPECT_EQ(2, ranges_with_hash_schemas[1].hash_schema.size());
- const auto& range2_hash_schema_1 = ranges_with_hash_schemas[1].hash_schemas[0];
+ const auto& range2_hash_schema_1 = ranges_with_hash_schemas[1].hash_schema[0];
EXPECT_EQ(1, range2_hash_schema_1.column_ids.size());
EXPECT_EQ(0, range2_hash_schema_1.column_ids[0]);
EXPECT_EQ(2, range2_hash_schema_1.num_buckets);
- const auto& range2_hash_schema_2 = ranges_with_hash_schemas[1].hash_schemas[1];
+ const auto& range2_hash_schema_2 = ranges_with_hash_schemas[1].hash_schema[1];
EXPECT_EQ(1, range2_hash_schema_2.column_ids.size());
EXPECT_EQ(1, range2_hash_schema_2.column_ids[0]);
EXPECT_EQ(3, range2_hash_schema_2.num_buckets);
EXPECT_EQ(string("a2\0\0\0\0c4", 8), ranges_with_hash_schemas[2].lower);
EXPECT_EQ(string("a2\0\0\0\0c5", 8), ranges_with_hash_schemas[2].upper);
- EXPECT_EQ(0, ranges_with_hash_schemas[2].hash_schemas.size());
+ EXPECT_EQ(0, ranges_with_hash_schemas[2].hash_schema.size());
CheckSerializationFunctions(pb, partition_schema, schema);
}
@@ -1510,7 +1510,7 @@ TEST_F(PartitionTest, TestMalformedPartitionSchemaPB) {
PartitionSchema partition_schema;
Status s = PartitionSchema::FromPB(pb, schema, &partition_schema);
ASSERT_EQ("Invalid argument: 3 ops were provided; "
- "Only two ops are expected for this pair of range bounds.",
+ "only two ops are expected for this pair of range bounds",
s.ToString());
pb.Clear();
@@ -1578,15 +1578,15 @@ TEST_F(PartitionTest, TestOverloadedEqualsOperator) {
SetRangePartitionComponent(&schema_builder_1, {"a", "b", "c"});
// Table wide hash schemas are different.
- AddHashBucketComponent(&schema_builder, { "a" }, 2, 0);
- AddHashBucketComponent(&schema_builder_1, { "b" }, 2, 0);
+ AddHashDimension(&schema_builder, { "a" }, 2, 0);
+ AddHashDimension(&schema_builder_1, { "b" }, 2, 0);
ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
ASSERT_OK(PartitionSchema::FromPB(schema_builder_1, schema, &partition_schema_1));
ASSERT_NE(partition_schema, partition_schema_1);
// Resets table wide hash schemas so both will be equal again.
schema_builder_1.clear_hash_bucket_schemas();
- AddHashBucketComponent(&schema_builder_1, { "a" }, 2, 0);
+ AddHashDimension(&schema_builder_1, { "a" }, 2, 0);
// Different sizes of field 'ranges_with_hash_schemas_'
// [(a, _, _), (b, _, _))
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index d52b09e..6b949e6 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -163,23 +163,24 @@ void SetColumnIdentifiers(const vector<ColumnId>& column_ids,
} // anonymous namespace
-Status PartitionSchema::ExtractHashBucketSchemasFromPB(
+Status PartitionSchema::ExtractHashSchemaFromPB(
const Schema& schema,
const RepeatedPtrField<PartitionSchemaPB_HashBucketSchemaPB>& hash_buckets_pb,
- HashBucketSchemas* hash_bucket_schemas) {
- for (const PartitionSchemaPB_HashBucketSchemaPB& hash_bucket_pb : hash_buckets_pb) {
- HashBucketSchema hash_bucket;
- RETURN_NOT_OK(ExtractColumnIds(hash_bucket_pb.columns(), schema, &hash_bucket.column_ids));
+ HashSchema* hash_schema) {
+ for (const auto& hash_bucket_pb : hash_buckets_pb) {
+ HashDimension hash_dimension;
+ RETURN_NOT_OK(ExtractColumnIds(
+ hash_bucket_pb.columns(), schema, &hash_dimension.column_ids));
// Hashing is column-order dependent, so sort the column_ids to ensure that
// hash components with the same columns hash consistently. This is
// important when deserializing a user-supplied partition schema during
// table creation; after that the columns should remain in sorted order.
- std::sort(hash_bucket.column_ids.begin(), hash_bucket.column_ids.end());
+ std::sort(hash_dimension.column_ids.begin(), hash_dimension.column_ids.end());
- hash_bucket.seed = hash_bucket_pb.seed();
- hash_bucket.num_buckets = hash_bucket_pb.num_buckets();
- hash_bucket_schemas->push_back(std::move(hash_bucket));
+ hash_dimension.seed = hash_bucket_pb.seed();
+ hash_dimension.num_buckets = hash_bucket_pb.num_buckets();
+ hash_schema->push_back(std::move(hash_dimension));
}
return Status::OK();
}
@@ -188,13 +189,14 @@ Status PartitionSchema::FromPB(const PartitionSchemaPB& pb,
const Schema& schema,
PartitionSchema* partition_schema) {
partition_schema->Clear();
- RETURN_NOT_OK(ExtractHashBucketSchemasFromPB(schema, pb.hash_bucket_schemas(),
- &partition_schema->hash_bucket_schemas_));
- PerRangeHashBucketSchemas range_hash_schema;
- range_hash_schema.resize(pb.range_hash_schemas_size());
+ RETURN_NOT_OK(ExtractHashSchemaFromPB(
+ schema, pb.hash_bucket_schemas(), &partition_schema->hash_schema_));
+ vector<HashSchema> range_hash_schemas;
+ range_hash_schemas.resize(pb.range_hash_schemas_size());
for (int i = 0; i < pb.range_hash_schemas_size(); i++) {
- RETURN_NOT_OK(ExtractHashBucketSchemasFromPB(schema, pb.range_hash_schemas(i).hash_schemas(),
- &range_hash_schema[i]));
+ RETURN_NOT_OK(ExtractHashSchemaFromPB(schema,
+ pb.range_hash_schemas(i).hash_schemas(),
+ &range_hash_schemas[i]));
}
vector<pair<KuduPartialRow, KuduPartialRow>> range_bounds;
@@ -203,8 +205,9 @@ Status PartitionSchema::FromPB(const PartitionSchemaPB& pb,
vector<DecodedRowOperation> ops;
RETURN_NOT_OK(decoder.DecodeOperations<DecoderMode::SPLIT_ROWS>(&ops));
if (ops.size() != 2) {
- return Status::InvalidArgument(Substitute("$0 ops were provided; Only two ops are expected "
- "for this pair of range bounds.", ops.size()));
+ return Status::InvalidArgument(Substitute(
+ "$0 ops were provided; only two ops are expected "
+ "for this pair of range bounds", ops.size()));
}
const DecodedRowOperation& op1 = ops[0];
const DecodedRowOperation& op2 = ops[1];
@@ -252,7 +255,7 @@ Status PartitionSchema::FromPB(const PartitionSchemaPB& pb,
auto* ranges_ptr = &partition_schema->ranges_with_hash_schemas_;
if (!range_bounds.empty()) {
RETURN_NOT_OK(partition_schema->EncodeRangeBounds(
- range_bounds, range_hash_schema, schema, ranges_ptr));
+ range_bounds, range_hash_schemas, schema, ranges_ptr));
}
if (ranges_ptr != nullptr) {
auto& dict = partition_schema->hash_schema_idx_by_encoded_range_start_;
@@ -271,12 +274,13 @@ Status PartitionSchema::FromPB(const PartitionSchemaPB& pb,
Status PartitionSchema::ToPB(const Schema& schema, PartitionSchemaPB* pb) const {
pb->Clear();
- pb->mutable_hash_bucket_schemas()->Reserve(hash_bucket_schemas_.size());
- for (const HashBucketSchema& hash_bucket : hash_bucket_schemas_) {
- PartitionSchemaPB_HashBucketSchemaPB* hash_bucket_pb = pb->add_hash_bucket_schemas();
- SetColumnIdentifiers(hash_bucket.column_ids, hash_bucket_pb->mutable_columns());
- hash_bucket_pb->set_num_buckets(hash_bucket.num_buckets);
- hash_bucket_pb->set_seed(hash_bucket.seed);
+ pb->mutable_hash_bucket_schemas()->Reserve(hash_schema_.size());
+ for (const auto& hash_dimension : hash_schema_) {
+ auto* hash_schema_pb = pb->add_hash_bucket_schemas();
+ SetColumnIdentifiers(hash_dimension.column_ids,
+ hash_schema_pb->mutable_columns());
+ hash_schema_pb->set_num_buckets(hash_dimension.num_buckets);
+ hash_schema_pb->set_seed(hash_dimension.seed);
}
if (!ranges_with_hash_schemas_.empty()) {
@@ -296,16 +300,18 @@ Status PartitionSchema::ToPB(const Schema& schema, PartitionSchemaPB* pb) const
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
auto* range_hash_schema_pb = pb->add_range_hash_schemas();
- for (const auto& hash_bucket : range_hash_schema.hash_schemas) {
- auto* hash_bucket_pb = range_hash_schema_pb->add_hash_schemas();
- SetColumnIdentifiers(hash_bucket.column_ids, hash_bucket_pb->mutable_columns());
- hash_bucket_pb->set_num_buckets(hash_bucket.num_buckets);
- hash_bucket_pb->set_seed(hash_bucket.seed);
+ for (const auto& hash_dimension : range_hash_schema.hash_schema) {
+ auto* hash_schema_pb = range_hash_schema_pb->add_hash_schemas();
+ SetColumnIdentifiers(hash_dimension.column_ids,
+ hash_schema_pb->mutable_columns());
+ hash_schema_pb->set_num_buckets(hash_dimension.num_buckets);
+ hash_schema_pb->set_seed(hash_dimension.seed);
}
}
}
- SetColumnIdentifiers(range_schema_.column_ids, pb->mutable_range_schema()->mutable_columns());
+ SetColumnIdentifiers(range_schema_.column_ids,
+ pb->mutable_range_schema()->mutable_columns());
return Status::OK();
}
@@ -313,10 +319,10 @@ template<typename Row>
void PartitionSchema::EncodeKeyImpl(const Row& row, string* buf) const {
string range_key;
EncodeColumns(row, range_schema_.column_ids, &range_key);
- const auto& hash_schemas = GetHashBucketSchemasForRange(range_key);
+ const auto& hash_schema = GetHashSchemaForRange(range_key);
const auto& hash_encoder = GetKeyEncoder<string>(GetTypeInfo(UINT32));
- for (const auto& hash_bucket_schema : hash_schemas) {
- int32_t bucket = BucketForRow(row, hash_bucket_schema);
+ for (const auto& hash_dimension : hash_schema) {
+ const auto bucket = HashValueForRow(row, hash_dimension);
hash_encoder.Encode(&bucket, buf);
}
// The range portion of the key has been already encoded -- append it to the
@@ -350,7 +356,7 @@ Status PartitionSchema::EncodeRangeKey(const KuduPartialRow& row,
contains_no_columns = false;
} else {
return Status::InvalidArgument(
- "split rows may only contain values for range partitioned columns", column.name());
+ "split rows may only contain values for range partition columns", column.name());
}
}
}
@@ -389,14 +395,14 @@ Status PartitionSchema::EncodeRangeSplits(const vector<KuduPartialRow>& split_ro
Status PartitionSchema::EncodeRangeBounds(
const vector<pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
- const PerRangeHashBucketSchemas& range_hash_schemas,
+ const vector<HashSchema>& range_hash_schemas,
const Schema& schema,
RangesWithHashSchemas* bounds_with_hash_schemas) const {
DCHECK(bounds_with_hash_schemas);
auto& bounds_whs = *bounds_with_hash_schemas;
DCHECK(bounds_whs.empty());
if (range_bounds.empty()) {
- bounds_whs.emplace_back(RangeWithHashSchemas{"", "", {}});
+ bounds_whs.emplace_back(RangeWithHashSchema{"", "", {}});
return Status::OK();
}
@@ -420,15 +426,15 @@ Status PartitionSchema::EncodeRangeBounds(
"range partition lower bound must be less than the upper bound",
RangePartitionDebugString(bound.first, bound.second));
}
- RangeWithHashSchemas temp{std::move(lower), std::move(upper), {}};
+ RangeWithHashSchema temp{std::move(lower), std::move(upper), {}};
if (!range_hash_schemas.empty()) {
- temp.hash_schemas = range_hash_schemas[j++];
+ temp.hash_schema = range_hash_schemas[j++];
}
bounds_whs.emplace_back(std::move(temp));
}
std::sort(bounds_whs.begin(), bounds_whs.end(),
- [](const RangeWithHashSchemas& s1, const RangeWithHashSchemas& s2) {
+ [](const RangeWithHashSchema& s1, const RangeWithHashSchema& s2) {
return s1.lower < s2.lower;
});
@@ -485,11 +491,13 @@ Status PartitionSchema::SplitRangeBounds(
}
// Split the current bound. Add the lower section to the result list,
// and continue iterating on the upper section.
- new_bounds_with_hash_schemas.emplace_back(RangeWithHashSchemas{std::move(lower), *split, {}});
- lower = std::move(*split);
+ new_bounds_with_hash_schemas.emplace_back(
+ RangeWithHashSchema{std::move(lower), *split, {}});
+ lower = *split;
}
- new_bounds_with_hash_schemas.emplace_back(RangeWithHashSchemas{std::move(lower), upper, {}});
+ new_bounds_with_hash_schemas.emplace_back(
+ RangeWithHashSchema{std::move(lower), upper, {}});
}
if (split != splits.end()) {
@@ -504,26 +512,25 @@ Status PartitionSchema::SplitRangeBounds(
Status PartitionSchema::CreatePartitions(
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
- const PerRangeHashBucketSchemas& range_hash_schemas,
+ const vector<HashSchema>& ranges_hash_schemas,
const Schema& schema,
vector<Partition>* partitions) const {
const auto& hash_encoder = GetKeyEncoder<string>(GetTypeInfo(UINT32));
- if (!range_hash_schemas.empty()) {
+ if (!ranges_hash_schemas.empty()) {
if (!split_rows.empty()) {
return Status::InvalidArgument("Both 'split_rows' and 'range_hash_schemas' cannot be "
"populated at the same time.");
}
- if (range_bounds.size() != range_hash_schemas.size()) {
+ if (range_bounds.size() != ranges_hash_schemas.size()) {
return Status::InvalidArgument(
Substitute("$0 vs $1: per range hash schemas and range bounds "
"must have the same size",
- range_hash_schemas.size(), range_bounds.size()));
+ ranges_hash_schemas.size(), range_bounds.size()));
}
}
- vector<Partition> base_hash_partitions = GenerateHashPartitions(
- hash_bucket_schemas_, hash_encoder);
+ auto base_hash_partitions = GenerateHashPartitions(hash_schema_, hash_encoder);
std::unordered_set<int> range_column_idxs;
for (const ColumnId& column_id : range_schema_.column_ids) {
@@ -540,7 +547,7 @@ Status PartitionSchema::CreatePartitions(
RangesWithHashSchemas bounds_with_hash_schemas;
vector<string> splits;
- RETURN_NOT_OK(EncodeRangeBounds(range_bounds, range_hash_schemas, schema,
+ RETURN_NOT_OK(EncodeRangeBounds(range_bounds, ranges_hash_schemas, schema,
&bounds_with_hash_schemas));
RETURN_NOT_OK(EncodeRangeSplits(split_rows, schema, &splits));
RETURN_NOT_OK(SplitRangeBounds(schema, splits, &bounds_with_hash_schemas));
@@ -548,7 +555,7 @@ Status PartitionSchema::CreatePartitions(
// Maps each partition to its respective hash schemas within 'bounds_with_hash_schemas',
// needed for logic later in function for filling in holes in partition key space. Will be
// empty if no per range hash schemas are used.
- vector<int> partition_idx_to_hash_schemas_idx;
+ vector<int> partition_idx_to_hash_schema_idx;
// Even if no hash partitioning for a table is specified, there must be at
// least one element in 'base_hash_partitions': it's used to build the result
@@ -558,7 +565,7 @@ Status PartitionSchema::CreatePartitions(
DCHECK(base_hash_partitions.size() > 1 ||
base_hash_partitions.front().hash_buckets().empty());
- if (range_hash_schemas.empty()) {
+ if (ranges_hash_schemas.empty()) {
// Create a partition per range bound and hash bucket combination.
vector<Partition> new_partitions;
for (const Partition& base_partition : base_hash_partitions) {
@@ -572,34 +579,34 @@ Status PartitionSchema::CreatePartitions(
*partitions = std::move(new_partitions);
} else {
// The number of ranges should match the size of range_hash_schemas.
- DCHECK_EQ(range_hash_schemas.size(), bounds_with_hash_schemas.size());
+ DCHECK_EQ(ranges_hash_schemas.size(), bounds_with_hash_schemas.size());
// No split rows should be defined if range_hash_schemas is populated.
DCHECK(split_rows.empty());
vector<Partition> result_partitions;
// Iterate through each bound and its hash schemas to generate hash partitions.
- for (size_t i = 0; i < bounds_with_hash_schemas.size(); ++i) {
+ for (int i = 0; i < bounds_with_hash_schemas.size(); ++i) {
const auto& bound = bounds_with_hash_schemas[i];
- const auto& current_range_hash_schemas = bound.hash_schemas;
- // If current bound's HashBucketSchema is empty, implies use of default
+ const auto& current_range_hash_schema = bound.hash_schema;
+ // If current bound's HashSchema is empty, implies use of default
// table-wide schema. If not empty, generate hash partitions for all the
// provided hash schemas in this range.
vector<Partition> current_bound_hash_partitions =
- current_range_hash_schemas.empty() ? base_hash_partitions
- : GenerateHashPartitions(
- current_range_hash_schemas,
+ current_range_hash_schema.empty() ? base_hash_partitions
+ : GenerateHashPartitions(
+ current_range_hash_schema,
hash_encoder);
// Add range information to the partition key.
for (Partition& partition : current_bound_hash_partitions) {
partition.partition_key_start_.append(bound.lower);
partition.partition_key_end_.append(bound.upper);
- int index = current_range_hash_schemas.empty() ? -1 : i;
- partition_idx_to_hash_schemas_idx.emplace_back(index);
+ int index = current_range_hash_schema.empty() ? -1 : i;
+ partition_idx_to_hash_schema_idx.emplace_back(index);
}
result_partitions.insert(result_partitions.end(),
std::make_move_iterator(current_bound_hash_partitions.begin()),
std::make_move_iterator(current_bound_hash_partitions.end()));
}
- DCHECK_EQ(partition_idx_to_hash_schemas_idx.size(), result_partitions.size());
+ DCHECK_EQ(partition_idx_to_hash_schema_idx.size(), result_partitions.size());
*partitions = std::move(result_partitions);
}
// Note: the following discussion and logic only takes effect when the table's
@@ -620,7 +627,7 @@ Status PartitionSchema::CreatePartitions(
// the absolute start and end case, these holes are filled by clearing the
// partition key beginning at the hash component. For a concrete example,
// see PartitionTest::TestCreatePartitions.
- const HashBucketSchema* hash_bucket_schema;
+ const HashDimension* hash_dimension;
for (int j = 0; j < partitions->size(); j++) {
Partition& partition = (*partitions)[j];
// Find the first zero-valued bucket from the end and truncate the partition key
@@ -646,13 +653,13 @@ Status PartitionSchema::CreatePartitions(
for (int i = static_cast<int>(partition.hash_buckets().size()) - 1; i >= 0; i--) {
partition.partition_key_end_.erase(kEncodedBucketSize * i);
int32_t hash_bucket = partition.hash_buckets()[i] + 1;
- if (range_hash_schemas.empty() || partition_idx_to_hash_schemas_idx[j] == -1) {
- hash_bucket_schema = &hash_bucket_schemas_[i];
+ if (ranges_hash_schemas.empty() || partition_idx_to_hash_schema_idx[j] == -1) {
+ hash_dimension = &hash_schema_[i];
} else {
- const auto& hash_schemas_idx = partition_idx_to_hash_schemas_idx[j];
- hash_bucket_schema = &bounds_with_hash_schemas[hash_schemas_idx].hash_schemas[i];
+ const auto& hash_schemas_idx = partition_idx_to_hash_schema_idx[j];
+ hash_dimension = &bounds_with_hash_schemas[hash_schemas_idx].hash_schema[i];
}
- if (hash_bucket != hash_bucket_schema->num_buckets) {
+ if (hash_bucket != hash_dimension->num_buckets) {
hash_encoder.Encode(&hash_bucket, &partition.partition_key_end_);
break;
}
@@ -668,9 +675,9 @@ bool PartitionSchema::PartitionContainsRowImpl(const Partition& partition,
const Row& row) const {
string range_key;
EncodeColumns(row, range_schema_.column_ids, &range_key);
- const auto& hash_schemas = GetHashBucketSchemasForRange(range_key);
- for (size_t i = 0; i < hash_schemas.size(); ++i) {
- if (!HashPartitionContainsRowImpl(partition, row, hash_schemas, i)) {
+ const auto& hash_schema = GetHashSchemaForRange(range_key);
+ for (size_t i = 0; i < hash_schema.size(); ++i) {
+ if (!HashPartitionContainsRowImpl(partition, row, hash_schema, i)) {
return false;
}
}
@@ -682,14 +689,14 @@ template<typename Row>
bool PartitionSchema::HashPartitionContainsRowImpl(
const Partition& partition,
const Row& row,
- const HashBucketSchemas& hash_bucket_schemas,
- int hash_idx) const {
- DCHECK_GE(hash_idx, 0);
- DCHECK_LT(hash_idx, hash_bucket_schemas.size());
- DCHECK_EQ(partition.hash_buckets().size(), hash_bucket_schemas.size());
- const HashBucketSchema& hash_bucket_schema = hash_bucket_schemas[hash_idx];
- const int32_t bucket = BucketForRow(row, hash_bucket_schema);
- return partition.hash_buckets()[hash_idx] == bucket;
+ const HashSchema& hash_schema,
+ int hash_value) const {
+ DCHECK_GE(hash_value, 0);
+ DCHECK_LT(hash_value, hash_schema.size());
+ DCHECK_EQ(partition.hash_buckets().size(), hash_schema.size());
+ const HashDimension& hash_dimension = hash_schema[hash_value];
+ const auto bucket = HashValueForRow(row, hash_dimension);
+ return partition.hash_buckets()[hash_value] == bucket;
}
template<typename Row>
@@ -755,12 +762,12 @@ bool PartitionSchema::PartitionMayContainRow(const Partition& partition,
string range_key;
EncodeColumns(row, range_schema_.column_ids, &range_key);
- const auto& hash_schemas = GetHashBucketSchemasForRange(range_key);
- for (size_t i = 0; i < hash_schemas.size(); ++i) {
- const auto& hash_partition = hash_schemas[i];
- if (hash_partition.column_ids.size() == 1 &&
- hash_partition.column_ids[0] == single_column_id &&
- !HashPartitionContainsRowImpl(partition, row, hash_schemas, i)) {
+ const auto& hash_schema = GetHashSchemaForRange(range_key);
+ for (size_t i = 0; i < hash_schema.size(); ++i) {
+ const auto& hash_dimension = hash_schema[i];
+ if (hash_dimension.column_ids.size() == 1 &&
+ hash_dimension.column_ids[0] == single_column_id &&
+ !HashPartitionContainsRowImpl(partition, row, hash_schema, i)) {
return false;
}
}
@@ -811,14 +818,14 @@ Status PartitionSchema::DecodeRangeKey(Slice* encoded_key,
// remove the hash components.
Status PartitionSchema::DecodeHashBuckets(Slice* encoded_key,
vector<int32_t>* buckets) const {
- size_t hash_components_size = kEncodedBucketSize * hash_bucket_schemas_.size();
+ size_t hash_components_size = kEncodedBucketSize * hash_schema_.size();
if (encoded_key->size() < hash_components_size) {
return Status::InvalidArgument(
Substitute("expected encoded hash key to be at least $0 bytes (only found $1)",
hash_components_size, encoded_key->size()));
}
- for (const auto& schema : hash_bucket_schemas_) {
- (void) schema; // quiet unused variable warning
+ for (const auto& _ : hash_schema_) {
+ (void) _; // quiet unused variable warning
uint32_t big_endian;
memcpy(&big_endian, encoded_key->data(), sizeof(uint32_t));
buckets->push_back(BigEndian::ToHost32(big_endian));
@@ -880,13 +887,13 @@ string PartitionSchema::PartitionDebugString(const Partition& partition,
ScopedDisableRedaction no_redaction;
vector<string> components;
- if (partition.hash_buckets_.size() != hash_bucket_schemas_.size()) {
+ if (partition.hash_buckets_.size() != hash_schema_.size()) {
return "<hash-partition-error>";
}
- for (int i = 0; i < hash_bucket_schemas_.size(); i++) {
+ for (size_t i = 0; i < hash_schema_.size(); ++i) {
string s = Substitute("HASH ($0) PARTITION $1",
- ColumnIdsToColumnNames(schema, hash_bucket_schemas_[i].column_ids),
+ ColumnIdsToColumnNames(schema, hash_schema_[i].column_ids),
partition.hash_buckets_[i]);
components.emplace_back(std::move(s));
}
@@ -905,14 +912,14 @@ string PartitionSchema::PartitionDebugString(const Partition& partition,
template<typename Row>
string PartitionSchema::PartitionKeyDebugStringImpl(const Row& row) const {
+ // TODO(aserbin): update this to adapt to custom hash schemas per range
vector<string> components;
- components.reserve(hash_bucket_schemas_.size() +
- range_schema_.column_ids.size());
- for (const HashBucketSchema& hash_bucket_schema : hash_bucket_schemas_) {
+ components.reserve(hash_schema_.size() + range_schema_.column_ids.size());
+ for (const auto& hash_dimension : hash_schema_) {
components.emplace_back(Substitute(
"HASH ($0): $1",
- ColumnIdsToColumnNames(*row.schema(), hash_bucket_schema.column_ids),
- BucketForRow(row, hash_bucket_schema)));
+ ColumnIdsToColumnNames(*row.schema(), hash_dimension.column_ids),
+ HashValueForRow(row, hash_dimension)));
}
if (!range_schema_.column_ids.empty()) {
@@ -956,7 +963,7 @@ string PartitionSchema::PartitionKeyDebugString(Slice key, const Schema& schema)
return "<range-decode-error>";
}
} else {
- if (key.size() < kEncodedBucketSize * hash_bucket_schemas_.size()) {
+ if (key.size() < kEncodedBucketSize * hash_schema_.size()) {
return "<hash-decode-error>";
}
}
@@ -968,8 +975,7 @@ string PartitionSchema::PartitionKeyDebugString(Slice key, const Schema& schema)
}
const auto& hash_schemas = has_ranges
- ? GetHashBucketSchemasForRange(range_key.ToString())
- : hash_bucket_schemas_;
+ ? GetHashSchemaForRange(range_key.ToString()) : hash_schema_;
vector<string> components;
components.reserve(hash_schemas.size() + 1);
for (const auto& hash_schema : hash_schemas) {
@@ -1080,21 +1086,22 @@ string PartitionSchema::RangeKeyDebugString(const ConstContiguousRow& key) const
}
vector<string> PartitionSchema::DebugStringComponents(const Schema& schema) const {
+ // TODO(aserbin): adapt this to custom hash schemas per range
vector<string> components;
-
- for (const auto& hash_bucket_schema : hash_bucket_schemas_) {
+ for (const auto& hash_dimension : hash_schema_) {
string s;
SubstituteAndAppend(&s, "HASH ($0) PARTITIONS $1",
- ColumnIdsToColumnNames(schema, hash_bucket_schema.column_ids),
- hash_bucket_schema.num_buckets);
- if (hash_bucket_schema.seed != 0) {
- SubstituteAndAppend(&s, " SEED $0", hash_bucket_schema.seed);
+ ColumnIdsToColumnNames(schema, hash_dimension.column_ids),
+ hash_dimension.num_buckets);
+ if (hash_dimension.seed != 0) {
+ SubstituteAndAppend(&s, " SEED $0", hash_dimension.seed);
}
components.emplace_back(std::move(s));
}
if (!range_schema_.column_ids.empty()) {
- string s = Substitute("RANGE ($0)", ColumnIdsToColumnNames(schema, range_schema_.column_ids));
+ string s = Substitute("RANGE ($0)", ColumnIdsToColumnNames(
+ schema, range_schema_.column_ids));
components.emplace_back(std::move(s));
}
@@ -1132,10 +1139,10 @@ string PartitionSchema::DisplayString(const Schema& schema,
string PartitionSchema::PartitionTableHeader(const Schema& schema) const {
string header;
- for (const auto& hash_bucket_schema : hash_bucket_schemas_) {
+ for (const auto& hash_schema : hash_schema_) {
SubstituteAndAppend(&header, "<th>HASH ($0) PARTITION</th>",
- EscapeForHtmlToString(
- ColumnIdsToColumnNames(schema, hash_bucket_schema.column_ids)));
+ EscapeForHtmlToString(ColumnIdsToColumnNames(
+ schema, hash_schema.column_ids)));
}
if (!range_schema_.column_ids.empty()) {
SubstituteAndAppend(&header, "<th>RANGE ($0) PARTITION</th>",
@@ -1172,14 +1179,14 @@ bool PartitionSchema::operator==(const PartitionSchema& rhs) const {
return false;
}
- if (hash_bucket_schemas_.size() != rhs.hash_bucket_schemas_.size() ||
+ if (hash_schema_.size() != rhs.hash_schema_.size() ||
ranges_with_hash_schemas_.size() != rhs.ranges_with_hash_schemas_.size()) {
return false;
}
// Compare table wide hash bucket schemas.
- for (size_t i = 0; i < hash_bucket_schemas_.size(); ++i) {
- if (hash_bucket_schemas_[i] != rhs.hash_bucket_schemas_[i]) {
+ for (size_t i = 0; i < hash_schema_.size(); ++i) {
+ if (hash_schema_[i] != rhs.hash_schema_[i]) {
return false;
}
}
@@ -1190,13 +1197,13 @@ bool PartitionSchema::operator==(const PartitionSchema& rhs) const {
ranges_with_hash_schemas_[i].upper != rhs.ranges_with_hash_schemas_[i].upper) {
return false;
}
- const auto& lhs_hash_schemas = ranges_with_hash_schemas_[i].hash_schemas;
- const auto& rhs_hash_schemas = rhs.ranges_with_hash_schemas_[i].hash_schemas;
- if (lhs_hash_schemas.size() != rhs_hash_schemas.size()) {
+ const auto& lhs_hash_schema = ranges_with_hash_schemas_[i].hash_schema;
+ const auto& rhs_hash_schema = rhs.ranges_with_hash_schemas_[i].hash_schema;
+ if (lhs_hash_schema.size() != rhs_hash_schema.size()) {
return false;
}
- for (size_t j = 0; j < lhs_hash_schemas.size(); ++j) {
- if (lhs_hash_schemas[j] != rhs_hash_schemas[j]) {
+ for (size_t j = 0; j < lhs_hash_schema.size(); ++j) {
+ if (lhs_hash_schema[j] != rhs_hash_schema[j]) {
return false;
}
}
@@ -1241,26 +1248,27 @@ void PartitionSchema::EncodeColumns(const KuduPartialRow& row,
}
}
-int32_t PartitionSchema::BucketForEncodedColumns(const string& encoded_hash_columns,
- const HashBucketSchema& hash_bucket_schema) {
+uint32_t PartitionSchema::HashValueForEncodedColumns(
+ const string& encoded_hash_columns,
+ const HashDimension& hash_dimension) {
uint64_t hash = HashUtil::MurmurHash2_64(encoded_hash_columns.data(),
encoded_hash_columns.length(),
- hash_bucket_schema.seed);
- return hash % static_cast<uint64_t>(hash_bucket_schema.num_buckets);
+ hash_dimension.seed);
+ return hash % hash_dimension.num_buckets;
}
vector<Partition> PartitionSchema::GenerateHashPartitions(
- const HashBucketSchemas& hash_schemas,
+ const HashSchema& hash_schema,
const KeyEncoder<string>& hash_encoder) {
vector<Partition> hash_partitions(1);
// Create a partition for each hash bucket combination.
- for (const HashBucketSchema& bucket_schema : hash_schemas) {
+ for (const auto& hash_dimension : hash_schema) {
vector<Partition> new_partitions;
- new_partitions.reserve(hash_partitions.size() * bucket_schema.num_buckets);
+ new_partitions.reserve(hash_partitions.size() * hash_dimension.num_buckets);
// For each of the partitions created so far, replicate it
// by the number of buckets in the next hash bucketing component.
for (const Partition& base_partition : hash_partitions) {
- for (auto bucket = 0; bucket < bucket_schema.num_buckets; ++bucket) {
+ for (uint32_t bucket = 0; bucket < hash_dimension.num_buckets; ++bucket) {
Partition partition = base_partition;
partition.hash_buckets_.push_back(bucket);
hash_encoder.Encode(&bucket, &partition.partition_key_start_);
@@ -1273,19 +1281,19 @@ vector<Partition> PartitionSchema::GenerateHashPartitions(
return hash_partitions;
}
-Status PartitionSchema::ValidateHashBucketSchemas(const Schema& schema,
- const HashBucketSchemas& hash_schemas) {
+Status PartitionSchema::ValidateHashSchema(const Schema& schema,
+ const HashSchema& hash_schema) {
set<ColumnId> hash_columns;
- for (const PartitionSchema::HashBucketSchema& hash_schema : hash_schemas) {
- if (hash_schema.num_buckets < 2) {
+ for (const PartitionSchema::HashDimension& hash_dimension : hash_schema) {
+ if (hash_dimension.num_buckets < 2) {
return Status::InvalidArgument("must have at least two hash buckets");
}
- if (hash_schema.column_ids.empty()) {
+ if (hash_dimension.column_ids.empty()) {
return Status::InvalidArgument("must have at least one hash column");
}
- for (const ColumnId& hash_column : hash_schema.column_ids) {
+ for (const ColumnId& hash_column : hash_dimension.column_ids) {
if (!hash_columns.insert(hash_column).second) {
return Status::InvalidArgument("hash bucket schema components must not "
"contain columns in common");
@@ -1305,13 +1313,13 @@ Status PartitionSchema::ValidateHashBucketSchemas(const Schema& schema,
}
template<typename Row>
-int32_t PartitionSchema::BucketForRow(
- const Row& row, const HashBucketSchema& hash_bucket_schema) {
+uint32_t PartitionSchema::HashValueForRow(const Row& row,
+ const HashDimension& hash_dimension) {
string buf;
- EncodeColumns(row, hash_bucket_schema.column_ids, &buf);
+ EncodeColumns(row, hash_dimension.column_ids, &buf);
uint64_t hash = HashUtil::MurmurHash2_64(
- buf.data(), buf.length(), hash_bucket_schema.seed);
- return hash % static_cast<uint64_t>(hash_bucket_schema.num_buckets);
+ buf.data(), buf.length(), hash_dimension.seed);
+ return hash % hash_dimension.num_buckets;
}
//------------------------------------------------------------
@@ -1320,25 +1328,24 @@ int32_t PartitionSchema::BucketForRow(
//------------------------------------------------------------
template
-int32_t PartitionSchema::BucketForRow(const KuduPartialRow& row,
- const HashBucketSchema& hash_bucket_schema);
-
+uint32_t PartitionSchema::HashValueForRow(const KuduPartialRow& row,
+ const HashDimension& hash_dimension);
template
-int32_t PartitionSchema::BucketForRow(const ConstContiguousRow& row,
- const HashBucketSchema& hash_bucket_schema);
+uint32_t PartitionSchema::HashValueForRow(const ConstContiguousRow& row,
+ const HashDimension& hash_dimension);
void PartitionSchema::Clear() {
hash_schema_idx_by_encoded_range_start_.clear();
ranges_with_hash_schemas_.clear();
- hash_bucket_schemas_.clear();
+ hash_schema_.clear();
range_schema_.column_ids.clear();
}
Status PartitionSchema::Validate(const Schema& schema) const {
- RETURN_NOT_OK(ValidateHashBucketSchemas(schema, hash_bucket_schemas_));
+ RETURN_NOT_OK(ValidateHashSchema(schema, hash_schema_));
- for (const auto& range_with_hash_schemas : ranges_with_hash_schemas_) {
- RETURN_NOT_OK(ValidateHashBucketSchemas(schema, range_with_hash_schemas.hash_schemas));
+ for (const auto& range_with_hash_schema : ranges_with_hash_schemas_) {
+ RETURN_NOT_OK(ValidateHashSchema(schema, range_with_hash_schema.hash_schema));
}
for (const ColumnId& column_id : range_schema_.column_ids) {
@@ -1543,25 +1550,24 @@ Status PartitionSchema::IncrementRangePartitionKey(KuduPartialRow* row, bool* in
return Status::OK();
}
-const PartitionSchema::HashBucketSchemas& PartitionSchema::GetHashBucketSchemasForRange(
+const PartitionSchema::HashSchema& PartitionSchema::GetHashSchemaForRange(
const string& range_key) const {
// Find proper hash bucket schema corresponding to the specified range key.
- const auto* entry = FindFloorOrNull(
+ const auto* idx = FindFloorOrNull(
hash_schema_idx_by_encoded_range_start_, range_key);
- bool has_custom_range = (entry != nullptr);
+ bool has_custom_range = (idx != nullptr);
// Check for the case of a non-covered range between two covered ranges.
// TODO(aserbin): maybe, it's better to build ranges_with_hash_schemas_ not
// having any range gaps?
if (has_custom_range) {
- DCHECK_LT(*entry, ranges_with_hash_schemas_.size());
- const auto& upper = ranges_with_hash_schemas_[*entry].upper;
- // TODO(aserbin): is the upper bound always exclusive?
+ DCHECK_LT(*idx, ranges_with_hash_schemas_.size());
+ const auto& upper = ranges_with_hash_schemas_[*idx].upper;
if (!upper.empty() && upper <= range_key) {
has_custom_range = false;
}
}
- return has_custom_range ? ranges_with_hash_schemas_[*entry].hash_schemas
- : hash_bucket_schemas_;
+ return has_custom_range ? ranges_with_hash_schemas_[*idx].hash_schema
+ : hash_schema_;
}
Status PartitionSchema::MakeLowerBoundRangePartitionKeyInclusive(KuduPartialRow* row) const {
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index 6d77328..85b7d62 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -108,13 +108,21 @@ class Partition {
// primary key column values of a row into a partition key that can be used to
// determine the tablet containing the key.
//
-// The partition schema is made up of zero or more hash bucket components,
-// followed by a single range component. In addition, the partition schema can
-// contain multiple ranges and their per-range custom bucket schemas.
+// The partition schema for a table is made up of a single range component, and
+// per-range hash components, where a hash component consists of zero or more
+// hash bucket dimensions. When all the ranges have the same hash component,
+// the partition schema for a table reduces into a single range component and
+// a table-wide hash component. The range component is called 'range schema',
+// and the collection of per-range hash components is called 'hash schema'. With
+// that, the partition schema for a table consists of the range schema and
+// the hash schema.
//
-// Each hash bucket component includes one or more columns from the primary key
-// column set, with the restriction that an individual primary key column may
-// only be included in a single hash component.
+// Each hash bucket dimension includes one or more columns from the set of the
+// table's primary key columns, with the restriction that an individual primary
+// key column may only be included in a single hash dimension.
+//
+// In encoded partition keys (they are represented as sequence of bytes),
+// first comes the hash-related part, and then comes the range-related part.
//
// To determine the hash bucket of an individual row, the values of the columns
// of the hash component are encoded into bytes (in PK or lexicographic
@@ -148,53 +156,65 @@ class Partition {
// the methods which format individual partition keys do redact.
class PartitionSchema {
public:
+ // This structure represents the range component of the table's partitioning
+ // schema. It consists of at least one column, and every column must be one
+ // of the primary key's columns.
struct RangeSchema {
std::vector<ColumnId> column_ids;
};
- struct HashBucketSchema {
+ // This structure represents one dimension of the hash bucketing. To find the
+ // hash value (which directly corresponds to the hash bucket index) for a
+ // particular row in the given hash dimension, it's necessary to compute the
+ // hash value for a row by calling
+ // HashFunction(value_of_column_ids[0], ..., value_of_column_ids[N-1]), where
+ // N = column_ids.size().
+ //
+ // NOTE: this structure corresponds to PartitionSchemaPB::HashBucketSchemaPB
+ struct HashDimension {
std::vector<ColumnId> column_ids;
int32_t num_buckets;
uint32_t seed;
- bool operator==(const HashBucketSchema& rhs) const {
- if (this == &rhs) {
+ bool operator==(const HashDimension& other) const {
+ if (this == &other) {
return true;
}
- if (seed != rhs.seed) {
+ if (seed != other.seed) {
return false;
}
- if (num_buckets != rhs.num_buckets) {
+ if (num_buckets != other.num_buckets) {
return false;
}
- if (column_ids != rhs.column_ids) {
+ if (column_ids != other.column_ids) {
return false;
}
return true;
}
- bool operator!=(const HashBucketSchema& rhs) const {
- return !(*this == rhs);
+ bool operator!=(const HashDimension& other) const {
+ return !(*this == other);
}
};
- typedef std::vector<HashBucketSchema> HashBucketSchemas;
- // Holds each bound's HashBucketSchemas.
- typedef std::vector<HashBucketSchemas> PerRangeHashBucketSchemas;
+ // A hash schema consists of zero or more hash dimensions. With that,
+ // N-dimensional hash bucketing for a row is defined by N hash values computed
+ // in each dimension of the hash schema.
+ typedef std::vector<HashDimension> HashSchema;
- struct RangeWithHashSchemas {
+ struct RangeWithHashSchema {
std::string lower;
std::string upper;
- HashBucketSchemas hash_schemas;
+ HashSchema hash_schema;
};
- typedef std::vector<RangeWithHashSchemas> RangesWithHashSchemas;
+ typedef std::vector<RangeWithHashSchema> RangesWithHashSchemas;
- // Extracts HashBucketSchemas from a protobuf repeated field of hash buckets.
- static Status ExtractHashBucketSchemasFromPB(
+ // Extracts HashSchema from a protobuf repeated field of hash buckets.
+ static Status ExtractHashSchemaFromPB(
const Schema& schema,
const google::protobuf::RepeatedPtrField<PartitionSchemaPB_HashBucketSchemaPB>&
hash_buckets_pb,
- HashBucketSchemas* hash_bucket_schemas);
+ HashSchema* hash_schema);
// Deserializes a protobuf message into a partition schema.
static Status FromPB(const PartitionSchemaPB& pb,
@@ -219,19 +239,19 @@ class PartitionSchema {
// of resulting partitions is the product of the number of hash buckets for
// each hash bucket component, multiplied by
// (split_rows.size() + max(1, range_bounds.size())).
- // 'range_hash_schemas' contains each range's HashBucketSchemas,
+ // 'range_hash_schemas' contains each range's HashSchema,
// its order corresponds to the bounds in 'range_bounds'.
// If 'range_hash_schemas' is empty, the table wide hash schema is used per range.
// Size of 'range_hash_schemas' and 'range_bounds' are equal if 'range_hash_schema' isn't empty.
Status CreatePartitions(
const std::vector<KuduPartialRow>& split_rows,
const std::vector<std::pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
- const PerRangeHashBucketSchemas& range_hash_schemas,
+ const std::vector<HashSchema>& ranges_hash_schemas,
const Schema& schema,
std::vector<Partition>* partitions) const WARN_UNUSED_RESULT;
// Check if the given partition contains the specified row. The row must have
- // all the columns participating in the table's partitioning schema
+ // all the columns participating in the table's partition schema
// set to particular values.
bool PartitionContainsRow(const Partition& partition,
const KuduPartialRow& row) const;
@@ -240,12 +260,12 @@ class PartitionSchema {
// Check if the specified row is probably in the given partition.
// The collection of columns set to particular values in the row can be a
- // subset of all the columns participating in the table's partitioning schema.
- // This method can be used to optimize the collection of values for IN list
+ // subset of all the columns participating in the table's partition schema.
+ // This method can be used to optimize the set of values for IN list
// predicates. As of now, this method is effectively implemented only for
- // single-column hash and single-column range partitioning schemas, meaning
+ // single-column hash and single-column range partition schemas, meaning
// that it can return false positives in case of other than single-row range
- // and hash partitioning schemas.
+ // and hash schemas.
//
// NOTE: this method returns false positives in some cases (see above)
//
@@ -329,12 +349,15 @@ class PartitionSchema {
KuduPartialRow* partial_row,
Arena* arena) const;
- const RangeSchema& range_partition_schema() const {
+ const RangeSchema& range_schema() const {
return range_schema_;
}
- const HashBucketSchemas& hash_partition_schemas() const {
- return hash_bucket_schemas_;
+ // TODO(aserbin): this method is becoming obsolete with the introduction of
+ // custom per-range hash schemas -- update this or remove
+ // completely
+ const HashSchema& hash_schema() const {
+ return hash_schema_;
}
const RangesWithHashSchemas& ranges_with_hash_schemas() const {
@@ -381,25 +404,26 @@ class PartitionSchema {
const std::vector<ColumnId>& column_ids,
std::string* buf);
- // Returns the hash bucket of the encoded hash column. The encoded columns must match the
- // columns of the hash bucket schema.
- static int32_t BucketForEncodedColumns(const std::string& encoded_hash_columns,
- const HashBucketSchema& hash_bucket_schema);
+ // Returns the hash value of the encoded hash columns. The encoded columns
+ // must match the columns of the hash dimension.
+ static uint32_t HashValueForEncodedColumns(
+ const std::string& encoded_hash_columns,
+ const HashDimension& hash_dimension);
+
+ // Assigns the row to a bucket according to the hash rules.
+ template<typename Row>
+ static uint32_t HashValueForRow(const Row& row,
+ const HashDimension& hash_dimension);
- // Helper function that validates the hash bucket schemas.
- static Status ValidateHashBucketSchemas(const Schema& schema,
- const HashBucketSchemas& hash_schemas);
+ // Helper function that validates the hash schemas.
+ static Status ValidateHashSchema(const Schema& schema,
+ const HashSchema& hash_schema);
// Generates hash partitions for each combination of hash buckets in hash_schemas.
static std::vector<Partition> GenerateHashPartitions(
- const HashBucketSchemas& hash_schemas,
+ const HashSchema& hash_schema,
const KeyEncoder<std::string>& hash_encoder);
- // Assigns the row to a hash bucket according to the hash schema.
- template<typename Row>
- static int32_t BucketForRow(const Row& row,
- const HashBucketSchema& hash_bucket_schema);
-
// PartitionKeyDebugString implementation for row types.
template<typename Row>
std::string PartitionKeyDebugStringImpl(const Row& row) const;
@@ -411,11 +435,10 @@ class PartitionSchema {
// Private templated helper for HashPartitionContainsRow.
template<typename Row>
- bool HashPartitionContainsRowImpl(
- const Partition& partition,
- const Row& row,
- const HashBucketSchemas& hash_bucket_schemas,
- int hash_idx) const;
+ bool HashPartitionContainsRowImpl(const Partition& partition,
+ const Row& row,
+ const HashSchema& hash_schema,
+ int hash_value) const;
// Private templated helper for RangePartitionContainsRow.
template<typename Row>
@@ -478,7 +501,7 @@ class PartitionSchema {
// it indicates that the table wide hash schema will be used per range.
Status EncodeRangeBounds(
const std::vector<std::pair<KuduPartialRow, KuduPartialRow>>& range_bounds,
- const PerRangeHashBucketSchemas& range_hash_schemas,
+ const std::vector<HashSchema>& range_hash_schemas,
const Schema& schema,
RangesWithHashSchemas* bounds_with_hash_schemas) const;
@@ -495,14 +518,13 @@ class PartitionSchema {
// maximum value. Unset columns will be incremented to increment(min_value).
Status IncrementRangePartitionKey(KuduPartialRow* row, bool* increment) const;
- // Find hash bucket schemas for the given encoded range key. Depending
- // on the partitioning schema and the key, it might be either table-wide
- // or a custom hash bucket schema for a particular range.
- const HashBucketSchemas& GetHashBucketSchemasForRange(
- const std::string& range_key) const;
+ // Find hash schema for the given encoded range key. Depending on the
+ // partition schema and the key, it might be either table-wide or a custom
+ // hash schema for a particular range.
+ const HashSchema& GetHashSchemaForRange(const std::string& range_key) const;
- HashBucketSchemas hash_bucket_schemas_;
RangeSchema range_schema_;
+ HashSchema hash_schema_;
RangesWithHashSchemas ranges_with_hash_schemas_;
// Encoded start of the range --> index of the hash bucket schemas for the
diff --git a/src/kudu/common/partition_pruner-test.cc b/src/kudu/common/partition_pruner-test.cc
index 1789154..7ba9cc1 100644
--- a/src/kudu/common/partition_pruner-test.cc
+++ b/src/kudu/common/partition_pruner-test.cc
@@ -74,7 +74,7 @@ class PartitionPrunerTest : public KuduTest {
const vector<ColumnNameAndIntValue>& upper_int_cols,
const vector<ColumnNamesNumBucketsAndSeed>& hash_schemas,
vector<pair<KuduPartialRow, KuduPartialRow>>* bounds,
- PartitionSchema::PerRangeHashBucketSchemas* range_hash_schemas,
+ vector<PartitionSchema::HashSchema>* range_hash_schemas,
PartitionSchemaPB* pb);
};
@@ -121,13 +121,13 @@ void PartitionPrunerTest::CreatePartitionSchemaPB(
for (const auto& range_column : range_columns) {
range_schema->add_columns()->set_name(range_column);
}
- for (const auto& hash_schema : table_hash_schema) {
+ for (const auto& hash_dimension : table_hash_schema) {
auto* hash_schema_component = partition_schema_pb->add_hash_bucket_schemas();
- for (const auto& hash_schema_columns : get<0>(hash_schema)) {
+ for (const auto& hash_schema_columns : get<0>(hash_dimension)) {
hash_schema_component->add_columns()->set_name(hash_schema_columns);
}
- hash_schema_component->set_num_buckets(get<1>(hash_schema));
- hash_schema_component->set_seed(get<2>(hash_schema));
+ hash_schema_component->set_num_buckets(get<1>(hash_dimension));
+ hash_schema_component->set_seed(get<2>(hash_dimension));
}
}
@@ -137,9 +137,9 @@ void PartitionPrunerTest::AddRangePartitionWithSchema(
const vector<ColumnNameAndStringValue>& upper_string_cols,
const vector<ColumnNameAndIntValue>& lower_int_cols,
const vector<ColumnNameAndIntValue>& upper_int_cols,
- const vector<ColumnNamesNumBucketsAndSeed>& hash_schemas,
+ const vector<ColumnNamesNumBucketsAndSeed>& hash_buckets_info,
vector<pair<KuduPartialRow, KuduPartialRow>>* bounds,
- PartitionSchema::PerRangeHashBucketSchemas* range_hash_schemas,
+ vector<PartitionSchema::HashSchema>* range_hash_schemas,
PartitionSchemaPB* pb) {
RowOperationsPBEncoder encoder(pb->add_range_bounds());
KuduPartialRow lower(&schema);
@@ -159,21 +159,21 @@ void PartitionPrunerTest::AddRangePartitionWithSchema(
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
auto* range_hash_component = pb->add_range_hash_schemas();
- PartitionSchema::HashBucketSchemas hash_bucket_schemas;
- for (const auto& hash_schema : hash_schemas) {
+ PartitionSchema::HashSchema hash_schema;
+ for (const auto& hash_bucket_info : hash_buckets_info) {
auto* hash_component_pb = range_hash_component->add_hash_schemas();
- PartitionSchema::HashBucketSchema hash_bucket_schema;
- for (const auto& hash_schema_columns : get<0>(hash_schema)) {
+ PartitionSchema::HashDimension hash_dimension;
+ for (const auto& hash_schema_columns : get<0>(hash_bucket_info)) {
hash_component_pb->add_columns()->set_name(hash_schema_columns);
- hash_bucket_schema.column_ids.emplace_back(schema.find_column(hash_schema_columns));
+ hash_dimension.column_ids.emplace_back(schema.find_column(hash_schema_columns));
}
- hash_component_pb->set_num_buckets(get<1>(hash_schema));
- hash_bucket_schema.num_buckets = get<1>(hash_schema);
- hash_component_pb->set_seed(get<2>(hash_schema));
- hash_bucket_schema.seed = get<2>(hash_schema);
- hash_bucket_schemas.emplace_back(hash_bucket_schema);
+ hash_component_pb->set_num_buckets(get<1>(hash_bucket_info));
+ hash_dimension.num_buckets = get<1>(hash_bucket_info);
+ hash_component_pb->set_seed(get<2>(hash_bucket_info));
+ hash_dimension.seed = get<2>(hash_bucket_info);
+ hash_schema.emplace_back(hash_dimension);
}
- range_hash_schemas->emplace_back(hash_bucket_schemas);
+ range_hash_schemas->emplace_back(hash_schema);
bounds->emplace_back(lower, upper);
}
@@ -1116,7 +1116,7 @@ TEST_F(PartitionPrunerTest, TestHashSchemasPerRangePruning) {
CreatePartitionSchemaPB({"C"}, { {{"A"}, 2, 0}, {{"B"}, 2, 0} }, &pb);
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
// Need to add per range hash schema components to the field 'range_with_hash_schemas_'
// of PartitionSchema because PartitionPruner will use them to construct partition key ranges.
@@ -1298,7 +1298,7 @@ TEST_F(PartitionPrunerTest, TestHashSchemasPerRangeWithPartialPrimaryKeyRangePru
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
// [(0, 0, _), (2, 2, _))
{
@@ -1412,7 +1412,7 @@ TEST_F(PartitionPrunerTest, TestInListHashPruningPerRange) {
CreatePartitionSchemaPB({"A"}, { {{"B", "C"}, 3, 0} }, &pb);
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
// [(a, _, _), (c, _, _))
{
@@ -1514,7 +1514,7 @@ TEST_F(PartitionPrunerTest, TestSingleRangeElementAndBoundaryCase) {
CreatePartitionSchemaPB({"A"}, { {{"B"}, 2, 0} }, &pb);
vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
// [(0, _), (1, _))
{
diff --git a/src/kudu/common/partition_pruner.cc b/src/kudu/common/partition_pruner.cc
index 93f225a..6549001 100644
--- a/src/kudu/common/partition_pruner.cc
+++ b/src/kudu/common/partition_pruner.cc
@@ -158,8 +158,8 @@ void EncodeRangeKeysFromPredicates(const Schema& schema,
col_idxs.reserve(range_columns.size());
for (const auto& column : range_columns) {
int32_t col_idx = schema.find_column_by_id(column);
- CHECK(col_idx != Schema::kColumnNotFound);
- CHECK(col_idx < schema.num_key_columns());
+ CHECK_NE(Schema::kColumnNotFound, col_idx);
+ CHECK_LT(col_idx, schema.num_key_columns());
col_idxs.push_back(col_idx);
}
@@ -180,14 +180,14 @@ void EncodeRangeKeysFromPredicates(const Schema& schema,
} // anonymous namespace
vector<bool> PartitionPruner::PruneHashComponent(
- const PartitionSchema::HashBucketSchema& hash_bucket_schema,
+ const PartitionSchema::HashDimension& hash_dimension,
const Schema& schema,
const ScanSpec& scan_spec) {
- vector<bool> hash_bucket_bitset(hash_bucket_schema.num_buckets, false);
+ vector<bool> hash_bucket_bitset(hash_dimension.num_buckets, false);
vector<string> encoded_strings(1, "");
- for (size_t col_offset = 0; col_offset < hash_bucket_schema.column_ids.size(); ++col_offset) {
+ for (size_t col_offset = 0; col_offset < hash_dimension.column_ids.size(); ++col_offset) {
vector<string> new_encoded_strings;
- const ColumnSchema& column = schema.column_by_id(hash_bucket_schema.column_ids[col_offset]);
+ const ColumnSchema& column = schema.column_by_id(hash_dimension.column_ids[col_offset]);
const ColumnPredicate& predicate = FindOrDie(scan_spec.predicates(), column.name());
const KeyEncoder<string>& encoder = GetKeyEncoder<string>(column.type_info());
@@ -206,7 +206,7 @@ vector<bool> PartitionPruner::PruneHashComponent(
for (const void* predicate_value : predicate_values) {
string new_encoded_string = encoded_string;
encoder.Encode(predicate_value,
- col_offset + 1 == hash_bucket_schema.column_ids.size(),
+ col_offset + 1 == hash_dimension.column_ids.size(),
&new_encoded_string);
new_encoded_strings.emplace_back(new_encoded_string);
}
@@ -214,8 +214,9 @@ vector<bool> PartitionPruner::PruneHashComponent(
encoded_strings.swap(new_encoded_strings);
}
for (const string& encoded_string : encoded_strings) {
- uint32_t hash = PartitionSchema::BucketForEncodedColumns(encoded_string, hash_bucket_schema);
- hash_bucket_bitset[hash] = true;
+ uint32_t hash_value = PartitionSchema::HashValueForEncodedColumns(
+ encoded_string, hash_dimension);
+ hash_bucket_bitset[hash_value] = true;
}
return hash_bucket_bitset;
}
@@ -223,17 +224,17 @@ vector<bool> PartitionPruner::PruneHashComponent(
void PartitionPruner::ConstructPartitionKeyRanges(
const Schema& schema,
const ScanSpec& scan_spec,
- const PartitionSchema::HashBucketSchemas& hash_bucket_schemas,
+ const PartitionSchema::HashSchema& hash_schema,
const RangeBounds& range_bounds,
vector<PartitionKeyRange>* partition_key_ranges) {
// Create the hash bucket portion of the partition key.
// The list of hash buckets bitset per hash component
vector<vector<bool>> hash_bucket_bitsets;
- hash_bucket_bitsets.reserve(hash_bucket_schemas.size());
- for (const auto& hash_bucket_schema : hash_bucket_schemas) {
+ hash_bucket_bitsets.reserve(hash_schema.size());
+ for (const auto& hash_dimension : hash_schema) {
bool can_prune = true;
- for (const auto& column_id : hash_bucket_schema.column_ids) {
+ for (const auto& column_id : hash_dimension.column_ids) {
const ColumnSchema& column = schema.column_by_id(column_id);
const ColumnPredicate* predicate = FindOrNull(scan_spec.predicates(), column.name());
if (predicate == nullptr ||
@@ -244,12 +245,11 @@ void PartitionPruner::ConstructPartitionKeyRanges(
}
}
if (can_prune) {
- auto hash_bucket_bitset = PruneHashComponent(hash_bucket_schema,
- schema,
- scan_spec);
+ auto hash_bucket_bitset = PruneHashComponent(
+ hash_dimension, schema, scan_spec);
hash_bucket_bitsets.emplace_back(std::move(hash_bucket_bitset));
} else {
- hash_bucket_bitsets.emplace_back(hash_bucket_schema.num_buckets, true);
+ hash_bucket_bitsets.emplace_back(hash_dimension.num_buckets, true);
}
}
@@ -257,11 +257,11 @@ void PartitionPruner::ConstructPartitionKeyRanges(
size_t constrained_index;
if (!range_bounds.lower.empty() || !range_bounds.upper.empty()) {
// The range component is constrained.
- constrained_index = hash_bucket_schemas.size();
+ constrained_index = hash_schema.size();
} else {
// Search the hash bucket constraints from right to left, looking for the
// first constrained component.
- constrained_index = hash_bucket_schemas.size() -
+ constrained_index = hash_schema.size() -
distance(hash_bucket_bitsets.rbegin(),
find_if(hash_bucket_bitsets.rbegin(),
hash_bucket_bitsets.rend(),
@@ -426,7 +426,7 @@ void PartitionPruner::Init(const Schema& schema,
// the range bounds specified by the scan.
if (partition_schema.ranges_with_hash_schemas_.empty()) {
vector<PartitionKeyRange> partition_key_ranges(1);
- ConstructPartitionKeyRanges(schema, scan_spec, partition_schema.hash_bucket_schemas_,
+ ConstructPartitionKeyRanges(schema, scan_spec, partition_schema.hash_schema_,
{scan_range_lower_bound, scan_range_upper_bound},
&partition_key_ranges);
// Reverse the order of the partition key ranges, so that it is
@@ -438,35 +438,35 @@ void PartitionPruner::Init(const Schema& schema,
first_range.partition_key_ranges.begin());
} else {
vector<RangeBounds> range_bounds;
- vector<PartitionSchema::HashBucketSchemas> hash_schemas_per_range;
+ vector<PartitionSchema::HashSchema> hash_schemas_per_range;
for (const auto& range : partition_schema.ranges_with_hash_schemas_) {
- const auto& hash_schemas = range.hash_schemas.empty() ?
- partition_schema.hash_bucket_schemas_ : range.hash_schemas;
+ const auto& hash_schema = range.hash_schema.empty() ?
+ partition_schema.hash_schema_ : range.hash_schema;
// Both lower and upper bounds are unbounded.
if (scan_range_lower_bound.empty() && scan_range_upper_bound.empty()) {
range_bounds.emplace_back(RangeBounds{range.lower, range.upper});
- hash_schemas_per_range.emplace_back(hash_schemas);
+ hash_schemas_per_range.emplace_back(hash_schema);
continue;
}
// Only one of the lower/upper bounds is unbounded.
if (scan_range_lower_bound.empty()) {
if (scan_range_upper_bound > range.lower) {
range_bounds.emplace_back(RangeBounds{range.lower, range.upper});
- hash_schemas_per_range.emplace_back(hash_schemas);
+ hash_schemas_per_range.emplace_back(hash_schema);
}
continue;
}
if (scan_range_upper_bound.empty()) {
if (scan_range_lower_bound < range.upper) {
range_bounds.emplace_back(RangeBounds{range.lower, range.upper});
- hash_schemas_per_range.emplace_back(hash_schemas);
+ hash_schemas_per_range.emplace_back(hash_schema);
}
continue;
}
// Both lower and upper ranges are bounded.
if (scan_range_lower_bound < range.upper && scan_range_upper_bound > range.lower) {
range_bounds.emplace_back(RangeBounds{range.lower, range.upper});
- hash_schemas_per_range.emplace_back(hash_schemas);
+ hash_schemas_per_range.emplace_back(hash_schema);
}
}
DCHECK_EQ(range_bounds.size(), hash_schemas_per_range.size());
diff --git a/src/kudu/common/partition_pruner.h b/src/kudu/common/partition_pruner.h
index 4e3a1f4..2730409 100644
--- a/src/kudu/common/partition_pruner.h
+++ b/src/kudu/common/partition_pruner.h
@@ -97,15 +97,16 @@ class PartitionPruner {
// Search all combinations of in-list and equality predicates.
// Return hash values bitset of these combinations.
static std::vector<bool> PruneHashComponent(
- const PartitionSchema::HashBucketSchema& hash_bucket_schema,
+ const PartitionSchema::HashDimension& hash_dimension,
const Schema& schema,
const ScanSpec& scan_spec);
- // Given the range bounds and the hash bucket schemas, constructs a set of partition key ranges.
+ // Given the range bounds and the hash schema, constructs a set of partition
+ // key ranges.
static void ConstructPartitionKeyRanges(
const Schema& schema,
const ScanSpec& scan_spec,
- const PartitionSchema::HashBucketSchemas& hash_bucket_schemas,
+ const PartitionSchema::HashSchema& hash_schema,
const RangeBounds& range_bounds,
std::vector<PartitionKeyRange>* partition_key_ranges);
diff --git a/src/kudu/integration-tests/table_locations-itest.cc b/src/kudu/integration-tests/table_locations-itest.cc
index 67dc370..0fa4745 100644
--- a/src/kudu/integration-tests/table_locations-itest.cc
+++ b/src/kudu/integration-tests/table_locations-itest.cc
@@ -154,20 +154,19 @@ class TableLocationsTest : public KuduTest {
virtual void SetUpConfig() {}
- struct HashBucketSchema {
+ struct HashDimension {
vector<string> columns;
int32_t num_buckets;
uint32_t seed;
};
- typedef vector<HashBucketSchema> HashBucketSchemas;
- typedef vector<HashBucketSchemas> PerRangeHashBucketSchemas;
+ typedef vector<HashDimension> HashSchema;
Status CreateTable(const string& table_name,
const Schema& schema,
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds,
- const PerRangeHashBucketSchemas& range_hash_schema,
- const HashBucketSchemas& table_hash_schema);
+ const vector<HashSchema>& ranges_hash_schemas,
+ const HashSchema& table_hash_schema);
void CreateTable(const string& table_name, int num_splits);
@@ -185,8 +184,8 @@ Status TableLocationsTest::CreateTable(
const Schema& schema,
const vector<KuduPartialRow>& split_rows = {},
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds = {},
- const PerRangeHashBucketSchemas& range_hash_schema = {},
- const HashBucketSchemas& table_hash_schema = {}) {
+ const vector<HashSchema>& ranges_hash_schemas = {},
+ const HashSchema& table_hash_schema = {}) {
CreateTableRequestPB req;
req.set_name(table_name);
@@ -199,33 +198,33 @@ Status TableLocationsTest::CreateTable(
for (const auto& bound : bounds) {
splits_encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, bound.first);
splits_encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, bound.second);
- if (!range_hash_schema.empty()) {
+ if (!ranges_hash_schemas.empty()) {
RowOperationsPBEncoder encoder(partition_schema_pb->add_range_bounds());
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, bound.first);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, bound.second);
}
}
- for (const auto& hash_schemas : range_hash_schema) {
+ for (const auto& hash_schema : ranges_hash_schemas) {
auto* range_hash_schemas_pb = partition_schema_pb->add_range_hash_schemas();
- for (const auto& hash_schema : hash_schemas) {
+ for (const auto& hash_dimension : hash_schema) {
auto* hash_schema_pb = range_hash_schemas_pb->add_hash_schemas();
- for (const string& col_name : hash_schema.columns) {
+ for (const string& col_name : hash_dimension.columns) {
hash_schema_pb->add_columns()->set_name(col_name);
}
- hash_schema_pb->set_num_buckets(hash_schema.num_buckets);
- hash_schema_pb->set_seed(hash_schema.seed);
+ hash_schema_pb->set_num_buckets(hash_dimension.num_buckets);
+ hash_schema_pb->set_seed(hash_dimension.seed);
}
}
if (!table_hash_schema.empty()) {
- for (const auto& hash_schema : table_hash_schema) {
+ for (const auto& hash_dimension : table_hash_schema) {
auto* hash_schema_pb = partition_schema_pb->add_hash_bucket_schemas();
- for (const string& col_name : hash_schema.columns) {
+ for (const string& col_name : hash_dimension.columns) {
hash_schema_pb->add_columns()->set_name(col_name);
}
- hash_schema_pb->set_num_buckets(hash_schema.num_buckets);
- hash_schema_pb->set_seed(hash_schema.seed);
+ hash_schema_pb->set_num_buckets(hash_dimension.num_buckets);
+ hash_schema_pb->set_seed(hash_dimension.seed);
}
}
@@ -477,17 +476,18 @@ TEST_F(TableLocationsTest, TestRangeSpecificHashing) {
ASSERT_OK(bounds[2].first.SetStringNoCopy(0, "e"));
ASSERT_OK(bounds[2].second.SetStringNoCopy(0, "f"));
- PerRangeHashBucketSchemas range_hash_schema;
- HashBucketSchemas hash_schema_4_by_2 = { { { "key" }, 4, 0 }, { { "val" }, 2, 0} };
- range_hash_schema.emplace_back(hash_schema_4_by_2);
- HashBucketSchemas hash_schema_6 = { { { "key" }, 6, 2 } };
- range_hash_schema.emplace_back(hash_schema_6);
+ vector<HashSchema> range_hash_schemas;
+ HashSchema hash_schema_4_by_2 = { { { "key" }, 4, 0 }, { { "val" }, 2, 0} };
+ range_hash_schemas.emplace_back(hash_schema_4_by_2);
+ HashSchema hash_schema_6 = { { { "key" }, 6, 2 } };
+ range_hash_schemas.emplace_back(hash_schema_6);
// Table-wide hash schema, applied to range by default if no per-range schema is specified.
- HashBucketSchemas table_hash_schema_5 = { { { "val" }, 5, 4 } };
- range_hash_schema.push_back({});
+ HashSchema table_hash_schema_5 = { { { "val" }, 5, 4 } };
+ range_hash_schemas.emplace_back();
- ASSERT_OK(CreateTable(table_name, schema, {}, bounds, range_hash_schema, table_hash_schema_5));
+ ASSERT_OK(CreateTable(
+ table_name, schema, {}, bounds, range_hash_schemas, table_hash_schema_5));
NO_FATALS(CheckMasterTableCreation(table_name, 19));
GetTableLocationsRequestPB req;
diff --git a/src/kudu/integration-tests/txn_commit-itest.cc b/src/kudu/integration-tests/txn_commit-itest.cc
index 1a01867..b833959 100644
--- a/src/kudu/integration-tests/txn_commit-itest.cc
+++ b/src/kudu/integration-tests/txn_commit-itest.cc
@@ -170,7 +170,7 @@ class TxnCommitITest : public KuduTest {
table_name_ = w.table_name();
initial_row_count_ = w.rows_inserted();
- // Since the test table uses the hash partitioning scheme, every tablet gets
+ // Since the test table uses hash partitioning, every tablet gets
// at least one write operation when inserting several rows into the test
// table. So, for every transaction inserting several rows into the test
// table, it's easy to build the list of transaction participants.
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 92690ad..ab29a95 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1859,7 +1859,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
}
}
- PartitionSchema::PerRangeHashBucketSchemas range_hash_schemas;
+ vector<PartitionSchema::HashSchema> range_hash_schemas;
if (FLAGS_enable_per_range_hash_schemas) {
// TODO(aserbin): the signature of CreatePartitions() require the
// 'range_hash_schemas' parameters: update its signature
@@ -1869,10 +1869,10 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
// CreatePartitions() should be updated correspondingly.
const auto& ps = req.partition_schema();
for (int i = 0; i < ps.range_hash_schemas_size(); i++) {
- PartitionSchema::HashBucketSchemas hash_bucket_schemas;
- RETURN_NOT_OK(PartitionSchema::ExtractHashBucketSchemasFromPB(
- schema, ps.range_hash_schemas(i).hash_schemas(), &hash_bucket_schemas));
- range_hash_schemas.emplace_back(std::move(hash_bucket_schemas));
+ PartitionSchema::HashSchema hash_schema;
+ RETURN_NOT_OK(PartitionSchema::ExtractHashSchemaFromPB(
+ schema, ps.range_hash_schemas(i).hash_schemas(), &hash_schema));
+ range_hash_schemas.emplace_back(std::move(hash_schema));
}
}
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index 3d218ee..4a622ad 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -171,13 +171,12 @@ class MasterTest : public KuduTest {
KuduTest::TearDown();
}
- struct HashBucketSchema {
+ struct HashDimension {
vector<string> columns;
int32_t num_buckets;
uint32_t seed;
};
- typedef vector<HashBucketSchema> HashBucketSchemas;
- typedef vector<HashBucketSchemas> PerRangeHashBucketSchemas;
+ typedef vector<HashDimension> HashSchema;
void DoListTables(const ListTablesRequestPB& req, ListTablesResponsePB* resp);
void DoListAllTables(ListTablesResponsePB* resp);
@@ -192,7 +191,7 @@ class MasterTest : public KuduTest {
const Schema& schema,
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds = {},
- const PerRangeHashBucketSchemas& range_hash_schema = {});
+ const vector<HashSchema>& range_hash_schemas = {});
Status CreateTable(const string& name,
const Schema& schema,
@@ -201,7 +200,7 @@ class MasterTest : public KuduTest {
const optional<string>& comment,
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds,
- const PerRangeHashBucketSchemas& range_hash_schema);
+ const vector<HashSchema>& range_hash_schemas);
shared_ptr<Messenger> client_messenger_;
unique_ptr<MiniMaster> mini_master_;
@@ -229,9 +228,9 @@ Status MasterTest::CreateTable(
const Schema& schema,
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds,
- const PerRangeHashBucketSchemas& range_hash_schema) {
+ const vector<HashSchema>& range_hash_schemas) {
return CreateTable(
- name, schema, none, none, none, split_rows, bounds, range_hash_schema);
+ name, schema, none, none, none, split_rows, bounds, range_hash_schemas);
}
Status MasterTest::CreateTable(
@@ -242,7 +241,7 @@ Status MasterTest::CreateTable(
const optional<string>& comment,
const vector<KuduPartialRow>& split_rows,
const vector<pair<KuduPartialRow, KuduPartialRow>>& bounds,
- const PerRangeHashBucketSchemas& range_hash_schema) {
+ const vector<HashSchema>& range_hash_schemas) {
CreateTableRequestPB req;
req.set_name(name);
if (type) {
@@ -257,22 +256,22 @@ Status MasterTest::CreateTable(
for (const pair<KuduPartialRow, KuduPartialRow>& bound : bounds) {
splits_encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, bound.first);
splits_encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, bound.second);
- if (!range_hash_schema.empty()) {
+ if (!range_hash_schemas.empty()) {
RowOperationsPBEncoder encoder(partition_schema_pb->add_range_bounds());
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, bound.first);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, bound.second);
}
}
- for (const auto& hash_schemas : range_hash_schema) {
+ for (const auto& range_hash_schema : range_hash_schemas) {
auto* hash_schemas_pb = partition_schema_pb->add_range_hash_schemas();
- for (const auto& hash_schema : hash_schemas) {
+ for (const auto& hash_dimension : range_hash_schema) {
auto* hash_bucket_schema_pb = hash_schemas_pb->add_hash_schemas();
- for (const string& col_name : hash_schema.columns) {
+ for (const string& col_name : hash_dimension.columns) {
hash_bucket_schema_pb->add_columns()->set_name(col_name);
}
- hash_bucket_schema_pb->set_num_buckets(hash_schema.num_buckets);
- hash_bucket_schema_pb->set_seed(hash_schema.seed);
+ hash_bucket_schema_pb->set_num_buckets(hash_dimension.num_buckets);
+ hash_bucket_schema_pb->set_seed(hash_dimension.seed);
}
}
@@ -919,7 +918,7 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) {
KuduPartialRow a_upper(&kTableSchema);
ASSERT_OK(a_lower.SetInt32("key", 0));
ASSERT_OK(a_upper.SetInt32("key", 100));
- PerRangeHashBucketSchemas range_hash_schemas = {{}};
+ vector<HashSchema> range_hash_schemas = {{}};
Status s = CreateTable(kTableName,
kTableSchema,
{ split1 },
@@ -943,13 +942,12 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) {
KuduPartialRow b_upper(&kTableSchema);
ASSERT_OK(b_lower.SetInt32("key", 100));
ASSERT_OK(b_upper.SetInt32("key", 200));
- HashBucketSchemas hash_schemas_4 = { { {"key"}, 4, 0 } };
- PerRangeHashBucketSchemas range_hash_schema = { std::move(hash_schemas_4) };
+ vector<HashSchema> range_hash_schemas = { { { {"key"}, 4, 0 } } };
Status s = CreateTable(kTableName,
kTableSchema,
{},
{ { a_lower, a_upper }, { b_lower, b_upper }, },
- range_hash_schema);
+ range_hash_schemas);
ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(),
"1 vs 2: per range hash schemas and range bounds "
@@ -964,8 +962,8 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) {
Status s = CreateTable(kTableName, kTableSchema, { split });
ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(),
- "split rows may only contain values "
- "for range partitioned columns: val");
+ "split rows may only contain values for "
+ "range partition columns: val");
}
{ // Overlapping bounds.
@@ -1198,9 +1196,9 @@ TEST_F(MasterTest, NonPrimaryKeyColumnsForPerRangeCustomHashSchema) {
KuduPartialRow upper(&kTableSchema);
ASSERT_OK(lower.SetInt32("key", 0));
ASSERT_OK(upper.SetInt32("key", 100));
- PerRangeHashBucketSchemas range_hash_schema{{{{"int32_val"}, 2, 0}}};
+ vector<HashSchema> range_hash_schemas{{{{"int32_val"}, 2, 0}}};
const auto s = CreateTable(
- kTableName, kTableSchema, {}, { { lower, upper } }, range_hash_schema);
+ kTableName, kTableSchema, {}, { { lower, upper } }, range_hash_schemas);
ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(),
"must specify only primary key columns for "
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index 28c4180..e043d2a 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -180,7 +180,7 @@ message SysTablesEntryPB {
// This is the schema provided to the user on client->GetSchema(tableName).
optional SchemaPB fully_applied_schema = 4;
- // The table's partitioning schema.
+ // The table's partition schema.
optional PartitionSchemaPB partition_schema = 9;
// The next column ID to assign to newly added columns in this table.
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index c8015df..88c0bfd 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2543,7 +2543,7 @@ TEST_F(ToolTest, TestLoadgenHmsEnabled) {
HostPort::ToCommaSeparatedString(cluster_->master_rpc_addrs())), &out));
}
-// Run the loadgen, generating a few different partitioning schemas.
+// Run the loadgen, generating a few different partition schemas.
TEST_F(ToolTest, TestLoadgenAutoGenTablePartitioning) {
{
ExternalMiniClusterOptions opts;
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 61ca244..c9038c1 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -420,18 +420,18 @@ Status CreateDstTableIfNeeded(const client::sp::shared_ptr<KuduTable>& src_table
.schema(&dst_table_schema)
.num_replicas(src_table->num_replicas());
- // Add hash partition schemas.
- for (const auto& hash_partition_schema : partition_schema.hash_partition_schemas()) {
- auto hash_columns = convert_column_ids_to_names(hash_partition_schema.column_ids);
+ // Add hash partition schema.
+ for (const auto& hash_dimension : partition_schema.hash_schema()) {
+ auto hash_columns = convert_column_ids_to_names(hash_dimension.column_ids);
table_creator->add_hash_partitions(hash_columns,
- hash_partition_schema.num_buckets,
- hash_partition_schema.seed);
+ hash_dimension.num_buckets,
+ hash_dimension.seed);
}
// Add range partition schema.
- if (!partition_schema.range_partition_schema().column_ids.empty()) {
+ if (!partition_schema.range_schema().column_ids.empty()) {
auto range_columns
- = convert_column_ids_to_names(partition_schema.range_partition_schema().column_ids);
+ = convert_column_ids_to_names(partition_schema.range_schema().column_ids);
table_creator->set_range_partition_columns(range_columns);
}
diff --git a/src/kudu/tools/tool_action_perf.cc b/src/kudu/tools/tool_action_perf.cc
index 1b36bfa..ace1c63 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -340,7 +340,7 @@ DEFINE_int32(table_num_hash_partitions, 8,
"greater than 1.");
DEFINE_int32(table_num_range_partitions, 1,
"The number of range partitions to create when this tool creates "
- "a new table. A range partitioning schema will be determined to "
+ "a new table. A range partition schema will be determined to "
"evenly split a sequential workload across ranges, leaving "
"the outermost ranges unbounded to ensure coverage of the entire "
"keyspace. Note: The total number of partitions must be greater "