You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2022/08/03 19:12:05 UTC
[kudu] 02/02: KUDU-2671 range-specific hash schemas in 'kudu table create' CLI
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 18d40679a441068072856995a3fb6d4cabfe6d68
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Fri Jul 29 14:07:37 2022 -0700
KUDU-2671 range-specific hash schemas in 'kudu table create' CLI
This patch add support for range-specific hash schemas into the
'kudu table create' CLI tool. This patch also contains a test scenario
to cover the newly introduced functionality.
Change-Id: I94aab482792ef93754b6475e1390b8f0c4a05678
Reviewed-on: http://gerrit.cloudera.org:8080/18809
Reviewed-by: Mahesh Reddy <mr...@cloudera.com>
Tested-by: Alexey Serbin <al...@apache.org>
Reviewed-by: Yingchun Lai <ac...@gmail.com>
Reviewed-by: Abhishek Chennaka <ac...@cloudera.com>
Reviewed-by: Attila Bukor <ab...@apache.org>
---
src/kudu/tools/create-table-tool-test.cc | 130 +++++++++++++++++++++++++++----
src/kudu/tools/tool.proto | 40 ++++++----
src/kudu/tools/tool_action_table.cc | 64 +++++++++++++--
3 files changed, 198 insertions(+), 36 deletions(-)
diff --git a/src/kudu/tools/create-table-tool-test.cc b/src/kudu/tools/create-table-tool-test.cc
index d592638d7..9ce59a98d 100644
--- a/src/kudu/tools/create-table-tool-test.cc
+++ b/src/kudu/tools/create-table-tool-test.cc
@@ -15,13 +15,14 @@
// specific language governing permissions and limitations
// under the License.
+#include <algorithm>
+#include <cstdint>
#include <cstdio>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
-#include <utility>
#include <vector>
#include <gtest/gtest.h>
@@ -99,28 +100,32 @@ TEST_F(CreateTableToolTest, TestCreateTable) {
// Test a few good cases.
const auto check_good_input = [&](const string& json_str,
- const string& master,
+ const string& master_rpc_addr,
const string& table_name,
- const string& schema,
- const string& partition,
+ const string& schema_str,
+ const string& partition_str,
const map<string, string>& extra_configs,
- KuduClient* client) {
+ KuduClient* client,
+ shared_ptr<KuduTable>* table_out = nullptr) {
const vector<string> table_args = {
- "table", "create", master, json_str
+ "table", "create", master_rpc_addr, json_str
};
bool table_exists = false;
ASSERT_OK(RunKuduTool(table_args));
ASSERT_EVENTUALLY([&] {
- ASSERT_OK(client->TableExists(table_name, &table_exists));
- ASSERT_TRUE(table_exists);
+ ASSERT_OK(client->TableExists(table_name, &table_exists));
+ ASSERT_TRUE(table_exists);
});
shared_ptr<KuduTable> table;
ASSERT_OK(client->OpenTable(table_name, &table));
- ASSERT_EQ(table->name(), table_name);
- ASSERT_EQ(table->schema().ToString(), schema);
- ASSERT_EQ(table->partition_schema().DebugString(KuduSchema::ToSchema(
- table->schema())), partition);
- ASSERT_EQ(table->extra_configs(), extra_configs);
+ ASSERT_EQ(table_name, table->name());
+ ASSERT_EQ(schema_str, table->schema().ToString());
+ ASSERT_EQ(partition_str, table->partition_schema().DebugString(
+ KuduSchema::ToSchema(table->schema())));
+ ASSERT_EQ(extra_configs, table->extra_configs());
+ if (table_out) {
+ *table_out = std::move(table);
+ }
};
// Create a simple table.
@@ -613,6 +618,105 @@ TEST_F(CreateTableToolTest, TestCreateTable) {
NO_FATALS(check_good_input(encoding_type_unknown, master_addr,
"encoding_type_unknown", schema, partition, {}, client.get()));
+ // Create a table with a range having custom hash schema.
+ const string range_with_custom_hash_schema = R"(
+ {
+ "table_name": "range_with_custom_hash_schema",
+ "schema": {
+ "columns": [
+ {
+ "column_name": "id",
+ "column_type": "INT32",
+ "is_nullable": false,
+ },
+ {
+ "column_name": "name",
+ "column_type": "STRING",
+ "is_nullable": true,
+ }
+ ],
+ "key_column_names": [
+ "id"
+ ]
+ },
+ "partition": {
+ "hash_partitions": [
+ {
+ "columns": ["id"],
+ "num_buckets": 2,
+ "seed": 1
+ }
+ ],
+ "range_partition": {
+ "columns": ["id"],
+ "range_bounds": [
+ {
+ "upper_bound": {
+ "bound_values": ["-100"],
+ "bound_type": "EXCLUSIVE"
+ }
+ },
+ {
+ "lower_bound": {
+ "bound_values": ["100"],
+ "bound_type": "INCLUSIVE"
+ }
+ }
+ ],
+ "custom_hash_schema_ranges": [
+ {
+ "range_bounds": {
+ "lower_bound": {
+ "bound_values": ["-100"],
+ "bound_type": "INCLUSIVE"
+ },
+ "upper_bound": {
+ "bound_values": ["100"],
+ "bound_type": "EXCLUSIVE"
+ }
+ },
+ "hash_schema": {
+ "columns": ["id"],
+ "num_buckets": 5,
+ "seed": 8
+ }
+ }
+ ]
+ }
+ }
+ }
+ )";
+ {
+ constexpr const char* const kRefSchema =
+ "(\n"
+ " id INT32 NOT NULL,\n"
+ " name STRING NULLABLE,\n"
+ " PRIMARY KEY (id)\n)";
+ constexpr const char* const kRefPartitionInfo =
+ "HASH (id) PARTITIONS 2 SEED 1, RANGE (id)";
+ shared_ptr<KuduTable> table;
+ NO_FATALS(check_good_input(range_with_custom_hash_schema,
+ master_addr,
+ "range_with_custom_hash_schema",
+ kRefSchema,
+ kRefPartitionInfo,
+ {},
+ client.get(),
+ &table));
+ vector<Partition> partitions;
+ ASSERT_OK(table->ListPartitions(&partitions));
+ ASSERT_EQ(9, partitions.size());
+ vector<int32_t> bucket_nums;
+ for (const auto& p : partitions) {
+ // All hash schemas in this table are one-dimensional.
+ ASSERT_EQ(1, p.hash_buckets().size());
+ bucket_nums.emplace_back(p.hash_buckets().front());
+ }
+ std::sort(bucket_nums.begin(), bucket_nums.end());
+ const vector<int32_t> ref_bucket_nums{0, 0, 0, 1, 1, 1, 2, 3, 4};
+ ASSERT_EQ(ref_bucket_nums, bucket_nums);
+ }
+
// Test a few error cases.
const auto check_bad_input = [&](const string& json_str,
const string& master,
diff --git a/src/kudu/tools/tool.proto b/src/kudu/tools/tool.proto
index dc735e137..f98d8cd84 100644
--- a/src/kudu/tools/tool.proto
+++ b/src/kudu/tools/tool.proto
@@ -398,6 +398,20 @@ message ColumnPB {
}
message PartitionPB {
+ message HashPartitionPB {
+ // Column names of columns included in the hash. Every column must be
+ // a component of the primary key.
+ repeated string columns = 1;
+ // Number of buckets into which columns will be hashed. Must be at least 2.
+ optional int32 num_buckets = 2;
+ // Seed value for hash calculation. Administrators may set a seed value
+ // on a per-table basis in order to randomize the mapping of rows to
+ // buckets. Setting a seed provides some amount of protection against denial
+ // of service attacks when the hash bucket columns contain user provided
+ // input.
+ optional uint32 seed = 3;
+ }
+
message RangePartitionPB {
message BoundPB {
enum Type {
@@ -422,6 +436,12 @@ message PartitionPB {
// exact string value for the bound.
repeated string split_values = 1;
}
+ message RangeWithHashSchemaPB {
+ // The bounds of this range.
+ optional RangeBoundPB range_bounds = 1;
+ // Hash schema for this range.
+ repeated HashPartitionPB hash_schema = 2;
+ }
// Column names of columns included in the range. All columns must be
// a component of the primary key.
@@ -430,25 +450,13 @@ message PartitionPB {
repeated RangeBoundPB range_bounds = 2;
// Range splits.
repeated SplitValuePB range_splits = 3;
+ // Ranges with custom hash schemas.
+ repeated RangeWithHashSchemaPB custom_hash_schema_ranges = 4;
}
- message HashPartitionPB {
- // Column names of columns included in the hash. Every column must be
- // a component of the primary key.
- repeated string columns = 1;
- // Number of buckets into which columns will be hashed. Must be at least 2.
- optional int32 num_buckets = 2;
- // Seed value for hash calculation. Administrators may set a seed value
- // on a per-table basis in order to randomize the mapping of rows to
- // buckets. Setting a seed provides some amount of protection against denial
- // of service attacks when the hash bucket columns contain user provided
- // input.
- optional uint32 seed = 3;
- }
-
- // Hash partition message. Support zero or more hash partition levels .
+ // Table-wide hash schema.
repeated HashPartitionPB hash_partitions = 1;
- // range partition message.
+ // Range partitioning information.
optional RangePartitionPB range_partition = 2;
}
diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc
index a03c23574..b764781de 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -72,6 +72,7 @@ using kudu::client::KuduColumnSchema;
using kudu::client::KuduColumnSpec;
using kudu::client::KuduColumnStorageAttributes;
using kudu::client::KuduPredicate;
+using kudu::client::KuduRangePartition;
using kudu::client::KuduScanToken;
using kudu::client::KuduScanTokenBuilder;
using kudu::client::KuduScanner;
@@ -1491,11 +1492,8 @@ Status ParseTablePartition(const PartitionPB& partition,
string bound_partial_row_json;
for (const auto& bound : partition.range_partition().range_bounds()) {
unique_ptr<KuduPartialRow> lower_bound(kudu_schema.NewRow());
- unique_ptr<KuduPartialRow> upper_bound(kudu_schema.NewRow());
KuduTableCreator::RangePartitionBound lower_bound_type =
KuduTableCreator::INCLUSIVE_BOUND;
- KuduTableCreator::RangePartitionBound upper_bound_type =
- KuduTableCreator::EXCLUSIVE_BOUND;
if (bound.has_lower_bound()) {
RETURN_NOT_OK(ToJsonPartialRow(bound.lower_bound().bound_values(),
range_col_names_and_types,
@@ -1503,9 +1501,13 @@ Status ParseTablePartition(const PartitionPB& partition,
RETURN_NOT_OK(ConvertToKuduPartialRow(range_col_names_and_types,
bound_partial_row_json,
lower_bound.get()));
- RETURN_NOT_OK(ToClientRangePartitionBound(bound.lower_bound().bound_type(),
- &lower_bound_type));
+ RETURN_NOT_OK(ToClientRangePartitionBound(
+ bound.lower_bound().bound_type(), &lower_bound_type));
}
+
+ unique_ptr<KuduPartialRow> upper_bound(kudu_schema.NewRow());
+ KuduTableCreator::RangePartitionBound upper_bound_type =
+ KuduTableCreator::EXCLUSIVE_BOUND;
if (bound.has_upper_bound()) {
RETURN_NOT_OK(ToJsonPartialRow(bound.upper_bound().bound_values(),
range_col_names_and_types,
@@ -1513,12 +1515,60 @@ Status ParseTablePartition(const PartitionPB& partition,
RETURN_NOT_OK(ConvertToKuduPartialRow(range_col_names_and_types,
bound_partial_row_json,
upper_bound.get()));
- RETURN_NOT_OK(ToClientRangePartitionBound(bound.upper_bound().bound_type(),
- &upper_bound_type));
+ RETURN_NOT_OK(ToClientRangePartitionBound(
+ bound.upper_bound().bound_type(), &upper_bound_type));
}
+
table_creator->add_range_partition(lower_bound.release(), upper_bound.release(),
lower_bound_type, upper_bound_type);
}
+
+ for (const auto& range : partition.range_partition().custom_hash_schema_ranges()) {
+ const auto& bounds = range.range_bounds();
+
+ unique_ptr<KuduPartialRow> lower_bound(kudu_schema.NewRow());
+ KuduTableCreator::RangePartitionBound lower_bound_type =
+ KuduTableCreator::INCLUSIVE_BOUND;
+ if (bounds.has_lower_bound()) {
+ RETURN_NOT_OK(ToJsonPartialRow(bounds.lower_bound().bound_values(),
+ range_col_names_and_types,
+ &bound_partial_row_json));
+ RETURN_NOT_OK(ConvertToKuduPartialRow(range_col_names_and_types,
+ bound_partial_row_json,
+ lower_bound.get()));
+ RETURN_NOT_OK(ToClientRangePartitionBound(
+ bounds.lower_bound().bound_type(), &lower_bound_type));
+ }
+
+ unique_ptr<KuduPartialRow> upper_bound(kudu_schema.NewRow());
+ KuduTableCreator::RangePartitionBound upper_bound_type =
+ KuduTableCreator::EXCLUSIVE_BOUND;
+ if (bounds.has_upper_bound()) {
+ RETURN_NOT_OK(ToJsonPartialRow(bounds.upper_bound().bound_values(),
+ range_col_names_and_types,
+ &bound_partial_row_json));
+ RETURN_NOT_OK(ConvertToKuduPartialRow(range_col_names_and_types,
+ bound_partial_row_json,
+ upper_bound.get()));
+ RETURN_NOT_OK(ToClientRangePartitionBound(
+ bounds.upper_bound().bound_type(), &upper_bound_type));
+ }
+
+ unique_ptr<KuduRangePartition> partition(
+ new KuduRangePartition(lower_bound.release(), upper_bound.release(),
+ lower_bound_type, upper_bound_type));
+ for (const auto& hash_dimension : range.hash_schema()) {
+ vector<string> hash_columns;
+ for (const auto& c : hash_dimension.columns()) {
+ hash_columns.emplace_back(c);
+ }
+ const int32_t seed = hash_dimension.has_seed() ? hash_dimension.seed() : 0;
+ partition->add_hash_partitions(
+ hash_columns, hash_dimension.num_buckets(), seed);
+ }
+
+ table_creator->add_custom_range_partition(partition.release());
+ }
for (const auto& split_pb : partition.range_partition().range_splits()) {
RETURN_NOT_OK(ToJsonPartialRow(split_pb.split_values(),
range_col_names_and_types,