You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/12/01 18:51:10 UTC
kudu git commit: KUDU-1775 (part 2). Enforce a max number of columns
and reasonable identifier lengths
Repository: kudu
Updated Branches:
refs/heads/master b0209c16d -> b30d4fc15
KUDU-1775 (part 2). Enforce a max number of columns and reasonable identifier lengths
* Disallows unreasonable identifiers such as empty strings or strings
longer than a configurable maximum size. I set the default max size to
256. MySQL[1] and PostgreSQL[2] seem to limit identifiers to 64
characters, and Oracle[3] seems to limit to 30. So 256 seemed like it
should be plenty. The flag is marked as unsafe but if anyone has a
compatibility issue on upgrade, the limit can be raised.
* Enforces a maximum number of columns, set here to 300 as a default.
Really wide tables are an anti-pattern, and although some initial
testing shows that we won't completely fail until a few thousand
columns, we probably want to put a guard rail up well before the
failure point.
Again, if users are depending on a higher number, this can be worked
around by raising the default through an unsafe flag.
[1] http://dev.mysql.com/doc/refman/5.7/en/identifiers.html
[2] https://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html
[3] http://stackoverflow.com/questions/18248318/change-table-column-index-names-size-in-oracle-11g-or-12c
Change-Id: I9cab30dd1ea78a160403c79442b08739ddc92f12
Reviewed-on: http://gerrit.cloudera.org:8080/5290
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Dan Burkert <da...@apache.org>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b30d4fc1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b30d4fc1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b30d4fc1
Branch: refs/heads/master
Commit: b30d4fc154151aaa5583dbbd8ef83ac044236ee9
Parents: b0209c1
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Nov 30 15:09:01 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Dec 1 18:50:14 2016 +0000
----------------------------------------------------------------------
java/kudu-client/pom.xml | 10 +-
.../org/apache/kudu/client/TestKuduClient.java | 27 ++++
java/pom.xml | 3 +-
src/kudu/client/client-test.cc | 89 +++++++++++++
src/kudu/master/catalog_manager.cc | 132 +++++++++++++------
src/kudu/master/master-test.cc | 23 +---
6 files changed, 225 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/pom.xml
----------------------------------------------------------------------
diff --git a/java/kudu-client/pom.xml b/java/kudu-client/pom.xml
index cb5b55f..81866d9 100644
--- a/java/kudu-client/pom.xml
+++ b/java/kudu-client/pom.xml
@@ -64,8 +64,14 @@
</dependency>
<dependency>
<groupId>org.mockito</groupId>
- <artifactId>mockito-all</artifactId>
- <version>${mockito-all.version}</version>
+ <artifactId>mockito-core</artifactId>
+ <version>${mockito-core.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-core</artifactId>
+ <version>${hamcrest-core.version}</version>
<scope>test</scope>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
index aeba4bb..64951b2 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
@@ -24,8 +24,10 @@ import static org.apache.kudu.client.RowResult.timestampToString;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.junit.matchers.JUnitMatchers.containsString;
import java.util.ArrayList;
import java.util.Collections;
@@ -116,6 +118,31 @@ public class TestKuduClient extends BaseKuduTest {
newSchema.getColumn("column3_s").getCompressionAlgorithm());
}
+
+ /**
+ * Test creating a table with various invalid schema cases.
+ */
+ @Test(timeout = 100000)
+ public void testCreateTableTooManyColumns() throws Exception {
+ List<ColumnSchema> cols = new ArrayList<>();
+ cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING)
+ .key(true)
+ .build());
+ for (int i = 0; i < 1000; i++) {
+ // not null with default
+ cols.add(new ColumnSchema.ColumnSchemaBuilder("c" + i, Type.STRING)
+ .build());
+ }
+ Schema schema = new Schema(cols);
+ try {
+ syncClient.createTable(tableName, schema, getBasicCreateTableOptions());
+ } catch (NonRecoverableException nre) {
+ assertThat(nre.toString(), containsString(
+ "number of columns 1001 is greater than the permitted maximum"));
+ }
+ }
+
+
/**
* Test creating a table with columns with different combinations of NOT NULL and
* default values, inserting rows, and checking the results are as expected.
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index c9fb510..6575ef6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -62,9 +62,10 @@
<guava.version>18.0</guava.version>
<hadoop.version>2.7.2</hadoop.version>
<jsr305.version>3.0.1</jsr305.version>
+ <hamcrest-core.version>1.3</hamcrest-core.version>
<junit.version>4.11</junit.version>
<log4j.version>1.2.17</log4j.version>
- <mockito-all.version>1.9.0</mockito-all.version>
+ <mockito-core.version>1.9.0</mockito-core.version>
<murmur.version>1.0.0</murmur.version>
<netty.version>3.10.5.Final</netty.version>
<protobuf.version>2.6.1</protobuf.version>
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 77aa1a1..3c4f962 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -3109,6 +3109,8 @@ TEST_F(ClientTest, TestWriteWithBadSchema) {
}
TEST_F(ClientTest, TestBasicAlterOperations) {
+ const vector<string> kBadNames = {"", string(1000, 'x')};
+
// test that having no steps throws an error
{
gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
@@ -3154,6 +3156,38 @@ TEST_F(ClientTest, TestBasicAlterOperations) {
ASSERT_STR_CONTAINS(s.ToString(), "The column already exists: string_val");
}
+ // Test that renaming a column to an invalid name throws an error.
+ for (const string& bad_name : kBadNames) {
+ gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+ table_alterer->AlterColumn("int_val")->RenameTo(bad_name);
+ Status s = table_alterer->Alter();
+ ASSERT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid column name");
+ }
+
+ // Test that renaming a table to an invalid name throws an error.
+ for (const string& bad_name : kBadNames) {
+ gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+ table_alterer->RenameTo(bad_name);
+ Status s = table_alterer->Alter();
+ ASSERT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid table name");
+ }
+
+ // Test trying to add columns to a table such that it exceeds the permitted
+ // maximum.
+ {
+ gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+ for (int i = 0; i < 1000; i++) {
+ table_alterer->AddColumn(Substitute("c$0", i))->Type(KuduColumnSchema::INT32);
+ }
+ Status s = table_alterer->Alter();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(),
+ "number of columns 1004 is greater than the "
+ "permitted maximum 300");
+ }
+
// Need a tablet peer for the next set of tests.
string tablet_id = GetFirstTabletId(client_table_.get());
scoped_refptr<TabletPeer> tablet_peer;
@@ -3800,6 +3834,61 @@ TEST_F(ClientTest, TestCreateTableWithInvalidEncodings) {
"DICT_ENCODING not supported for type INT32");
}
+TEST_F(ClientTest, TestCreateTableWithTooManyColumns) {
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ KuduSchema schema;
+ KuduSchemaBuilder schema_builder;
+ schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ for (int i = 0; i < 1000; i++) {
+ schema_builder.AddColumn(Substitute("c$0", i))
+ ->Type(KuduColumnSchema::INT32)->NotNull();
+ }
+ ASSERT_OK(schema_builder.Build(&schema));
+ Status s = table_creator->table_name("foobar")
+ .schema(&schema)
+ .set_range_partition_columns({ "key" })
+ .Create();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(),
+ "number of columns 1001 is greater than the "
+ "permitted maximum 300");
+}
+
+TEST_F(ClientTest, TestCreateTableWithTooLongTableName) {
+ const string kLongName(1000, 'x');
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ KuduSchema schema;
+ KuduSchemaBuilder schema_builder;
+ schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ ASSERT_OK(schema_builder.Build(&schema));
+ Status s = table_creator->table_name(kLongName)
+ .schema(&schema)
+ .set_range_partition_columns({ "key" })
+ .Create();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_MATCHES(s.ToString(),
+ "invalid table name: identifier 'xxx*' "
+ "longer than maximum permitted length 256");
+}
+
+TEST_F(ClientTest, TestCreateTableWithTooLongColumnName) {
+ const string kLongName(1000, 'x');
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ KuduSchema schema;
+ KuduSchemaBuilder schema_builder;
+ schema_builder.AddColumn(kLongName)->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ ASSERT_OK(schema_builder.Build(&schema));
+ Status s = table_creator->table_name("foobar")
+ .schema(&schema)
+ .set_range_partition_columns({ kLongName })
+ .Create();
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_MATCHES(s.ToString(),
+ "invalid column name: identifier 'xxx*' "
+ "longer than maximum permitted length 256");
+}
+
+
// Check the behavior of the latest observed timestamp when performing
// write and read operations.
TEST_F(ClientTest, TestLatestObservedTimestamp) {
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index d5af69c..33fcc92 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -121,6 +121,20 @@ DEFINE_int32(max_num_replicas, 7,
// Tag as unsafe since we have done very limited testing of higher than 5 replicas.
TAG_FLAG(max_num_replicas, unsafe);
+DEFINE_int32(max_num_columns, 300,
+ "Maximum number of columns that may be in a table.");
+// Tag as unsafe since we have done very limited testing of higher than 300 columns.
+TAG_FLAG(max_num_columns, unsafe);
+
+
+DEFINE_int32(max_identifier_length, 256,
+ "Maximum length of the name of a column or table.");
+// Tag as unsafe because we end up writing schemas in every WAL entry, etc,
+// and having very long column names would enter untested territory and affect
+// performance.
+TAG_FLAG(max_identifier_length, unsafe);
+
+
DEFINE_bool(allow_unsafe_replication_factor, false,
"Allow creating tables with even replication factor.");
TAG_FLAG(allow_unsafe_replication_factor, unsafe);
@@ -807,6 +821,65 @@ Status CatalogManager::CheckOnline() const {
return Status::OK();
}
+namespace {
+
+// Validate a table or column name to ensure that it is a valid identifier.
+Status ValidateIdentifier(const string& id) {
+ if (id.empty()) {
+ return Status::InvalidArgument("empty string not a valid identifier");
+ }
+
+ if (id.length() > FLAGS_max_identifier_length) {
+ return Status::InvalidArgument(Substitute(
+ "identifier '$0' longer than maximum permitted length $1",
+ id, FLAGS_max_identifier_length));
+ }
+
+ return Status::OK();
+}
+
+// Validate the client-provided schema and name.
+Status ValidateClientSchema(const boost::optional<string>& name,
+ const Schema& schema) {
+ if (name != boost::none) {
+ RETURN_NOT_OK_PREPEND(ValidateIdentifier(name.get()), "invalid table name");
+ }
+ for (int i = 0; i < schema.num_columns(); i++) {
+ RETURN_NOT_OK_PREPEND(ValidateIdentifier(schema.column(i).name()),
+ "invalid column name");
+ }
+ if (schema.num_key_columns() <= 0) {
+ return Status::InvalidArgument("must specify at least one key column");
+ }
+ if (schema.num_columns() > FLAGS_max_num_columns) {
+ return Status::InvalidArgument(Substitute(
+ "number of columns $0 is greater than the permitted maximum $1",
+ schema.num_columns(), FLAGS_max_num_columns));
+ }
+ for (int i = 0; i < schema.num_key_columns(); i++) {
+ if (!IsTypeAllowableInKey(schema.column(i).type_info())) {
+ return Status::InvalidArgument(
+ "key column may not have type of BOOL, FLOAT, or DOUBLE");
+ }
+ }
+
+ // Check that the encodings are valid for the specified types.
+ for (int i = 0; i < schema.num_columns(); i++) {
+ const auto& col = schema.column(i);
+ const TypeEncodingInfo *dummy;
+ Status s = TypeEncodingInfo::Get(col.type_info(),
+ col.attributes().encoding,
+ &dummy);
+ if (!s.ok()) {
+ return s.CloneAndPrepend(
+ Substitute("invalid encoding for column '$0'", col.name()));
+ }
+ }
+ return Status::OK();
+}
+
+} // anonymous namespace
+
// Create a new table.
// See README file in this directory for a description of the design.
Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
@@ -842,35 +915,13 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
// a. Validate the user request.
Schema client_schema;
RETURN_NOT_OK(SchemaFromPB(req.schema(), &client_schema));
- if (client_schema.has_column_ids()) {
- return SetError(MasterErrorPB::INVALID_SCHEMA,
- Status::InvalidArgument("User requests should not have Column IDs"));
+ s = ValidateClientSchema(req.name(), client_schema);
+ if (s.ok() && client_schema.has_column_ids()) {
+ s = Status::InvalidArgument("User requests should not have Column IDs");
}
- if (PREDICT_FALSE(client_schema.num_key_columns() <= 0)) {
- return SetError(MasterErrorPB::INVALID_SCHEMA,
- Status::InvalidArgument("Must specify at least one key column"));
- }
- for (int i = 0; i < client_schema.num_key_columns(); i++) {
- if (!IsTypeAllowableInKey(client_schema.column(i).type_info())) {
- return SetError(MasterErrorPB::INVALID_SCHEMA,
- Status::InvalidArgument(
- "Key column may not have type of BOOL, FLOAT, or DOUBLE"));
- }
- }
- // Check that the encodings are valid for the specified types.
- for (int i = 0; i < client_schema.num_columns(); i++) {
- const auto& col = client_schema.column(i);
- const TypeEncodingInfo *dummy;
- Status s = TypeEncodingInfo::Get(col.type_info(),
- col.attributes().encoding,
- &dummy);
- if (!s.ok()) {
- return SetError(MasterErrorPB::INVALID_SCHEMA,
- s.CloneAndPrepend(
- Substitute("invalid encoding for column '$0'", col.name())));
- }
+ if (!s.ok()) {
+ return SetError(MasterErrorPB::INVALID_SCHEMA, s);
}
-
Schema schema = client_schema.CopyWithColumnIds();
// If the client did not set a partition schema in the create table request,
@@ -1262,15 +1313,8 @@ Status CatalogManager::ApplyAlterSchemaSteps(const SysTablesEntryPB& current_pb,
}
RETURN_NOT_OK(ProcessColumnPBDefaults(&new_col_pb));
- // Verify that encoding is appropriate for the new column's
- // type
+ // Can't accept a NOT NULL column without a default.
ColumnSchema new_col = ColumnSchemaFromPB(new_col_pb);
- const TypeEncodingInfo *dummy;
- RETURN_NOT_OK(TypeEncodingInfo::Get(new_col.type_info(),
- new_col.attributes().encoding,
- &dummy));
-
- // can't accept a NOT NULL column without a default
if (!new_col.is_nullable() && !new_col.has_read_default()) {
return Status::InvalidArgument(
Substitute("column `$0`: NOT NULL columns must have a default", new_col.name()));
@@ -1548,7 +1592,7 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req,
string table_name = l.data().name();
*resp->mutable_table_id() = table->id();
- // 4. Calculate new schema for the on-disk state, not persisted yet.
+ // 4. Calculate and validate new schema for the on-disk state, not persisted yet.
Schema new_schema;
ColumnId next_col_id = ColumnId(l.data().pb.next_column_id());
if (!alter_schema_steps.empty()) {
@@ -1561,10 +1605,24 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req,
DCHECK_NE(next_col_id, 0);
DCHECK_EQ(new_schema.find_column_by_id(next_col_id),
static_cast<int>(Schema::kColumnNotFound));
+
+ // Just validate the schema, not the name (validated below).
+ s = ValidateClientSchema(boost::none, new_schema);
+ if (!s.ok()) {
+ SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s);
+ return s;
+ }
}
- // 5. Try to acquire the new table name.
+ // 5. Validate and try to acquire the new table name.
if (req->has_new_table_name()) {
+ Status s = ValidateIdentifier(req->new_table_name());
+ if (!s.ok()) {
+ SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA,
+ s.CloneAndPrepend("invalid table name"));
+ return s;
+ }
+
std::lock_guard<LockType> catalog_lock(lock_);
TRACE("Acquired catalog manager lock");
http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index d454ed1..80b4ecb 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -581,28 +581,13 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) {
TEST_F(MasterTest, TestCreateTableInvalidKeyType) {
const char *kTableName = "testtb";
- {
- const Schema kTableSchema({ ColumnSchema("key", BOOL) }, 1);
- Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
- ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
- ASSERT_STR_CONTAINS(s.ToString(),
- "Key column may not have type of BOOL, FLOAT, or DOUBLE");
- }
-
- {
- const Schema kTableSchema({ ColumnSchema("key", FLOAT) }, 1);
- Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
- ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
- ASSERT_STR_CONTAINS(s.ToString(),
- "Key column may not have type of BOOL, FLOAT, or DOUBLE");
- }
-
- {
- const Schema kTableSchema({ ColumnSchema("key", DOUBLE) }, 1);
+ const DataType types[] = { BOOL, FLOAT, DOUBLE };
+ for (DataType type : types) {
+ const Schema kTableSchema({ ColumnSchema("key", type) }, 1);
Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(),
- "Key column may not have type of BOOL, FLOAT, or DOUBLE");
+ "key column may not have type of BOOL, FLOAT, or DOUBLE");
}
}