You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/12/01 18:51:10 UTC

kudu git commit: KUDU-1775 (part 2). Enforce a max number of columns and reasonable identifier lengths

Repository: kudu
Updated Branches:
  refs/heads/master b0209c16d -> b30d4fc15


KUDU-1775 (part 2). Enforce a max number of columns and reasonable identifier lengths

* Disallows unreasonable identifiers such as empty strings or strings
  longer than a configurable maximum size. I set the default max size to
  256. MySQL[1] and PostgreSQL[2] seem to limit identifiers to 64
  characters, and Oracle[3] seems to limit to 30. So 256 seemed like it
  should be plenty. The flag is marked as unsafe but if anyone has a
  compatibility issue on upgrade, the limit can be raised.

* Enforces a maximum number of columns, set here to 300 as a default.
  Really wide tables are an anti-pattern, and although some initial
  testing shows that we won't completely fail until a few thousand
  columns, we probably want to put a guard rail up well before the
  failure point.

  Again, if users are depending on a higher number, this can be worked
  around by raising the default through an unsafe flag.

[1] http://dev.mysql.com/doc/refman/5.7/en/identifiers.html
[2] https://www.postgresql.org/docs/9.1/static/sql-syntax-lexical.html
[3] http://stackoverflow.com/questions/18248318/change-table-column-index-names-size-in-oracle-11g-or-12c

Change-Id: I9cab30dd1ea78a160403c79442b08739ddc92f12
Reviewed-on: http://gerrit.cloudera.org:8080/5290
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Dan Burkert <da...@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b30d4fc1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b30d4fc1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b30d4fc1

Branch: refs/heads/master
Commit: b30d4fc154151aaa5583dbbd8ef83ac044236ee9
Parents: b0209c1
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Nov 30 15:09:01 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Dec 1 18:50:14 2016 +0000

----------------------------------------------------------------------
 java/kudu-client/pom.xml                        |  10 +-
 .../org/apache/kudu/client/TestKuduClient.java  |  27 ++++
 java/pom.xml                                    |   3 +-
 src/kudu/client/client-test.cc                  |  89 +++++++++++++
 src/kudu/master/catalog_manager.cc              | 132 +++++++++++++------
 src/kudu/master/master-test.cc                  |  23 +---
 6 files changed, 225 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/pom.xml
----------------------------------------------------------------------
diff --git a/java/kudu-client/pom.xml b/java/kudu-client/pom.xml
index cb5b55f..81866d9 100644
--- a/java/kudu-client/pom.xml
+++ b/java/kudu-client/pom.xml
@@ -64,8 +64,14 @@
         </dependency>
         <dependency>
             <groupId>org.mockito</groupId>
-            <artifactId>mockito-all</artifactId>
-            <version>${mockito-all.version}</version>
+            <artifactId>mockito-core</artifactId>
+            <version>${mockito-core.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.hamcrest</groupId>
+            <artifactId>hamcrest-core</artifactId>
+            <version>${hamcrest-core.version}</version>
             <scope>test</scope>
         </dependency>
         <dependency>

http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
index aeba4bb..64951b2 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
@@ -24,8 +24,10 @@ import static org.apache.kudu.client.RowResult.timestampToString;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.junit.matchers.JUnitMatchers.containsString;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -116,6 +118,31 @@ public class TestKuduClient extends BaseKuduTest {
                  newSchema.getColumn("column3_s").getCompressionAlgorithm());
   }
 
+
+  /**
+   * Test creating a table with various invalid schema cases.
+   */
+  @Test(timeout = 100000)
+  public void testCreateTableTooManyColumns() throws Exception {
+    List<ColumnSchema> cols = new ArrayList<>();
+    cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING)
+             .key(true)
+             .build());
+    for (int i = 0; i < 1000; i++) {
+      // not null with default
+      cols.add(new ColumnSchema.ColumnSchemaBuilder("c" + i, Type.STRING)
+               .build());
+    }
+    Schema schema = new Schema(cols);
+    try {
+      syncClient.createTable(tableName, schema, getBasicCreateTableOptions());
+    } catch (NonRecoverableException nre) {
+      assertThat(nre.toString(), containsString(
+          "number of columns 1001 is greater than the permitted maximum"));
+    }
+  }
+
+
   /**
    * Test creating a table with columns with different combinations of NOT NULL and
    * default values, inserting rows, and checking the results are as expected.

http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index c9fb510..6575ef6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -62,9 +62,10 @@
         <guava.version>18.0</guava.version>
         <hadoop.version>2.7.2</hadoop.version>
         <jsr305.version>3.0.1</jsr305.version>
+        <hamcrest-core.version>1.3</hamcrest-core.version>
         <junit.version>4.11</junit.version>
         <log4j.version>1.2.17</log4j.version>
-        <mockito-all.version>1.9.0</mockito-all.version>
+        <mockito-core.version>1.9.0</mockito-core.version>
         <murmur.version>1.0.0</murmur.version>
         <netty.version>3.10.5.Final</netty.version>
         <protobuf.version>2.6.1</protobuf.version>

http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 77aa1a1..3c4f962 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -3109,6 +3109,8 @@ TEST_F(ClientTest, TestWriteWithBadSchema) {
 }
 
 TEST_F(ClientTest, TestBasicAlterOperations) {
+  const vector<string> kBadNames = {"", string(1000, 'x')};
+
   // test that having no steps throws an error
   {
     gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
@@ -3154,6 +3156,38 @@ TEST_F(ClientTest, TestBasicAlterOperations) {
     ASSERT_STR_CONTAINS(s.ToString(), "The column already exists: string_val");
   }
 
+  // Test that renaming a column to an invalid name throws an error.
+  for (const string& bad_name : kBadNames) {
+    gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+    table_alterer->AlterColumn("int_val")->RenameTo(bad_name);
+    Status s = table_alterer->Alter();
+    ASSERT_TRUE(s.IsInvalidArgument());
+    ASSERT_STR_CONTAINS(s.ToString(), "invalid column name");
+  }
+
+  // Test that renaming a table to an invalid name throws an error.
+  for (const string& bad_name : kBadNames) {
+    gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+    table_alterer->RenameTo(bad_name);
+    Status s = table_alterer->Alter();
+    ASSERT_TRUE(s.IsInvalidArgument());
+    ASSERT_STR_CONTAINS(s.ToString(), "invalid table name");
+  }
+
+  // Test trying to add columns to a table such that it exceeds the permitted
+  // maximum.
+  {
+    gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(kTableName));
+    for (int i = 0; i < 1000; i++) {
+      table_alterer->AddColumn(Substitute("c$0", i))->Type(KuduColumnSchema::INT32);
+    }
+    Status s = table_alterer->Alter();
+    ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+    ASSERT_STR_CONTAINS(s.ToString(),
+                        "number of columns 1004 is greater than the "
+                        "permitted maximum 300");
+  }
+
   // Need a tablet peer for the next set of tests.
   string tablet_id = GetFirstTabletId(client_table_.get());
   scoped_refptr<TabletPeer> tablet_peer;
@@ -3800,6 +3834,61 @@ TEST_F(ClientTest, TestCreateTableWithInvalidEncodings) {
                       "DICT_ENCODING not supported for type INT32");
 }
 
+TEST_F(ClientTest, TestCreateTableWithTooManyColumns) {
+  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+  KuduSchema schema;
+  KuduSchemaBuilder schema_builder;
+  schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+  for (int i = 0; i < 1000; i++) {
+    schema_builder.AddColumn(Substitute("c$0", i))
+        ->Type(KuduColumnSchema::INT32)->NotNull();
+  }
+  ASSERT_OK(schema_builder.Build(&schema));
+  Status s = table_creator->table_name("foobar")
+      .schema(&schema)
+      .set_range_partition_columns({ "key" })
+      .Create();
+  ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+  ASSERT_STR_CONTAINS(s.ToString(),
+                      "number of columns 1001 is greater than the "
+                      "permitted maximum 300");
+}
+
+TEST_F(ClientTest, TestCreateTableWithTooLongTableName) {
+  const string kLongName(1000, 'x');
+  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+  KuduSchema schema;
+  KuduSchemaBuilder schema_builder;
+  schema_builder.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+  ASSERT_OK(schema_builder.Build(&schema));
+  Status s = table_creator->table_name(kLongName)
+      .schema(&schema)
+      .set_range_partition_columns({ "key" })
+      .Create();
+  ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+  ASSERT_STR_MATCHES(s.ToString(),
+                     "invalid table name: identifier 'xxx*' "
+                     "longer than maximum permitted length 256");
+}
+
+TEST_F(ClientTest, TestCreateTableWithTooLongColumnName) {
+  const string kLongName(1000, 'x');
+  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+  KuduSchema schema;
+  KuduSchemaBuilder schema_builder;
+  schema_builder.AddColumn(kLongName)->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+  ASSERT_OK(schema_builder.Build(&schema));
+  Status s = table_creator->table_name("foobar")
+      .schema(&schema)
+      .set_range_partition_columns({ kLongName })
+      .Create();
+  ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+  ASSERT_STR_MATCHES(s.ToString(),
+                     "invalid column name: identifier 'xxx*' "
+                     "longer than maximum permitted length 256");
+}
+
+
 // Check the behavior of the latest observed timestamp when performing
 // write and read operations.
 TEST_F(ClientTest, TestLatestObservedTimestamp) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index d5af69c..33fcc92 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -121,6 +121,20 @@ DEFINE_int32(max_num_replicas, 7,
 // Tag as unsafe since we have done very limited testing of higher than 5 replicas.
 TAG_FLAG(max_num_replicas, unsafe);
 
+DEFINE_int32(max_num_columns, 300,
+             "Maximum number of columns that may be in a table.");
+// Tag as unsafe since we have done very limited testing of higher than 300 columns.
+TAG_FLAG(max_num_columns, unsafe);
+
+
+DEFINE_int32(max_identifier_length, 256,
+             "Maximum length of the name of a column or table.");
+// Tag as unsafe because we end up writing schemas in every WAL entry, etc,
+// and having very long column names would enter untested territory and affect
+// performance.
+TAG_FLAG(max_identifier_length, unsafe);
+
+
 DEFINE_bool(allow_unsafe_replication_factor, false,
             "Allow creating tables with even replication factor.");
 TAG_FLAG(allow_unsafe_replication_factor, unsafe);
@@ -807,6 +821,65 @@ Status CatalogManager::CheckOnline() const {
   return Status::OK();
 }
 
+namespace {
+
+// Validate a table or column name to ensure that it is a valid identifier.
+Status ValidateIdentifier(const string& id) {
+  if (id.empty()) {
+    return Status::InvalidArgument("empty string not a valid identifier");
+  }
+
+  if (id.length() > FLAGS_max_identifier_length) {
+    return Status::InvalidArgument(Substitute(
+        "identifier '$0' longer than maximum permitted length $1",
+        id, FLAGS_max_identifier_length));
+  }
+
+  return Status::OK();
+}
+
+// Validate the client-provided schema and name.
+Status ValidateClientSchema(const boost::optional<string>& name,
+                            const Schema& schema) {
+  if (name != boost::none) {
+    RETURN_NOT_OK_PREPEND(ValidateIdentifier(name.get()), "invalid table name");
+  }
+  for (int i = 0; i < schema.num_columns(); i++) {
+    RETURN_NOT_OK_PREPEND(ValidateIdentifier(schema.column(i).name()),
+                          "invalid column name");
+  }
+  if (schema.num_key_columns() <= 0) {
+    return Status::InvalidArgument("must specify at least one key column");
+  }
+  if (schema.num_columns() > FLAGS_max_num_columns) {
+    return Status::InvalidArgument(Substitute(
+        "number of columns $0 is greater than the permitted maximum $1",
+        schema.num_columns(), FLAGS_max_num_columns));
+  }
+  for (int i = 0; i < schema.num_key_columns(); i++) {
+    if (!IsTypeAllowableInKey(schema.column(i).type_info())) {
+      return Status::InvalidArgument(
+          "key column may not have type of BOOL, FLOAT, or DOUBLE");
+    }
+  }
+
+  // Check that the encodings are valid for the specified types.
+  for (int i = 0; i < schema.num_columns(); i++) {
+    const auto& col = schema.column(i);
+    const TypeEncodingInfo *dummy;
+    Status s = TypeEncodingInfo::Get(col.type_info(),
+                                     col.attributes().encoding,
+                                     &dummy);
+    if (!s.ok()) {
+      return s.CloneAndPrepend(
+          Substitute("invalid encoding for column '$0'", col.name()));
+    }
+  }
+  return Status::OK();
+}
+
+} // anonymous namespace
+
 // Create a new table.
 // See README file in this directory for a description of the design.
 Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
@@ -842,35 +915,13 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   // a. Validate the user request.
   Schema client_schema;
   RETURN_NOT_OK(SchemaFromPB(req.schema(), &client_schema));
-  if (client_schema.has_column_ids()) {
-    return SetError(MasterErrorPB::INVALID_SCHEMA,
-                    Status::InvalidArgument("User requests should not have Column IDs"));
+  s = ValidateClientSchema(req.name(), client_schema);
+  if (s.ok() && client_schema.has_column_ids()) {
+    s = Status::InvalidArgument("User requests should not have Column IDs");
   }
-  if (PREDICT_FALSE(client_schema.num_key_columns() <= 0)) {
-    return SetError(MasterErrorPB::INVALID_SCHEMA,
-                    Status::InvalidArgument("Must specify at least one key column"));
-  }
-  for (int i = 0; i < client_schema.num_key_columns(); i++) {
-    if (!IsTypeAllowableInKey(client_schema.column(i).type_info())) {
-      return SetError(MasterErrorPB::INVALID_SCHEMA,
-                      Status::InvalidArgument(
-                          "Key column may not have type of BOOL, FLOAT, or DOUBLE"));
-    }
-  }
-  // Check that the encodings are valid for the specified types.
-  for (int i = 0; i < client_schema.num_columns(); i++) {
-    const auto& col = client_schema.column(i);
-    const TypeEncodingInfo *dummy;
-    Status s = TypeEncodingInfo::Get(col.type_info(),
-                                     col.attributes().encoding,
-                                     &dummy);
-    if (!s.ok()) {
-      return SetError(MasterErrorPB::INVALID_SCHEMA,
-                      s.CloneAndPrepend(
-                          Substitute("invalid encoding for column '$0'", col.name())));
-    }
+  if (!s.ok()) {
+    return SetError(MasterErrorPB::INVALID_SCHEMA, s);
   }
-
   Schema schema = client_schema.CopyWithColumnIds();
 
   // If the client did not set a partition schema in the create table request,
@@ -1262,15 +1313,8 @@ Status CatalogManager::ApplyAlterSchemaSteps(const SysTablesEntryPB& current_pb,
         }
         RETURN_NOT_OK(ProcessColumnPBDefaults(&new_col_pb));
 
-        // Verify that encoding is appropriate for the new column's
-        // type
+        // Can't accept a NOT NULL column without a default.
         ColumnSchema new_col = ColumnSchemaFromPB(new_col_pb);
-        const TypeEncodingInfo *dummy;
-        RETURN_NOT_OK(TypeEncodingInfo::Get(new_col.type_info(),
-                                            new_col.attributes().encoding,
-                                            &dummy));
-
-        // can't accept a NOT NULL column without a default
         if (!new_col.is_nullable() && !new_col.has_read_default()) {
           return Status::InvalidArgument(
               Substitute("column `$0`: NOT NULL columns must have a default", new_col.name()));
@@ -1548,7 +1592,7 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req,
   string table_name = l.data().name();
   *resp->mutable_table_id() = table->id();
 
-  // 4. Calculate new schema for the on-disk state, not persisted yet.
+  // 4. Calculate and validate new schema for the on-disk state, not persisted yet.
   Schema new_schema;
   ColumnId next_col_id = ColumnId(l.data().pb.next_column_id());
   if (!alter_schema_steps.empty()) {
@@ -1561,10 +1605,24 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req,
     DCHECK_NE(next_col_id, 0);
     DCHECK_EQ(new_schema.find_column_by_id(next_col_id),
               static_cast<int>(Schema::kColumnNotFound));
+
+    // Just validate the schema, not the name (validated below).
+    s = ValidateClientSchema(boost::none, new_schema);
+    if (!s.ok()) {
+      SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA, s);
+      return s;
+    }
   }
 
-  // 5. Try to acquire the new table name.
+  // 5. Validate and try to acquire the new table name.
   if (req->has_new_table_name()) {
+    Status s = ValidateIdentifier(req->new_table_name());
+    if (!s.ok()) {
+      SetupError(resp->mutable_error(), MasterErrorPB::INVALID_SCHEMA,
+                 s.CloneAndPrepend("invalid table name"));
+      return s;
+    }
+
     std::lock_guard<LockType> catalog_lock(lock_);
     TRACE("Acquired catalog manager lock");
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b30d4fc1/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index d454ed1..80b4ecb 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -581,28 +581,13 @@ TEST_F(MasterTest, TestCreateTableCheckRangeInvariants) {
 TEST_F(MasterTest, TestCreateTableInvalidKeyType) {
   const char *kTableName = "testtb";
 
-  {
-    const Schema kTableSchema({ ColumnSchema("key", BOOL) }, 1);
-    Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
-    ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
-    ASSERT_STR_CONTAINS(s.ToString(),
-        "Key column may not have type of BOOL, FLOAT, or DOUBLE");
-  }
-
-  {
-    const Schema kTableSchema({ ColumnSchema("key", FLOAT) }, 1);
-    Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
-    ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
-    ASSERT_STR_CONTAINS(s.ToString(),
-        "Key column may not have type of BOOL, FLOAT, or DOUBLE");
-  }
-
-  {
-    const Schema kTableSchema({ ColumnSchema("key", DOUBLE) }, 1);
+  const DataType types[] = { BOOL, FLOAT, DOUBLE };
+  for (DataType type : types) {
+    const Schema kTableSchema({ ColumnSchema("key", type) }, 1);
     Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>(), {});
     ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
     ASSERT_STR_CONTAINS(s.ToString(),
-        "Key column may not have type of BOOL, FLOAT, or DOUBLE");
+        "key column may not have type of BOOL, FLOAT, or DOUBLE");
   }
 }