You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2018/07/11 23:02:08 UTC

kudu git commit: KUDU-2191: downcase/normalize table names during DDL

Repository: kudu
Updated Branches:
  refs/heads/master 4eefc8d4e -> cee17c03b


KUDU-2191: downcase/normalize table names during DDL

This is a followup to 7b048b8dbe which changed the catalog manager to be
case preserving, but insensitive on lookup when the HMS integration is
enabled. It turns out this was only possible because the HMS failed to
downcase/normalize table names in notification log events[1]. This is an
oversight, and probably could be considered a bug, and HMS developers
have suggested that Kudu should not rely on it.

After a lot of consideration I haven't been able to come up with a way
to keep the case preserving semantics without changes to the HMS APIs,
so instead this commit throws in the towel and adopts HMS-style case
normalization during CREATE TABLE and ALTER TABLE RENAME operations.

Existing tables with uppercase table names will not be altered
automatically (this is consistent with the current handling of non-ascii
chars in table names), so the upgrade CLI tool will be extended in a
follow up commit to handle this.

[1] In particular, renaming a table is problematic if the notification
log listener events doesn't preserve case due to how the catalog manager
/ notification log listener handles table renames.

Change-Id: Ie32a209d9d85851562691ddbc30f7dd02886bad7
Reviewed-on: http://gerrit.cloudera.org:8080/10903
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cee17c03
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cee17c03
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cee17c03

Branch: refs/heads/master
Commit: cee17c03bc30037bf2a7d97c6bb3aa39cec34a4c
Parents: 4eefc8d
Author: Dan Burkert <da...@apache.org>
Authored: Tue Jul 10 11:07:03 2018 -0700
Committer: Dan Burkert <da...@apache.org>
Committed: Wed Jul 11 23:01:51 2018 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/master_hms-itest.cc | 19 ++---
 src/kudu/master/catalog_manager.cc             | 91 +++++++++------------
 2 files changed, 46 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/cee17c03/src/kudu/integration-tests/master_hms-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_hms-itest.cc b/src/kudu/integration-tests/master_hms-itest.cc
index 4b639f5..2fd415c 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -499,23 +499,16 @@ TEST_F(MasterHmsTest, TestUppercaseIdentifiers) {
     ASSERT_EQ(name, table->name());
   }
 
-  // Listing tables shows the preserved case.
+  // Listing tables shows the normalized case.
   vector<string> tables;
   ASSERT_OK(client_->ListTables(&tables));
-  ASSERT_EQ(tables, vector<string>({ "default.MyTable" }));
+  ASSERT_EQ(tables, vector<string>({ "default.mytable" }));
 
   // Rename the table to the same normalized name, but with a different case.
   unique_ptr<KuduTableAlterer> table_alterer;
   table_alterer.reset(client_->NewTableAlterer("default.mytable"));
-  ASSERT_OK(table_alterer->RenameTo("DEFAULT.MYTABLE")->Alter());
-  NO_FATALS(CheckTable("default", "MyTable"));
-  NO_FATALS(CheckTable("default", "mytable"));
-  NO_FATALS(CheckTable("default", "MYTABLE"));
-
-  // The master should retain the new case.
-  tables.clear();
-  ASSERT_OK(client_->ListTables(&tables));
-  ASSERT_EQ(tables, vector<string>({ "DEFAULT.MYTABLE" }));
+  Status s = table_alterer->RenameTo("DEFAULT.MYTABLE")->Alter();
+  ASSERT_TRUE(s.IsAlreadyPresent()) << s.ToString();
 
   // Rename the table to something different.
   table_alterer.reset(client_->NewTableAlterer("DEFAULT.MYTABLE"));
@@ -530,10 +523,10 @@ TEST_F(MasterHmsTest, TestUppercaseIdentifiers) {
     NO_FATALS(CheckTable("default", "AbC"));
   });
 
-  // Listing tables shows the preserved case.
+  // Listing tables shows the normalized case.
   tables.clear();
   ASSERT_OK(client_->ListTables(&tables));
-  ASSERT_EQ(tables, vector<string>({ "default.AbC" }));
+  ASSERT_EQ(tables, vector<string>({ "default.abc" }));
 
   // Drop the table.
   ASSERT_OK(client_->DeleteTable("DEFAULT.abc"));

http://git-wip-us.apache.org/repos/asf/kudu/blob/cee17c03/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 8f58924..b10a4cc 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1363,10 +1363,9 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   // a. Validate the user request.
   Schema client_schema;
   RETURN_NOT_OK(SchemaFromPB(req.schema(), &client_schema));
-  const string& table_name = req.name();
-  string normalized_table_name = NormalizeTableName(table_name);
+  string normalized_table_name = NormalizeTableName(req.name());
 
-  RETURN_NOT_OK(SetupError(ValidateClientSchema(table_name, client_schema),
+  RETURN_NOT_OK(SetupError(ValidateClientSchema(normalized_table_name, client_schema),
                            resp, MasterErrorPB::INVALID_SCHEMA));
   if (client_schema.has_column_ids()) {
     return SetupError(Status::InvalidArgument("user requests should not have Column IDs"),
@@ -1493,7 +1492,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
         "tablet replica of the newly created table $0 in case of a replica "
         "failure: $1 tablet servers are needed, $2 are alive. "
         "Consider bringing up additional tablet server(s)$3",
-        table_name, num_ts_needed_for_rereplication, num_live_tservers,
+        normalized_table_name, num_ts_needed_for_rereplication, num_live_tservers,
         is_off_by_one ? " or running both the masters and all tablet servers"
                         " with --raft_prepare_replacement_before_eviction=false"
                         " flag (not recommended)." : ".");
@@ -1509,7 +1508,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
     table = FindPtrOrNull(normalized_table_names_map_, normalized_table_name);
     if (table != nullptr) {
       return SetupError(Status::AlreadyPresent(Substitute(
-              "table $0 already exists with id $1", table_name, table->id())),
+              "table $0 already exists with id $1", normalized_table_name, table->id())),
           resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
     }
 
@@ -1521,7 +1520,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
       // in the case of an error. Instead, we force the client to retry at a
       // later time.
       return SetupError(Status::ServiceUnavailable(Substitute(
-              "new table name $0 is already reserved", table_name)),
+              "new table name $0 is already reserved", normalized_table_name)),
           resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
     }
   }
@@ -1564,10 +1563,10 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   // It is critical that this step happen before writing the table to the sys catalog,
   // since this step validates that the table name is available in the HMS catalog.
   if (hms_catalog_) {
-    Status s = hms_catalog_->CreateTable(table->id(), req.name(), schema);
+    Status s = hms_catalog_->CreateTable(table->id(), normalized_table_name, schema);
     if (!s.ok()) {
       s = s.CloneAndPrepend(Substitute("an error occurred while creating table $0 in the HMS",
-                                       req.name()));
+                                       normalized_table_name));
       LOG(WARNING) << s.ToString();
       return SetupError(std::move(s), resp, MasterErrorPB::HIVE_METASTORE_ERROR);
     }
@@ -1577,7 +1576,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
       // TODO(dan): figure out how to test this.
       if (hms_catalog_) {
         TRACE("Rolling back HMS table creation");
-        WARN_NOT_OK(hms_catalog_->DropTable(table->id(), req.name()),
+        WARN_NOT_OK(hms_catalog_->DropTable(table->id(), normalized_table_name),
                     "an error occurred while attempting to delete orphaned HMS table entry");
       }
   });
@@ -1661,7 +1660,7 @@ scoped_refptr<TableInfo> CatalogManager::CreateTableInfo(const CreateTableReques
   table->mutable_metadata()->StartMutation();
   SysTablesEntryPB *metadata = &table->mutable_metadata()->mutable_dirty()->pb;
   metadata->set_state(SysTablesEntryPB::PREPARING);
-  metadata->set_name(req.name());
+  metadata->set_name(NormalizeTableName(req.name()));
   metadata->set_version(0);
   metadata->set_next_column_id(ColumnId(schema.max_col_id() + 1));
   metadata->set_num_replicas(req.num_replicas());
@@ -2163,15 +2162,16 @@ Status CatalogManager::AlterTableRpc(const AlterTableRequestPB& req,
     TableMetadataLock l;
     RETURN_NOT_OK(FindAndLockTable(req, resp, LockMode::READ, &table, &l));
     RETURN_NOT_OK(CheckIfTableDeletedOrNotRunning(&l, resp));
+    string normalized_new_table_name = NormalizeTableName(req.new_table_name());
 
     // The HMS allows renaming a table to the same name (ALTER TABLE t RENAME TO t),
     // however Kudu does not, so we must enforce this constraint ourselves before
     // altering the table in the HMS. The comparison is on the non-normalized
     // table names, since we want to allow changing the case of a table name.
-    if (l.data().name() == req.new_table_name()) {
+    if (l.data().name() == normalized_new_table_name) {
       return SetupError(
           Status::AlreadyPresent(Substitute("table $0 already exists with id $1",
-              req.new_table_name(), table->id())),
+              normalized_new_table_name, table->id())),
           resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
     }
 
@@ -2180,7 +2180,7 @@ Status CatalogManager::AlterTableRpc(const AlterTableRequestPB& req,
 
     // Rename the table in the HMS.
     RETURN_NOT_OK(SetupError(hms_catalog_->AlterTable(
-            table->id(), l.data().name(), req.new_table_name(),
+            table->id(), l.data().name(), normalized_new_table_name,
             schema),
         resp, MasterErrorPB::HIVE_METASTORE_ERROR));
 
@@ -2262,7 +2262,6 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req,
   }
 
   string table_name = l.data().name();
-  string normalized_table_name = NormalizeTableName(table_name);
   *resp->mutable_table_id() = table->id();
 
   // 3. Calculate and validate new schema for the on-disk state, not persisted yet.
@@ -2287,55 +2286,45 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req,
         resp, MasterErrorPB::INVALID_SCHEMA));
 
   // 4. Validate and try to acquire the new table name.
-  bool do_cleanup = false;
   string normalized_new_table_name = NormalizeTableName(req.new_table_name());
   if (req.has_new_table_name()) {
     RETURN_NOT_OK(SetupError(
-          ValidateIdentifier(req.new_table_name()).CloneAndPrepend("invalid table name"),
+          ValidateIdentifier(normalized_new_table_name).CloneAndPrepend("invalid table name"),
           resp, MasterErrorPB::INVALID_SCHEMA));
 
     // Validate the new table name.
-    //
-    // Special case: the new table name and existing table names are different,
-    // but map to the same normalized name. In this case we don't need to
-    // reserve the new table name, since it's already reserved by way of
-    // existing in the by-name map.
-    if (!(table_name != req.new_table_name() &&
-          normalized_table_name == normalized_new_table_name)) {
-      std::lock_guard<LockType> catalog_lock(lock_);
-      TRACE("Acquired catalog manager lock");
-
-      // Verify that the table does not exist.
-      scoped_refptr<TableInfo> other_table = FindPtrOrNull(normalized_table_names_map_,
-                                                           normalized_new_table_name);
-
-      if (other_table != nullptr) {
-        return SetupError(
-            Status::AlreadyPresent(Substitute("table $0 already exists with id $1",
-                req.new_table_name(), table->id())),
-            resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
-      }
+    std::lock_guard<LockType> catalog_lock(lock_);
+    TRACE("Acquired catalog manager lock");
 
-      // Reserve the new table name if possible.
-      if (!InsertIfNotPresent(&reserved_normalized_table_names_, normalized_new_table_name)) {
-        // ServiceUnavailable will cause the client to retry the create table
-        // request. We don't want to outright fail the request with
-        // 'AlreadyPresent', because a table name reservation can be rolled back
-        // in the case of an error. Instead, we force the client to retry at a
-        // later time.
-        return SetupError(Status::ServiceUnavailable(Substitute(
-                "table name $0 is already reserved", req.new_table_name())),
-            resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
-      }
-      do_cleanup = true;
+    // Verify that the table does not exist.
+    scoped_refptr<TableInfo> other_table = FindPtrOrNull(normalized_table_names_map_,
+                                                         normalized_new_table_name);
+
+    if (other_table != nullptr) {
+      return SetupError(
+          Status::AlreadyPresent(Substitute("table $0 already exists with id $1",
+              normalized_new_table_name, table->id())),
+          resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
+    }
+
+    // Reserve the new table name if possible.
+    if (!InsertIfNotPresent(&reserved_normalized_table_names_, normalized_new_table_name)) {
+      // ServiceUnavailable will cause the client to retry the create table
+      // request. We don't want to outright fail the request with
+      // 'AlreadyPresent', because a table name reservation can be rolled back
+      // in the case of an error. Instead, we force the client to retry at a
+      // later time.
+      return SetupError(Status::ServiceUnavailable(Substitute(
+              "table name $0 is already reserved", normalized_new_table_name)),
+          resp, MasterErrorPB::TABLE_ALREADY_PRESENT);
     }
 
-    l.mutable_data()->pb.set_name(req.new_table_name());
+    l.mutable_data()->pb.set_name(normalized_new_table_name);
   }
 
   // Ensure that we drop our reservation upon return.
   auto cleanup = MakeScopedCleanup([&] () {
-    if (do_cleanup) {
+    if (req.has_new_table_name()) {
       std::lock_guard<LockType> l(lock_);
       CHECK_EQ(1, reserved_normalized_table_names_.erase(normalized_new_table_name));
     }
@@ -2434,7 +2423,7 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB& req,
     // and tablets indices.
     std::lock_guard<LockType> lock(lock_);
     if (req.has_new_table_name()) {
-      if (normalized_table_names_map_.erase(normalized_table_name) != 1) {
+      if (normalized_table_names_map_.erase(table_name) != 1) {
         LOG(FATAL) << "Could not remove table " << table->ToString()
                    << " from map in response to AlterTable request: "
                    << SecureShortDebugString(req);