You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/02/22 07:39:02 UTC

[impala] 02/04: IMPALA-4052: CREATE TABLE LIKE for Kudu tables

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 89cc20717eb0f054db59b4de06f7f01279eeb252
Author: gaoxq <ga...@gmail.com>
AuthorDate: Wed Jul 13 20:02:29 2022 +0800

    IMPALA-4052: CREATE TABLE LIKE for Kudu tables
    
    This commit implements cloning between Kudu tables, including clone the
    schema and hash partitions. But there is one limitation, cloning of
    Kudu tables with range paritions is not supported. For cloning range
    partitions, it's tracked by IMPALA-11912.
    
    Cloning Kudu tables from other types of tables is not implemented,
    because the table creation statements are different.
    
    Testing:
     - e2e tests
     - AnalyzeDDLTest tests
    
    Change-Id: Ia3d276a6465301dbcfed17bb713aca06367d9a42
    Reviewed-on: http://gerrit.cloudera.org:8080/18729
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/analysis/CreateTableLikeStmt.java       |  38 +++++--
 .../apache/impala/service/CatalogOpExecutor.java   |  54 +++++++++-
 .../main/java/org/apache/impala/util/KuduUtil.java |   5 +
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  22 +++-
 .../functional/functional_schema_template.sql      |   2 +-
 .../QueryTest/kudu_create_table_like_table.test    | 115 +++++++++++++++++++++
 tests/custom_cluster/test_kudu.py                  |   6 ++
 tests/query_test/test_kudu.py                      |   7 ++
 8 files changed, 232 insertions(+), 17 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java
index 727e3c3d0..0b66bd1a6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java
@@ -166,21 +166,13 @@ public class CreateTableLikeStmt extends StatementBase {
   public void analyze(Analyzer analyzer) throws AnalysisException {
     Preconditions.checkState(tableName_ != null && !tableName_.isEmpty());
     Preconditions.checkState(srcTableName_ != null && !srcTableName_.isEmpty());
-    // We currently don't support creating a Kudu table using a CREATE TABLE LIKE
-    // statement (see IMPALA-4052).
-    if (fileFormat_ == THdfsFileFormat.KUDU) {
-      throw new AnalysisException("CREATE TABLE LIKE is not supported for Kudu tables");
-    }
 
     // Make sure the source table exists and the user has permission to access it.
     FeTable srcTable = analyzer.getTable(srcTableName_, Privilege.VIEW_METADATA);
 
     analyzer.ensureTableNotBucketed(srcTable);
 
-    if (KuduTable.isKuduTable(srcTable.getMetaStoreTable())) {
-      throw new AnalysisException("Cloning a Kudu table using CREATE TABLE LIKE is " +
-          "not supported.");
-    }
+    validateCreateKuduTableParams(srcTable);
 
     // Only clone between Iceberg tables because the Data Types of Iceberg and Impala
     // do not correspond one by one, the transformation logic is in
@@ -215,4 +207,32 @@ public class CreateTableLikeStmt extends StatementBase {
       TableDef.analyzeSortColumns(sortColumns_, srcTable, sortingOrder_);
     }
   }
+
+  private void validateCreateKuduTableParams(FeTable srcTable) throws AnalysisException {
+    // Only clone between Kudu tables because the table creation statements are different.
+    if ((fileFormat_ == THdfsFileFormat.KUDU
+            && !KuduTable.isKuduTable(srcTable.getMetaStoreTable()))
+        || (fileFormat_ != null && fileFormat_ != THdfsFileFormat.KUDU
+               && KuduTable.isKuduTable(srcTable.getMetaStoreTable()))) {
+      throw new AnalysisException(String.format(
+          "%s cannot be cloned into a %s table: CREATE TABLE LIKE is not supported "
+              + "between Kudu tables and non-Kudu tables.",
+          srcTable.getFullName(), fileFormat_.toString()));
+    }
+    if (sortColumns_ != null && KuduTable.isKuduTable(srcTable.getMetaStoreTable())) {
+      throw new AnalysisException(srcTable.getFullName()
+          + " cannot be cloned because SORT BY is not supported for Kudu tables.");
+    }
+    if (srcTable instanceof KuduTable) {
+      KuduTable kuduTable = (KuduTable) srcTable;
+      for (KuduPartitionParam kuduPartitionParam : kuduTable.getPartitionBy()) {
+        // TODO: IMPALA-11912: Add support for cloning a Kudu table with range partitions
+        if (kuduPartitionParam.getType() == KuduPartitionParam.Type.RANGE) {
+          throw new AnalysisException(
+              "CREATE TABLE LIKE is not supported for Kudu tables having range "
+              + "partitions.");
+        }
+      }
+    }
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 21c2ac28f..a99f8a00e 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -86,6 +86,7 @@ import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.impala.analysis.AlterTableSortByStmt;
 import org.apache.impala.analysis.FunctionName;
+import org.apache.impala.analysis.KuduPartitionParam;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.authorization.AuthorizationConfig;
@@ -115,6 +116,7 @@ import org.apache.impala.catalog.HdfsPartition;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.HiveStorageDescriptorFactory;
 import org.apache.impala.catalog.IncompleteTable;
+import org.apache.impala.catalog.KuduColumn;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient;
 import org.apache.impala.catalog.PartitionNotFoundException;
@@ -212,6 +214,7 @@ import org.apache.impala.thrift.THdfsFileFormat;
 import org.apache.impala.thrift.TIcebergCatalog;
 import org.apache.impala.thrift.TImpalaTableType;
 import org.apache.impala.thrift.TIcebergPartitionSpec;
+import org.apache.impala.thrift.TKuduPartitionParam;
 import org.apache.impala.thrift.TOwnerType;
 import org.apache.impala.thrift.TPartitionDef;
 import org.apache.impala.thrift.TPartitionKeyValue;
@@ -3772,7 +3775,6 @@ public class CatalogOpExecutor {
   private void createTableLike(TCreateTableLikeParams params, TDdlExecResponse response,
       boolean syncDdl, boolean wantMinimalResult) throws ImpalaException {
     Preconditions.checkNotNull(params);
-
     THdfsFileFormat fileFormat =
         params.isSetFile_format() ? params.getFile_format() : null;
     String comment = params.isSetComment() ? params.getComment() : null;
@@ -3819,8 +3821,6 @@ public class CatalogOpExecutor {
         "Load source for CREATE TABLE LIKE");
     org.apache.hadoop.hive.metastore.api.Table tbl =
         srcTable.getMetaStoreTable().deepCopy();
-    Preconditions.checkState(!KuduTable.isKuduTable(tbl),
-        "CREATE TABLE LIKE is not supported for Kudu tables.");
     tbl.setDbName(tblName.getDb());
     tbl.setTableName(tblName.getTbl());
     tbl.setOwner(params.getOwner());
@@ -3908,6 +3908,10 @@ public class CatalogOpExecutor {
           .toThrift();
       createIcebergTable(tbl, wantMinimalResult, response, params.if_not_exists, columns,
           partitionSpec, tableProperties, params.getComment());
+    } else if (srcTable instanceof KuduTable && KuduTable.isKuduTable(tbl)) {
+      TCreateTableParams createTableParams =
+          extractKuduCreateTableParams(params, tblName, (KuduTable) srcTable, tbl);
+      createKuduTable(tbl, createTableParams, wantMinimalResult, response);
     } else {
       MetastoreShim.setTableLocation(catalog_.getDb(tbl.getDbName()), tbl);
       createTable(tbl, params.if_not_exists, null, params.server_name, null, null,
@@ -3915,6 +3919,50 @@ public class CatalogOpExecutor {
     }
   }
 
+  /**
+   * Build TCreateTableParams by source
+   */
+  private TCreateTableParams extractKuduCreateTableParams(TCreateTableLikeParams params,
+      TableName tblName, KuduTable kuduTable,
+      org.apache.hadoop.hive.metastore.api.Table tbl) throws ImpalaRuntimeException {
+    TCreateTableParams createTableParams = new TCreateTableParams();
+    createTableParams.if_not_exists = params.if_not_exists;
+    createTableParams.setComment(params.getComment());
+    List<TColumn> columns = new ArrayList<>();
+    for (Column col : kuduTable.getColumns()) {
+      // Omit cloning auto-incrementing column of Kudu table since the column will be
+      // created by Kudu engine.
+      if (((KuduColumn) col).isAutoIncrementing()) continue;
+      columns.add(col.toThrift());
+    }
+    createTableParams.setColumns(columns);
+    // Omit auto-incrementing column as primary key.
+    List<String> primaryColumnNames =
+        new ArrayList<>(kuduTable.getPrimaryKeyColumnNames());
+    if (kuduTable.hasAutoIncrementingColumn()) {
+      primaryColumnNames.remove(KuduUtil.getAutoIncrementingColumnName());
+    }
+    createTableParams.setPrimary_key_column_names(primaryColumnNames);
+
+    List<TKuduPartitionParam> partitionParams = new ArrayList<>();
+    for (KuduPartitionParam kuduPartitionParam : kuduTable.getPartitionBy()) {
+      partitionParams.add(kuduPartitionParam.toThrift());
+    }
+    createTableParams.setPartition_by(partitionParams);
+
+    Map<String, String> tableProperties = tbl.getParameters();
+    tableProperties.remove(KuduTable.KEY_TABLE_NAME);
+    tableProperties.remove(KuduTable.KEY_TABLE_ID);
+
+    String kuduMasters = tbl.getParameters().get(KuduTable.KEY_MASTER_HOSTS);
+    boolean isKuduHmsIntegrationEnabled = KuduTable.isHMSIntegrationEnabled(kuduMasters);
+    tableProperties.put(KuduTable.KEY_TABLE_NAME,
+        KuduUtil.getDefaultKuduTableName(
+            tblName.getDb(), tblName.getTbl(), isKuduHmsIntegrationEnabled));
+    tbl.setParameters(tableProperties);
+    return createTableParams;
+  }
+
   private static void setDefaultTableCapabilities(
       org.apache.hadoop.hive.metastore.api.Table tbl) {
     if (MetastoreShim.getMajorVersion() > 2) {
diff --git a/fe/src/main/java/org/apache/impala/util/KuduUtil.java b/fe/src/main/java/org/apache/impala/util/KuduUtil.java
index c8da69055..ece4f3726 100644
--- a/fe/src/main/java/org/apache/impala/util/KuduUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/KuduUtil.java
@@ -493,4 +493,9 @@ public class KuduUtil {
     sb.append("PRIMARY KEY");
     return sb.toString();
   }
+
+  // Get auto-incrementing column name of Kudu table
+  public static String getAutoIncrementingColumnName() {
+    return Schema.getAutoIncrementingColumnName();
+  }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 30f541c03..5986fb1c8 100755
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2506,11 +2506,25 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table functional.baz like functional.alltypes location '  '",
         "URI path cannot be empty.");
 
-    // CREATE TABLE LIKE is not currently supported for Kudu tables (see IMPALA-4052)
+    // CREATE TABLE LIKE is only implements cloning between Kudu tables (see IMPALA-4052)
     AnalysisError("create table kudu_tbl like functional.alltypestiny stored as kudu",
-        "CREATE TABLE LIKE is not supported for Kudu tables");
-    AnalysisError("create table tbl like functional_kudu.dimtbl", "Cloning a Kudu " +
-        "table using CREATE TABLE LIKE is not supported.");
+        "functional.alltypestiny cannot be cloned into a KUDU table: " +
+        "CREATE TABLE LIKE is not supported between Kudu tables and non-Kudu tables.");
+    AnalysisError(
+        "create table kudu_to_parquet like functional_kudu.alltypes stored as parquet",
+        "functional_kudu.alltypes cannot be cloned into a PARQUET table: CREATE "
+            + "TABLE LIKE is not supported between Kudu tables and non-Kudu tables.");
+    AnalysisError("create table kudu_decimal_tbl_clone sort by (d1, d2) like "
+            + "functional_kudu.decimal_tbl",
+        "functional_kudu.decimal_tbl cannot be cloned "
+            + "because SORT BY is not supported for Kudu tables.");
+    AnalysisError(
+        "create table alltypestiny_clone sort by (d1, d2) like functional.alltypestiny " +
+        "stored as kudu", "functional.alltypestiny cannot be cloned into a KUDU table: " +
+        "CREATE TABLE LIKE is not supported between Kudu tables and non-Kudu tables.");
+    // Kudu tables with range partitions cannot be cloned
+    AnalysisError("create table kudu_jointbl_clone like functional_kudu.jointbl",
+        "CREATE TABLE LIKE is not supported for Kudu tables having range partitions.");
 
     // Test sort columns.
     AnalyzesOk("create table tbl sort by (int_col,id) like functional.alltypes");
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 8ac045275..27b7f3cee 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -263,7 +263,7 @@ CREATE TABLE {db_name}{db_suffix}.{table_name} (
   year INT,
   month INT
 )
-PARTITION BY HASH (id) PARTITIONS 3 STORED AS KUDU;
+PARTITION BY HASH (id) PARTITIONS 3 COMMENT 'Tiny table' STORED AS KUDU;
 ---- DEPENDENT_LOAD_KUDU
 INSERT INTO TABLE {db_name}{db_suffix}.{table_name}
 SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col,
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_create_table_like_table.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_create_table_like_table.test
new file mode 100644
index 000000000..96dd13a75
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create_table_like_table.test
@@ -0,0 +1,115 @@
+====
+---- QUERY
+# CREATE TABLE LIKE on Kudu table
+create table kudu_alltypes_clone like functional_kudu.alltypes
+stored as kudu
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+# Make sure no data exists for this table
+select count(*) from kudu_alltypes_clone
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+describe formatted kudu_alltypes_clone
+---- RESULTS: VERIFY_IS_SUBSET
+'# col_name            ','data_type           ','comment             '
+'','NULL','NULL'
+'id','int','NULL'
+'bool_col','boolean','NULL'
+'tinyint_col','tinyint','NULL'
+'smallint_col','smallint','NULL'
+'int_col','int','NULL'
+'bigint_col','bigint','NULL'
+'float_col','float','NULL'
+'double_col','double','NULL'
+'date_string_col','string','NULL'
+'string_col','string','NULL'
+'timestamp_col','timestamp','NULL'
+'year','int','NULL'
+'month','int','NULL'
+'','NULL','NULL'
+'# Detailed Table Information','NULL','NULL'
+'OwnerType:          ','USER                ','NULL'
+'LastAccessTime:     ','UNKNOWN             ','NULL'
+'Retention:          ','0                   ','NULL'
+'Table Parameters:','NULL','NULL'
+'','storage_handler     ','org.apache.hadoop.hive.kudu.KuduStorageHandler'
+'','NULL','NULL'
+'# Storage Information','NULL','NULL'
+'Compressed:         ','No                  ','NULL'
+'Num Buckets:        ','0                   ','NULL'
+'Bucket Columns:     ','[]                  ','NULL'
+'Sort Columns:       ','[]                  ','NULL'
+'','NULL','NULL'
+'# Constraints','NULL','NULL'
+---- TYPES
+string, string, string
+====
+---- QUERY
+# Should be able to insert into this table
+insert into kudu_alltypes_clone
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
+float_col, double_col, date_string_col, string_col, timestamp_col,
+year, month
+from functional.alltypes where year=2009 and month=4
+---- RESULTS
+: 300
+====
+---- QUERY
+# Make sure we can read the new data.
+select count(*) from kudu_alltypes_clone
+---- RESULTS
+300
+---- TYPES
+BIGINT
+====
+---- QUERY
+# create table like having comment.
+create table kudu_alltypestiny_clone like functional_kudu.alltypestiny comment 'Tiny clone table'
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe formatted kudu_alltypestiny_clone
+---- RESULTS: VERIFY_IS_SUBSET
+'# col_name            ','data_type           ','comment             '
+'','comment             ','Tiny clone table    '
+---- TYPES
+string, string, string
+====
+---- QUERY
+# No error is thrown when IF NOT EXISTS is specified and the table already exists.
+create table if not exists kudu_alltypes_clone like functional_kudu.alltypes
+---- RESULTS
+'Table already exists.'
+====
+---- QUERY
+# Create Kudu table with non unique primary key
+create table non_unique_key_create_tbl1 (id int non unique primary key, name string)
+partition by hash (id) partitions 3
+stored as kudu
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+# create table like on Kudu table with non unique primary key
+create table non_unique_key_create_tbl1_clone like non_unique_key_create_tbl1
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+describe non_unique_key_create_tbl1_clone
+---- LABELS
+NAME,TYPE,COMMENT,PRIMARY_KEY,KEY_UNIQUE,NULLABLE,DEFAULT_VALUE,ENCODING,COMPRESSION,BLOCK_SIZE
+---- RESULTS
+'id','int','','true','false','false','','AUTO_ENCODING','DEFAULT_COMPRESSION','0'
+'auto_incrementing_id','bigint','','true','false','false','','AUTO_ENCODING','DEFAULT_COMPRESSION','0'
+'name','string','','false','','true','','AUTO_ENCODING','DEFAULT_COMPRESSION','0'
+---- TYPES
+STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index d05517061..74155da95 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -336,6 +336,12 @@ class TestKuduHMSIntegration(CustomKuduTest):
   def test_kudu_alter_table(self, vector, unique_database):
     self.run_test_case('QueryTest/kudu_hms_alter', vector, use_db=unique_database)
 
+  @SkipIfKudu.no_hybrid_clock
+  def test_create_kudu_table_like(self, vector, unique_database):
+    self.run_test_case(
+      'QueryTest/kudu_create_table_like_table',
+      vector,
+      use_db=unique_database)
 
 class TestKuduTransactionBase(CustomClusterTestSuite):
   """
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index f9f728900..587431dd0 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -85,6 +85,13 @@ class TestKuduBasicDML(KuduTestSuite):
   def test_kudu_delete(self, vector, unique_database):
     self.run_test_case('QueryTest/kudu_delete', vector, use_db=unique_database)
 
+  @SkipIfKudu.no_hybrid_clock
+  def test_kudu_create_table_like_table(self, vector, unique_database):
+    self.run_test_case(
+      'QueryTest/kudu_create_table_like_table',
+      vector,
+      use_db=unique_database)
+
 # TODO(IMPALA-8614): parameterize some tests to run with HMS integration enabled.
 class TestKuduOperations(KuduTestSuite):
   """