You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/13 06:37:20 UTC

[impala] 06/07: IMPALA-8629: (part 2) Adjust new KuduStorageHandler package

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ff4e0a3a459b233495e71f9bcb584ac200caf23a
Author: Grant Henke <gh...@cloudera.com>
AuthorDate: Thu Jun 6 12:30:39 2019 -0500

    IMPALA-8629: (part 2) Adjust new KuduStorageHandler package
    
    This patch changes the new KuduStorageHandler
    package from “org.apache.kudu.hive” to
    “org.apache.hadoop.hive.kudu”.
    
    This is done to ensure the stand-in storage handler
    can be a real storage handler when a Hive integration
    is added in the future. The “org.apache.hadoop.hive”
    package is the standard package all Hive storage
    handlers lives under.
    
    Additionally this patch updates the stand-in InputFormat,
    OutputFormat, and SerDe entries for Kudu. This allows a
    future Hive integration to read HMS tables/entries created
    by Impala.
    
    This patch also includes PlannerTest fixes to handle the
    ScanToken stringification changes in the newer version
    of Kudu.
    
    Change-Id: I4d0c505643247498383472704a37d27c9e1ce473
    Reviewed-on: http://gerrit.cloudera.org:8080/13541
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               |  4 ++--
 .../org/apache/impala/catalog/HdfsFileFormat.java  |  7 +++---
 .../java/org/apache/impala/catalog/KuduTable.java  |  9 +------
 .../apache/impala/analysis/AnalyzeKuduDDLTest.java | 20 ++++++++--------
 .../java/org/apache/impala/analysis/ToSqlTest.java |  5 ++--
 .../org/apache/impala/planner/PlannerTestBase.java |  7 ++++--
 .../queries/PlannerTest/kudu.test                  | 28 +++++++++++-----------
 tests/custom_cluster/test_kudu.py                  | 10 ++++----
 tests/query_test/test_kudu.py                      |  5 ++--
 9 files changed, 45 insertions(+), 50 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2386bbd..86d9c0d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -162,7 +162,7 @@ fi
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
 export CDH_MAJOR_VERSION=6
-export CDH_BUILD_NUMBER=1137441
+export CDH_BUILD_NUMBER=1173663
 export CDP_BUILD_NUMBER=1153860
 export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
 export CDP_HADOOP_VERSION=3.1.1.7.0.0.0-107
@@ -666,7 +666,7 @@ if $USE_CDH_KUDU; then
   export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.10.0-cdh6.x-SNAPSHOT"}
   export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
 else
-  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"84086fe"}
+  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"988296d"}
   export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION
 fi
 export IMPALA_KUDU_JAVA_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
index 25684cb..2cdbc30 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
@@ -67,9 +67,10 @@ public enum HdfsFileFormat {
       "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
       "org.apache.hadoop.hive.ql.io.orc.OrcSerde",
       true, true, false),
-  KUDU("org.apache.kudu.mapreduce.KuduTableInputFormat",
-      "org.apache.kudu.mapreduce.KuduTableOutputFormat",
-      "", false, false, false);
+  KUDU("org.apache.hadoop.hive.kudu.KuduInputFormat",
+       "org.apache.hadoop.hive.kudu.KuduOutputFormat",
+       "org.apache.hadoop.hive.kudu.KuduSerDe",
+       false, false, false);
 
   private final String inputFormat_;
   private final String outputFormat_;
diff --git a/fe/src/main/java/org/apache/impala/catalog/KuduTable.java b/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
index 1626236..329c5aa 100644
--- a/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
@@ -85,12 +85,6 @@ public class KuduTable extends Table implements FeKuduTable {
   // Kudu specific value for the storage handler table property keyed by
   // KEY_STORAGE_HANDLER.
   public static final String KUDU_STORAGE_HANDLER =
-      "org.apache.kudu.hive.KuduStorageHandler";
-
-  // TODO(IMPALA-8629): Remove this after Kudu adjusts its StorageHandler logic.
-  // Once that is done KUDU_STORAGE_HANDLER will be
-  // "org.apache.hadoop.hive.kudu.KuduStorageHandler".
-  public static final String TEMP_KUDU_STORAGE_HANDLER =
       "org.apache.hadoop.hive.kudu.KuduStorageHandler";
 
   // Key to specify the number of tablet replicas.
@@ -146,8 +140,7 @@ public class KuduTable extends Table implements FeKuduTable {
 
   public static boolean isKuduStorageHandler(String handler) {
     return handler != null && (handler.equals(KUDU_LEGACY_STORAGE_HANDLER) ||
-                               handler.equals(KUDU_STORAGE_HANDLER) ||
-                               handler.equals(TEMP_KUDU_STORAGE_HANDLER));
+                               handler.equals(KUDU_STORAGE_HANDLER));
   }
 
   public static boolean isKuduTable(org.apache.hadoop.hive.metastore.api.Table msTbl) {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
index 20e8f4e..6dfa163 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
@@ -198,14 +198,13 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
         "partition 30 < values) stored as kudu tblproperties" +
         "('kudu.master_addresses' = '%s')", kuduMasters));
     // Not using the STORED AS KUDU syntax to specify a Kudu table
-    // TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
-    // AnalysisError("create table tab (x int) tblproperties (" +
-    //     "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')",
-    //     CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
+    AnalysisError("create table tab (x int) tblproperties (" +
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+        CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
     // Creating unpartitioned table results in a warning.
-    // AnalyzesOk("create table tab (x int primary key) stored as kudu " +
-    //     "tblproperties ('storage_handler'='org.apache.kudu.hive.KuduStorageHandler')",
-    //     "Unpartitioned Kudu tables are inefficient for large data sizes.");
+    AnalyzesOk("create table tab (x int primary key) stored as kudu tblproperties (" +
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+        "Unpartitioned Kudu tables are inefficient for large data sizes.");
     // Invalid value for number of replicas
     AnalysisError("create table t (x int primary key) stored as kudu tblproperties (" +
         "'kudu.num_tablet_replicas'='1.1')",
@@ -445,14 +444,15 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
     // Use all allowed optional table props.
     AnalyzesOk(String.format("create external table t stored as kudu tblproperties (" +
         "'kudu.table_name'='tab', 'kudu.master_addresses' = '%s', " +
-        "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')", kuduMasters));
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+        kuduMasters));
     // Kudu table should be specified using the STORED AS KUDU syntax.
     AnalysisError("create external table t tblproperties (" +
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
         "'kudu.table_name'='t')",
         CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
     AnalysisError("create external table t tblproperties (" +
-        "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler'," +
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler'," +
         "'kudu.table_name'='t')",
         CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
     // Columns should not be specified in an external Kudu table
@@ -468,7 +468,7 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
         "'kudu.table_name'='t')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
     AnalysisError("create external table t (x int) stored as parquet tblproperties (" +
-        "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler'," +
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler'," +
         "'kudu.table_name'='t')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
     AnalysisError("create external table t stored as kudu tblproperties (" +
         "'storage_handler'='foo', 'kudu.table_name'='t')",
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index 78eb8e6..5e7c92b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -338,7 +338,8 @@ public class ToSqlTest extends FrontendTestBase {
         String.format("CREATE TABLE default.p ( a BIGINT PRIMARY KEY, b TIMESTAMP " +
         "DEFAULT '1987-05-19' ) PARTITION BY HASH (a) PARTITIONS 3 " +
         "STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='%s', " +
-        "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')", kuduMasters),
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+        kuduMasters),
         true);
   }
 
@@ -372,7 +373,7 @@ public class ToSqlTest extends FrontendTestBase {
         String.format("CREATE TABLE default.p PRIMARY KEY (a, b) " +
         "PARTITION BY HASH (a) PARTITIONS 3, RANGE (b) (PARTITION VALUE = 1) " +
         "STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='%s', " +
-        "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler') AS SELECT " +
+        "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler') AS SELECT " +
         "int_col a, bigint_col b FROM functional.alltypes", kuduMasters), true);
   }
 
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index 3adb01f..430c352 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -315,8 +315,11 @@ public class PlannerTestBase extends FrontendTestBase {
             Preconditions.checkNotNull(kuduClient_,
                 "Test should not be invoked on platforms that do not support Kudu.");
             try {
-              result.append(KuduScanToken.stringifySerializedToken(
-                  locations.scan_range.kudu_scan_token.array(), kuduClient_));
+              String token = KuduScanToken.stringifySerializedToken(
+                  locations.scan_range.kudu_scan_token.array(), kuduClient_);
+              // Don't match against the table id as its nondeterministic.
+              token = token.replaceAll(" table-id=.*?,", "");
+              result.append(token);
             } catch (IOException e) {
               throw new IllegalStateException("Unable to parse Kudu scan token", e);
             }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index d8b50dc..3258fae 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -6,9 +6,9 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -26,9 +26,9 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -147,7 +147,7 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -181,7 +181,7 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -204,9 +204,9 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -230,9 +230,9 @@ PLAN-ROOT SINK
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
-  ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+  ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index eb87a1e..38ce51b 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -168,9 +168,8 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
         assert ["", "EXTERNAL", "TRUE"] in table_desc
         assert ["", "kudu.master_addresses", KUDU_MASTER_HOSTS] in table_desc
         assert ["", "kudu.table_name", kudu_table.name] in table_desc
-        # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
-        # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
-        #     in table_desc
+        assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+            in table_desc
 
   @pytest.mark.execute_serially
   def test_implicit_managed_table_props(self, cursor, kudu_client, unique_database):
@@ -191,9 +190,8 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
     assert any("kudu.master_addresses" in s for s in table_desc)
     assert ["Table Type:", "MANAGED_TABLE", None] in table_desc
     assert ["", "kudu.table_name", "%s.foo" % unique_database] in table_desc
-    # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
-    # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
-    #     in table_desc
+    assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+        in table_desc
 
   @pytest.mark.execute_serially
   def test_drop_non_empty_db(self, unique_cursor, kudu_client):
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 875b5d0..df347c6 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -560,9 +560,8 @@ class TestCreateExternalTable(KuduTestSuite):
         assert ["", "EXTERNAL", "TRUE"] in table_desc
         assert ["", "kudu.master_addresses", KUDU_MASTER_HOSTS] in table_desc
         assert ["", "kudu.table_name", kudu_table.name] in table_desc
-        # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
-        # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
-        #     in table_desc
+        assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+            in table_desc
 
   @SkipIfKudu.hms_integration_enabled
   def test_col_types(self, cursor, kudu_client):