You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/13 06:37:20 UTC
[impala] 06/07: IMPALA-8629: (part 2) Adjust new KuduStorageHandler
package
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit ff4e0a3a459b233495e71f9bcb584ac200caf23a
Author: Grant Henke <gh...@cloudera.com>
AuthorDate: Thu Jun 6 12:30:39 2019 -0500
IMPALA-8629: (part 2) Adjust new KuduStorageHandler package
This patch changes the new KuduStorageHandler
package from “org.apache.kudu.hive” to
“org.apache.hadoop.hive.kudu”.
This is done to ensure the stand-in storage handler
can be a real storage handler when a Hive integration
is added in the future. The “org.apache.hadoop.hive”
package is the standard package all Hive storage
handlers lives under.
Additionally this patch updates the stand-in InputFormat,
OutputFormat, and SerDe entries for Kudu. This allows a
future Hive integration to read HMS tables/entries created
by Impala.
This patch also includes PlannerTest fixes to handle the
ScanToken stringification changes in the newer version
of Kudu.
Change-Id: I4d0c505643247498383472704a37d27c9e1ce473
Reviewed-on: http://gerrit.cloudera.org:8080/13541
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/impala-config.sh | 4 ++--
.../org/apache/impala/catalog/HdfsFileFormat.java | 7 +++---
.../java/org/apache/impala/catalog/KuduTable.java | 9 +------
.../apache/impala/analysis/AnalyzeKuduDDLTest.java | 20 ++++++++--------
.../java/org/apache/impala/analysis/ToSqlTest.java | 5 ++--
.../org/apache/impala/planner/PlannerTestBase.java | 7 ++++--
.../queries/PlannerTest/kudu.test | 28 +++++++++++-----------
tests/custom_cluster/test_kudu.py | 10 ++++----
tests/query_test/test_kudu.py | 5 ++--
9 files changed, 45 insertions(+), 50 deletions(-)
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2386bbd..86d9c0d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -162,7 +162,7 @@ fi
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
export IMPALA_TOOLCHAIN_HOST
export CDH_MAJOR_VERSION=6
-export CDH_BUILD_NUMBER=1137441
+export CDH_BUILD_NUMBER=1173663
export CDP_BUILD_NUMBER=1153860
export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
export CDP_HADOOP_VERSION=3.1.1.7.0.0.0-107
@@ -666,7 +666,7 @@ if $USE_CDH_KUDU; then
export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.10.0-cdh6.x-SNAPSHOT"}
export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
else
- export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"84086fe"}
+ export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"988296d"}
export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION
fi
export IMPALA_KUDU_JAVA_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
index 25684cb..2cdbc30 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsFileFormat.java
@@ -67,9 +67,10 @@ public enum HdfsFileFormat {
"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
"org.apache.hadoop.hive.ql.io.orc.OrcSerde",
true, true, false),
- KUDU("org.apache.kudu.mapreduce.KuduTableInputFormat",
- "org.apache.kudu.mapreduce.KuduTableOutputFormat",
- "", false, false, false);
+ KUDU("org.apache.hadoop.hive.kudu.KuduInputFormat",
+ "org.apache.hadoop.hive.kudu.KuduOutputFormat",
+ "org.apache.hadoop.hive.kudu.KuduSerDe",
+ false, false, false);
private final String inputFormat_;
private final String outputFormat_;
diff --git a/fe/src/main/java/org/apache/impala/catalog/KuduTable.java b/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
index 1626236..329c5aa 100644
--- a/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/KuduTable.java
@@ -85,12 +85,6 @@ public class KuduTable extends Table implements FeKuduTable {
// Kudu specific value for the storage handler table property keyed by
// KEY_STORAGE_HANDLER.
public static final String KUDU_STORAGE_HANDLER =
- "org.apache.kudu.hive.KuduStorageHandler";
-
- // TODO(IMPALA-8629): Remove this after Kudu adjusts its StorageHandler logic.
- // Once that is done KUDU_STORAGE_HANDLER will be
- // "org.apache.hadoop.hive.kudu.KuduStorageHandler".
- public static final String TEMP_KUDU_STORAGE_HANDLER =
"org.apache.hadoop.hive.kudu.KuduStorageHandler";
// Key to specify the number of tablet replicas.
@@ -146,8 +140,7 @@ public class KuduTable extends Table implements FeKuduTable {
public static boolean isKuduStorageHandler(String handler) {
return handler != null && (handler.equals(KUDU_LEGACY_STORAGE_HANDLER) ||
- handler.equals(KUDU_STORAGE_HANDLER) ||
- handler.equals(TEMP_KUDU_STORAGE_HANDLER));
+ handler.equals(KUDU_STORAGE_HANDLER));
}
public static boolean isKuduTable(org.apache.hadoop.hive.metastore.api.Table msTbl) {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
index 20e8f4e..6dfa163 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeKuduDDLTest.java
@@ -198,14 +198,13 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
"partition 30 < values) stored as kudu tblproperties" +
"('kudu.master_addresses' = '%s')", kuduMasters));
// Not using the STORED AS KUDU syntax to specify a Kudu table
- // TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
- // AnalysisError("create table tab (x int) tblproperties (" +
- // "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')",
- // CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
+ AnalysisError("create table tab (x int) tblproperties (" +
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+ CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
// Creating unpartitioned table results in a warning.
- // AnalyzesOk("create table tab (x int primary key) stored as kudu " +
- // "tblproperties ('storage_handler'='org.apache.kudu.hive.KuduStorageHandler')",
- // "Unpartitioned Kudu tables are inefficient for large data sizes.");
+ AnalyzesOk("create table tab (x int primary key) stored as kudu tblproperties (" +
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+ "Unpartitioned Kudu tables are inefficient for large data sizes.");
// Invalid value for number of replicas
AnalysisError("create table t (x int primary key) stored as kudu tblproperties (" +
"'kudu.num_tablet_replicas'='1.1')",
@@ -445,14 +444,15 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
// Use all allowed optional table props.
AnalyzesOk(String.format("create external table t stored as kudu tblproperties (" +
"'kudu.table_name'='tab', 'kudu.master_addresses' = '%s', " +
- "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')", kuduMasters));
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+ kuduMasters));
// Kudu table should be specified using the STORED AS KUDU syntax.
AnalysisError("create external table t tblproperties (" +
"'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
"'kudu.table_name'='t')",
CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
AnalysisError("create external table t tblproperties (" +
- "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler'," +
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler'," +
"'kudu.table_name'='t')",
CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
// Columns should not be specified in an external Kudu table
@@ -468,7 +468,7 @@ public class AnalyzeKuduDDLTest extends FrontendTestBase {
"'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
"'kudu.table_name'='t')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
AnalysisError("create external table t (x int) stored as parquet tblproperties (" +
- "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler'," +
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler'," +
"'kudu.table_name'='t')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
AnalysisError("create external table t stored as kudu tblproperties (" +
"'storage_handler'='foo', 'kudu.table_name'='t')",
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index 78eb8e6..5e7c92b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -338,7 +338,8 @@ public class ToSqlTest extends FrontendTestBase {
String.format("CREATE TABLE default.p ( a BIGINT PRIMARY KEY, b TIMESTAMP " +
"DEFAULT '1987-05-19' ) PARTITION BY HASH (a) PARTITIONS 3 " +
"STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='%s', " +
- "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler')", kuduMasters),
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler')",
+ kuduMasters),
true);
}
@@ -372,7 +373,7 @@ public class ToSqlTest extends FrontendTestBase {
String.format("CREATE TABLE default.p PRIMARY KEY (a, b) " +
"PARTITION BY HASH (a) PARTITIONS 3, RANGE (b) (PARTITION VALUE = 1) " +
"STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='%s', " +
- "'storage_handler'='org.apache.kudu.hive.KuduStorageHandler') AS SELECT " +
+ "'storage_handler'='org.apache.hadoop.hive.kudu.KuduStorageHandler') AS SELECT " +
"int_col a, bigint_col b FROM functional.alltypes", kuduMasters), true);
}
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index 3adb01f..430c352 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -315,8 +315,11 @@ public class PlannerTestBase extends FrontendTestBase {
Preconditions.checkNotNull(kuduClient_,
"Test should not be invoked on platforms that do not support Kudu.");
try {
- result.append(KuduScanToken.stringifySerializedToken(
- locations.scan_range.kudu_scan_token.array(), kuduClient_));
+ String token = KuduScanToken.stringifySerializedToken(
+ locations.scan_range.kudu_scan_token.array(), kuduClient_);
+ // Don't match against the table id as its nondeterministic.
+ token = token.replaceAll(" table-id=.*?,", "");
+ result.append(token);
} catch (IOException e) {
throw new IllegalStateException("Unable to parse Kudu scan token", e);
}
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index d8b50dc..3258fae 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -6,9 +6,9 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -26,9 +26,9 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -147,7 +147,7 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -181,7 +181,7 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -204,9 +204,9 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -230,9 +230,9 @@ PLAN-ROOT SINK
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
- ScanToken{table=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [(int64 id=1008), <end>)}
+ ScanToken{table-name=impala::functional_kudu.testtbl, range-partition: [<start>, (int64 id=1004))}
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index eb87a1e..38ce51b 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -168,9 +168,8 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
assert ["", "EXTERNAL", "TRUE"] in table_desc
assert ["", "kudu.master_addresses", KUDU_MASTER_HOSTS] in table_desc
assert ["", "kudu.table_name", kudu_table.name] in table_desc
- # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
- # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
- # in table_desc
+ assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+ in table_desc
@pytest.mark.execute_serially
def test_implicit_managed_table_props(self, cursor, kudu_client, unique_database):
@@ -191,9 +190,8 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
assert any("kudu.master_addresses" in s for s in table_desc)
assert ["Table Type:", "MANAGED_TABLE", None] in table_desc
assert ["", "kudu.table_name", "%s.foo" % unique_database] in table_desc
- # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
- # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
- # in table_desc
+ assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+ in table_desc
@pytest.mark.execute_serially
def test_drop_non_empty_db(self, unique_cursor, kudu_client):
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 875b5d0..df347c6 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -560,9 +560,8 @@ class TestCreateExternalTable(KuduTestSuite):
assert ["", "EXTERNAL", "TRUE"] in table_desc
assert ["", "kudu.master_addresses", KUDU_MASTER_HOSTS] in table_desc
assert ["", "kudu.table_name", kudu_table.name] in table_desc
- # TODO(IMPALA-8629): Uncomment tests after Kudu adjusts its StorageHandler logic.
- # assert ["", "storage_handler", "org.apache.kudu.hive.KuduStorageHandler"] \
- # in table_desc
+ assert ["", "storage_handler", "org.apache.hadoop.hive.kudu.KuduStorageHandler"] \
+ in table_desc
@SkipIfKudu.hms_integration_enabled
def test_col_types(self, cursor, kudu_client):