You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/09/05 22:26:50 UTC

[5/9] incubator-impala git commit: IMPALA-5871: KuduPartitionExpr incorrectly handles its child types

IMPALA-5871: KuduPartitionExpr incorrectly handles its child types

KuduPartitionExpr takes input rows and sends them to Kudu to determine
the partition the rows correspond to in a particular table's
partitioning scheme. This is then used to partition and sort rows
before sending them to Kudu when performing an INSERT.

If the input types are not the same as (but are compatible with) the
types of the columns in the table, we need to cast the input rows.
KuduPartitionExpr.analyze() actually already does this, but the casts
are dropped for the sort step during the INSERT in most cases.

As a result, attempting to insert a string value into a Kudu timestamp
column causes a crash.

Inserting a numeric value into a different but compatibly typed col
(eg. tinyint into an int col) will cause the sort during a Kudu INSERT
to operate on garbage values, potentially degrading performance and
causing INSERTs to fail due to Kudu timeouts (see IMPALA-3742).

Testing:
- Added an e2e test in kudu_insert.test

Change-Id: I44cf31e46a77f3e7c92cf6b9112653808a001705
Reviewed-on: http://gerrit.cloudera.org:8080/7922
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/64e28021
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/64e28021
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/64e28021

Branch: refs/heads/master
Commit: 64e28021957f6993aea5ceb3ad626fb577597107
Parents: 1b70eb6
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Aug 31 09:15:22 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 1 21:31:55 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/kudu-partition-expr.cc             |  1 +
 .../impala/analysis/KuduPartitionExpr.java      |  4 +--
 .../queries/QueryTest/kudu_insert.test          | 27 +++++++++++++++-----
 3 files changed, 23 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/be/src/exprs/kudu-partition-expr.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/kudu-partition-expr.cc b/be/src/exprs/kudu-partition-expr.cc
index 6f35268..2cc96f0 100644
--- a/be/src/exprs/kudu-partition-expr.cc
+++ b/be/src/exprs/kudu-partition-expr.cc
@@ -71,6 +71,7 @@ IntVal KuduPartitionExpr::GetIntVal(ScalarExprEvaluator* eval,
     int col = tkudu_partition_expr_.referenced_columns[i];
     const ColumnDescriptor& col_desc = table_desc_->col_descs()[col];
     PrimitiveType type = col_desc.type().type;
+    DCHECK_EQ(GetChild(i)->type().type, type);
     Status s = WriteKuduValue(col, type, val, false, row_.get());
     // This can only fail if we set a col to an incorect type, which would be a bug in
     // planning, so we can DCHECK.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
index cc42804..8d52d59 100644
--- a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
@@ -73,8 +73,8 @@ public class KuduPartitionExpr extends Expr {
     // IMPALA-5294: If one of the children is a NullLiteral, it has to be cast to a type
     // to be passed to the BE.
     for (int i = 0; i < children_.size(); ++i) {
-      children_.get(i).castTo(
-          targetTable_.getColumns().get(partitionColPos_.get(i)).getType());
+      children_.set(i, children_.get(i).castTo(
+          targetTable_.getColumns().get(partitionColPos_.get(i)).getType()));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
index 76ad779..1150898 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
@@ -287,22 +287,35 @@ NumRowErrors: 7299
 ---- QUERY
 # Table with all supported types as primary key and distribution columns
 create table allkeytypes (i1 tinyint, i2 smallint, i3 int, i4 bigint, name string,
-  valf float, vald double, primary key (i1, i2, i3, i4, name)) partition by
-  hash partitions 3, range (partition value = (1,1,1,1,'1'),
-  partition value = (2,2,2,2,'2'), partition value = (3,3,3,3,'3')) stored as kudu
+  valt timestamp, valf float, vald double, primary key (i1, i2, i3, i4, name, valt))
+  partition by hash partitions 3, range
+  (partition value = (1,1,1,1,'1','2009-01-01 00:01:00'),
+  partition value = (2,2,2,2,'2','2009-01-01 00:02:00.100000000'),
+  partition value = (3,3,3,3,'3','2009-01-01 00:03:00.300000000')) stored as kudu
 ---- RESULTS
 ====
 ---- QUERY
 insert into allkeytypes select cast(id as tinyint), smallint_col, int_col,
-  cast (bigint_col/10 as bigint), string_col, float_col, double_col
-  from functional.alltypes where id > 0 and id < 10
+  cast (bigint_col/10 as bigint), string_col, timestamp_col, float_col, double_col
+  from functional.alltypes where id > 1 and id < 10
 ---- RESULTS
-: 3
+: 2
 ---- RUNTIME_PROFILE
-NumModifiedRows: 3
+NumModifiedRows: 2
 NumRowErrors: 6
 ====
 ---- QUERY
+# IMPALA-5871 - test that a cast is correctly added when inserting a string into a Kudu
+# timestamp partition column with distributed exec.
+set exec_single_node_rows_threshold=0;
+insert into allkeytypes values (1,1,1,1,'1','2009-01-01 00:01:00',null,null)
+---- RESULTS
+: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
+NumRowErrors: 0
+====
+---- QUERY
 # Table with default values
 create table tbl_with_defaults (a int primary key, b int null default 10,
   c int not null default 100, d int default 1000, e int null, f int not null,