You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/05/11 00:04:29 UTC

[1/3] incubator-impala git commit: IMPALA-5294: Kudu INSERT partitioning fails with constants

Repository: incubator-impala
Updated Branches:
  refs/heads/master d6b5f82e3 -> e7cb80b66


IMPALA-5294: Kudu INSERT partitioning fails with constants

An INSERT into a Kudu table with a constant value being inserted
into a partition column causes an IllegalStateExcaption. This is
because DistributedPlanner removes constants from the list of
partition exprs before creating the KuduPartitionExpr, but
KuduPartitionExpr expects to get one expr per partition column.

The fix is to pass the full list of partition exprs into the
KuduPartitionExpr, instead of the list that has had constants
removed. This preserves the behavior that if all of the partition
exprs are constant we fall back to UNPARTITIONED.

One complication is that if a partition expr is a NullLiteral, it
must be cast to a specific type to be passed to the BE. The
InsertStmt will cast the partition exprs to the partition column
types, but these casts may be lost from the copies of the partition
exprs stored by the KuduPartitionExpr during reset(). To fix this,
the KuduPartitionExpr can store the types of the partition cols and
recast the partition exprs to those types during analyze().

Change-Id: I12cbb319f9a5c47fdbfee347b47650186b27f8f9
Reviewed-on: http://gerrit.cloudera.org:8080/6828
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b8c8fb1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b8c8fb1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b8c8fb1b

Branch: refs/heads/master
Commit: b8c8fb1b439ceffc6e167089184b14559f03699c
Parents: d6b5f82
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Mon May 8 19:10:37 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed May 10 22:53:02 2017 +0000

----------------------------------------------------------------------
 .../impala/analysis/KuduPartitionExpr.java      | 14 ++++++++--
 .../impala/planner/DistributedPlanner.java      |  7 ++++-
 .../queries/QueryTest/kudu_insert.test          | 29 ++++++++++++++++++++
 3 files changed, 47 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b8c8fb1b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
index 88644e2..cc42804 100644
--- a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java
@@ -20,6 +20,7 @@ package org.apache.impala.analysis;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.thrift.TExprNode;
@@ -42,14 +43,16 @@ public class KuduPartitionExpr extends Expr {
 
   // The table to use the partitioning scheme from.
   private final int targetTableId_;
+  private final KuduTable targetTable_;
   // Maps from this Epxrs children to column positions in the table, i.e. children_[i]
   // produces the value for column partitionColPos_[i].
   private List<Integer> partitionColPos_;
 
-  public KuduPartitionExpr(
-      int targetTableId, List<Expr> partitionKeyExprs, List<Integer> partitionKeyIdxs) {
+  public KuduPartitionExpr(int targetTableId, KuduTable targetTable,
+      List<Expr> partitionKeyExprs, List<Integer> partitionKeyIdxs) {
     Preconditions.checkState(partitionKeyExprs.size() == partitionKeyIdxs.size());
     targetTableId_ = targetTableId;
+    targetTable_ = targetTable;
     partitionColPos_ = partitionKeyIdxs;
     children_.addAll(Expr.cloneList(partitionKeyExprs));
   }
@@ -60,12 +63,19 @@ public class KuduPartitionExpr extends Expr {
   protected KuduPartitionExpr(KuduPartitionExpr other) {
     super(other);
     targetTableId_ = other.targetTableId_;
+    targetTable_ = other.targetTable_;
     partitionColPos_ = other.partitionColPos_;
   }
 
   @Override
   protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
     type_ = Type.INT;
+    // IMPALA-5294: If one of the children is a NullLiteral, it has to be cast to a type
+    // to be passed to the BE.
+    for (int i = 0; i < children_.size(); ++i) {
+      children_.get(i).castTo(
+          targetTable_.getColumns().get(partitionColPos_.get(i)).getType());
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b8c8fb1b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
index 83c8ccb..ac0355e 100644
--- a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
@@ -246,8 +246,13 @@ public class DistributedPlanner {
     if (partitionExprs.isEmpty()) {
       partition = DataPartition.UNPARTITIONED;
     } else if (insertStmt.getTargetTable() instanceof KuduTable) {
+      // IMPALA-5294: Don't use 'partitionExpr' here because the constants were removed.
+      // We need all of the partition exprs to fill in the partition column values for
+      // each row to send to Kudu to determine the partition number.
       Expr kuduPartitionExpr = new KuduPartitionExpr(DescriptorTable.TABLE_SINK_ID,
-          partitionExprs, insertStmt.getPartitionColPos());
+          (KuduTable) insertStmt.getTargetTable(),
+          Lists.newArrayList(insertStmt.getPartitionKeyExprs()),
+          insertStmt.getPartitionColPos());
       kuduPartitionExpr.analyze(ctx_.getRootAnalyzer());
       partition = DataPartition.kuduPartitioned(kuduPartitionExpr);
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b8c8fb1b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
index 71224ce..9779c7f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
@@ -394,3 +394,32 @@ A, B, C, D, E, F, G, H, I, J
 ---- TYPES
 INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
 ====
+---- QUERY
+create table multiple_partition_cols (x bigint, y bigint, z string, primary key(x, y))
+partition by hash(x, y) partitions 8 stored as kudu
+---- RESULTS
+====
+---- QUERY
+# SELECT with constant
+insert into multiple_partition_cols select 0, bigint_col, string_col
+  from functional.alltypes where id = 0
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
+NumRowErrors: 0
+---- LABELS
+X,Y,Z
+---- DML_RESULTS: multiple_partition_cols
+0,0,'0'
+---- TYPES
+BIGINT,BIGINT,STRING
+====
+---- QUERY
+# SELECT with constant NULL
+insert into multiple_partition_cols select bigint_col, null, string_col
+  from functional.alltypes where id = 1
+---- RESULTS
+: 0
+---- RUNTIME_PROFILE
+NumModifiedRows: 0
+NumRowErrors: 1
+====
\ No newline at end of file


[2/3] incubator-impala git commit: IMPALA-4499: Table name missing from exec summary

Posted by mj...@apache.org.
IMPALA-4499: Table name missing from exec summary

For scan nodes, previously only HDFS tables showed the name
of the table in the 'Detail' section for the scan node. This
change adds the table name for all scan node types (Kudu,
HBase, and DataSource).

Testing:
- Added an e2e test in test_observability.

Change-Id: If4fd13f893aea4e7df8a2474d7136770660e4324
Reviewed-on: http://gerrit.cloudera.org:8080/6832
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/49b6af54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/49b6af54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/49b6af54

Branch: refs/heads/master
Commit: 49b6af54c8194af34eacd5322dd6e2ea831f27f8
Parents: b8c8fb1
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Tue May 9 13:12:38 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed May 10 22:56:38 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/HdfsScanNode.java | 14 ------------
 .../org/apache/impala/planner/ScanNode.java     | 15 +++++++++++++
 tests/query_test/test_observability.py          | 23 ++++++++++++++++++++
 3 files changed, 38 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/49b6af54/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 40d750b..296dfb4 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -781,20 +781,6 @@ public class HdfsScanNode extends ScanNode {
   }
 
   @Override
-  protected String getDisplayLabelDetail() {
-    HdfsTable table = (HdfsTable) desc_.getTable();
-    List<String> path = Lists.newArrayList();
-    path.add(table.getDb().getName());
-    path.add(table.getName());
-    Preconditions.checkNotNull(desc_.getPath());
-    if (desc_.hasExplicitAlias()) {
-      return desc_.getPath().toString() + " " + desc_.getAlias();
-    } else {
-      return desc_.getPath().toString();
-    }
-  }
-
-  @Override
   protected String getNodeExplainString(String prefix, String detailPrefix,
       TExplainLevel detailLevel) {
     StringBuilder output = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/49b6af54/fe/src/main/java/org/apache/impala/planner/ScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/ScanNode.java b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
index 985a03a..c1b6858 100644
--- a/fe/src/main/java/org/apache/impala/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
@@ -22,6 +22,7 @@ import java.util.List;
 import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.HdfsFileFormat;
+import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.thrift.TExplainLevel;
@@ -188,4 +189,18 @@ abstract public class ScanNode extends PlanNode {
     if (getConjuncts().isEmpty() && hasLimit()) return getLimit();
     return inputCardinality_;
   }
+
+  @Override
+  protected String getDisplayLabelDetail() {
+    Table table = desc_.getTable();
+    List<String> path = Lists.newArrayList();
+    path.add(table.getDb().getName());
+    path.add(table.getName());
+    Preconditions.checkNotNull(desc_.getPath());
+    if (desc_.hasExplicitAlias()) {
+      return desc_.getPath().toString() + " " + desc_.getAlias();
+    } else {
+      return desc_.getPath().toString();
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/49b6af54/tests/query_test/test_observability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 59e6a73..d2b27c3 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -50,3 +50,26 @@ class TestObservability(ImpalaTestSuite):
     assert result.exec_summary[5]['operator'] == '04:EXCHANGE'
     assert result.exec_summary[5]['num_rows'] == 25
     assert result.exec_summary[5]['est_num_rows'] == 25
+
+  def test_scan_summary(self):
+    """IMPALA-4499: Checks that the exec summary for scans show the table name."""
+    # HDFS table
+    query = "select count(*) from functional.alltypestiny"
+    result = self.execute_query(query)
+    scan_idx = len(result.exec_summary) - 1
+    assert result.exec_summary[scan_idx]['operator'] == '00:SCAN HDFS'
+    assert result.exec_summary[scan_idx]['detail'] == 'functional.alltypestiny'
+
+    # KUDU table
+    query = "select count(*) from functional_kudu.alltypestiny"
+    result = self.execute_query(query)
+    scan_idx = len(result.exec_summary) - 1
+    assert result.exec_summary[scan_idx]['operator'] == '00:SCAN KUDU'
+    assert result.exec_summary[scan_idx]['detail'] == 'functional_kudu.alltypestiny'
+
+    # HBASE table
+    query = "select count(*) from functional_hbase.alltypestiny"
+    result = self.execute_query(query)
+    scan_idx = len(result.exec_summary) - 1
+    assert result.exec_summary[scan_idx]['operator'] == '00:SCAN HBASE'
+    assert result.exec_summary[scan_idx]['detail'] == 'functional_hbase.alltypestiny'


[3/3] incubator-impala git commit: IMPALA-5297: Reduce free-pool-test mem requirement to avoid OOM

Posted by mj...@apache.org.
IMPALA-5297: Reduce free-pool-test mem requirement to avoid OOM

On jenkins.impala.io gerrit-verify-dryrun jobs,
free-pool-test started running out of memory after updating
Kudu to a newer version which can have slightly higher
memory requirements (Kudu will reject writes when mem is 80%
full rather than 60% full). free-pool-test can allocate up
to 12gb, and the VMs have a CommitLimit of only 16gb.

While larger VMs could be used, or the minicluster could be
tuned further, free-pool-test can also be modified to
reduce the actual RSS usage. Instead of memsetting the
entire allocation in the test, we only scribble the first
byte. This reduces the max RSS from 14gb to 88mb in some
local tests.

Change-Id: I31f03e7a4d5d237a1183277c988f85a992396a43
Reviewed-on: http://gerrit.cloudera.org:8080/6842
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e7cb80b6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e7cb80b6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e7cb80b6

Branch: refs/heads/master
Commit: e7cb80b66f6ff653fa41713c090a9284c5bbc1f7
Parents: 49b6af5
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Wed May 10 10:13:38 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed May 10 23:08:48 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/free-pool-test.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e7cb80b6/be/src/runtime/free-pool-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/free-pool-test.cc b/be/src/runtime/free-pool-test.cc
index 63153c9..07b676f 100644
--- a/be/src/runtime/free-pool-test.cc
+++ b/be/src/runtime/free-pool-test.cc
@@ -135,8 +135,9 @@ TEST(FreePoolTest, Loop) {
     EXPECT_TRUE(p1 != NULL);
     EXPECT_TRUE(p2 != NULL);
     EXPECT_TRUE(p1 != p2);
-    memset(p1, 1, size); // Scribble the expected value (used below).
-    memset(p2, 1, size);
+    // Scribble the expected value (used below).
+    *p1 = 1;
+    *p2 = 1;
     primed_allocations[size] = make_pair(p1, p2);
     pool.Free(p1);
     pool.Free(p2);