You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2016/12/02 02:16:43 UTC

[5/6] incubator-impala git commit: IMPALA-4564, IMPALA-4565: mt_dop fixes for old aggs and joins

IMPALA-4564,IMPALA-4565: mt_dop fixes for old aggs and joins

Fix a test bug where we need to skip nested types tests for the old aggs
and joins.

Fix a product bug where *eos is not initialised by the MT scan node.
This causes incorrect results when the calling ExecNode does not
initialise the eos variable, e.g. the sort node and the old agg and join
nodes.

Testing:
Added a test that reproduces the incorrect results with the sort node
when run under ASAN

Tested the mt_dop tests locally with old aggs and joins to ensure they
pass.

Change-Id: I48c50c8aa0c23710eb099fba252bc3c0cb74b313
Reviewed-on: http://gerrit.cloudera.org:8080/5302
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b3740612
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b3740612
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b3740612

Branch: refs/heads/master
Commit: b3740612065d742f7ddb96154da64c590867759e
Parents: 56f4d0f
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Nov 30 21:53:27 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 2 01:46:55 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scan-node-mt.cc                |  1 +
 .../QueryTest/mt-dop-parquet-nested.test        | 34 ++++++++++++++++++++
 .../queries/QueryTest/mt-dop-parquet.test       | 33 -------------------
 .../queries/QueryTest/mt-dop.test               | 19 +++++++++++
 tests/query_test/test_mt_dop.py                 |  6 ++++
 5 files changed, 60 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b3740612/be/src/exec/hdfs-scan-node-mt.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-mt.cc b/be/src/exec/hdfs-scan-node-mt.cc
index bde9f81..50936b8 100644
--- a/be/src/exec/hdfs-scan-node-mt.cc
+++ b/be/src/exec/hdfs-scan-node-mt.cc
@@ -67,6 +67,7 @@ Status HdfsScanNodeMt::GetNext(RuntimeState* state, RowBatch* row_batch, bool* e
   RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::GETNEXT, state));
   RETURN_IF_CANCELLED(state);
   RETURN_IF_ERROR(QueryMaintenance(state));
+  *eos = false;
 
   DCHECK(scan_range_ == NULL || scanner_ != NULL);
   if (scan_range_ == NULL || scanner_->eos()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b3740612/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-nested.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-nested.test
new file mode 100644
index 0000000..9be983d
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-nested.test
@@ -0,0 +1,34 @@
+====
+---- QUERY
+# IMPALA-4554: Memory corruption of nested collection with MT_DOP > 0.
+select id, cnt
+from functional_parquet.complextypestbl t,
+  (select count(item) cnt from t.int_array) v
+order by id
+limit 10
+---- RESULTS
+1,3
+2,3
+3,0
+4,0
+5,0
+6,0
+7,0
+8,1
+---- TYPES
+bigint,bigint
+====
+---- QUERY
+# IMPALA-4458: Test proper resource cleanup for cancelled fragments.
+# This test is duplicated from nested-types-subplan.test
+select c_custkey, c_mktsegment, o_orderkey, o_orderdate
+from tpch_nested_parquet.customer c, c.c_orders o
+where c_custkey = 1
+limit 3
+---- RESULTS
+1,regex:.*,regex:.*,regex:.*
+1,regex:.*,regex:.*,regex:.*
+1,regex:.*,regex:.*,regex:.*
+---- TYPES
+bigint,string,bigint,string
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b3740612/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test
index 0523f1d..39ec4b3 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test
@@ -5,36 +5,3 @@ select * from functional_parquet.bad_metadata_len
 ---- CATCH
 Invalid metadata size in file footer
 ====
----- QUERY
-# IMPALA-4554: Memory corruption of nested collection with MT_DOP > 0.
-select id, cnt
-from functional_parquet.complextypestbl t,
-  (select count(item) cnt from t.int_array) v
-order by id
-limit 10
----- RESULTS
-1,3
-2,3
-3,0
-4,0
-5,0
-6,0
-7,0
-8,1
----- TYPES
-bigint,bigint
-====
----- QUERY
-# IMPALA-4458: Test proper resource cleanup for cancelled fragments.
-# This test is duplicated from nested-types-subplan.test
-select c_custkey, c_mktsegment, o_orderkey, o_orderdate
-from tpch_nested_parquet.customer c, c.c_orders o
-where c_custkey = 1
-limit 3
----- RESULTS
-1,regex:.*,regex:.*,regex:.*
-1,regex:.*,regex:.*,regex:.*
-1,regex:.*,regex:.*,regex:.*
----- TYPES
-bigint,string,bigint,string
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b3740612/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
index ac453ca..a46693a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
@@ -7,3 +7,22 @@ select count(*) from alltypes
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+# IMPALA-4565: incorrect results because mt scan node does not set eos
+# correctly and sort node only gets the first row batch.
+set batch_size=1;
+select id
+from alltypestiny
+order by id
+---- TYPES
+int
+---- RESULTS
+0
+1
+2
+3
+4
+5
+6
+7
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b3740612/tests/query_test/test_mt_dop.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_mt_dop.py b/tests/query_test/test_mt_dop.py
index d05d6b4..ff60b60 100644
--- a/tests/query_test/test_mt_dop.py
+++ b/tests/query_test/test_mt_dop.py
@@ -21,6 +21,7 @@ import pytest
 
 from copy import deepcopy
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import SkipIfOldAggsJoins
 from tests.common.test_vector import TestDimension
 from tests.common.test_vector import TestVector
 
@@ -88,3 +89,8 @@ class TestMtDopParquet(ImpalaTestSuite):
   def test_parquet(self, vector):
     vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/mt-dop-parquet', vector)
+
+  @SkipIfOldAggsJoins.nested_types
+  def test_parquet_nested(self, vector):
+    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
+    self.run_test_case('QueryTest/mt-dop-parquet-nested', vector)