You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2016/10/18 16:43:24 UTC

[01/32] incubator-impala git commit: Bump Bzip2 version

Repository: incubator-impala
Updated Branches:
  refs/heads/hadoop-next 75a857c0c -> ee2a06d82


Bump Bzip2 version

This picks up the latest toolchain version. The only change is that
some symlinks in the previous version were broken.

Change-Id: I0c5e9ef10984fc8c6840acf285a04e472fc8b304
Reviewed-on: http://gerrit.cloudera.org:8080/4716
Reviewed-by: Alex Behm <al...@cloudera.com>
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/67a0451e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/67a0451e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/67a0451e

Branch: refs/heads/hadoop-next
Commit: 67a0451e3b9422681cc78bad6886f130a89fef30
Parents: 75a857c
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Oct 13 08:08:16 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Thu Oct 13 21:25:06 2016 +0000

----------------------------------------------------------------------
 bin/impala-config.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/67a0451e/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index ef179f4..90e8fc0 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -246,13 +246,12 @@ else
 fi
 export NUM_CONCURRENT_TESTS="${NUM_CONCURRENT_TESTS-${CORES}}"
 
-# Versions of toolchain dependencies (or if toolchain is not used of dependencies in
-# thirdparty)
+# Versions of toolchain dependencies.
 export IMPALA_AVRO_VERSION=1.7.4-p4
 export IMPALA_BINUTILS_VERSION=2.26-p1
 export IMPALA_BOOST_VERSION=1.57.0
 export IMPALA_BREAKPAD_VERSION=20150612-p1
-export IMPALA_BZIP2_VERSION=1.0.6-p1
+export IMPALA_BZIP2_VERSION=1.0.6-p2
 export IMPALA_CMAKE_VERSION=3.2.3-p1
 export IMPALA_CYRUS_SASL_VERSION=2.1.23
 export IMPALA_GCC_VERSION=4.9.2

[16/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/order.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/order.test b/testdata/workloads/functional-planner/queries/PlannerTest/order.test
index a39b9b2..266c517 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/order.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/order.test
@@ -2,12 +2,16 @@ select name, zip
 from functional.testtbl
 order by name offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: name ASC
 |
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: name ASC
@@ -22,12 +26,16 @@ select name, zip
 from functional.testtbl
 order by name
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: name ASC
 |
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: name ASC
 |
@@ -43,6 +51,8 @@ where name like 'm%'
 group by 1
 order by 2 desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: count(*) DESC
 |
@@ -54,6 +64,8 @@ order by 2 desc
    partitions=1/1 files=0 size=0B
    predicates: name LIKE 'm%'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC
 |
@@ -80,6 +92,8 @@ where id < 5
 group by 1
 order by 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: sum(float_col) ASC
 |
@@ -90,6 +104,8 @@ order by 2
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(float_col) ASC
 |
@@ -114,6 +130,8 @@ from functional_hbase.alltypessmall
 group by 1
 order by 2,3 desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: sum(float_col) ASC, min(float_col) DESC
 |
@@ -123,6 +141,8 @@ order by 2,3 desc
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(float_col) ASC, min(float_col) DESC
 |
@@ -144,12 +164,16 @@ order by 2,3 desc
 # Test that the sort is on int_col and not on the id column
 select int_col as id from functional.alltypessmall order by id
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: int_col ASC
 |
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: int_col ASC
 |
@@ -162,12 +186,16 @@ select int_col as id from functional.alltypessmall order by id
 # Test that the sort is on id and not on int_col
 select int_col as id from functional.alltypessmall order by functional.alltypessmall.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: id ASC
 |
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |
@@ -184,6 +212,8 @@ select int_col, bigint_col from
    select * from functional.alltypessmall) t
 order by int_col desc offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SORT
 |  order by: int_col DESC
 |
@@ -195,6 +225,8 @@ order by int_col desc offset 5
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col DESC
@@ -217,6 +249,8 @@ select int_col, bigint_col from
    select * from functional.alltypessmall) t
 order by int_col desc offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: int_col DESC
 |
@@ -231,6 +265,8 @@ order by int_col desc offset 5
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col DESC
@@ -259,6 +295,8 @@ select j.*, d.* from functional.JoinTbl j full outer join functional.DimTbl d
 on (j.test_id = d.id)
 order by j.test_id, j.test_name, j.test_zip, j.alltypes_id, d.name
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SORT
 |  order by: test_id ASC, test_name ASC, test_zip ASC, alltypes_id ASC, name ASC
 |
@@ -271,6 +309,8 @@ order by j.test_id, j.test_name, j.test_zip, j.alltypes_id, d.name
 00:SCAN HDFS [functional.jointbl j]
    partitions=1/1 files=1 size=433B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: test_id ASC, test_name ASC, test_zip ASC, alltypes_id ASC, name ASC
 |
@@ -303,6 +343,8 @@ and c.string_col < '7'
 and a.int_col + b.float_col + cast(c.string_col as float) < 1000
 order by c.string_col desc, a.smallint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SORT
 |  order by: string_col DESC, smallint_col ASC
 |
@@ -329,6 +371,8 @@ order by c.string_col desc, a.smallint_col
    predicates: c.string_col < '7'
    runtime filters: RF000 -> c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: string_col DESC, smallint_col ASC
 |
@@ -370,6 +414,8 @@ from functional.alltypesagg
 group by 1
 order by avg(tinyint_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: avg(tinyint_col) ASC
 |
@@ -380,6 +426,8 @@ order by avg(tinyint_col)
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: avg(tinyint_col) ASC
 |
@@ -406,6 +454,8 @@ left outer join functional.alltypessmall t2
   on (t1.int_col = t2.int_col)
 order by t1.id,t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SORT
 |  order by: id ASC, id ASC
 |
@@ -418,6 +468,8 @@ order by t1.id,t2.id
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC, id ASC
 |
@@ -440,6 +492,8 @@ select t1.id, t2.id from functional.alltypestiny t1 cross join functional.alltyp
 where (t1.id < 3 and t2.id < 3)
 order by t1.id, t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SORT
 |  order by: id ASC, id ASC
 |
@@ -453,6 +507,8 @@ order by t1.id, t2.id
    partitions=4/4 files=4 size=460B
    predicates: t1.id < 3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC, id ASC
 |
@@ -481,6 +537,8 @@ union distinct
 (select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2)
 order by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:SORT
 |  order by: id ASC
 |
@@ -501,6 +559,8 @@ order by 1
 01:SCAN HDFS [functional.alltypestiny]
    partitions=1/4 files=1 size=115B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |
@@ -538,6 +598,8 @@ union all
 (select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2)
 order by 1,2
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: id ASC, bool_col ASC
 |
@@ -560,6 +622,8 @@ order by 1,2
 05:SCAN HDFS [functional.alltypestiny]
    partitions=1/4 files=1 size=115B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC, bool_col ASC
 |
@@ -611,6 +675,8 @@ union all
    order by 1 limit 3)
 order by 12, 13, 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: year ASC, month ASC, id ASC
 |
@@ -658,6 +724,8 @@ order by 12, 13, 1
 10:SCAN HDFS [functional.alltypestiny]
    partitions=1/4 files=1 size=115B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: year ASC, month ASC, id ASC
 |
@@ -729,6 +797,8 @@ order by 12, 13, 1
 select * from (select * from functional.alltypes order by bigint_col limit 10) t
 order by int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: int_col ASC
 |
@@ -738,6 +808,8 @@ order by int_col
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: int_col ASC
 |
@@ -758,6 +830,8 @@ select * from
    (select * from functional.alltypessmall) order by bigint_col limit 10) t
 order by int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: int_col ASC
 |
@@ -772,6 +846,8 @@ order by int_col
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: int_col ASC
 |
@@ -796,6 +872,8 @@ select * from
 (select * from functional.alltypes order by bigint_col) A
 join B on (A.string_col = B.string_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.string_col = functional.alltypes.string_col
 |  runtime filters: RF000 <- functional.alltypes.string_col
@@ -807,6 +885,8 @@ join B on (A.string_col = B.string_col)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.string_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -863,6 +943,8 @@ select * from functional.alltypes
    union all
 select * from functional.alltypessmall order by bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypessmall]
@@ -871,6 +953,8 @@ select * from functional.alltypessmall order by bigint_col
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -886,6 +970,8 @@ select * from functional.alltypes
    union all
 (select * from functional.alltypessmall) order by bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SORT
 |  order by: bigint_col ASC
 |
@@ -897,6 +983,8 @@ select * from functional.alltypes
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |
@@ -917,6 +1005,8 @@ select int_col from functional.alltypes order by int_col
  union (select int_col from functional.alltypes order by int_col limit 10 offset 5)
 order by int_col offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: int_col ASC
 |
@@ -940,6 +1030,8 @@ order by int_col offset 5
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 14:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col ASC
@@ -1003,6 +1095,8 @@ select * from
   having sum(float_col) > 10) t3
 order by x
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:SORT
 |  order by: x ASC
 |
@@ -1040,6 +1134,8 @@ order by x
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:SORT
 |  order by: x ASC
 |
@@ -1098,6 +1194,8 @@ select int_col from
   (select int_col, bigint_col from functional.alltypesagg)
 order by bigint_col limit 10) A
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=10]
 |  order by: bigint_col ASC
 |
@@ -1109,6 +1207,8 @@ order by bigint_col limit 10) A
 01:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |  limit: 10
@@ -1127,6 +1227,8 @@ order by bigint_col limit 10) A
 # Sort node is unnecessary (IMPALA-1148).
 select 1 from functional.alltypes order by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ====
@@ -1135,6 +1237,8 @@ select a from
   (select 1 as a, int_col, bigint_col
    from functional.alltypes order by 1 limit 1) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
    limit: 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
index c6ccc81..f3e43bd 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
@@ -27,6 +27,8 @@ and t1.zip + t2.zip = 10
 # join predicate between t1, t2 and t3 applied after last join
 and t1.zip + t2.zip + t3.zip= 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t1.id = t3.id
 |  other join predicates: t2.id = 15, t1.id - t2.id = 0
@@ -49,6 +51,8 @@ and t1.zip + t2.zip + t3.zip= 20
    partitions=1/1 files=0 size=0B
    predicates: t1.id > 0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
@@ -87,6 +91,8 @@ from (select * from functional.testtbl a1) t1
 where t1.id > 0 and t2.id is null and t3.id is not null
 and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a1.id = a3.id
 |  other join predicates: a2.id = 15, a1.id - a2.id = 0
@@ -109,6 +115,8 @@ and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
    partitions=1/1 files=0 size=0B
    predicates: a1.id > 0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
@@ -145,6 +153,8 @@ from functional.testtbl t1
     t1.id = t2.id and t1.id = 17)
   join functional.testtbl t3 on (t1.id = t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -195,6 +205,8 @@ and t1.zip + t2.zip = 10
 # join predicate between t1, t2 and t3 applied after last join
 and t1.zip + t2.zip + t3.zip= 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: t1.id = t3.id
 |  other join predicates: t3.zip = 94720, t1.id - t2.id = 0
@@ -219,6 +231,8 @@ and t1.zip + t2.zip + t3.zip= 20
    predicates: t1.id IS NOT NULL, t1.id > 0
    runtime filters: RF000 -> t1.id, RF001 -> t1.id - 1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -262,6 +276,8 @@ from (select * from functional.testtbl a1) t1 right outer join (select * from fu
 where t1.id > 0 and t2.id is null and t3.id is not null
 and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a1.id = a3.id
 |  other join predicates: a3.zip = 94720, a1.id - a2.id = 0
@@ -286,6 +302,8 @@ and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
    predicates: a1.id IS NOT NULL, a1.id > 0
    runtime filters: RF000 -> a1.id, RF001 -> a1.id - 1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -327,6 +345,8 @@ from functional.alltypesagg a
 right outer join functional.alltypestiny b on (a.tinyint_col = b.id)
 where a.tinyint_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.tinyint_col = b.id
 |  other predicates: a.tinyint_col IS NULL
@@ -339,6 +359,8 @@ where a.tinyint_col is null
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> a.tinyint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -363,6 +385,8 @@ from functional.alltypesagg a
 full outer join functional.alltypestiny b on (a.tinyint_col = b.id)
 where a.tinyint_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.tinyint_col = b.id
 |  other predicates: a.tinyint_col IS NULL
@@ -373,6 +397,8 @@ where a.tinyint_col is null
 00:SCAN HDFS [functional.alltypesagg a]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -398,6 +424,8 @@ from functional.alltypes a full outer join functional.alltypes b
 # also to the full outer join
 where b.bigint_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = b.id
 |  other join predicates: a.int_col < 10, b.tinyint_col != 5
@@ -420,6 +448,8 @@ inner join functional.alltypes c
 # first full outer join
 where b.tinyint_col > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: c.int_col = a.int_col
 |  other predicates: a.tinyint_col < 10
@@ -453,6 +483,8 @@ full outer join functional.alltypes c
 # re-assigned to the full outer join.
 where a.smallint_col = 100 and a.float_col > b.float_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: c.int_col = a.int_col
 |  other join predicates: a.bigint_col < 10, a.tinyint_col < b.tinyint_col
@@ -487,6 +519,8 @@ full outer join functional.alltypes d
 # predicate on b from the where clause is assigned to the first full outer join
 where a.bool_col = false and a.float_col < b.float_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.tinyint_col = d.tinyint_col
 |  other join predicates: b.int_col < 20
@@ -536,6 +570,8 @@ from (
 # tuple ids
 where x != y
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: c.id = d.id
 |  other join predicates: a.bigint_col > b.bigint_col
@@ -575,6 +611,8 @@ full outer join functional.alltypes c
 # that materializes the corresponding tuple ids
 where a.bigint_col = b.bigint_col and a.tinyint_col < b.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: b.int_col = c.int_col
 |  other join predicates: c.int_col < 10
@@ -606,6 +644,8 @@ full outer join functional.alltypes d
   on (b.string_col = d.string_col and a.tinyint_col < b.tinyint_col)
 where a.float_col = b.float_col and b.smallint_col = 1 and d.tinyint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: b.string_col = d.string_col
 |  other join predicates: a.tinyint_col < b.tinyint_col
@@ -645,6 +685,8 @@ group by a.bool_col, a.int_col, b.bool_col, b.int_col
 having a.bool_col is null and a.int_col is not null
   and b.bool_col is null and b.int_col is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(b.double_col)
 |  group by: a.bool_col, a.int_col, b.bool_col, b.int_col
@@ -669,6 +711,8 @@ group by a.bool_col, a.int_col, b.bool_col, b.int_col
 having a.bool_col is null and a.int_col is not null
   and b.bool_col is null and b.int_col is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(b.double_col)
 |  group by: a.bool_col, a.int_col, b.bool_col, b.int_col
@@ -697,6 +741,8 @@ where (
     a.timestamp_col
   end) >= cast('2001-01-01 00:00:00' as timestamp);
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: b.id = a.id
 |  runtime filters: RF000 <- a.id
@@ -717,6 +763,8 @@ left outer join functional.alltypestiny b
 inner join functional.alltypestiny c
   on b.id = c.id and b.int_col < 0 and a.int_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = b.id
 |  other predicates: b.int_col < 0
@@ -744,6 +792,8 @@ right outer join functional.alltypestiny b
 inner join functional.alltypestiny c
   on b.id = c.id and b.int_col < 0 and a.int_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = b.id
 |  other predicates: a.int_col > 10
@@ -773,6 +823,8 @@ full outer join functional.alltypestiny b
 inner join functional.alltypestiny c
   on b.id = c.id and b.int_col < 0 and a.int_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = c.id
 |  other predicates: a.int_col > 10, b.int_col < 0
@@ -805,6 +857,8 @@ inner join functional.alltypestiny d
 full outer join functional.alltypestiny e
   on d.id = e.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: e.id = d.id
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/partition-key-scans.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/partition-key-scans.test b/testdata/workloads/functional-planner/queries/PlannerTest/partition-key-scans.test
index c99a90b..deda7e9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/partition-key-scans.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/partition-key-scans.test
@@ -1,12 +1,16 @@
 # Test with aggregate expressions which ignore the distinct keyword.
 select min(month), max(year), ndv(day) from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: min(month), max(year), ndv(day)
 |
 00:UNION
    constant-operands=11
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: min(month), max(year), ndv(day)
 |
@@ -16,6 +20,8 @@ select min(month), max(year), ndv(day) from functional.alltypesagg
 # Test with explicit distinct keyword.
 select count(distinct year), ndv(day) from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(year), ndv:merge(day)
 |
@@ -26,6 +32,8 @@ select count(distinct year), ndv(day) from functional.alltypesagg
 00:UNION
    constant-operands=11
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(year), ndv:merge(day)
 |
@@ -39,6 +47,8 @@ select count(distinct year), ndv(day) from functional.alltypesagg
 # Test static partition pruning.
 select min(month), max(day) from functional.alltypesagg where year = 2010 and day = 1;
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: min(month), max(day)
 |
@@ -50,6 +60,8 @@ select c1, c2 from
   (select min(year) c1, max(month) c2, count(int_col) c3
    from functional.alltypes where year = 2000) t;
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: min(year), max(month)
 |
@@ -58,6 +70,8 @@ select c1, c2 from
 # Test with group by and having clauses.
 select ndv(month) from functional.alltypesagg group by year having max(day)=10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: ndv(month), max(day)
 |  group by: year
@@ -69,6 +83,8 @@ select ndv(month) from functional.alltypesagg group by year having max(day)=10
 # Test with group-by clauses (no aggregate expressions) only.
 select month from functional.alltypes group by month
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: month
 |
@@ -78,6 +94,8 @@ select month from functional.alltypes group by month
 # Test with distinct select list.
 select distinct month from functional.alltypes where month % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: month
 |
@@ -89,6 +107,8 @@ select min(a.month)
 from functional.alltypes as a, functional.alltypesagg as b
 where a.year = b.year
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: min(a.month)
 |
@@ -101,6 +121,8 @@ where a.year = b.year
 00:UNION
    constant-operands=24
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: min(a.month)
 |
@@ -122,6 +144,8 @@ select * from
   (select year, count(month) from functional.alltypes group by year) b
 on (a.year = b.year)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: year = year
 |  runtime filters: RF000 <- year
@@ -144,6 +168,8 @@ on (a.year = b.year)
 select min(a.year), ndv(b.timestamp_col) from
 functional.alltypes a, functional.alltypesnopart b
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: min(a.year), ndv(b.timestamp_col)
 |
@@ -161,6 +187,8 @@ select c1, c2 from
   (select ndv(a.year + b.year) c1, min(a.month + b.month) c2, count(a.int_col) c3 from
    functional.alltypes a, functional.alltypesagg b) t
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: ndv(a.year + b.year), min(a.month + b.month)
 |
@@ -179,6 +207,8 @@ from functional.alltypestiny t1 inner join
           min(t2.year) as int_col from functional.alltypestiny t2) t3
 on (t1.int_col = t3.int_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.int_col = min(t2.year)
 |  runtime filters: RF000 <- min(t2.year)
@@ -198,6 +228,8 @@ with c1 as (select distinct month from functional.alltypes),
      c2 as (select distinct year from functional.alltypes)
 select ndv(month) from (select * from c1 union all select * from c2) t
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: ndv(month)
 |
@@ -218,6 +250,8 @@ select ndv(month) from (select * from c1 union all select * from c2) t
 # If slots other than partition keys are accessed, make sure scan nodes are generated.
 select date_string_col, min(month) from functional.alltypes group by date_string_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: min(month)
 |  group by: date_string_col
@@ -228,6 +262,8 @@ select date_string_col, min(month) from functional.alltypes group by date_string
 # Make sure non-distinct aggregation functions will generate scan nodes.
 select count(month) from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(month)
 |
@@ -237,6 +273,8 @@ select count(month) from functional.alltypes
 # Make sure that queries without any aggregation will generate scan nodes.
 select month from functional.alltypes order by year
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: year ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
index 605f157..eeb9b97 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
@@ -3,6 +3,8 @@ select straight_join count(*)
 from functional.alltypes a join functional.alltypes b on (a.double_col = b.bigint_col)
 where b.bigint_col div 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -26,6 +28,8 @@ from
    inner join functional.alltypessmall b on (a.cnt = b.id)
 where b.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: count(id) = b.id
 |
@@ -45,6 +49,8 @@ where b.id < 10
 select count(*) from functional.alltypes
 where month = id and id = int_col and tinyint_col = int_col and int_col < 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -60,6 +66,8 @@ from
    left outer join (select id, string_col from functional.alltypes) b
    on (a.id = b.id and a.string_col = 'a' and b.string_col = 'b')
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: id = id
 |  other join predicates: string_col = 'a'
@@ -80,6 +88,8 @@ from
    on (a.id = b.id)
 where a.string_col = 'a' and b.string_col = 'b'
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: id = id
 |  other predicates: string_col = 'b'
@@ -99,6 +109,8 @@ from
    cross join (select id, string_col from functional.alltypes) b
 where a.string_col = 'a' and b.string_col = 'b'
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--01:SCAN HDFS [functional.alltypes]
@@ -109,6 +121,8 @@ where a.string_col = 'a' and b.string_col = 'b'
    partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.string_col = 'a'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
@@ -135,6 +149,8 @@ where c1 > 0
 order by 2, 1 desc
 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=3]
 |  order by: c2 ASC, c1 DESC
 |
@@ -163,6 +179,8 @@ where c1 > 0
 order by 2, 1 desc
 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=3]
 |  order by: c2 ASC, c1 DESC
 |
@@ -189,6 +207,8 @@ from functional.alltypes a
 where a.year = 2009 and b.month + 2 <= 4 and b.id = 17
   and cast(sin(c.int_col) as boolean) = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = c.id, b.month = c.month, b.year = c.year, b.smallint_col = c.int_col
 |  runtime filters: RF000 <- c.id, RF001 <- c.month, RF002 <- c.year, RF003 <- c.int_col
@@ -221,6 +241,8 @@ NODE 2:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -262,6 +284,8 @@ from (select * from functional.alltypes) a
 where a.year = 2009 and b.month + 2 <= 4 and b.id = 17
   and cast(sin(c.int_col) as boolean) = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypessmall.id = functional.alltypestiny.id, functional.alltypessmall.month = functional.alltypestiny.month, functional.alltypessmall.year = functional.alltypestiny.year, functional.alltypessmall.smallint_col = functional.alltypestiny.int_col
 |  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.month, RF002 <- functional.alltypestiny.year, RF003 <- functional.alltypestiny.int_col
@@ -294,6 +318,8 @@ NODE 2:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -340,6 +366,8 @@ from functional.alltypes a
    and a.month = b.month and b.month + 1 = 2)
 where a.year = 2009 and a.tinyint_col = 7 and a.id is null and b.id = 17 and b.int_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: b.int_col IS NULL, b.id = 17
@@ -368,6 +396,8 @@ NODE 0:
 NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
@@ -400,6 +430,8 @@ from functional.alltypessmall a
    and a.month = b.month and a.month + 1 = 2)
 where b.year = 2009 and b.tinyint_col = 7 and b.id is null and a.id = 17 and a.int_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: a.int_col IS NULL, a.id = 17
@@ -414,6 +446,8 @@ where b.year = 2009 and b.tinyint_col = 7 and b.id is null and a.id = 17 and a.i
    predicates: a.id = 17, a.tinyint_col = 7
    runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.tinyint_col, RF003 -> a.year
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -448,6 +482,8 @@ from functional.alltypes a
   on (a.id = b.id and a.tinyint_col = b.int_col and a.year = b.year and a.month = b.month)
 where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col != 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
 |  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
@@ -473,6 +509,8 @@ NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
@@ -518,6 +556,8 @@ where a.id = b.id and
       b.count_col + 1 = 17 and
       a.tinyint_col != 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
 |  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
@@ -536,6 +576,8 @@ where a.id = b.id and
    predicates: a.id > 11, a.tinyint_col != 5
    runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
@@ -579,6 +621,8 @@ from functional.alltypes a
   on (a.id = b.id and a.tinyint_col = b.int_col and a.year = b.year and a.month = b.month)
 where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col != 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
 |  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
@@ -609,6 +653,8 @@ NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -661,6 +707,8 @@ where a.year = 2009 and
       a.year = b.year and
       a.month = b.month
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
 |  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
@@ -682,6 +730,8 @@ where a.year = 2009 and
    predicates: a.id > 11, a.tinyint_col != 5
    runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -724,6 +774,8 @@ on (x.id = z.id)
 where x.year = 2009
 and z.month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = functional.alltypesagg.id
 |  runtime filters: RF000 <- functional.alltypesagg.id
@@ -756,6 +808,8 @@ where x.year = 2009
 and z.month = 1
 and x.id + x.b_id = 17
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = functional.alltypesagg.id
 |  runtime filters: RF000 <- functional.alltypesagg.id
@@ -785,6 +839,8 @@ from functional.alltypes a left outer join
 (select id, int_col from functional.alltypes group by 1, 2) b on (a.id = b.id)
 where a.id is null and isnull(b.id, 0) = 0 and b.int_col = 17
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = id
 |  other predicates: int_col = 17, isnull(id, 0) = 0
@@ -807,6 +863,8 @@ from functional.alltypes a left outer join
 on (a.id = b.id)
 where isnull(a.id, 0) = 0 and b.id is null  and b.int_col = 17
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = id
 |  other predicates: id IS NULL, int_col = 17
@@ -837,6 +895,8 @@ from
 right outer join functional.alltypes a on (a.id = b.id)
 where a.id is null and isnull(b.id, 0) = 0 and b.int_col = 17
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = a.id
 |  other predicates: int_col = 17, isnull(id, 0) = 0
@@ -861,6 +921,8 @@ from
 right outer join functional.alltypes a on (a.id = b.id)
 where isnull(a.id, 0) = 0 and b.id is null  and b.int_col = 17
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = a.id
 |  other predicates: id IS NULL, int_col = 17
@@ -893,6 +955,8 @@ select straight_join a.string_col from functional.alltypes a
 full outer join (select * from functional.alltypessmall where id > 0) b
 ON a.id=b.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = functional.alltypessmall.id
 |
@@ -916,6 +980,8 @@ left outer join
    where x.id is null) b
 on a.id=b.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = x.id
 |
@@ -942,6 +1008,8 @@ from functional.alltypes
 group by bool_col, int_col
 having bool_col = false and int_col > 0 and count(bigint_col) > 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(bigint_col)
 |  group by: bool_col, int_col
@@ -968,6 +1036,8 @@ and t2.id + t2.smallint_col + t2.bigint_col > 30
 # assigned in join, TODO: propagate multi-tuple predicates
 and t2.id + t3.int_col > 40
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.bigint_col = functional.alltypestiny.bigint_col, t2.id = functional.alltypestiny.id, t2.smallint_col = functional.alltypestiny.int_col
 |  other predicates: t2.id + functional.alltypestiny.int_col > 40
@@ -1009,6 +1079,8 @@ t1.id + t1.tinyint_col > 20
 # assigned in agg nodes in t2 and t3
 and t2.y + t2.z > 30
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: max(smallint_col) = max(smallint_col), min(int_col) = min(int_col)
 |  runtime filters: RF000 <- max(smallint_col)
@@ -1056,6 +1128,8 @@ and ifnull(t3.tinyint_col + t3.bigint_col, true) = true
 # assigned in scan of t1, t2 and t3
 and t1.id * t1.int_col < 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t2.id = functional.alltypestiny.id, t2.int_col = functional.alltypestiny.int_col
 |  other predicates: functional.alltypestiny.tinyint_col + functional.alltypestiny.smallint_col + functional.alltypestiny.int_col > 10, ifnull(functional.alltypestiny.tinyint_col + functional.alltypestiny.bigint_col, TRUE) = TRUE
@@ -1084,6 +1158,8 @@ functional.alltypes t1 inner join functional.alltypessmall t2
 on (t1.id = t2.month and t1.year = t2.year and t1.month = t2.month)
 where t2.year + t2.month > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.month, t1.year = t2.year
 |  runtime filters: RF000 <- t2.month, RF001 <- t2.year
@@ -1107,6 +1183,8 @@ on (t1.id = t2.id
     and t1.tinyint_col = t2.int_col)
 where t1.id + t1.tinyint_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id, t1.tinyint_col = t2.tinyint_col
 |  runtime filters: RF000 <- t2.id, RF001 <- t2.tinyint_col
@@ -1132,6 +1210,8 @@ inner join (select bigint_col, min(int_col) x, max(int_col) y
 on (t1.id = t2.bigint_col and t1.int_col = t2.x)
 where t1.id + t1.int_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.int_col = min(int_col), t1.id = bigint_col
 |  runtime filters: RF000 <- min(int_col), RF001 <- bigint_col
@@ -1157,6 +1237,8 @@ left anti join
 on (a.id = b.id)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: a.id = id
 |
@@ -1176,6 +1258,8 @@ right anti join functional.alltypestiny b
 on (a.id = b.id)
 where b.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT ANTI JOIN]
 |  hash predicates: id = b.id
 |
@@ -1197,6 +1281,8 @@ select * from
    group by j.int_col) v
 where v.int_col = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(a.tinyint_col)
 |  group by: b.int_col
@@ -1223,6 +1309,8 @@ select * from
     on a.id = b.id) j) v
 where v.int_col = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: b.id = a.id
 |  other predicates: b.int_col = 10
@@ -1245,6 +1333,8 @@ SELECT count(*) FROM
   WHERE n_name = 'BRAZIL' AND n_regionkey = 1 AND c_custkey % 2 = 0) cn
  LEFT OUTER JOIN tpch_parquet.region r ON n_regionkey = r_regionkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
index f1abd1d..446aead 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
@@ -2,6 +2,8 @@
 select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
 where t1.year = t2.int_col and t2.id < 10 and t1.id = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = t2.int_col
 |  runtime filters: RF000 <- t2.int_col
@@ -15,6 +17,8 @@ where t1.year = t2.int_col and t2.id < 10 and t1.id = 10
    predicates: t1.id = 10
    runtime filters: RF000 -> t1.year
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -38,6 +42,8 @@ select straight_join * from functional.alltypestiny t1, functional.alltypesagg t
 where t1.year = t2.int_col and t3.tinyint_col = t2.id and t3.month = t4.id and
   t2.bool_col = true and t4.bigint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: t3.month = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -71,6 +77,8 @@ where t1.year = t2.int_col and t3.tinyint_col = t2.id and t3.month = t4.id and
 select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
 where t1.year = t2.int_col and t1.month = t2.bigint_col and t2.id = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = t2.int_col, t1.month = t2.bigint_col
 |  runtime filters: RF000 <- t2.int_col, RF001 <- t2.bigint_col
@@ -88,6 +96,8 @@ select straight_join * from functional.alltypesagg t1,
   (select * from functional.alltypesnopart t2 where t2.id = 1) v
 where t1.year = v.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = t2.int_col
 |  runtime filters: RF000 <- t2.int_col
@@ -107,6 +117,8 @@ select straight_join * from functional.alltypesagg t1,
    where t2.bigint_col < 10) v
 where v.id1 = t1.year
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = id + int_col
 |  runtime filters: RF000 <- id + int_col
@@ -123,6 +135,8 @@ where v.id1 = t1.year
 select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
 where t1.year + 1 = t2.id and t2.int_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year + 1 = t2.id
 |  runtime filters: RF000 <- t2.id
@@ -141,6 +155,8 @@ where t1.id = t2.id and t1.year + t2.int_col = t1.month + t2.tinyint_col
 and t1.year = t1.month + t2.int_col and t1.year + t2.smallint_col = t2.tinyint_col
 and t1.int_col = 1 and 1 = t2.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
 |  other predicates: t1.year + t2.smallint_col = t2.tinyint_col, t1.year = t1.month + t2.int_col, t1.year + t2.int_col = t1.month + t2.tinyint_col
@@ -161,6 +177,8 @@ select straight_join * from functional.alltypesagg t1, functional.alltypesnopart
 where t1.year + t1.month = t2.id and t1.int_col + 1 - t1.tinyint_col = t2.smallint_col + 10
 and t1.int_col * 100 = t2.bigint_col / 100 and t2.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year + t1.month = t2.id, t1.int_col * 100 = t2.bigint_col / 100, t1.int_col + 1 - t1.tinyint_col = t2.smallint_col + 10
 |  runtime filters: RF000 <- t2.id, RF001 <- t2.bigint_col / 100, RF002 <- t2.smallint_col + 10
@@ -182,6 +200,8 @@ select straight_join * from
   functional.alltypesnopart t3
 where v.year = t3.int_col and t3.bool_col = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year + t2.year = t3.int_col
 |
@@ -207,6 +227,8 @@ select straight_join * from functional.alltypesagg t1,
    having count(int_col) < 10) v
 where v.cnt = t1.year and v.id = t1.month
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.month = id, t1.year = count(int_col)
 |  runtime filters: RF000 <- id, RF001 <- count(int_col)
@@ -230,6 +252,8 @@ select straight_join * from functional.alltypesagg t1,
    functional.alltypesnopart t3 where t2.int_col = t3.int_col) v
 where v.id = t1.year and t1.month = v.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.month = t3.tinyint_col, t1.year = t2.id + t3.id
 |  runtime filters: RF000 <- t3.tinyint_col, RF001 <- t2.id + t3.id
@@ -257,6 +281,8 @@ select straight_join * from functional.alltypesagg t1,
    where t2.id = t3.id and t3.int_col = t4.int_col and t4.tinyint_col = t2.tinyint_col) v
 where t1.year = v.int_col and t1.year = v.id and t1.month = v.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = t2.int_col, t1.month = t4.tinyint_col
 |  runtime filters: RF000 <- t2.int_col, RF001 <- t4.tinyint_col
@@ -291,6 +317,8 @@ select straight_join * from functional.alltypesagg t1, functional.alltypesnopart
 where t1.year = t2.id and t1.year = t3.int_col and t1.year = t4.tinyint_col and
   t2.bool_col = false and t3.bool_col = true and t4.bigint_col in (1,2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.year = t4.tinyint_col
 |  runtime filters: RF000 <- t4.tinyint_col
@@ -327,6 +355,8 @@ select straight_join * from functional.alltypesagg t1, functional.alltypesnopart
 where t1.year = t2.id and t2.int_col = t3.tinyint_col and t3.month = t4.bigint_col
   and t4.smallint_col = t5.smallint_col and t5.id = t1.month
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.month = t5.id, t4.smallint_col = t5.smallint_col
 |  runtime filters: RF000 <- t5.id, RF001 <- t5.smallint_col
@@ -368,6 +398,8 @@ select straight_join * from functional.alltypesagg t1 left outer join functional
   on t1.year = t2.int_col
 where t2.id = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t1.year = t2.int_col
 |  other predicates: t2.id = 1
@@ -385,6 +417,8 @@ select straight_join * from functional.alltypesagg t1 left outer join functional
   on t1.year = t2.int_col
 where t2.id = 2 and t1.month = t2.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t1.year = t2.int_col
 |  other predicates: t2.id = 2, t1.month = t2.tinyint_col
@@ -406,6 +440,8 @@ select straight_join * from functional.alltypesagg t1 left outer join functional
   on t1.year = t5.smallint_col
 where t2.id = 1 and t3.int_col = 1 and t4.bool_col = true and t5.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: t1.year = t5.smallint_col
 |  other predicates: t2.id = 1, t3.int_col = 1, t4.bool_col = TRUE
@@ -449,6 +485,8 @@ from functional.alltypesagg t1 right outer join functional.alltypesnopart t2
   on t1.year = t2.int_col and t1.month = 1 and t2.int_col = 10
 where t2.id = 10 and t1.month = t2.tinyint_col and t1.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: t1.year = t2.int_col
 |  other join predicates: t2.int_col = 10
@@ -470,6 +508,8 @@ select straight_join * from functional.alltypesagg t1 left semi join functional.
   on t1.month = t3.tinyint_col
 where t3.id = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: t1.month = t3.tinyint_col
 |  runtime filters: RF000 <- t3.tinyint_col
@@ -494,6 +534,8 @@ select straight_join * from functional.alltypesagg t1
 where t1.year not in (select id from functional.alltypesnopart where int_col = 10)
 and t1.int_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: t1.year = id
 |
@@ -512,6 +554,8 @@ select straight_join * from
   (select id, int_col from functional.alltypesnopart where tinyint_col < 10) v2
 where v1.year = v2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: year = id
 |  runtime filters: RF000 <- id
@@ -535,6 +579,8 @@ select straight_join * from
   functional.alltypes t2
 where v1.cnt = t2.id and t2.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: count(*) = t2.id
 |
@@ -559,6 +605,8 @@ select straight_join * from
    functional.alltypesnopart t3
 where v2.year = t3.smallint_col and t3.id = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: v1.year = t3.smallint_col
 |  runtime filters: RF000 <- t3.smallint_col
@@ -593,6 +641,8 @@ join functional.alltypestiny b on v.year = b.year
 join functional.alltypestiny c on v.year = c.year
 where b.int_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:HASH JOIN [INNER JOIN]
 |  hash predicates: year = c.year
 |  runtime filters: RF000 <- c.year
@@ -631,6 +681,8 @@ select straight_join * from
   functional.alltypesnopart t2
 where v1.year = t2.id and v1.int_col = t2.int_col and t2.smallint_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = t2.int_col, year = t2.id
 |
@@ -651,6 +703,8 @@ select straight_join * from
   functional.alltypesnopart t3
 where v.year = t3.int_col and t3.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: year = t3.int_col
 |  runtime filters: RF000 <- t3.int_col
@@ -680,6 +734,8 @@ select straight_join count(*) from
   on a.month = b.month
 where b.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -716,6 +772,8 @@ select straight_join count(*) from
   on a.month = b.month
 where b.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -751,6 +809,8 @@ union all
 select straight_join t4.id, t3.year from functional.alltypes t3, functional.alltypesnopart t4
 where t3.month = t4.smallint_col and t4.bool_col = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--06:HASH JOIN [INNER JOIN]
@@ -786,6 +846,8 @@ select straight_join count(*) from functional.alltypes a
   on a.id = b.id
 where (b.id - b.id) < 1 AND (b.int_col - b.int_col) < 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -817,6 +879,8 @@ select straight_join * from
    functional.alltypesnopart t3
 where v1.month = t3.tinyint_col and v1.year = t3.id and t3.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: year = t3.id, month = t3.tinyint_col
 |
@@ -852,6 +916,8 @@ select straight_join * from
    from functional.alltypes) v, functional.alltypestiny v1
 where v.year = v1.int_col and v.year = 2009
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: year = v1.int_col
 |
@@ -887,6 +953,8 @@ select straight_join * from
   ) v3
 where v2.month = v3.intcol1
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.month = t4.int_col
 |  runtime filters: RF000 <- t4.int_col
@@ -927,6 +995,8 @@ select straight_join 1 from functional.alltypestiny t1 join functional.alltypest
   join functional.alltypestiny t3 on t2.id = t3.id
 where t3.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.id = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -952,6 +1022,8 @@ select straight_join 1 from functional.alltypestiny t1 join functional.alltypest
   join functional.alltypestiny t3 on t1.id = t3.id
 where t3.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -979,6 +1051,8 @@ select straight_join 1 from tpch_nested_parquet.customer c,
    on o1.o_orderkey = o2.o_orderkey) v
 where c_custkey = v.o_orderkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--06:HASH JOIN [INNER JOIN]
@@ -1002,6 +1076,8 @@ from functional.alltypestiny t1 join
   (select * from functional.alltypessmall t2 where false) v on t1.id = v.id
 where v.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
 |  runtime filters: RF000 <- t2.id
@@ -1018,6 +1094,8 @@ select straight_join 1 from
   (select * from functional.alltypestiny where false) v1 join
   (select * from functional.alltypessmall where false) v2 on v1.id = v2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypestiny.id = functional.alltypessmall.id
 |
@@ -1032,6 +1110,8 @@ select straight_join 1 from functional.alltypestiny t1 join functional.alltypest
   join functional.alltypestiny t3 on t1.id = t3.id
   join functional.alltypestiny t4 on t1.id + t2.id = t4.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id + t2.id = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -1066,6 +1146,8 @@ select straight_join 1 from functional.alltypestiny a1
   inner join functional.alltypestiny a3 ON a3.smallint_col = a1.int_col
   inner join functional.alltypes a4 ON a4.smallint_col = a3.smallint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a3.smallint_col = a4.smallint_col
 |  runtime filters: RF000 <- a4.smallint_col
@@ -1096,6 +1178,8 @@ from functional.alltypestiny t1 left join
   on t2.int_col = t1.month
 where t1.month is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  group by: t1.int_col
 |
@@ -1210,6 +1294,8 @@ from big_six
   inner join big_three
   inner join small_four_2
 ---- PLAN
+PLAN-ROOT SINK
+|
 36:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--28:HASH JOIN [INNER JOIN]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test b/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test
index 82b8d73..60e9dd9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test
@@ -1,9 +1,13 @@
 select * from functional_seq.alltypes t1 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional_seq.alltypes t1]
    partitions=24/24 files=24 size=562.59KB
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional_seq.alltypes t1]
    partitions=24/24 files=24 size=562.59KB
    limit: 5
@@ -11,10 +15,14 @@ select * from functional_seq.alltypes t1 limit 5
 # Query is over the limit of 8 rows to be optimized, will distribute the query
 select * from functional.alltypes t1 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
    limit: 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -25,10 +33,14 @@ select * from functional.alltypes t1 limit 10
 # Query is optimized, run on coordinator only
 select * from functional.alltypes t1 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
    limit: 5
@@ -36,11 +48,15 @@ select * from functional.alltypes t1 limit 5
 # If a predicate is applied the optimization is disabled
 select * from functional.alltypes t1 where t1.id < 99 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
    predicates: t1.id < 99
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |  limit: 5
 |
@@ -52,10 +68,14 @@ select * from functional.alltypes t1 where t1.id < 99 limit 5
 # No optimization for hbase tables
 select * from functional_hbase.alltypes t1 where t1.id < 99 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypes t1]
    predicates: t1.id < 99
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |  limit: 5
 |
@@ -66,9 +86,13 @@ select * from functional_hbase.alltypes t1 where t1.id < 99 limit 5
 # Applies optimization for small queries in hbase
 select * from functional_hbase.alltypes t1 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypes t1]
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypes t1]
    limit: 5
 ====
@@ -122,6 +146,8 @@ select * from functional_hbase.alltypes limit 5
 union all
 select * from functional_hbase.alltypes limit 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HBASE [functional_hbase.alltypes]
@@ -130,6 +156,8 @@ select * from functional_hbase.alltypes limit 2
 01:SCAN HBASE [functional_hbase.alltypes]
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HBASE [functional_hbase.alltypes]
@@ -142,6 +170,8 @@ select * from functional_hbase.alltypes limit 5
 union all
 select * from functional_hbase.alltypes limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HBASE [functional_hbase.alltypes]
@@ -150,6 +180,8 @@ select * from functional_hbase.alltypes limit 5
 01:SCAN HBASE [functional_hbase.alltypes]
    limit: 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--04:EXCHANGE [UNPARTITIONED]
@@ -168,6 +200,8 @@ select * from functional_hbase.alltypes limit 5
 select * from
   functional.testtbl a join functional.testtbl b on a.id = b.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -186,6 +220,8 @@ select * from
 select * from
   functional.testtbl a, functional.testtbl b
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
@@ -202,6 +238,8 @@ select * from
   functional.alltypestiny a
 where a.id in (select id from functional.alltypestiny limit 5) limit 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |  limit: 5
 |
@@ -237,6 +275,8 @@ select id, bool_col
 from functional.alltypestiny c
 where year=2009 and month=2
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:UNION
 |
 |--03:AGGREGATE [FINALIZE]
@@ -256,6 +296,8 @@ where year=2009 and month=2
 # IMPALA-2527: Tests that the small query optimization is disabled for colleciton types
 select key from functional.allcomplextypes.map_map_col.value limit 5;
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |  limit: 5
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index 21776ba..060d470 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -4,6 +4,8 @@ from functional.alltypes
 where id in
   (select id from functional.alltypesagg)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  runtime filters: RF000 <- id
@@ -21,6 +23,8 @@ from functional.alltypes
 where id not in
   (select id from functional.alltypesagg)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: id = id
 |
@@ -39,6 +43,8 @@ where a.int_col not in
    where g.id = a.id and g.bigint_col < a.bigint_col)
 and a.int_col < 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: a.int_col = int_col
 |  other join predicates: a.id = g.id, g.bigint_col < a.bigint_col
@@ -56,6 +62,8 @@ select *
 from functional.alltypes a
 where a.id not in (select id from functional.alltypes b where a.id = b.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: a.id = id
 |  other join predicates: a.id = b.id
@@ -73,6 +81,8 @@ where int_col in
   (select int_col from functional.alltypesagg g where a.id = g.id and g.bigint_col < 10)
 and bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -95,6 +105,8 @@ from functional.alltypes t
 where t.int_col + 1 in
   (select int_col + bigint_col from functional.alltypesagg)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t.int_col + 1 = int_col + bigint_col
 |  runtime filters: RF000 <- int_col + bigint_col
@@ -114,6 +126,8 @@ where t.id in
 and t.tinyint_col not in (select tinyint_col from functional.alltypestiny)
 and t.bigint_col < 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: t.tinyint_col = tinyint_col
 |
@@ -139,6 +153,8 @@ from functional.alltypesagg a, functional.alltypes t
 where a.id = t.id and a.int_col in
   (select int_col from functional.alltypestiny where bool_col = false)
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -170,6 +186,8 @@ where a.id in
    where s.int_col = t.int_col and a.bool_col = s.bool_col)
 and a.int_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -201,6 +219,8 @@ where a.id in
   (select id from functional.alltypestiny)
 and t.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -231,6 +251,8 @@ from functional.alltypes a left outer join
 on a.int_col = t.int_col
 where a.bool_col = false and t.bigint_col < 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -262,6 +284,8 @@ where a.int_col in
    from functional.alltypes t, functional.alltypessmall s, functional.alltypestiny n
    where t.id = s.id and s.bigint_col = n.bigint_col and n.bool_col = false)
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count(id)
 |
@@ -304,6 +328,8 @@ where t.id in
    (select id, count(*) as cnt from functional.alltypessmall group by id) s
    where s.id = a.id and s.cnt = 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t.id = a.id
 |  runtime filters: RF000 <- a.id
@@ -332,6 +358,8 @@ with t as (select a.* from functional.alltypes a where id in
   (select id from functional.alltypestiny))
 select * from t where t.bool_col = false and t.int_col = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  runtime filters: RF000 <- id
@@ -354,6 +382,8 @@ where s.string_col = t.string_col and t.int_col in
   (select int_col from functional.alltypessmall)
 and s.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -395,6 +425,8 @@ where id in
   (select id from functional.alltypesagg a where t.int_col = a.int_col)
 and t.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id, t.int_col = a.int_col
 |  runtime filters: RF000 <- id, RF001 <- a.int_col
@@ -416,6 +448,8 @@ where id in
    and bool_col = false)
 and bigint_col < 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  runtime filters: RF000 <- id
@@ -446,6 +480,8 @@ where id in
     (select tinyint_col from functional.alltypestiny s
      where s.bigint_col = a.bigint_col))
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id, t.int_col = a.int_col
 |  runtime filters: RF000 <- id, RF001 <- a.int_col
@@ -472,6 +508,8 @@ where id in
   (select id from functional.alltypesagg a where a.int_col in
     (select int_col from functional.alltypestiny s where a.bigint_col = s.bigint_col))
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  runtime filters: RF000 <- id
@@ -497,6 +535,8 @@ from functional.alltypes
 where id in
   (select id from functional.alltypes where id < 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  runtime filters: RF000 <- id
@@ -516,6 +556,8 @@ from functional.alltypesagg a inner join functional.alltypes t on t.id = a.id
 where t.int_col < 10 and t.int_col in
   (select int_col from functional.alltypessmall s where s.id = t.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t.id = s.id, t.int_col = int_col
 |  runtime filters: RF000 <- s.id, RF001 <- int_col
@@ -543,6 +585,8 @@ from functional.alltypes t
 where exists
   (select * from functional.alltypesagg a where a.id = t.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -567,6 +611,8 @@ where exists
    group by id, int_col, bool_col)
 and tinyint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.tinyint_col = b.tinyint_col
 |  runtime filters: RF000 <- b.tinyint_col
@@ -588,6 +634,8 @@ from functional.alltypes t
 where not exists
   (select id from functional.alltypesagg a where t.int_col = a.int_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -610,6 +658,8 @@ where not exists
    group by b.id, b.int_col, b.bigint_col)
 and bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -640,6 +690,8 @@ select *
 from functional.alltypestiny t
 where exists (select * from functional.alltypessmall s where s.id < 5)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [LEFT SEMI JOIN]
 |
 |--01:SCAN HDFS [functional.alltypessmall s]
@@ -658,6 +710,8 @@ where exists
    from functional.alltypesagg where tinyint_col = 10
    group by id, int_col, bigint_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [RIGHT SEMI JOIN]
 |
 |--00:SCAN HDFS [functional.alltypestiny t]
@@ -676,6 +730,8 @@ select 1
 from functional.alltypestiny t
 where exists (select * from functional.alltypessmall limit 0)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Uncorrelated NOT EXISTS
@@ -683,6 +739,8 @@ select *
 from functional.alltypestiny t
 where not exists (select * from functional.alltypessmall s where s.id < 5)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [LEFT ANTI JOIN]
 |
 |--01:SCAN HDFS [functional.alltypessmall s]
@@ -701,6 +759,8 @@ select *
 from w1 t
 where not exists (select 1 from w2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [LEFT ANTI JOIN]
 |
 |--01:SCAN HDFS [functional.alltypessmall s]
@@ -719,6 +779,8 @@ where not exists
    from functional.alltypesagg where tinyint_col = 10
    group by id, int_col, bigint_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [RIGHT ANTI JOIN]
 |
 |--00:SCAN HDFS [functional.alltypestiny t]
@@ -737,6 +799,8 @@ select 1
 from functional.alltypestiny t
 where not exists (select * from functional.alltypessmall limit 0)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny t]
    partitions=4/4 files=4 size=460B
 ====
@@ -748,6 +812,8 @@ where exists
     (select * from functional.alltypesagg g where g.int_col = t.int_col
      and g.bool_col = false))
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -790,6 +856,8 @@ and g.tinyint_col <
 group by g.int_col
 having count(*) < 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: g.int_col
@@ -847,6 +915,8 @@ where a.int_col in
   group by int_col)
 and a.bigint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.int_col = count(int_col)
 |  runtime filters: RF000 <- count(int_col)
@@ -871,6 +941,8 @@ where a.int_col <
   (select max(int_col) from functional.alltypesagg g where g.bool_col = true)
 and a.bigint_col > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: a.int_col < max(int_col)
 |
@@ -891,6 +963,8 @@ from functional.alltypesagg a
 where (select max(id) from functional.alltypes t where t.bool_col = false) > 10
 and a.int_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--02:AGGREGATE [FINALIZE]
@@ -915,6 +989,8 @@ where a.id =
 and a.bool_col = false
 group by a.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: a.int_col
@@ -947,6 +1023,8 @@ where t.int_col <
 and a.bool_col = false
 group by t.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: t.tinyint_col
@@ -993,6 +1071,8 @@ and a.tinyint_col >
   (select max(tinyint_col) from functional.alltypessmall s where s.id < 10)
 and t.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: a.tinyint_col > max(tinyint_col)
 |
@@ -1041,6 +1121,8 @@ where t.int_col <
       where a.id = g.id
       and a.bool_col = false))
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t.id = g.id
 |  other join predicates: t.int_col < avg(g.int_col) * 2
@@ -1079,6 +1161,8 @@ where a.int_col <
    where s.id = a.id and s.tinyint_col >
      (select count(*) from functional.alltypestiny where bool_col = false))
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = s.id
 |  other predicates: a.int_col < zeroifnull(count(*))
@@ -1115,6 +1199,8 @@ from functional.alltypesagg g
 where 100 < (select count(*) from functional.alltypes where bool_col = false and id < 5)
 and bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col
 |
@@ -1138,6 +1224,8 @@ from functional.alltypesagg g
 where 100 > (select count(distinct id) from functional.alltypestiny where int_col < 5)
 and g.bigint_col < 1000 and g.bigint_col = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  group by: g.id, g.bool_col, g.tinyint_col, g.smallint_col, g.int_col, g.bigint_col, g.float_col, g.double_col, g.date_string_col, g.string_col, g.timestamp_col, g.year, g.month, g.day
 |
@@ -1163,6 +1251,8 @@ select *
 from functional.alltypestiny t
 where (select max(int_col) from functional.alltypesagg where int_col is null) is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--02:AGGREGATE [FINALIZE]
@@ -1183,6 +1273,8 @@ where (select count(*) from functional.alltypesagg g where t.id = g.id) is null
 and bool_col = false
 group by int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: int_col
@@ -1214,6 +1306,8 @@ where
    where g.id = t.id and g.int_col is null) is null
 and t.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: g.id = t.id
 |  runtime filters: RF000 <- t.id
@@ -1239,6 +1333,8 @@ where 1 +
   (select count(*) from functional.alltypesagg where bool_col = false) = t.int_col + 2
 and t.bigint_col < 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t.int_col + 2 = 1 + count(*)
 |  runtime filters: RF000 <- 1 + count(*)
@@ -1262,6 +1358,8 @@ where nullifzero((select min(id) from functional.alltypessmall s where s.bool_co
   is null
 and t.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--02:AGGREGATE [FINALIZE]
@@ -1286,6 +1384,8 @@ where t.int_col <
    limit 1)
 group by t.bool_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: min(t.id)
 |  group by: t.bool_col
@@ -1313,6 +1413,8 @@ where int_col between
   (select min(int_col) from functional.alltypessmall where bool_col = false) and
   (select max(int_col) from functional.alltypessmall where bool_col = true)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: int_col <= max(int_col)
 |
@@ -1344,6 +1446,8 @@ where
    from functional.alltypestiny tt1
    where t1.id = tt1.month) < t1.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: tt1.month = t1.id
 |  other predicates: zeroifnull(count(tt1.smallint_col)) < t1.id
@@ -1369,6 +1473,8 @@ where
   < 10
 group by int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: int_col
@@ -1398,6 +1504,8 @@ where
 and bool_col = false
 group by int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: int_col
@@ -1425,6 +1533,8 @@ select 1
 from functional.alltypestiny t1
 where (select count(*) from functional.alltypessmall) + t1.int_col = t1.bigint_col - 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: count(*) + t1.int_col = t1.bigint_col - 1
 |
@@ -1443,6 +1553,8 @@ select 1
 from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
 where (select count(*) from functional.alltypes) + 1 = t1.int_col + t2.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.int_col + t2.int_col = count(*) + 1
 |
@@ -1470,6 +1582,8 @@ from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.i
 where
   (select count(*) from functional.alltypes) + t2.bigint_col = t1.int_col + t2.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: count(*) + t2.bigint_col = t1.int_col + t2.int_col
 |
@@ -1498,6 +1612,8 @@ where
    from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
    where t1.id + t2.id = t.int_col) = t.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: sum(t1.id) = t.int_col
 |
@@ -1528,6 +1644,8 @@ where
    from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
    where t1.id + t2.id = t.bigint_col) = t.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: t1.id + t2.id = t.bigint_col, sum(t1.id) = t.int_col
 |
@@ -1560,6 +1678,8 @@ where
    on tt1.int_col = tt2.int_col
    where tt1.id + tt2.id = t1.int_col - t2.int_col) = t1.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: sum(tt1.id) = t1.bigint_col, tt1.id + tt2.id = t1.int_col - t2.int_col
 |
@@ -1602,6 +1722,8 @@ and not exists
    from functional.alltypesagg t3
    where t1.id = t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
@@ -1625,6 +1747,8 @@ and not exists
    from functional.alltypestiny t5
    where t1.id = t5.id and false)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny t1]
    partitions=4/4 files=4 size=460B
 ====
@@ -1647,6 +1771,8 @@ and not exists
    where t4.int_col = t1.tinyint_col
    having count(id) > 200)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: t1.tinyint_col = t4.int_col
 |
@@ -1680,6 +1806,8 @@ where t1.id is not distinct from
 (select min(id) from functional.alltypes t2
 where t1.int_col is not distinct from t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col
 |  runtime filters: RF000 <- min(id), RF001 <- t2.int_col
@@ -1700,6 +1828,8 @@ where t1.id is distinct from
 (select min(id) from functional.alltypes t2
 where t1.int_col is not distinct from t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col
 |  other join predicates: t1.id IS DISTINCT FROM min(id)
@@ -1721,6 +1851,8 @@ where t1.id =
 (select min(id) from functional.alltypes t2
 where t1.int_col is not distinct from t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.id = min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col
 |  runtime filters: RF000 <- min(id), RF001 <- t2.int_col
@@ -1741,6 +1873,8 @@ where t1.id !=
 (select min(id) from functional.alltypes t2
 where t1.int_col is not distinct from t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col
 |  other join predicates: t1.id != min(id)
@@ -1762,6 +1896,8 @@ where t1.id is not distinct from
 (select min(id) from functional.alltypes t2
 where t1.int_col = t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col = t2.int_col
 |  runtime filters: RF000 <- min(id), RF001 <- t2.int_col
@@ -1782,6 +1918,8 @@ where t1.id is distinct from
 (select min(id) from functional.alltypes t2
 where t1.int_col = t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.int_col = t2.int_col
 |  other join predicates: t1.id IS DISTINCT FROM min(id)
@@ -1803,6 +1941,8 @@ where t1.id =
 (select min(id) from functional.alltypes t2
 where t1.int_col = t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.id = min(id), t1.int_col = t2.int_col
 |  runtime filters: RF000 <- min(id), RF001 <- t2.int_col
@@ -1823,6 +1963,8 @@ where t1.id !=
 (select min(id) from functional.alltypes t2
 where t1.int_col = t2.int_col);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: t1.int_col = t2.int_col
 |  other join predicates: t1.id != min(id)
@@ -1850,6 +1992,8 @@ select 1 from functional.alltypes t where id in
    a.double_col between round(acos(t.float_col), 2)
                 and cast(t.string_col as int))
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
 |  other join predicates: a.tinyint_col >= t.tinyint_col, t.float_col >= a.float_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col, t.float_col <= a.double_col, a.double_col <= CAST(t.string_col AS INT), t.string_col >= a.string_col, a.double_col >= round(acos(t.float_col), 2)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/topn.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/topn.test b/testdata/workloads/functional-planner/queries/PlannerTest/topn.test
index e9dcd43..4252ac7 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/topn.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/topn.test
@@ -3,12 +3,16 @@ from functional.testtbl
 order by name
 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=1]
 |  order by: name ASC
 |
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: name ASC
 |  limit: 1
@@ -26,6 +30,8 @@ group by 1
 order by 2 desc
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=10]
 |  order by: count(*) DESC
 |
@@ -37,6 +43,8 @@ limit 10
    partitions=1/1 files=0 size=0B
    predicates: name LIKE 'm%'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC
 |  limit: 10
@@ -65,6 +73,8 @@ group by 1
 order by 2
 limit 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=4]
 |  order by: sum(float_col) ASC
 |
@@ -75,6 +85,8 @@ limit 4
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(float_col) ASC
 |  limit: 4
@@ -101,8 +113,12 @@ group by 1
 order by 2,3 desc
 limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Test correct identification of the implicit aliasing of int_col in the select
@@ -114,6 +130,8 @@ where t1.id = t2.id and t2.int_col is not null
 order by int_col
 limit 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=2]
 |  order by: int_col ASC
 |
@@ -129,6 +147,8 @@ limit 2
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> t1.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: int_col ASC
 |  limit: 2
@@ -153,12 +173,16 @@ limit 2
 # Test that the top-n is on int_col and not on the id column
 select int_col as id from functional.alltypessmall order by id limit 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=2]
 |  order by: int_col ASC
 |
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: int_col ASC
 |  limit: 2
@@ -172,12 +196,16 @@ select int_col as id from functional.alltypessmall order by id limit 2
 # Test that the top-n is on id and not on int_col
 select int_col as id from functional.alltypessmall order by functional.alltypessmall.id limit 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=2]
 |  order by: id ASC
 |
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |  limit: 2
@@ -196,6 +224,8 @@ where t1.id = t2.id and t2.int_col is not null
 order by int_col
 limit 10 offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=10 OFFSET=5]
 |  order by: int_col ASC
 |
@@ -222,6 +252,8 @@ NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col ASC
@@ -251,6 +283,8 @@ select int_col, bigint_col from
    select * from functional.alltypessmall) t
 order by int_col desc limit 10 offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=10 OFFSET=5]
 |  order by: int_col DESC
 |
@@ -262,6 +296,8 @@ order by int_col desc limit 10 offset 5
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col DESC
@@ -285,6 +321,8 @@ select int_col, bigint_col from
    select * from functional.alltypessmall) t
 order by int_col desc limit 10 offset 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=10 OFFSET=5]
 |  order by: int_col DESC
 |
@@ -299,6 +337,8 @@ order by int_col desc limit 10 offset 5
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:MERGING-EXCHANGE [UNPARTITIONED]
 |  offset: 5
 |  order by: int_col DESC
@@ -327,6 +367,8 @@ order by int_col desc limit 10 offset 5
 select * from (select * from functional.alltypes limit 10) t
 order by int_col limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: int_col ASC
 |
@@ -334,6 +376,8 @@ order by int_col limit 10
    partitions=24/24 files=24 size=478.45KB
    limit: 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: int_col ASC
 |
@@ -351,6 +395,8 @@ select * from
    (select * from functional.alltypessmall) limit 10) t
 order by int_col limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=10]
 |  order by: int_col ASC
 |
@@ -363,6 +409,8 @@ order by int_col limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=10]
 |  order by: int_col ASC
 |

[08/32] incubator-impala git commit: IMPALA-3644 Make predicate order deterministic

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 492f7a4..4de9722 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -700,7 +700,7 @@ limit 100
 |
 |--00:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk >= 2451484, tpcds.date_dim.d_date_sk <= 2451513
+|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk <= 2451513, tpcds.date_dim.d_date_sk >= 2451484
 |
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -765,7 +765,7 @@ limit 100
 |  |
 |  00:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk >= 2451484, tpcds.date_dim.d_date_sk <= 2451513
+|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk <= 2451513, tpcds.date_dim.d_date_sk >= 2451484
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -844,7 +844,7 @@ limit 100
 |  |
 |  00:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk >= 2451484, tpcds.date_dim.d_date_sk <= 2451513
+|     predicates: d_year = 1999, d_moy = 11, tpcds.date_dim.d_date_sk <= 2451513, tpcds.date_dim.d_date_sk >= 2451484
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -1161,7 +1161,7 @@ limit 100000
 |--07:AGGREGATE [FINALIZE]
 |  |  output: count(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 15, count(*) <= 20
+|  |  having: count(*) <= 20, count(*) >= 15
 |  |
 |  06:HASH JOIN [INNER JOIN]
 |  |  hash predicates: store_sales.ss_store_sk = store.s_store_sk
@@ -1211,7 +1211,7 @@ limit 100000
 |  15:AGGREGATE [FINALIZE]
 |  |  output: count:merge(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 15, count(*) <= 20
+|  |  having: count(*) <= 20, count(*) >= 15
 |  |
 |  14:EXCHANGE [HASH(ss_ticket_number,ss_customer_sk)]
 |  |
@@ -1277,7 +1277,7 @@ limit 100000
 |  15:AGGREGATE [FINALIZE]
 |  |  output: count:merge(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 15, count(*) <= 20
+|  |  having: count(*) <= 20, count(*) >= 15
 |  |
 |  14:EXCHANGE [HASH(ss_ticket_number,ss_customer_sk)]
 |  |
@@ -2525,7 +2525,7 @@ limit 100
 |  |
 |  |--13:SCAN HDFS [tpcds.date_dim d]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23
+|  |     predicates: d_month_seq <= 1185 + 23, d_month_seq >= 1185 + 12
 |  |
 |  11:AGGREGATE [FINALIZE]
 |  |  output: sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END)
@@ -2606,7 +2606,7 @@ limit 100
 |  |  |
 |  |  13:SCAN HDFS [tpcds.date_dim d]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23
+|  |     predicates: d_month_seq <= 1185 + 23, d_month_seq >= 1185 + 12
 |  |
 |  27:AGGREGATE [FINALIZE]
 |  |  output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END)
@@ -2725,7 +2725,7 @@ limit 100
 |  |  |
 |  |  13:SCAN HDFS [tpcds.date_dim d]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23
+|  |     predicates: d_month_seq <= 1185 + 23, d_month_seq >= 1185 + 12
 |  |
 |  27:AGGREGATE [FINALIZE]
 |  |  output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END)
@@ -2897,7 +2897,7 @@ limit 100
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2452275, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
+   predicates: tpcds.date_dim.d_date_sk <= 2452275, tpcds.date_dim.d_date_sk >= 2451911, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- DISTRIBUTEDPLAN
 17:MERGING-EXCHANGE [UNPARTITIONED]
@@ -2958,7 +2958,7 @@ limit 100
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2452275, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
+   predicates: tpcds.date_dim.d_date_sk <= 2452275, tpcds.date_dim.d_date_sk >= 2451911, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- PARALLELPLANS
 17:MERGING-EXCHANGE [UNPARTITIONED]
@@ -3031,7 +3031,7 @@ limit 100
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2452275, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
+   predicates: tpcds.date_dim.d_date_sk <= 2452275, tpcds.date_dim.d_date_sk >= 2451911, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ====
 # TPCDS-Q65
@@ -3444,7 +3444,7 @@ limit 100
 |  |
 |  |--01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  05:HASH JOIN [INNER JOIN]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -3522,7 +3522,7 @@ limit 100
 |  |  |
 |  |  01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  05:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -3622,7 +3622,7 @@ limit 100
 |  |  |
 |  |  01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  05:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -3695,7 +3695,7 @@ limit 1000
 |--07:AGGREGATE [FINALIZE]
 |  |  output: count(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 1, count(*) <= 5
+|  |  having: count(*) <= 5, count(*) >= 1
 |  |
 |  06:HASH JOIN [INNER JOIN]
 |  |  hash predicates: store_sales.ss_store_sk = store.s_store_sk
@@ -3744,7 +3744,7 @@ limit 1000
 |  16:AGGREGATE [FINALIZE]
 |  |  output: count:merge(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 1, count(*) <= 5
+|  |  having: count(*) <= 5, count(*) >= 1
 |  |
 |  15:EXCHANGE [HASH(ss_ticket_number,ss_customer_sk)]
 |  |
@@ -3811,7 +3811,7 @@ limit 1000
 |  16:AGGREGATE [FINALIZE]
 |  |  output: count:merge(*)
 |  |  group by: ss_ticket_number, ss_customer_sk
-|  |  having: count(*) >= 1, count(*) <= 5
+|  |  having: count(*) <= 5, count(*) >= 1
 |  |
 |  15:EXCHANGE [HASH(ss_ticket_number,ss_customer_sk)]
 |  |
@@ -3931,7 +3931,7 @@ limit 100
 |  |
 |  |--01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  05:HASH JOIN [INNER JOIN]
 |  |  hash predicates: store_sales.ss_store_sk = store.s_store_sk
@@ -3939,7 +3939,7 @@ limit 100
 |  |
 |  |--02:SCAN HDFS [tpcds.store]
 |  |     partitions=1/1 files=1 size=3.08KB
-|  |     predicates: store.s_number_employees >= 200, store.s_number_employees <= 295
+|  |     predicates: store.s_number_employees <= 295, store.s_number_employees >= 200
 |  |
 |  04:HASH JOIN [INNER JOIN]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -3988,7 +3988,7 @@ limit 100
 |  |  |
 |  |  01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  13:EXCHANGE [HASH(store_sales.ss_sold_date_sk)]
 |  |
@@ -4000,7 +4000,7 @@ limit 100
 |  |  |
 |  |  02:SCAN HDFS [tpcds.store]
 |  |     partitions=1/1 files=1 size=3.08KB
-|  |     predicates: store.s_number_employees >= 200, store.s_number_employees <= 295
+|  |     predicates: store.s_number_employees <= 295, store.s_number_employees >= 200
 |  |
 |  04:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -4059,7 +4059,7 @@ limit 100
 |  |  |
 |  |  01:SCAN HDFS [tpcds.date_dim]
 |  |     partitions=1/1 files=1 size=9.84MB
-|  |     predicates: d_date >= '1999-01-01', d_date <= '1999-03-31'
+|  |     predicates: d_date <= '1999-03-31', d_date >= '1999-01-01'
 |  |
 |  13:EXCHANGE [HASH(store_sales.ss_sold_date_sk)]
 |  |
@@ -4075,7 +4075,7 @@ limit 100
 |  |  |
 |  |  02:SCAN HDFS [tpcds.store]
 |  |     partitions=1/1 files=1 size=3.08KB
-|  |     predicates: store.s_number_employees >= 200, store.s_number_employees <= 295
+|  |     predicates: store.s_number_employees <= 295, store.s_number_employees >= 200
 |  |
 |  04:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
@@ -4159,7 +4159,7 @@ limit 100) tmp2
 |
 |--02:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk >= 2451545, tpcds.date_dim.d_date_sk <= 2451910
+|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk <= 2451910, tpcds.date_dim.d_date_sk >= 2451545
 |
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -4217,7 +4217,7 @@ limit 100) tmp2
 |  |
 |  02:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk >= 2451545, tpcds.date_dim.d_date_sk <= 2451910
+|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk <= 2451910, tpcds.date_dim.d_date_sk >= 2451545
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -4285,7 +4285,7 @@ limit 100) tmp2
 |  |
 |  02:SCAN HDFS [tpcds.date_dim]
 |     partitions=1/1 files=1 size=9.84MB
-|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk >= 2451545, tpcds.date_dim.d_date_sk <= 2451910
+|     predicates: d_year IN (2000), tpcds.date_dim.d_date_sk <= 2451910, tpcds.date_dim.d_date_sk >= 2451545
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ss_item_sk = i_item_sk
@@ -4512,7 +4512,7 @@ limit 1000
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2451941, d_date >= '2001-01-01', d_date <= '2001-01-31'
+   predicates: tpcds.date_dim.d_date_sk <= 2451941, tpcds.date_dim.d_date_sk >= 2451911, d_date <= '2001-01-31', d_date >= '2001-01-01'
    runtime filters: RF000 -> d_date_sk
 ---- DISTRIBUTEDPLAN
 13:MERGING-EXCHANGE [UNPARTITIONED]
@@ -4561,7 +4561,7 @@ limit 1000
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2451941, d_date >= '2001-01-01', d_date <= '2001-01-31'
+   predicates: tpcds.date_dim.d_date_sk <= 2451941, tpcds.date_dim.d_date_sk >= 2451911, d_date <= '2001-01-31', d_date >= '2001-01-01'
    runtime filters: RF000 -> d_date_sk
 ---- PARALLELPLANS
 13:MERGING-EXCHANGE [UNPARTITIONED]
@@ -4618,7 +4618,7 @@ limit 1000
 |
 02:SCAN HDFS [tpcds.date_dim]
    partitions=1/1 files=1 size=9.84MB
-   predicates: tpcds.date_dim.d_date_sk >= 2451911, tpcds.date_dim.d_date_sk <= 2451941, d_date >= '2001-01-01', d_date <= '2001-01-31'
+   predicates: tpcds.date_dim.d_date_sk <= 2451941, tpcds.date_dim.d_date_sk >= 2451911, d_date <= '2001-01-31', d_date >= '2001-01-01'
    runtime filters: RF000 -> d_date_sk
 ====
 # TPCD-Q6

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index 7864153..fc6f4e2 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -124,8 +124,8 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 17:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: min(ps_supplycost) = ps_supplycost, ps_partkey = p_partkey
-|  runtime filters: RF001 <- p_partkey
+|  hash predicates: ps_partkey = p_partkey, min(ps_supplycost) = ps_supplycost
+|  runtime filters: RF000 <- p_partkey
 |
 |--16:HASH JOIN [INNER JOIN]
 |  |  hash predicates: n_regionkey = r_regionkey
@@ -193,7 +193,7 @@ limit 100
 |
 05:SCAN HDFS [tpch.partsupp]
    partitions=1/1 files=1 size=112.71MB
-   runtime filters: RF001 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
+   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ---- DISTRIBUTEDPLAN
 30:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
@@ -203,10 +203,10 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 17:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
-|  hash predicates: min(ps_supplycost) = ps_supplycost, ps_partkey = p_partkey
-|  runtime filters: RF001 <- p_partkey
+|  hash predicates: ps_partkey = p_partkey, min(ps_supplycost) = ps_supplycost
+|  runtime filters: RF000 <- p_partkey
 |
-|--29:EXCHANGE [HASH(ps_supplycost,p_partkey)]
+|--29:EXCHANGE [HASH(p_partkey,ps_supplycost)]
 |  |
 |  16:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: n_regionkey = r_regionkey
@@ -252,7 +252,7 @@ limit 100
 |     partitions=1/1 files=1 size=1.33MB
 |     runtime filters: RF006 -> s_nationkey, RF007 -> s_suppkey
 |
-28:EXCHANGE [HASH(min(ps_supplycost),ps_partkey)]
+28:EXCHANGE [HASH(ps_partkey,min(ps_supplycost))]
 |
 23:AGGREGATE [FINALIZE]
 |  output: min:merge(ps_supplycost)
@@ -296,7 +296,7 @@ limit 100
 |
 05:SCAN HDFS [tpch.partsupp]
    partitions=1/1 files=1 size=112.71MB
-   runtime filters: RF001 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
+   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ---- PARALLELPLANS
 30:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
@@ -306,14 +306,14 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 17:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
-|  hash predicates: min(ps_supplycost) = ps_supplycost, ps_partkey = p_partkey
-|  runtime filters: RF001 <- p_partkey
+|  hash predicates: ps_partkey = p_partkey, min(ps_supplycost) = ps_supplycost
+|  runtime filters: RF000 <- p_partkey
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
-|  |  build expressions: ps_supplycost, p_partkey
+|  |  build expressions: p_partkey, ps_supplycost
 |  |
-|  29:EXCHANGE [HASH(ps_supplycost,p_partkey)]
+|  29:EXCHANGE [HASH(p_partkey,ps_supplycost)]
 |  |
 |  16:HASH JOIN [INNER JOIN, BROADCAST]
 |  |  hash predicates: n_regionkey = r_regionkey
@@ -375,7 +375,7 @@ limit 100
 |     partitions=1/1 files=1 size=1.33MB
 |     runtime filters: RF006 -> s_nationkey, RF007 -> s_suppkey
 |
-28:EXCHANGE [HASH(min(ps_supplycost),ps_partkey)]
+28:EXCHANGE [HASH(ps_partkey,min(ps_supplycost))]
 |
 23:AGGREGATE [FINALIZE]
 |  output: min:merge(ps_supplycost)
@@ -431,7 +431,7 @@ limit 100
 |
 05:SCAN HDFS [tpch.partsupp]
    partitions=1/1 files=1 size=112.71MB
-   runtime filters: RF001 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
+   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ====
 # TPCH-Q3
 # Q3 - Shipping Priority Query
@@ -619,7 +619,7 @@ order by
 |
 |--00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
+|     predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
@@ -650,7 +650,7 @@ order by
 |  |
 |  00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
+|     predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
 |
 05:EXCHANGE [HASH(l_orderkey)]
 |
@@ -687,7 +687,7 @@ order by
 |  |
 |  00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
+|     predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
 |
 05:EXCHANGE [HASH(l_orderkey)]
 |
@@ -747,8 +747,8 @@ order by
 |     runtime filters: RF000 -> n_regionkey
 |
 08:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_suppkey, RF003 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF002 <- s_nationkey, RF003 <- s_suppkey
 |
 |--03:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
@@ -760,7 +760,7 @@ order by
 |
 |--00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> c_nationkey
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: l_orderkey = o_orderkey
@@ -768,12 +768,12 @@ order by
 |
 |--01:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> o_custkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> l_suppkey, RF005 -> l_orderkey
+   runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- DISTRIBUTEDPLAN
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
@@ -812,8 +812,8 @@ order by
 |     runtime filters: RF000 -> n_regionkey
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_suppkey, RF003 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF002 <- s_nationkey, RF003 <- s_suppkey
 |
 |--15:EXCHANGE [BROADCAST]
 |  |
@@ -829,7 +829,7 @@ order by
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> c_nationkey
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l_orderkey = o_orderkey
@@ -839,12 +839,12 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> o_custkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> l_suppkey, RF005 -> l_orderkey
+   runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- PARALLELPLANS
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
@@ -891,12 +891,12 @@ order by
 |     runtime filters: RF000 -> n_regionkey
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_suppkey, RF003 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF002 <- s_nationkey, RF003 <- s_suppkey
 |
 |--JOIN BUILD
 |  |  join-table-id=02 plan-id=03 cohort-id=01
-|  |  build expressions: s_suppkey, s_nationkey
+|  |  build expressions: s_nationkey, s_suppkey
 |  |
 |  15:EXCHANGE [BROADCAST]
 |  |
@@ -916,7 +916,7 @@ order by
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> c_nationkey
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l_orderkey = o_orderkey
@@ -930,12 +930,12 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> o_custkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> l_suppkey, RF005 -> l_orderkey
+   runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ====
 # TPCH-Q6
 # Q6 - Forecasting Revenue Change Query
@@ -954,7 +954,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- DISTRIBUTEDPLAN
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
@@ -966,7 +966,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- PARALLELPLANS
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
@@ -978,7 +978,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ====
 # TPCH-Q7
 # Q7 - Volume Shipping Query
@@ -1069,7 +1069,7 @@ order by
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
+   predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> l_suppkey, RF004 -> l_orderkey
 ---- DISTRIBUTEDPLAN
 21:MERGING-EXCHANGE [UNPARTITIONED]
@@ -1141,7 +1141,7 @@ order by
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
+   predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> l_suppkey, RF004 -> l_orderkey
 ---- PARALLELPLANS
 21:MERGING-EXCHANGE [UNPARTITIONED]
@@ -1233,7 +1233,7 @@ order by
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
+   predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> l_suppkey, RF004 -> l_orderkey
 ====
 # TPCH-Q8
@@ -1336,7 +1336,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
+|     predicates: o_orderdate <= '1996-12-31', o_orderdate >= '1995-01-01'
 |     runtime filters: RF005 -> o_orderkey
 |
 04:SCAN HDFS [tpch.customer]
@@ -1430,7 +1430,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
+|     predicates: o_orderdate <= '1996-12-31', o_orderdate >= '1995-01-01'
 |     runtime filters: RF005 -> o_orderkey
 |
 04:SCAN HDFS [tpch.customer]
@@ -1552,7 +1552,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
+|     predicates: o_orderdate <= '1996-12-31', o_orderdate >= '1995-01-01'
 |     runtime filters: RF005 -> o_orderkey
 |
 04:SCAN HDFS [tpch.customer]
@@ -1608,8 +1608,8 @@ order by
 |     partitions=1/1 files=1 size=2.15KB
 |
 09:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--03:SCAN HDFS [tpch.partsupp]
 |     partitions=1/1 files=1 size=112.71MB
@@ -1620,7 +1620,7 @@ order by
 |
 |--01:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
-|     runtime filters: RF000 -> s_nationkey, RF001 -> tpch.supplier.s_suppkey
+|     runtime filters: RF000 -> s_nationkey, RF002 -> tpch.supplier.s_suppkey
 |
 07:HASH JOIN [INNER JOIN]
 |  hash predicates: l_orderkey = o_orderkey
@@ -1636,11 +1636,11 @@ order by
 |--00:SCAN HDFS [tpch.part]
 |     partitions=1/1 files=1 size=22.83MB
 |     predicates: p_name LIKE '%green%'
-|     runtime filters: RF002 -> tpch.part.p_partkey
+|     runtime filters: RF001 -> tpch.part.p_partkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF001 -> l_suppkey, RF002 -> l_partkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
+   runtime filters: RF001 -> l_partkey, RF002 -> l_suppkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
 ---- DISTRIBUTEDPLAN
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: nation ASC, o_year DESC
@@ -1668,8 +1668,8 @@ order by
 |     partitions=1/1 files=1 size=2.15KB
 |
 09:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--17:EXCHANGE [BROADCAST]
 |  |
@@ -1684,7 +1684,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
-|     runtime filters: RF000 -> s_nationkey, RF001 -> tpch.supplier.s_suppkey
+|     runtime filters: RF000 -> s_nationkey, RF002 -> tpch.supplier.s_suppkey
 |
 07:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_orderkey = o_orderkey
@@ -1706,11 +1706,11 @@ order by
 |  00:SCAN HDFS [tpch.part]
 |     partitions=1/1 files=1 size=22.83MB
 |     predicates: p_name LIKE '%green%'
-|     runtime filters: RF002 -> tpch.part.p_partkey
+|     runtime filters: RF001 -> tpch.part.p_partkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF001 -> l_suppkey, RF002 -> l_partkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
+   runtime filters: RF001 -> l_partkey, RF002 -> l_suppkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
 ---- PARALLELPLANS
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: nation ASC, o_year DESC
@@ -1742,12 +1742,12 @@ order by
 |     partitions=1/1 files=1 size=2.15KB
 |
 09:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--JOIN BUILD
 |  |  join-table-id=01 plan-id=02 cohort-id=01
-|  |  build expressions: ps_suppkey, ps_partkey
+|  |  build expressions: ps_partkey, ps_suppkey
 |  |
 |  17:EXCHANGE [BROADCAST]
 |  |
@@ -1766,7 +1766,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
-|     runtime filters: RF000 -> s_nationkey, RF001 -> tpch.supplier.s_suppkey
+|     runtime filters: RF000 -> s_nationkey, RF002 -> tpch.supplier.s_suppkey
 |
 07:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_orderkey = o_orderkey
@@ -1796,11 +1796,11 @@ order by
 |  00:SCAN HDFS [tpch.part]
 |     partitions=1/1 files=1 size=22.83MB
 |     predicates: p_name LIKE '%green%'
-|     runtime filters: RF002 -> tpch.part.p_partkey
+|     runtime filters: RF001 -> tpch.part.p_partkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF001 -> l_suppkey, RF002 -> l_partkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
+   runtime filters: RF001 -> l_partkey, RF002 -> l_suppkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
 ====
 # TPCH-Q10
 # Q10 - Returned Item Reporting Query
@@ -1862,7 +1862,7 @@ limit 20
 |  |
 |  |--01:SCAN HDFS [tpch.orders]
 |  |     partitions=1/1 files=1 size=162.56MB
-|  |     predicates: o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
+|  |     predicates: o_orderdate < '1994-01-01', o_orderdate >= '1993-10-01'
 |  |
 |  02:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
@@ -1913,7 +1913,7 @@ limit 20
 |  |  |
 |  |  01:SCAN HDFS [tpch.orders]
 |  |     partitions=1/1 files=1 size=162.56MB
-|  |     predicates: o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
+|  |     predicates: o_orderdate < '1994-01-01', o_orderdate >= '1993-10-01'
 |  |
 |  02:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
@@ -1976,7 +1976,7 @@ limit 20
 |  |  |
 |  |  01:SCAN HDFS [tpch.orders]
 |  |     partitions=1/1 files=1 size=162.56MB
-|  |     predicates: o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
+|  |     predicates: o_orderdate < '1994-01-01', o_orderdate >= '1993-10-01'
 |  |
 |  02:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
@@ -2294,7 +2294,7 @@ order by
 |
 |--01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
+|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01', l_shipdate < l_commitdate
 |
 00:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
@@ -2324,7 +2324,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
+|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01', l_shipdate < l_commitdate
 |
 05:EXCHANGE [HASH(o_orderkey)]
 |
@@ -2360,7 +2360,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
+|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01', l_shipdate < l_commitdate
 |
 05:EXCHANGE [HASH(o_orderkey)]
 |
@@ -2518,7 +2518,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
 07:AGGREGATE [FINALIZE]
@@ -2542,7 +2542,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- PARALLELPLANS
 07:AGGREGATE [FINALIZE]
@@ -2570,7 +2570,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ====
 # TPCH-Q15
@@ -2621,7 +2621,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: s_suppkey = l_suppkey
@@ -2633,7 +2633,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 00:SCAN HDFS [tpch.supplier]
    partitions=1/1 files=1 size=1.33MB
@@ -2670,7 +2670,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: s_suppkey = l_suppkey
@@ -2690,7 +2690,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 00:SCAN HDFS [tpch.supplier]
    partitions=1/1 files=1 size=1.33MB
@@ -2731,7 +2731,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: s_suppkey = l_suppkey
@@ -2755,7 +2755,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 00:SCAN HDFS [tpch.supplier]
    partitions=1/1 files=1 size=1.33MB
@@ -3423,9 +3423,9 @@ order by
 |     runtime filters: RF004 -> s_nationkey
 |
 07:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--06:HASH JOIN [LEFT SEMI JOIN]
 |  |  hash predicates: ps_partkey = p_partkey
@@ -3445,8 +3445,8 @@ order by
 |
 04:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey
+   predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ---- DISTRIBUTEDPLAN
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
@@ -3477,9 +3477,9 @@ order by
 16:EXCHANGE [HASH(ps_suppkey)]
 |
 07:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--14:EXCHANGE [HASH(ps_partkey,ps_suppkey)]
 |  |
@@ -3509,8 +3509,8 @@ order by
 |
 04:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey
+   predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ---- PARALLELPLANS
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
@@ -3549,13 +3549,13 @@ order by
 16:EXCHANGE [HASH(ps_suppkey)]
 |
 07:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF001 <- ps_suppkey, RF002 <- ps_partkey
+|  runtime filters: RF001 <- ps_partkey, RF002 <- ps_suppkey
 |
 |--JOIN BUILD
 |  |  join-table-id=02 plan-id=03 cohort-id=01
-|  |  build expressions: ps_suppkey, ps_partkey
+|  |  build expressions: ps_partkey, ps_suppkey
 |  |
 |  14:EXCHANGE [HASH(ps_partkey,ps_suppkey)]
 |  |
@@ -3589,8 +3589,8 @@ order by
 |
 04:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey
+   predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ====
 # TPCH-Q21
 # Q21 - Suppliers Who Kept Orders Waiting Query

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index 5659b4b..6c70c35 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -81,7 +81,7 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 17:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: min(ps_supplycost) = ps_supplycost, ps_partkey = p_partkey
+|  hash predicates: ps_partkey = p_partkey, min(ps_supplycost) = ps_supplycost
 |
 |--16:HASH JOIN [INNER JOIN]
 |  |  hash predicates: n_regionkey = r_regionkey
@@ -261,7 +261,7 @@ order by
 |--04:SCAN KUDU [tpch_kudu.nation]
 |
 08:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
 |
 |--03:SCAN KUDU [tpch_kudu.supplier]
 |
@@ -274,7 +274,7 @@ order by
 |  hash predicates: l_orderkey = o_orderkey
 |
 |--01:SCAN KUDU [tpch_kudu.orders]
-|     kudu predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     kudu predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |
 02:SCAN KUDU [tpch_kudu.lineitem]
 ====
@@ -293,7 +293,7 @@ where
 |  output: sum(l_extendedprice * l_discount)
 |
 00:SCAN KUDU [tpch_kudu.lineitem]
-   kudu predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   kudu predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ====
 # Q7 - Volume Shipping Query
 select
@@ -369,7 +369,7 @@ order by
 |--02:SCAN KUDU [tpch_kudu.orders]
 |
 01:SCAN KUDU [tpch_kudu.lineitem]
-   kudu predicates: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
+   kudu predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
 ====
 # Q8 - National Market Share Query
 select
@@ -453,7 +453,7 @@ order by
 |  |  02:SCAN KUDU [tpch_kudu.lineitem]
 |  |
 |  03:SCAN KUDU [tpch_kudu.orders]
-|     kudu predicates: o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
+|     kudu predicates: o_orderdate <= '1996-12-31', o_orderdate >= '1995-01-01'
 |
 04:SCAN KUDU [tpch_kudu.customer]
 ====
@@ -503,7 +503,7 @@ order by
 |--05:SCAN KUDU [tpch_kudu.nation]
 |
 09:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
 |
 |--03:SCAN KUDU [tpch_kudu.partsupp]
 |
@@ -579,7 +579,7 @@ limit 20
 |  |  hash predicates: l_orderkey = o_orderkey
 |  |
 |  |--01:SCAN KUDU [tpch_kudu.orders]
-|  |     kudu predicates: o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
+|  |     kudu predicates: o_orderdate < '1994-01-01', o_orderdate >= '1993-10-01'
 |  |
 |  02:SCAN KUDU [tpch_kudu.lineitem]
 |     kudu predicates: l_returnflag = 'R'
@@ -702,7 +702,7 @@ order by
 |
 |--01:SCAN KUDU [tpch_kudu.lineitem]
 |     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate
-|     kudu predicates: l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
+|     kudu predicates: l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01'
 |
 00:SCAN KUDU [tpch_kudu.orders]
 ====
@@ -771,7 +771,7 @@ where
 |--01:SCAN KUDU [tpch_kudu.part]
 |
 00:SCAN KUDU [tpch_kudu.lineitem]
-   kudu predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   kudu predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
 ====
 # Q15 - Top Supplier Query
 with revenue_view as (
@@ -819,7 +819,7 @@ order by
 |  |  group by: l_suppkey
 |  |
 |  03:SCAN KUDU [tpch_kudu.lineitem]
-|     kudu predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     kudu predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: s_suppkey = l_suppkey
@@ -829,7 +829,7 @@ order by
 |  |  group by: l_suppkey
 |  |
 |  01:SCAN KUDU [tpch_kudu.lineitem]
-|     kudu predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     kudu predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 00:SCAN KUDU [tpch_kudu.supplier]
 ====
@@ -1095,7 +1095,7 @@ order by
 |  00:SCAN KUDU [tpch_kudu.supplier]
 |
 07:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: l_suppkey = ps_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
 |
 |--06:HASH JOIN [LEFT SEMI JOIN]
@@ -1111,7 +1111,7 @@ order by
 |  group by: l_partkey, l_suppkey
 |
 04:SCAN KUDU [tpch_kudu.lineitem]
-   kudu predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   kudu predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ====
 # Q21 - Suppliers Who Kept Orders Waiting Query
 select

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index 4354814..caa3420 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -100,8 +100,8 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 25:HASH JOIN [LEFT SEMI JOIN]
-|  hash predicates: ps_supplycost = min(ps_supplycost), p_partkey = ps_partkey
-|  runtime filters: RF001 <- ps_partkey
+|  hash predicates: p_partkey = ps_partkey, ps_supplycost = min(ps_supplycost)
+|  runtime filters: RF000 <- ps_partkey
 |
 |--22:AGGREGATE [FINALIZE]
 |  |  output: min(ps_supplycost)
@@ -158,7 +158,7 @@ limit 100
 |--05:SCAN HDFS [tpch_nested_parquet.part p]
 |     partitions=1/1 files=1 size=6.20MB
 |     predicates: p_size = 15, p_type LIKE '%BRASS'
-|     runtime filters: RF001 -> p_partkey
+|     runtime filters: RF000 -> p_partkey
 |
 01:SUBPLAN
 |
@@ -181,8 +181,8 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 25:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
-|  hash predicates: ps_supplycost = min(ps_supplycost), p_partkey = ps_partkey
-|  runtime filters: RF001 <- ps_partkey
+|  hash predicates: p_partkey = ps_partkey, ps_supplycost = min(ps_supplycost)
+|  runtime filters: RF000 <- ps_partkey
 |
 |--32:EXCHANGE [BROADCAST]
 |  |
@@ -253,7 +253,7 @@ limit 100
 |  05:SCAN HDFS [tpch_nested_parquet.part p]
 |     partitions=1/1 files=1 size=6.20MB
 |     predicates: p_size = 15, p_type LIKE '%BRASS'
-|     runtime filters: RF001 -> p_partkey
+|     runtime filters: RF000 -> p_partkey
 |
 01:SUBPLAN
 |
@@ -499,8 +499,8 @@ order by
 |     predicates: r_name = 'ASIA', !empty(r.r_nations)
 |
 15:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF001 <- s_nationkey
 |
 |--09:SCAN HDFS [tpch_nested_parquet.supplier s]
 |     partitions=1/1 files=1 size=111.08MB
@@ -526,7 +526,7 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates: !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
-   runtime filters: RF000 -> c.c_nationkey, RF002 -> c_nationkey
+   runtime filters: RF000 -> c.c_nationkey, RF001 -> c_nationkey
 ---- DISTRIBUTEDPLAN
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
@@ -563,8 +563,8 @@ order by
 |     predicates: r_name = 'ASIA', !empty(r.r_nations)
 |
 15:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF001 <- s_nationkey
 |
 |--19:EXCHANGE [BROADCAST]
 |  |
@@ -592,7 +592,7 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates: !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
-   runtime filters: RF000 -> c.c_nationkey, RF002 -> c_nationkey
+   runtime filters: RF000 -> c.c_nationkey, RF001 -> c_nationkey
 ====
 # TPCH-Q6
 # Q6 - Forecasting Revenue Change Query
@@ -610,8 +610,8 @@ where
 |  output: sum(l_extendedprice * l_discount)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
-   partitions=1/1 files=4 size=577.87MB
-   predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   partitions=1/1 files=4 size=292.35MB
+   predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- DISTRIBUTEDPLAN
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
@@ -622,8 +622,8 @@ where
 |  output: sum(l_extendedprice * l_discount)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
-   partitions=1/1 files=4 size=577.87MB
-   predicates: l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24, l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
+   partitions=1/1 files=4 size=292.35MB
+   predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ====
 # TPCH-Q7
 # Q7 - Volume Shipping Query
@@ -1023,7 +1023,7 @@ order by
 |     predicates: p_name LIKE '%green%'
 |
 12:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = s_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
 |
 |--06:SUBPLAN
 |  |
@@ -1085,7 +1085,7 @@ order by
 |     predicates: p_name LIKE '%green%'
 |
 12:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = s_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
 |
 |--17:EXCHANGE [BROADCAST]
 |  |
@@ -1588,7 +1588,7 @@ where
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
 06:AGGREGATE [FINALIZE]
@@ -1610,7 +1610,7 @@ where
 |
 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
+   predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ====
 # TPCH-Q15
@@ -1661,7 +1661,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
 |     partitions=1/1 files=4 size=577.87MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: l_suppkey = s_suppkey
@@ -1676,7 +1676,7 @@ order by
 |
 01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+   predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
    runtime filters: RF000 -> l.l_suppkey
 ---- DISTRIBUTEDPLAN
 17:MERGING-EXCHANGE [UNPARTITIONED]
@@ -1710,7 +1710,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
 |     partitions=1/1 files=4 size=577.87MB
-|     predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+|     predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_suppkey = s_suppkey
@@ -1733,7 +1733,7 @@ order by
 |
 01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
+   predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
    runtime filters: RF000 -> l.l_suppkey
 ====
 # TPCH-Q16
@@ -2117,9 +2117,9 @@ order by
 |  group by: s_name, s_address
 |
 11:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: l_suppkey = s_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF000 <- s_suppkey, RF001 <- ps_partkey
+|  runtime filters: RF000 <- ps_partkey, RF001 <- s_suppkey
 |
 |--10:HASH JOIN [LEFT SEMI JOIN]
 |  |  hash predicates: ps_partkey = p_partkey
@@ -2155,8 +2155,8 @@ order by
 |
 07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> l.l_suppkey, RF001 -> l.l_partkey
+   predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey
 ---- DISTRIBUTEDPLAN
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
@@ -2173,9 +2173,9 @@ order by
 |  group by: s_name, s_address
 |
 11:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
-|  hash predicates: l_suppkey = s_suppkey, l_partkey = ps_partkey
+|  hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
 |  other join predicates: ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF000 <- s_suppkey, RF001 <- ps_partkey
+|  runtime filters: RF000 <- ps_partkey, RF001 <- s_suppkey
 |
 |--18:EXCHANGE [HASH(ps_partkey,s_suppkey)]
 |  |
@@ -2223,8 +2223,8 @@ order by
 |
 07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
    partitions=1/1 files=4 size=577.87MB
-   predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> l.l_suppkey, RF001 -> l.l_partkey
+   predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey
 ====
 # TPCH-Q21
 # Q21 - Suppliers Who Kept Orders Waiting Query

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index 69e5ecc..96409e2 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -84,8 +84,8 @@ limit 100
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
 17:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: min(ps_supplycost) = tpch.partsupp.ps_supplycost, ps_partkey = tpch.part.p_partkey
-|  runtime filters: RF001 <- tpch.part.p_partkey
+|  hash predicates: ps_partkey = tpch.part.p_partkey, min(ps_supplycost) = tpch.partsupp.ps_supplycost
+|  runtime filters: RF000 <- tpch.part.p_partkey
 |
 |--16:HASH JOIN [INNER JOIN]
 |  |  hash predicates: tpch.nation.n_regionkey = tpch.region.r_regionkey
@@ -153,7 +153,7 @@ limit 100
 |
 05:SCAN HDFS [tpch.partsupp]
    partitions=1/1 files=1 size=112.71MB
-   runtime filters: RF001 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
+   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ====
 # TPCH-Q3
 # Q3 - Shipping Priority Query
@@ -247,7 +247,7 @@ order by
 |
 |--00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: tpch.orders.o_orderdate >= '1993-07-01', tpch.orders.o_orderdate < '1993-10-01'
+|     predicates: tpch.orders.o_orderdate < '1993-10-01', tpch.orders.o_orderdate >= '1993-07-01'
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
@@ -305,8 +305,8 @@ order by
 |     runtime filters: RF000 -> tpch.nation.n_regionkey
 |
 08:HASH JOIN [INNER JOIN]
-|  hash predicates: tpch.lineitem.l_suppkey = tpch.supplier.s_suppkey, tpch.customer.c_nationkey = tpch.supplier.s_nationkey
-|  runtime filters: RF002 <- tpch.supplier.s_suppkey, RF003 <- tpch.supplier.s_nationkey
+|  hash predicates: tpch.customer.c_nationkey = tpch.supplier.s_nationkey, tpch.lineitem.l_suppkey = tpch.supplier.s_suppkey
+|  runtime filters: RF002 <- tpch.supplier.s_nationkey, RF003 <- tpch.supplier.s_suppkey
 |
 |--03:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
@@ -318,7 +318,7 @@ order by
 |
 |--00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> tpch.customer.c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> tpch.customer.c_nationkey
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: tpch.lineitem.l_orderkey = tpch.orders.o_orderkey
@@ -326,12 +326,12 @@ order by
 |
 |--01:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: tpch.orders.o_orderdate >= '1994-01-01', tpch.orders.o_orderdate < '1995-01-01'
+|     predicates: tpch.orders.o_orderdate < '1995-01-01', tpch.orders.o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> tpch.orders.o_custkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> tpch.lineitem.l_suppkey, RF005 -> tpch.lineitem.l_orderkey
+   runtime filters: RF003 -> tpch.lineitem.l_suppkey, RF005 -> tpch.lineitem.l_orderkey
 ====
 # TPCH-Q6
 # Q6 - Forecasting Revenue Change Query
@@ -350,7 +350,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: tpch.lineitem.l_discount >= 0.05, tpch.lineitem.l_discount <= 0.07, tpch.lineitem.l_quantity < 24, tpch.lineitem.l_shipdate >= '1994-01-01', tpch.lineitem.l_shipdate < '1995-01-01'
+   predicates: tpch.lineitem.l_discount <= 0.07, tpch.lineitem.l_discount >= 0.05, tpch.lineitem.l_quantity < 24, tpch.lineitem.l_shipdate < '1995-01-01', tpch.lineitem.l_shipdate >= '1994-01-01'
 ====
 # TPCH-Q7
 # Q7 - Volume Shipping Query
@@ -441,7 +441,7 @@ order by
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: tpch.lineitem.l_shipdate >= '1995-01-01', tpch.lineitem.l_shipdate <= '1996-12-31'
+   predicates: tpch.lineitem.l_shipdate <= '1996-12-31', tpch.lineitem.l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> tpch.lineitem.l_suppkey, RF004 -> tpch.lineitem.l_orderkey
 ====
 # TPCH-Q8
@@ -544,7 +544,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: tpch.orders.o_orderdate >= '1995-01-01', tpch.orders.o_orderdate <= '1996-12-31'
+|     predicates: tpch.orders.o_orderdate <= '1996-12-31', tpch.orders.o_orderdate >= '1995-01-01'
 |     runtime filters: RF005 -> tpch.orders.o_orderkey
 |
 04:SCAN HDFS [tpch.customer]
@@ -600,8 +600,8 @@ order by
 |     partitions=1/1 files=1 size=2.15KB
 |
 09:HASH JOIN [INNER JOIN]
-|  hash predicates: tpch.lineitem.l_suppkey = tpch.partsupp.ps_suppkey, tpch.lineitem.l_partkey = tpch.partsupp.ps_partkey
-|  runtime filters: RF001 <- tpch.partsupp.ps_suppkey, RF002 <- tpch.partsupp.ps_partkey
+|  hash predicates: tpch.lineitem.l_partkey = tpch.partsupp.ps_partkey, tpch.lineitem.l_suppkey = tpch.partsupp.ps_suppkey
+|  runtime filters: RF001 <- tpch.partsupp.ps_partkey, RF002 <- tpch.partsupp.ps_suppkey
 |
 |--03:SCAN HDFS [tpch.partsupp]
 |     partitions=1/1 files=1 size=112.71MB
@@ -612,7 +612,7 @@ order by
 |
 |--01:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
-|     runtime filters: RF000 -> tpch.supplier.s_nationkey, RF001 -> tpch.supplier.s_suppkey
+|     runtime filters: RF000 -> tpch.supplier.s_nationkey, RF002 -> tpch.supplier.s_suppkey
 |
 07:HASH JOIN [INNER JOIN]
 |  hash predicates: tpch.lineitem.l_orderkey = tpch.orders.o_orderkey
@@ -628,11 +628,11 @@ order by
 |--00:SCAN HDFS [tpch.part]
 |     partitions=1/1 files=1 size=22.83MB
 |     predicates: tpch.part.p_name LIKE '%green%'
-|     runtime filters: RF002 -> tpch.part.p_partkey
+|     runtime filters: RF001 -> tpch.part.p_partkey
 |
 02:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF001 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey, RF003 -> tpch.lineitem.l_suppkey, RF004 -> tpch.lineitem.l_orderkey, RF005 -> tpch.lineitem.l_partkey
+   runtime filters: RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey, RF003 -> tpch.lineitem.l_suppkey, RF004 -> tpch.lineitem.l_orderkey, RF005 -> tpch.lineitem.l_partkey
 ====
 # TPCH-Q10
 # Q10 - Returned Item Reporting Query
@@ -694,7 +694,7 @@ limit 20
 |  |
 |  |--01:SCAN HDFS [tpch.orders]
 |  |     partitions=1/1 files=1 size=162.56MB
-|  |     predicates: tpch.orders.o_orderdate >= '1993-10-01', tpch.orders.o_orderdate < '1994-01-01'
+|  |     predicates: tpch.orders.o_orderdate < '1994-01-01', tpch.orders.o_orderdate >= '1993-10-01'
 |  |
 |  02:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
@@ -838,7 +838,7 @@ order by
 |
 |--01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: tpch.lineitem.l_shipmode IN ('MAIL', 'SHIP'), tpch.lineitem.l_commitdate < tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipdate < tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate >= '1994-01-01', tpch.lineitem.l_receiptdate < '1995-01-01'
+|     predicates: tpch.lineitem.l_shipmode IN ('MAIL', 'SHIP'), tpch.lineitem.l_commitdate < tpch.lineitem.l_receiptdate, tpch.lineitem.l_receiptdate < '1995-01-01', tpch.lineitem.l_receiptdate >= '1994-01-01', tpch.lineitem.l_shipdate < tpch.lineitem.l_commitdate
 |
 00:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
@@ -918,7 +918,7 @@ where
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: tpch.lineitem.l_shipdate >= '1995-09-01', tpch.lineitem.l_shipdate < '1995-10-01'
+   predicates: tpch.lineitem.l_shipdate < '1995-10-01', tpch.lineitem.l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> tpch.lineitem.l_partkey
 ====
 # TPCH-Q15
@@ -969,7 +969,7 @@ order by
 |  |
 |  03:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: tpch.lineitem.l_shipdate >= '1996-01-01', tpch.lineitem.l_shipdate < '1996-04-01'
+|     predicates: tpch.lineitem.l_shipdate < '1996-04-01', tpch.lineitem.l_shipdate >= '1996-01-01'
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: tpch.supplier.s_suppkey = l_suppkey
@@ -981,7 +981,7 @@ order by
 |  |
 |  01:SCAN HDFS [tpch.lineitem]
 |     partitions=1/1 files=1 size=718.94MB
-|     predicates: tpch.lineitem.l_shipdate >= '1996-01-01', tpch.lineitem.l_shipdate < '1996-04-01'
+|     predicates: tpch.lineitem.l_shipdate < '1996-04-01', tpch.lineitem.l_shipdate >= '1996-01-01'
 |
 00:SCAN HDFS [tpch.supplier]
    partitions=1/1 files=1 size=1.33MB
@@ -1285,9 +1285,9 @@ order by
 |     runtime filters: RF004 -> tpch.supplier.s_nationkey
 |
 07:HASH JOIN [RIGHT SEMI JOIN]
-|  hash predicates: l_suppkey = tpch.partsupp.ps_suppkey, l_partkey = tpch.partsupp.ps_partkey
+|  hash predicates: l_partkey = tpch.partsupp.ps_partkey, l_suppkey = tpch.partsupp.ps_suppkey
 |  other join predicates: tpch.partsupp.ps_availqty > 0.5 * sum(l_quantity)
-|  runtime filters: RF001 <- tpch.partsupp.ps_suppkey, RF002 <- tpch.partsupp.ps_partkey
+|  runtime filters: RF001 <- tpch.partsupp.ps_partkey, RF002 <- tpch.partsupp.ps_suppkey
 |
 |--06:HASH JOIN [LEFT SEMI JOIN]
 |  |  hash predicates: tpch.partsupp.ps_partkey = tpch.part.p_partkey
@@ -1307,8 +1307,8 @@ order by
 |
 04:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
-   predicates: tpch.lineitem.l_shipdate >= '1994-01-01', tpch.lineitem.l_shipdate < '1995-01-01'
-   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey
+   predicates: tpch.lineitem.l_shipdate < '1995-01-01', tpch.lineitem.l_shipdate >= '1994-01-01'
+   runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ====
 # TPCH-Q21
 # Q21 - Suppliers Who Kept Orders Waiting Query

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/union.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/union.test b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
index 7e9549a..84053a9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/union.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
@@ -2413,11 +2413,11 @@ where a + b < 100 and c > 20 and d > 50
 |
 |--02:SCAN HDFS [functional.alltypes]
 |     partitions=24/24 files=24 size=478.45KB
-|     predicates: functional.alltypes.smallint_col > 20, functional.alltypes.bigint_col > 50, functional.alltypes.tinyint_col + functional.alltypes.int_col < 100
+|     predicates: functional.alltypes.bigint_col > 50, functional.alltypes.smallint_col > 20, functional.alltypes.tinyint_col + functional.alltypes.int_col < 100
 |
 01:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
-   predicates: functional.alltypestiny.int_col > 20, functional.alltypestiny.bigint_col > 50
+   predicates: functional.alltypestiny.bigint_col > 50, functional.alltypestiny.int_col > 20
 ====
 # Test union where all operands are dropped because of constant conjuncts.
 select * from

[21/32] incubator-impala git commit: IMPALA-2789: More compact mem layout with null bits at the end.

Posted by ta...@apache.org.

IMPALA-2789: More compact mem layout with null bits at the end.

There are two motivations for this change:
1. Reduce memory consumption.
2. Pave the way for full memory layout compatibility between
   Impala and Kudu to eventually enable zero-copy scans. This
   patch is a only first step towards that goal.

New Memory Layout
Slots are placed in descending order by size with trailing bytes to
store null flags. Null flags are omitted for non-nullable slots. There
is no padding between tuples when stored back-to-back in a row batch.

Example: select bool_col, int_col, string_col, smallint_col
         from functional.alltypes
Slots:   string_col|int_col|smallint_col|bool_col|null_byte
Offsets: 0          16      20           22       23

The main change is to move the null indicators to the end of tuples.
The new memory layout is fully packed with no padding in between
slots or tuples.

Performance:
Our standard cluster perf tests showed no significant difference in
query response times as well as consumed cycles, and a slight
reduction in peak memory consumption.

Testing:
An exhaustive test run passed. Ran a few select tests like TPC-H/DS
with ASAN locally.

These follow-on changes are planned:
1. Planner needs to mark slots non-nullable if they correspond
   to a non-nullable Kudu column.
2. Update Kudu scan node to copy tuples with memcpy.
3. Kudu client needs to support transferring ownership of the
   tuple memory (maybe do direct and indirect buffers separately).
4. Update Kudu scan node to use memory transfer instead of copy

Change-Id: Ib6510c75d841bddafa6638f1bd2ac6731a7053f6
Reviewed-on: http://gerrit.cloudera.org:8080/4673
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b0e87c68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b0e87c68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b0e87c68

Branch: refs/heads/hadoop-next
Commit: b0e87c685d96e4e55a0fda3b23c10fc069f7551a
Parents: 9f61397
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Dec 22 13:56:32 2015 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sun Oct 16 23:36:10 2016 +0000

----------------------------------------------------------------------
 .../benchmarks/row-batch-serialize-benchmark.cc |  13 ++-
 be/src/codegen/llvm-codegen.cc                  |  11 ++
 be/src/codegen/llvm-codegen.h                   |   5 +
 be/src/exec/hdfs-scanner.cc                     |  73 +++++++------
 be/src/exec/hdfs-scanner.h                      |  13 +--
 be/src/exec/kudu-scanner.cc                     |   7 +-
 be/src/exec/kudu-scanner.h                      |   3 -
 be/src/exec/row-batch-list-test.cc              |  20 +++-
 be/src/exec/text-converter.cc                   |   2 +-
 be/src/runtime/buffered-tuple-stream-test.cc    |  35 +++----
 be/src/runtime/collection-value-builder-test.cc |  15 ++-
 be/src/runtime/descriptors.cc                   |  56 +++++-----
 be/src/runtime/descriptors.h                    |   4 +-
 be/src/runtime/row-batch-serialize-test.cc      |  32 ++++--
 be/src/runtime/row-batch-test.cc                |  16 ++-
 be/src/runtime/tuple.cc                         |   6 +-
 be/src/runtime/tuple.h                          |   7 +-
 be/src/service/frontend.cc                      |   7 ++
 be/src/service/frontend.h                       |   8 ++
 be/src/testutil/desc-tbl-builder.cc             |  96 ++++-------------
 be/src/testutil/desc-tbl-builder.h              |  20 ++--
 common/thrift/Frontend.thrift                   |   7 ++
 .../apache/impala/analysis/DescriptorTable.java |  72 +++++++++++--
 .../apache/impala/analysis/TupleDescriptor.java |  43 ++++----
 .../org/apache/impala/service/JniFrontend.java  |  41 ++++++--
 .../apache/impala/analysis/AnalyzerTest.java    | 102 ++++++++-----------
 26 files changed, 402 insertions(+), 312 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/benchmarks/row-batch-serialize-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/row-batch-serialize-benchmark.cc b/be/src/benchmarks/row-batch-serialize-benchmark.cc
index 3f04906..5a8a104 100644
--- a/be/src/benchmarks/row-batch-serialize-benchmark.cc
+++ b/be/src/benchmarks/row-batch-serialize-benchmark.cc
@@ -17,11 +17,15 @@
 
 #include <iostream>
 #include <sstream>
+#include <boost/scoped_ptr.hpp>
 
+#include "common/init.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/raw-value.h"
 #include "runtime/row-batch.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 #include "util/benchmark.h"
 #include "util/compress.h"
@@ -88,6 +92,10 @@ const int NUM_ROWS = 1024;
 const int MAX_STRING_LEN = 10;
 
 namespace impala {
+
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
+
 // Friend class with access to RowBatch internals
 class RowBatchSerializeBaseline {
  public:
@@ -318,7 +326,7 @@ class RowBatchSerializeBenchmark {
     MemTracker tracker;
     MemPool mem_pool(&tracker);
     ObjectPool obj_pool;
-    DescriptorTblBuilder builder(&obj_pool);
+    DescriptorTblBuilder builder(fe.get(), &obj_pool);
     builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
     DescriptorTbl* desc_tbl = builder.Build();
 
@@ -398,6 +406,9 @@ class RowBatchSerializeBenchmark {
 }
 
 int main(int argc, char** argv) {
+  impala::InitCommonRuntime(argc, argv, true);
+  InitFeSupport();
+  fe.reset(new Frontend());
   RowBatchSerializeBenchmark::Run();
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index cc8b46c..d43ad6e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -59,6 +59,7 @@
 #include "codegen/instruction-counter.h"
 #include "codegen/mcjit-mem-mgr.h"
 #include "impala-ir/impala-ir-names.h"
+#include "runtime/descriptors.h"
 #include "runtime/hdfs-fs-cache.h"
 #include "runtime/lib-cache.h"
 #include "runtime/mem-pool.h"
@@ -1210,6 +1211,16 @@ void LlvmCodeGen::CodegenMemset(LlvmBuilder* builder, Value* dst, int value, int
   builder->CreateMemSet(dst, value_const, size, /* no alignment */ 0);
 }
 
+void LlvmCodeGen::CodegenClearNullBits(LlvmBuilder* builder, Value* tuple_ptr,
+    const TupleDescriptor& tuple_desc) {
+  Value* int8_ptr = builder->CreateBitCast(tuple_ptr, ptr_type(), "int8_ptr");
+  Value* null_bytes_offset =
+      ConstantInt::get(int_type(), tuple_desc.null_bytes_offset());
+  Value* null_bytes_ptr =
+      builder->CreateInBoundsGEP(int8_ptr, null_bytes_offset, "null_bytes_ptr");
+  CodegenMemset(builder, null_bytes_ptr, 0, tuple_desc.num_null_bytes());
+}
+
 Value* LlvmCodeGen::CodegenAllocate(LlvmBuilder* builder, MemPool* pool, Value* size,
     const char* name) {
   DCHECK(pool != NULL);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 2ef936f..0b81701 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -74,6 +74,7 @@ namespace impala {
 class CodegenAnyVal;
 class CodegenSymbolEmitter;
 class SubExprElimination;
+class TupleDescriptor;
 
 /// LLVM code generator.  This is the top level object to generate jitted code.
 //
@@ -419,6 +420,10 @@ class LlvmCodeGen {
   /// be a pointer. No-op if size is zero.
   void CodegenMemset(LlvmBuilder* builder, llvm::Value* dst, int value, int size);
 
+  /// Codegen to set all null bytes of the given tuple to 0.
+  void CodegenClearNullBits(LlvmBuilder* builder, llvm::Value* tuple_ptr,
+      const TupleDescriptor& tuple_desc);
+
   /// Codegen to call pool->Allocate(size).
   llvm::Value* CodegenAllocate(LlvmBuilder* builder, MemPool* pool, llvm::Value* size,
       const char* name = "");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index 28eb606..81542ec 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -67,7 +67,6 @@ HdfsScanner::HdfsScanner(HdfsScanNodeBase* scan_node, RuntimeState* state)
       template_tuple_pool_(new MemPool(scan_node->mem_tracker())),
       template_tuple_(NULL),
       tuple_byte_size_(scan_node->tuple_desc()->byte_size()),
-      num_null_bytes_(scan_node->tuple_desc()->num_null_bytes()),
       tuple_(NULL),
       batch_(NULL),
       tuple_mem_(NULL),
@@ -88,7 +87,6 @@ HdfsScanner::HdfsScanner()
       template_tuple_pool_(NULL),
       template_tuple_(NULL),
       tuple_byte_size_(-1),
-      num_null_bytes_(-1),
       tuple_(NULL),
       batch_(NULL),
       tuple_mem_(NULL),
@@ -302,50 +300,54 @@ bool HdfsScanner::WriteCompleteTuple(MemPool* pool, FieldLocation* fields,
   return EvalConjuncts(tuple_row);
 }
 
-// Codegen for WriteTuple(above).  The signature matches WriteTuple (except for the
-// this* first argument).  For writing out and evaluating a single string slot:
+// Codegen for WriteTuple(above) for writing out single nullable string slot and
+// evaluating a <slot> = <constantexpr> conjunct. The signature matches WriteTuple()
+// except for the first this* argument.
 // define i1 @WriteCompleteTuple(%"class.impala::HdfsScanner"* %this,
 //                               %"class.impala::MemPool"* %pool,
 //                               %"struct.impala::FieldLocation"* %fields,
 //                               %"class.impala::Tuple"* %tuple,
 //                               %"class.impala::TupleRow"* %tuple_row,
 //                               %"class.impala::Tuple"* %template,
-//                               i8* %error_fields, i8* %error_in_row) #20 {
+//                               i8* %error_fields, i8* %error_in_row) {
 // entry:
 //   %tuple_ptr = bitcast %"class.impala::Tuple"* %tuple
-//                to { i8, %"struct.impala::StringValue" }*
+//                to <{ %"struct.impala::StringValue", i8 }>*
 //   %tuple_ptr1 = bitcast %"class.impala::Tuple"* %template
-//                 to { i8, %"struct.impala::StringValue" }*
-//   %null_byte = getelementptr inbounds
-//                { i8, %"struct.impala::StringValue" }* %tuple_ptr, i32 0, i32 0
-//   store i8 0, i8* %null_byte
+//                 to <{ %"struct.impala::StringValue", i8 }>*
+//   %int8_ptr = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple_ptr to i8*
+//   %null_bytes_ptr = getelementptr i8, i8* %int8_ptr, i32 16
+//   call void @llvm.memset.p0i8.i64(i8* %null_bytes_ptr, i8 0, i64 1, i32 0, i1 false)
 //   %0 = bitcast %"class.impala::TupleRow"* %tuple_row
-//        to { i8, %"struct.impala::StringValue" }**
-//   %1 = getelementptr { i8, %"struct.impala::StringValue" }** %0, i32 0
-//   store { i8, %"struct.impala::StringValue" }* %tuple_ptr,
-//         { i8, %"struct.impala::StringValue" }** %1
+//        to <{ %"struct.impala::StringValue", i8 }>**
+//   %1 = getelementptr <{ %"struct.impala::StringValue", i8 }>*,
+//                      <{ %"struct.impala::StringValue", i8 }>** %0, i32 0
+//   store <{ %"struct.impala::StringValue", i8 }>* %tuple_ptr,
+//         <{ %"struct.impala::StringValue", i8 }>** %1
 //   br label %parse
 //
 // parse:                                            ; preds = %entry
-//   %data_ptr = getelementptr %"struct.impala::FieldLocation"* %fields, i32 0, i32 0
-//   %len_ptr = getelementptr %"struct.impala::FieldLocation"* %fields, i32 0, i32 1
-//   %slot_error_ptr = getelementptr i8* %error_fields, i32 0
-//   %data = load i8** %data_ptr
-//   %len = load i32* %len_ptr
-//   %2 = call i1 @WriteSlot({ i8, %"struct.impala::StringValue" }* %tuple_ptr,
-//                           i8* %data, i32 %len)
-//   %slot_parse_error = xor i1 %2, true
-//   %error_in_row2 = or i1 false, %slot_parse_error
-//   %3 = zext i1 %slot_parse_error to i8
-//   store i8 %3, i8* %slot_error_ptr
-//   %4 = call %"class.impala::ExprContext"* @GetConjunctCtx(
-//       %"class.impala::HdfsScanner"* %this, i32 0)
-//   %conjunct_eval = call i16 @Eq_StringVal_StringValWrapper1(
-//       %"class.impala::ExprContext"* %4, %"class.impala::TupleRow"* %tuple_row)
-//   %5 = ashr i16 %conjunct_eval, 8
-//   %6 = trunc i16 %5 to i8
-//   %val = trunc i8 %6 to i1
-//   br i1 %val, label %parse3, label %eval_fail
+//  %data_ptr = getelementptr %"struct.impala::FieldLocation",
+//                            %"struct.impala::FieldLocation"* %fields, i32 0, i32 0
+//  %len_ptr = getelementptr %"struct.impala::FieldLocation",
+//                           %"struct.impala::FieldLocation"* %fields, i32 0, i32 1
+//  %slot_error_ptr = getelementptr i8, i8* %error_fields, i32 0
+//  %data = load i8*, i8** %data_ptr
+//  %len = load i32, i32* %len_ptr
+//  %2 = call i1 @WriteSlot(<{ %"struct.impala::StringValue", i8 }>* %tuple_ptr,
+//                          i8* %data, i32 %len)
+//  %slot_parse_error = xor i1 %2, true
+//  %error_in_row2 = or i1 false, %slot_parse_error
+//  %3 = zext i1 %slot_parse_error to i8
+//  store i8 %3, i8* %slot_error_ptr
+//  %4 = call %"class.impala::ExprContext"* @GetConjunctCtx(
+//    %"class.impala::HdfsScanner"* %this, i32 0)
+//  %conjunct_eval = call i16 @"impala::Operators::Eq_StringVal_StringValWrapper"(
+//    %"class.impala::ExprContext"* %4, %"class.impala::TupleRow"* %tuple_row)
+//  %5 = ashr i16 %conjunct_eval, 8
+//  %6 = trunc i16 %5 to i8
+//  %val = trunc i8 %6 to i1
+//  br i1 %val, label %parse3, label %eval_fail
 //
 // parse3:                                           ; preds = %parse
 //   %7 = zext i1 %error_in_row2 to i8
@@ -451,10 +453,7 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
   // Initialize tuple
   if (node->num_materialized_partition_keys() == 0) {
     // No partition key slots, just zero the NULL bytes.
-    for (int i = 0; i < tuple_desc->num_null_bytes(); ++i) {
-      Value* null_byte = builder.CreateStructGEP(NULL, tuple_arg, i, "null_byte");
-      builder.CreateStore(codegen->GetIntConstant(TYPE_TINYINT, 0), null_byte);
-    }
+    codegen->CodegenClearNullBits(&builder, tuple_arg, *tuple_desc);
   } else {
     // Copy template tuple.
     // TODO: only copy what's necessary from the template tuple.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index d9fa424..4a4d366 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -32,6 +32,7 @@
 #include "exec/scanner-context.h"
 #include "runtime/disk-io-mgr.h"
 #include "runtime/row-batch.h"
+#include "runtime/tuple.h"
 
 namespace impala {
 
@@ -44,7 +45,6 @@ class MemPool;
 class SlotDescriptor;
 class Status;
 class TextConverter;
-class Tuple;
 class TupleDescriptor;
 class TPlanNode;
 class TScanRange;
@@ -234,9 +234,6 @@ class HdfsScanner {
   /// Fixed size of each top-level tuple, in bytes
   const int32_t tuple_byte_size_;
 
-  /// Number of null bytes in the top-level tuple.
-  const int32_t num_null_bytes_;
-
   /// Current tuple pointer into tuple_mem_.
   Tuple* tuple_;
 
@@ -443,18 +440,14 @@ class HdfsScanner {
     if (template_tuple != NULL) {
       memcpy(tuple, template_tuple, desc->byte_size());
     } else {
-      memset(tuple, 0, sizeof(uint8_t) * desc->num_null_bytes());
+      tuple->ClearNullBits(*desc);
     }
   }
 
   // TODO: replace this function with above once we can inline constants from
   // scan_node_->tuple_desc() via codegen
   void InitTuple(Tuple* template_tuple, Tuple* tuple) {
-    if (template_tuple != NULL) {
-      memcpy(tuple, template_tuple, tuple_byte_size_);
-    } else {
-      memset(tuple, 0, sizeof(uint8_t) * num_null_bytes_);
-    }
+    InitTuple(scan_node_->tuple_desc(), template_tuple, tuple);
   }
 
   inline Tuple* next_tuple(int tuple_byte_size, Tuple* t) const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index 4fcb40a..a6affbf 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -59,7 +59,6 @@ KuduScanner::KuduScanner(KuduScanNode* scan_node, RuntimeState* state)
     state_(state),
     cur_kudu_batch_num_read_(0),
     last_alive_time_micros_(0),
-    tuple_num_null_bytes_(scan_node_->tuple_desc()->num_null_bytes()),
     num_string_slots_(0) {
 }
 
@@ -185,14 +184,14 @@ Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch,
   // that happens inside the loop.
   int idx = row_batch->AddRow();
   TupleRow* row = row_batch->GetRow(idx);
-  (*tuple_mem)->Init(scan_node_->tuple_desc()->num_null_bytes());
+  (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
   row->SetTuple(tuple_idx(), *tuple_mem);
 
   int num_rows = cur_kudu_batch_.NumRows();
   // Now iterate through the Kudu rows.
   for (int krow_idx = cur_kudu_batch_num_read_; krow_idx < num_rows; ++krow_idx) {
     // Clear any NULL indicators set by a previous iteration.
-    (*tuple_mem)->Init(tuple_num_null_bytes_);
+    (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
 
     // Transform a Kudu row into an Impala row.
     KuduScanBatch::RowPtr krow = cur_kudu_batch_.Row(krow_idx);
@@ -216,7 +215,7 @@ Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch,
 
       // Move to the next tuple in the tuple buffer.
       *tuple_mem = next_tuple(*tuple_mem);
-      (*tuple_mem)->Init(tuple_num_null_bytes_);
+      (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
       // Make 'row' point to the new row.
       row = row_batch->GetRow(idx);
       row->SetTuple(tuple_idx(), *tuple_mem);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/kudu-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.h b/be/src/exec/kudu-scanner.h
index d868b05..0ed5221 100644
--- a/be/src/exec/kudu-scanner.h
+++ b/be/src/exec/kudu-scanner.h
@@ -121,9 +121,6 @@ class KuduScanner {
   /// The scanner's cloned copy of the conjuncts to apply.
   vector<ExprContext*> conjunct_ctxs_;
 
-  /// Number of bytes needed to represent the null bits in the tuple.
-  int tuple_num_null_bytes_;
-
   /// List of string slots that need relocation for their auxiliary memory.
   std::vector<SlotDescriptor*> string_slots_;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/row-batch-list-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/row-batch-list-test.cc b/be/src/exec/row-batch-list-test.cc
index 38ce177..ad3317b 100644
--- a/be/src/exec/row-batch-list-test.cc
+++ b/be/src/exec/row-batch-list-test.cc
@@ -19,21 +19,30 @@
 #include <cstdio>
 #include <iostream>
 #include <vector>
+#include <boost/scoped_ptr.hpp>
 
+#include "common/init.h"
 #include "exec/row-batch-list.h"
 #include "runtime/descriptors.h"
 #include "runtime/mem-pool.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/string-value.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "util/runtime-profile-counters.h"
 #include "testutil/desc-tbl-builder.h"
 #include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
+using namespace impala;
+
 namespace impala {
 
+// For computing tuple mem layouts.
+scoped_ptr<Frontend> fe;
+
 class RowBatchListTest : public testing::Test {
  public:
   RowBatchListTest() {}
@@ -44,7 +53,7 @@ class RowBatchListTest : public testing::Test {
   RowDescriptor* desc_;
 
   virtual void SetUp() {
-    DescriptorTblBuilder builder(&pool_);
+    DescriptorTblBuilder builder(fe.get(), &pool_);
     builder.DeclareTuple() << TYPE_INT;
     DescriptorTbl* desc_tbl = builder.Build();
     vector<bool> nullable_tuples(1, false);
@@ -139,5 +148,10 @@ TEST_F(RowBatchListTest, MultipleRowBatchesTest) {
 
 }
 
-IMPALA_TEST_MAIN();
-
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
+  return RUN_ALL_TESTS();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 2271b4e..d38d662 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -126,7 +126,7 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
 
   StructType* tuple_type = tuple_desc->GetLlvmStruct(codegen);
   if (tuple_type == NULL) return NULL;
-  PointerType* tuple_ptr_type = PointerType::get(tuple_type, 0);
+  PointerType* tuple_ptr_type = tuple_type->getPointerTo();
 
   Function* set_null_fn = slot_desc->GetUpdateNullFn(codegen, true);
   if (set_null_fn == NULL) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/buffered-tuple-stream-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-test.cc b/be/src/runtime/buffered-tuple-stream-test.cc
index 20af23e..76b1bff 100644
--- a/be/src/runtime/buffered-tuple-stream-test.cc
+++ b/be/src/runtime/buffered-tuple-stream-test.cc
@@ -79,12 +79,12 @@ class SimpleTupleStreamTest : public testing::Test {
     vector<bool> nullable_tuples(1, false);
     vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_desc_ = pool_.Add(new RowDescriptor(
         *string_builder.Build(), tuple_ids, nullable_tuples));
@@ -145,23 +145,21 @@ class SimpleTupleStreamTest : public testing::Test {
       for (int tuple_idx = 0; tuple_idx < num_tuples; ++tuple_idx) {
         TupleDescriptor* tuple_desc = row_desc.tuple_descriptors()[tuple_idx];
         Tuple* tuple = Tuple::Create(tuple_desc->byte_size(), batch->tuple_data_pool());
-        // Skip over the null indicators at the beginning of the tuple.
-        uint8_t* ptr = reinterpret_cast<uint8_t*>(tuple) + tuple_desc->num_null_bytes();
         bool is_null = gen_null && !GenBoolValue(idx);
         for (int slot_idx = 0; slot_idx < tuple_desc->slots().size(); ++slot_idx, ++idx) {
           SlotDescriptor* slot_desc = tuple_desc->slots()[slot_idx];
+          void* slot = tuple->GetSlot(slot_desc->tuple_offset());
           switch (slot_desc->type().type) {
             case TYPE_INT:
-              *reinterpret_cast<int*>(ptr) = GenIntValue(idx);
+              *reinterpret_cast<int*>(slot) = GenIntValue(idx);
               break;
             case TYPE_STRING:
-              *reinterpret_cast<StringValue*>(ptr) = STRINGS[idx % NUM_STRINGS];
+              *reinterpret_cast<StringValue*>(slot) = STRINGS[idx % NUM_STRINGS];
               break;
             default:
               // The memory has been zero'ed out already by Tuple::Create().
               break;
           }
-          ptr += slot_desc->slot_size();
         }
         if (is_null) {
           row->SetTuple(tuple_idx, NULL);
@@ -212,14 +210,13 @@ class SimpleTupleStreamTest : public testing::Test {
       TupleDescriptor* tuple_desc = row_desc->tuple_descriptors()[tuple_idx];
       Tuple* tuple = row->GetTuple(tuple_idx);
       const int num_slots = tuple_desc->slots().size();
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(tuple) + tuple_desc->num_null_bytes();
       for (int slot_idx = 0; slot_idx < num_slots; ++slot_idx) {
         SlotDescriptor* slot_desc = tuple_desc->slots()[slot_idx];
         if (tuple == NULL) {
           AppendValue(NULL, results);
         } else {
-          AppendValue(ptr, results);
-          ptr += slot_desc->slot_size();
+          void* slot = tuple->GetSlot(slot_desc->tuple_offset());
+          AppendValue(reinterpret_cast<uint8_t*>(slot), results);
         }
       }
     }
@@ -270,7 +267,7 @@ class SimpleTupleStreamTest : public testing::Test {
         for (int slot_idx = 0; slot_idx < num_slots; ++slot_idx, ++idx) {
           T expected_val;
           GetExpectedValue(idx, is_null, &expected_val);
-          ASSERT_TRUE(results[idx] == expected_val)
+          ASSERT_EQ(results[idx], expected_val)
               << "results[" << idx << "] " << results[idx] << " != "
               << expected_val << " row_idx=" << row_idx
               << " tuple_idx=" << tuple_idx << " slot_idx=" << slot_idx
@@ -400,12 +397,12 @@ class SimpleNullStreamTest : public SimpleTupleStreamTest {
     vector<bool> nullable_tuples(1, true);
     vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_desc_ = pool_.Add(new RowDescriptor(
         *string_builder.Build(), tuple_ids, nullable_tuples));
@@ -426,14 +423,14 @@ class MultiTupleStreamTest : public SimpleTupleStreamTest {
     tuple_ids.push_back(static_cast<TTupleId>(1));
     tuple_ids.push_back(static_cast<TTupleId>(2));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
@@ -456,14 +453,14 @@ class MultiNullableTupleStreamTest : public SimpleTupleStreamTest {
     tuple_ids.push_back(static_cast<TTupleId>(1));
     tuple_ids.push_back(static_cast<TTupleId>(2));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
@@ -495,7 +492,7 @@ class ArrayTupleStreamTest : public SimpleTupleStreamTest {
     nested_array_type.type = TYPE_ARRAY;
     nested_array_type.children.push_back(int_array_type);
 
-    DescriptorTblBuilder builder(&pool_);
+    DescriptorTblBuilder builder(test_env_->exec_env()->frontend(), &pool_);
     builder.DeclareTuple() << string_array_type << nested_array_type;
     builder.DeclareTuple() << int_array_type;
     array_desc_ = pool_.Add(new RowDescriptor(
@@ -735,7 +732,7 @@ TEST_F(SimpleTupleStreamTest, BigRow) {
   vector<bool> nullable_tuples;
   vector<bool> non_nullable_tuples;
 
-  DescriptorTblBuilder big_row_builder(&pool_);
+  DescriptorTblBuilder big_row_builder(test_env_->exec_env()->frontend(), &pool_);
   // Each tuple contains 8 slots of TYPE_INT and a single byte for null indicator.
   const int num_tuples = IO_BLOCK_SIZE / (8 * sizeof(int) + 1);
   for (int tuple_idx = 0; tuple_idx < num_tuples; ++tuple_idx) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/collection-value-builder-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/collection-value-builder-test.cc b/be/src/runtime/collection-value-builder-test.cc
index b8f4b65..613132a 100644
--- a/be/src/runtime/collection-value-builder-test.cc
+++ b/be/src/runtime/collection-value-builder-test.cc
@@ -17,6 +17,8 @@
 
 #include "runtime/collection-value-builder.h"
 #include "runtime/mem-tracker.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 #include "testutil/gtest-util.h"
 
@@ -24,9 +26,12 @@
 
 using namespace impala;
 
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
+
 TEST(CollectionValueBuilderTest, MaxBufferSize) {
   ObjectPool obj_pool;
-  DescriptorTblBuilder builder(&obj_pool);
+  DescriptorTblBuilder builder(fe.get(), &obj_pool);
   builder.DeclareTuple() << TYPE_TINYINT << TYPE_TINYINT << TYPE_TINYINT;
   DescriptorTbl* desc_tbl = builder.Build();
   vector<TupleDescriptor*> descs;
@@ -66,4 +71,10 @@ TEST(CollectionValueBuilderTest, MaxBufferSize) {
   pool.FreeAll();
 }
 
-IMPALA_TEST_MAIN();
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
+  return RUN_ALL_TESTS();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/descriptors.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc
index 47ca791..d106ed4 100644
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -286,6 +286,7 @@ TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc)
     table_desc_(NULL),
     byte_size_(tdesc.byteSize),
     num_null_bytes_(tdesc.numNullBytes),
+    null_bytes_offset_(tdesc.byteSize - tdesc.numNullBytes),
     slots_(),
     has_varlen_slots_(false),
     tuple_path_(tdesc.tuplePath),
@@ -591,14 +592,18 @@ Function* SlotDescriptor::GetUpdateNullFn(LlvmCodeGen* codegen, bool set_null) c
   prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type));
 
   LlvmCodeGen::LlvmBuilder builder(codegen->context());
-  Value* tuple_ptr;
-  Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
-
-  Value* null_byte_ptr = builder.CreateStructGEP(NULL,
-          tuple_ptr, null_indicator_offset_.byte_offset, "null_byte_ptr");
+  Value* tuple_arg;
+  Function* fn = prototype.GeneratePrototype(&builder, &tuple_arg);
+
+  Value* tuple_int8_ptr =
+      builder.CreateBitCast(tuple_arg, codegen->ptr_type(), "tuple_int8_ptr");
+  Value* null_byte_offset =
+      ConstantInt::get(codegen->int_type(), null_indicator_offset_.byte_offset);
+  Value* null_byte_ptr =
+      builder.CreateInBoundsGEP(tuple_int8_ptr, null_byte_offset, "null_byte_ptr");
   Value* null_byte = builder.CreateLoad(null_byte_ptr, "null_byte");
-  Value* result = NULL;
 
+  Value* result = NULL;
   if (set_null) {
     Value* null_set = codegen->GetIntConstant(
         TYPE_TINYINT, null_indicator_offset_.bit_mask);
@@ -627,45 +632,38 @@ StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
 
   // Sort slots in the order they will appear in LLVM struct.
   vector<SlotDescriptor*> sorted_slots(slots_.size());
-  for (SlotDescriptor* slot: slots_) {
-    sorted_slots[slot->slot_idx_] = slot;
-  }
-
-  // For each null byte, add a byte to the struct
-  vector<Type*> struct_fields;
-  for (int i = 0; i < num_null_bytes_; ++i) {
-    struct_fields.push_back(codegen->GetType(TYPE_TINYINT));
-  }
-  int curr_struct_offset = num_null_bytes_;
+  for (SlotDescriptor* slot: slots_) sorted_slots[slot->slot_idx_] = slot;
 
   // Add the slot types to the struct description.
+  vector<Type*> struct_fields;
+  int curr_struct_offset = 0;
   for (SlotDescriptor* slot: sorted_slots) {
     // IMPALA-3207: Codegen for CHAR is not yet implemented: bail out of codegen here.
     if (slot->type().type == TYPE_CHAR) return NULL;
-    DCHECK_LE(curr_struct_offset, slot->tuple_offset());
-    if (curr_struct_offset < slot->tuple_offset()) {
-      // Need to add padding to ensure slots are aligned correctly. Clang likes to
-      // sometimes pad structs in its own way. When it does this, it sets the 'packed'
-      // flag, which means that at the LLVM level the struct type has no alignment
-      // requirements, even if it does at the C language level.
-      struct_fields.push_back(ArrayType::get(codegen->GetType(TYPE_TINYINT),
-          slot->tuple_offset() - curr_struct_offset));
-    }
+    DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
     slot->llvm_field_idx_ = struct_fields.size();
     struct_fields.push_back(codegen->GetType(slot->type()));
     curr_struct_offset = slot->tuple_offset() + slot->slot_size();
   }
+  // For each null byte, add a byte to the struct
+  for (int i = 0; i < num_null_bytes_; ++i) {
+    struct_fields.push_back(codegen->GetType(TYPE_TINYINT));
+    ++curr_struct_offset;
+  }
+
   DCHECK_LE(curr_struct_offset, byte_size_);
   if (curr_struct_offset < byte_size_) {
     struct_fields.push_back(ArrayType::get(codegen->GetType(TYPE_TINYINT),
         byte_size_ - curr_struct_offset));
   }
 
-  // Construct the struct type.
-  // We don't mark the struct as packed but it shouldn't matter either way: LLVM should
-  // not insert any additional padding since the contents are already aligned.
+  // Construct the struct type. Use the packed layout although not strictly necessary
+  // because the fields are already aligned, so LLVM should not add any padding. The
+  // fields are already aligned because we order the slots by descending size and only
+  // have powers-of-two slot sizes. Note that STRING and TIMESTAMP slots both occupy
+  // 16 bytes although their useful payload is only 12 bytes.
   StructType* tuple_struct = StructType::get(codegen->context(),
-      ArrayRef<Type*>(struct_fields));
+      ArrayRef<Type*>(struct_fields), true);
   const DataLayout& data_layout = codegen->execution_engine()->getDataLayout();
   const StructLayout* layout = data_layout.getStructLayout(tuple_struct);
   for (SlotDescriptor* slot: slots()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/descriptors.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index 874ad69..02193df 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -160,7 +160,7 @@ class SlotDescriptor {
 
   /// The idx of the slot in the llvm codegen'd tuple struct
   /// This is set by TupleDescriptor during codegen and takes into account
-  /// leading null bytes and any padding bytes.
+  /// any padding bytes.
   int llvm_field_idx_;
 
   /// Cached codegen'd functions
@@ -363,6 +363,7 @@ class TupleDescriptor {
  public:
   int byte_size() const { return byte_size_; }
   int num_null_bytes() const { return num_null_bytes_; }
+  int null_bytes_offset() const { return null_bytes_offset_; }
   const std::vector<SlotDescriptor*>& slots() const { return slots_; }
   const std::vector<SlotDescriptor*>& string_slots() const { return string_slots_; }
   const std::vector<SlotDescriptor*>& collection_slots() const {
@@ -401,6 +402,7 @@ class TupleDescriptor {
   TableDescriptor* table_desc_;
   const int byte_size_;
   const int num_null_bytes_;
+  const int null_bytes_offset_;
 
   /// Contains all slots.
   std::vector<SlotDescriptor*> slots_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/row-batch-serialize-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/row-batch-serialize-test.cc b/be/src/runtime/row-batch-serialize-test.cc
index 0f5e519..f4c4f0b 100644
--- a/be/src/runtime/row-batch-serialize-test.cc
+++ b/be/src/runtime/row-batch-serialize-test.cc
@@ -24,11 +24,15 @@
 #include "runtime/raw-value.inline.h"
 #include "runtime/row-batch.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "util/stopwatch.h"
 #include "testutil/desc-tbl-builder.h"
 
 #include "common/names.h"
 
+using namespace impala;
+
 namespace impala {
 
 const int NUM_ROWS = 20;
@@ -41,13 +45,18 @@ class RowBatchSerializeTest : public testing::Test {
   ObjectPool pool_;
   scoped_ptr<MemTracker> tracker_;
 
+  // For computing tuple mem layouts.
+  scoped_ptr<Frontend> fe_;
+
   virtual void SetUp() {
+    fe_.reset(new Frontend());
     tracker_.reset(new MemTracker());
   }
 
   virtual void TearDown() {
     pool_.Clear();
     tracker_.reset();
+    fe_.reset();
   }
 
   // Serializes and deserializes 'batch', then checks that the deserialized batch is valid
@@ -291,7 +300,7 @@ class RowBatchSerializeTest : public testing::Test {
 
 TEST_F(RowBatchSerializeTest, Basic) {
   // tuple: (int)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -306,7 +315,7 @@ TEST_F(RowBatchSerializeTest, Basic) {
 
 TEST_F(RowBatchSerializeTest, String) {
   // tuple: (int, string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -325,7 +334,7 @@ TEST_F(RowBatchSerializeTest, BasicArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_INT);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -353,7 +362,7 @@ TEST_F(RowBatchSerializeTest, StringArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -394,7 +403,7 @@ TEST_F(RowBatchSerializeTest, NestedArrays) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -418,7 +427,7 @@ TEST_F(RowBatchSerializeTest, DupCorrectnessFull) {
 
 void RowBatchSerializeTest::TestDupCorrectness(bool full_dedup) {
   // tuples: (int), (string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
@@ -459,7 +468,7 @@ TEST_F(RowBatchSerializeTest, DupRemovalFull) {
 // Test that tuple deduplication results in the expected reduction in serialized size.
 void RowBatchSerializeTest::TestDupRemoval(bool full_dedup) {
   // tuples: (int, string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -498,7 +507,7 @@ TEST_F(RowBatchSerializeTest, ConsecutiveNullsFull) {
 // Test that deduplication handles NULL tuples correctly.
 void RowBatchSerializeTest::TestConsecutiveNulls(bool full_dedup) {
   // tuples: (int)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
   vector<bool> nullable_tuples(1, true);
@@ -526,7 +535,7 @@ TEST_F(RowBatchSerializeTest, ZeroLengthTuplesDedup) {
 
 void RowBatchSerializeTest::TestZeroLengthTuple(bool full_dedup) {
   // tuples: (int), (string), ()
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   builder.DeclareTuple();
@@ -553,7 +562,7 @@ TEST_F(RowBatchSerializeTest, DedupPathologicalFull) {
   ColumnType array_type;
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_STRING);
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << array_type;
@@ -647,7 +656,8 @@ TEST_F(RowBatchSerializeTest, DedupPathologicalFull) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, false, impala::TestInfo::BE_TEST);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
   uint32_t seed = time(NULL);
   cout << "seed = " << seed << endl;
   srand(seed);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/row-batch-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/row-batch-test.cc b/be/src/runtime/row-batch-test.cc
index 2a8304a..041c3c2 100644
--- a/be/src/runtime/row-batch-test.cc
+++ b/be/src/runtime/row-batch-test.cc
@@ -15,23 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <boost/scoped_ptr.hpp>
+
 #include "testutil/death-test-util.h"
 #include "testutil/gtest-util.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/row-batch.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 
 #include <gtest/gtest.h>
 
 #include "common/names.h"
 
-namespace impala {
+using namespace impala;
+
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
 
 TEST(RowBatchTest, AcquireStateWithMarkAtCapacity) {
   // Test that AcquireState() can be correctly called with MarkAtCapacity() on the
   // source batch.
   ObjectPool pool;
-  DescriptorTblBuilder builder(&pool);
+  DescriptorTblBuilder builder(fe.get(), &pool);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -61,9 +68,10 @@ TEST(RowBatchTest, AcquireStateWithMarkAtCapacity) {
   }
 }
 
-}
-
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
   return RUN_ALL_TESTS();
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index df0066f..656621d 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -207,7 +207,7 @@ void Tuple::MaterializeExprs(
     TupleRow* row, const TupleDescriptor& desc, ExprContext* const* materialize_expr_ctxs,
     MemPool* pool, StringValue** non_null_string_values, int* total_string_lengths,
     int* num_non_null_string_values) {
-  memset(this, 0, desc.num_null_bytes());
+  ClearNullBits(desc);
   // Evaluate the materialize_expr_ctxs and place the results in the tuple.
   for (int i = 0; i < desc.slots().size(); ++i) {
     SlotDescriptor* slot_desc = desc.slots()[i];
@@ -368,8 +368,8 @@ Status Tuple::CodegenMaterializeExprs(RuntimeState* state, bool collect_string_v
   PointerType* tuple_type = codegen->GetPtrType(tuple_struct_type);
   Value* tuple = builder.CreateBitCast(opaque_tuple_arg, tuple_type, "tuple");
 
-  // Memset tuple's null bytes
-  codegen->CodegenMemset(&builder, tuple, 0, desc.num_null_bytes());
+  // Clear tuple's null bytes
+  codegen->CodegenClearNullBits(&builder, tuple, desc);
 
   // Evaluate the materialize_expr_ctxs and place the results in the tuple.
   for (int i = 0; i < desc.slots().size(); ++i) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/tuple.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index 3a3e399..b95492c 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -71,6 +71,11 @@ class Tuple {
 
   void Init(int size) { memset(this, 0, size); }
 
+  void ClearNullBits(const TupleDescriptor& tuple_desc) {
+    memset(reinterpret_cast<uint8_t*>(this) + tuple_desc.null_bytes_offset(),
+        0, tuple_desc.num_null_bytes());
+  }
+
   /// The total size of all data represented in this tuple (tuple data and referenced
   /// string and collection data).
   int64_t TotalByteSize(const TupleDescriptor& desc) const;
@@ -167,7 +172,7 @@ class Tuple {
       MemPool* pool, llvm::Function** fn);
 
   /// Turn null indicator bit on. For non-nullable slots, the mask will be 0 and
-  /// this is a no-op (but we don't have to branch to check is slots are nulalble).
+  /// this is a no-op (but we don't have to branch to check is slots are nullable).
   void SetNull(const NullIndicatorOffset& offset) {
     char* null_indicator_byte = reinterpret_cast<char*>(this) + offset.byte_offset;
     *null_indicator_byte |= offset.bit_mask;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/service/frontend.cc
----------------------------------------------------------------------
diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc
index 107354b..855924f 100644
--- a/be/src/service/frontend.cc
+++ b/be/src/service/frontend.cc
@@ -58,6 +58,7 @@ DEFINE_string(authorized_proxy_user_config, "",
     "users. For example: hue=user1,user2;admin=*");
 DEFINE_string(authorized_proxy_user_config_delimiter, ",",
     "Specifies the delimiter used in authorized_proxy_user_config. ");
+
 Frontend::Frontend() {
   JniMethodDescriptor methods[] = {
     {"<init>", "(ZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;"
@@ -85,6 +86,7 @@ Frontend::Frontend() {
     {"loadTableData", "([B)[B", &load_table_data_id_},
     {"getTableFiles", "([B)[B", &get_table_files_id_},
     {"showCreateFunction", "([B)Ljava/lang/String;", &show_create_function_id_},
+    {"buildTestDescriptorTable", "([B)[B", &build_test_descriptor_table_id_},
 };
 
   JNIEnv* jni_env = getJNIEnv();
@@ -264,3 +266,8 @@ Status Frontend::SetCatalogInitialized() {
 Status Frontend::GetTableFiles(const TShowFilesParams& params, TResultSet* result) {
   return JniUtil::CallJniMethod(fe_, get_table_files_id_, params, result);
 }
+
+Status Frontend::BuildTestDescriptorTable(const TBuildTestDescriptorTableParams& params,
+    TDescriptorTable* result) {
+  return JniUtil::CallJniMethod(fe_, build_test_descriptor_table_id_, params, result);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/service/frontend.h
----------------------------------------------------------------------
diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h
index 29dec68..c5c4895 100644
--- a/be/src/service/frontend.h
+++ b/be/src/service/frontend.h
@@ -168,6 +168,10 @@ class Frontend {
   /// Call FE to get files info for a table or partition.
   Status GetTableFiles(const TShowFilesParams& params, TResultSet* result);
 
+  /// Creates a thrift descriptor table for testing.
+  Status BuildTestDescriptorTable(const TBuildTestDescriptorTableParams& params,
+      TDescriptorTable* result);
+
  private:
   /// Descriptor of Java Frontend class itself, used to create a new instance.
   jclass fe_class_;
@@ -196,6 +200,10 @@ class Frontend {
   jmethodID set_catalog_initialized_id_; // JniFrontend.setCatalogInitialized
   jmethodID get_table_files_id_; // JniFrontend.getTableFiles
   jmethodID show_create_function_id_; // JniFrontend.showCreateFunction
+
+  // Only used for testing.
+  jmethodID build_test_descriptor_table_id_; // JniFrontend.buildTestDescriptorTable()
+
   jmethodID fe_ctor_;
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/testutil/desc-tbl-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/desc-tbl-builder.cc b/be/src/testutil/desc-tbl-builder.cc
index 7793f06..77be724 100644
--- a/be/src/testutil/desc-tbl-builder.cc
+++ b/be/src/testutil/desc-tbl-builder.cc
@@ -19,13 +19,17 @@
 #include "util/bit-util.h"
 
 #include "common/object-pool.h"
+#include "service/frontend.h"
 #include "runtime/descriptors.h"
 
 #include "common/names.h"
 
 namespace impala {
 
-DescriptorTblBuilder::DescriptorTblBuilder(ObjectPool* obj_pool) : obj_pool_(obj_pool) {
+DescriptorTblBuilder::DescriptorTblBuilder(Frontend* fe, ObjectPool* obj_pool)
+  : fe_(fe), obj_pool_(obj_pool) {
+  DCHECK(fe != NULL);
+  DCHECK(obj_pool_ != NULL);
 }
 
 TupleDescBuilder& DescriptorTblBuilder::DeclareTuple() {
@@ -34,34 +38,6 @@ TupleDescBuilder& DescriptorTblBuilder::DeclareTuple() {
   return *tuple_builder;
 }
 
-// item_id of -1 indicates no itemTupleId
-static TSlotDescriptor MakeSlotDescriptor(int id, int parent_id, const ColumnType& type,
-    int slot_idx, int byte_offset, int item_id) {
-  int null_byte = slot_idx / 8;
-  int null_bit = slot_idx % 8;
-  TSlotDescriptor slot_desc;
-  slot_desc.__set_id(id);
-  slot_desc.__set_parent(parent_id);
-  slot_desc.__set_slotType(type.ToThrift());
-  slot_desc.__set_materializedPath(vector<int>(1, slot_idx));
-  slot_desc.__set_byteOffset(byte_offset);
-  slot_desc.__set_nullIndicatorByte(null_byte);
-  slot_desc.__set_nullIndicatorBit(null_bit);
-  slot_desc.__set_slotIdx(slot_idx);
-  if (item_id != -1) slot_desc.__set_itemTupleId(item_id);
-  return slot_desc;
-}
-
-static TTupleDescriptor MakeTupleDescriptor(int id, int byte_size, int num_null_bytes,
-    int table_id = -1) {
-  TTupleDescriptor tuple_desc;
-  tuple_desc.__set_id(id);
-  tuple_desc.__set_byteSize(byte_size);
-  tuple_desc.__set_numNullBytes(num_null_bytes);
-  if (table_id != -1) tuple_desc.__set_tableId(table_id);
-  return tuple_desc;
-}
-
 void DescriptorTblBuilder::SetTableDescriptor(const TTableDescriptor& table_desc) {
   DCHECK(thrift_desc_tbl_.tableDescriptors.empty())
       << "Only one TableDescriptor can be set.";
@@ -69,61 +45,25 @@ void DescriptorTblBuilder::SetTableDescriptor(const TTableDescriptor& table_desc
 }
 
 DescriptorTbl* DescriptorTblBuilder::Build() {
-  DescriptorTbl* desc_tbl;
-  int tuple_id = 0;
-  int slot_id = tuples_descs_.size(); // First ids reserved for TupleDescriptors
+  DCHECK(!tuples_descs_.empty());
 
+  TBuildTestDescriptorTableParams params;
   for (int i = 0; i < tuples_descs_.size(); ++i) {
-    BuildTuple(tuples_descs_[i]->slot_types(), &thrift_desc_tbl_, &tuple_id, &slot_id);
-  }
-
-  Status status = DescriptorTbl::Create(obj_pool_, thrift_desc_tbl_, &desc_tbl);
-  DCHECK(status.ok());
-  return desc_tbl;
-}
-
-TTupleDescriptor DescriptorTblBuilder::BuildTuple(
-    const vector<ColumnType>& slot_types, TDescriptorTable* thrift_desc_tbl,
-    int* next_tuple_id, int* slot_id) {
-  // We never materialize struct slots (there's no in-memory representation of structs,
-  // instead the materialized fields appear directly in the tuple), but array types can
-  // still have a struct item type. In this case, the array item tuple contains the
-  // "inlined" struct fields.
-  if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) {
-    return BuildTuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id);
-  }
-
-  int num_null_bytes = BitUtil::Ceil(slot_types.size(), 8);
-  int byte_offset = num_null_bytes;
-  int tuple_id = *next_tuple_id;
-  ++(*next_tuple_id);
-
-  for (int i = 0; i < slot_types.size(); ++i) {
-    DCHECK_NE(slot_types[i].type, TYPE_STRUCT);
-    int item_id = -1;
-    if (slot_types[i].IsCollectionType()) {
-      TTupleDescriptor item_desc =
-          BuildTuple(slot_types[i].children, thrift_desc_tbl, next_tuple_id, slot_id);
-      item_id = item_desc.id;
+    params.slot_types.push_back(vector<TColumnType>());
+    vector<TColumnType>& tslot_types = params.slot_types.back();
+    const vector<ColumnType>& slot_types = tuples_descs_[i]->slot_types();
+    for (const ColumnType& slot_type : slot_types) {
+      tslot_types.push_back(slot_type.ToThrift());
     }
-
-    thrift_desc_tbl->slotDescriptors.push_back(
-        MakeSlotDescriptor(*slot_id, tuple_id, slot_types[i], i, byte_offset, item_id));
-    byte_offset += slot_types[i].GetSlotSize();
-    ++(*slot_id);
   }
 
-  TTupleDescriptor result;
+  Status buildDescTblStatus = fe_->BuildTestDescriptorTable(params, &thrift_desc_tbl_);
+  DCHECK(buildDescTblStatus.ok()) << buildDescTblStatus.GetDetail();
 
-  // If someone set a table descriptor pass that id along to the tuple descriptor.
-  if (thrift_desc_tbl_.tableDescriptors.empty()) {
-    result = MakeTupleDescriptor(tuple_id, byte_offset, num_null_bytes);
-  } else {
-    result = MakeTupleDescriptor(tuple_id, byte_offset, num_null_bytes,
-                                 thrift_desc_tbl_.tableDescriptors[0].id);
-  }
-  thrift_desc_tbl->tupleDescriptors.push_back(result);
-  return result;
+  DescriptorTbl* desc_tbl;
+  Status status = DescriptorTbl::Create(obj_pool_, thrift_desc_tbl_, &desc_tbl);
+  DCHECK(status.ok()) << status.GetDetail();
+  return desc_tbl;
 }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/testutil/desc-tbl-builder.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/desc-tbl-builder.h b/be/src/testutil/desc-tbl-builder.h
index 8dc8a55..9ad92b1 100644
--- a/be/src/testutil/desc-tbl-builder.h
+++ b/be/src/testutil/desc-tbl-builder.h
@@ -28,18 +28,17 @@ class TupleDescBuilder;
 
 /// Aids in the construction of a DescriptorTbl by declaring tuples and slots
 /// associated with those tuples.
-/// TupleIds are monotonically increasing from 0 for each DeclareTuple, and
-/// SlotIds increase similarly, but are always greater than all TupleIds.
-/// Unlike FE, slots are not reordered based on size, and padding is not addded.
-//
+/// The descriptor table is constructed by calling into the FE via JNI, such that
+/// the tuple mem layouts mimic real queries. All id assignments happen in the FE.
+///
 /// Example usage:
 /// DescriptorTblBuilder builder;
-/// builder.DeclareTuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0
-/// builder.DeclareTuple() << TYPE_FLOAT; // gets TupleId 1
+/// builder.DeclareTuple() << TYPE_TINYINT << TYPE_TIMESTAMP;
+/// builder.DeclareTuple() << TYPE_FLOAT;
 /// DescriptorTbl desc_tbl = builder.Build();
 class DescriptorTblBuilder {
  public:
-  DescriptorTblBuilder(ObjectPool* object_pool);
+  DescriptorTblBuilder(Frontend* fe, ObjectPool* object_pool);
 
   TupleDescBuilder& DeclareTuple();
 
@@ -50,15 +49,12 @@ class DescriptorTblBuilder {
   DescriptorTbl* Build();
 
  private:
-  /// Owned by caller.
+  /// Both owned by caller.
+  Frontend* fe_;
   ObjectPool* obj_pool_;
 
   std::vector<TupleDescBuilder*> tuples_descs_;
   TDescriptorTable thrift_desc_tbl_;
-
-  TTupleDescriptor BuildTuple(
-      const std::vector<ColumnType>& slot_types, TDescriptorTable* thrift_desc_tbl,
-      int* tuple_id, int* slot_id);
 };
 
 class TupleDescBuilder {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/common/thrift/Frontend.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift
index 91322b2..fbbf7be 100644
--- a/common/thrift/Frontend.thrift
+++ b/common/thrift/Frontend.thrift
@@ -749,3 +749,10 @@ struct TGetAllHadoopConfigsResponse {
 struct TStartupOptions {
   1: optional bool compute_lineage
 }
+
+// For creating a test descriptor table. The tuples and their memory layout are computed
+// in the FE.
+struct TBuildTestDescriptorTableParams {
+  // Every entry describes the slot types of one tuple.
+  1: required list<list<Types.TColumnType>> slot_types
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
index 0f59fdb..22764ea 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
@@ -17,17 +17,22 @@
 
 package org.apache.impala.analysis;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 
-import org.apache.commons.lang.StringUtils;
-
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.View;
 import org.apache.impala.common.IdGenerator;
+import org.apache.impala.thrift.TColumnType;
 import org.apache.impala.thrift.TDescriptorTable;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -142,13 +147,13 @@ public class DescriptorTable {
     return result;
   }
 
-  // Computes physical layout parameters of all descriptors.
-  // Call this only after the last descriptor was added.
-  // Test-only.
+  /**
+   * Computes physical layout parameters of all descriptors.
+   * Call this only after the last descriptor was added.
+   * Test-only.
+   */
   public void computeMemLayout() {
-    for (TupleDescriptor d: tupleDescs_.values()) {
-      d.computeMemLayout();
-    }
+    for (TupleDescriptor d: tupleDescs_.values()) d.computeMemLayout();
   }
 
   public TDescriptorTable toThrift() {
@@ -195,4 +200,55 @@ public class DescriptorTable {
     }
     return out.toString();
   }
+
+  /**
+   * Creates a thrift descriptor table for testing. Each entry in 'slotTypes' is a list
+   * of slot types for one tuple.
+   */
+  public static TDescriptorTable buildTestDescriptorTable(
+      List<List<TColumnType>> slotTypes) {
+    DescriptorTable descTbl = new DescriptorTable();
+    for (List<TColumnType> ttupleSlots: slotTypes) {
+      ArrayList<StructField> fields = Lists.newArrayListWithCapacity(ttupleSlots.size());
+      for (TColumnType ttype: ttupleSlots) {
+        fields.add(new StructField("testField", Type.fromThrift(ttype)));
+      }
+      StructType tupleType = new StructType(fields);
+      createTupleDesc(tupleType, descTbl);
+    }
+    descTbl.computeMemLayout();
+    return descTbl.toThrift();
+  }
+
+  /**
+   * Recursive helper for buildTestDescriptorTable(). Returns a TupleDescriptor
+   * corresponding to the given struct. The struct may contain scalar and array fields.
+   */
+  private static TupleDescriptor createTupleDesc(StructType tupleType,
+      DescriptorTable descTbl) {
+    TupleDescriptor tupleDesc = descTbl.createTupleDescriptor("testDescTbl");
+    for (StructField field: tupleType.getFields()) {
+      Type type = field.getType();
+      SlotDescriptor slotDesc = descTbl.addSlotDescriptor(tupleDesc);
+      slotDesc.setIsMaterialized(true);
+      slotDesc.setType(type);
+      if (!type.isCollectionType()) continue;
+
+      // Set item tuple descriptor for the collection.
+      Preconditions.checkState(type.isArrayType());
+      ArrayType arrayType = (ArrayType) type;
+      Type itemType = arrayType.getItemType();
+      StructType itemStruct = null;
+      if (itemType.isStructType()) {
+        itemStruct = (StructType) itemType;
+      } else {
+        ArrayList<StructField> itemFields = Lists.newArrayListWithCapacity(1);
+        itemFields.add(new StructField("item", itemType));
+        itemStruct = new StructType(itemFields);
+      }
+      TupleDescriptor itemTuple = createTupleDesc(itemStruct, descTbl);
+      slotDesc.setItemTupleDesc(itemTuple);
+    }
+    return tupleDesc;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index cbfdaca..e5462fd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -24,13 +24,13 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.lang.StringUtils;
-
 import org.apache.impala.catalog.ColumnStats;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.View;
 import org.apache.impala.thrift.TTupleDescriptor;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
@@ -56,6 +56,15 @@ import com.google.common.collect.Lists;
  * A tuple descriptor may be materialized or non-materialized. A non-materialized tuple
  * descriptor acts as a placeholder for 'virtual' table references such as inline views,
  * and must not be materialized at runtime.
+ *
+ * Memory Layout
+ * Slots are placed in descending order by size with trailing bytes to store null flags.
+ * Null flags are omitted for non-nullable slots. There is no padding between tuples when
+ * stored back-to-back in a row batch.
+ *
+ * Example: select bool_col, int_col, string_col, smallint_col from functional.alltypes
+ * Slots:   string_col|int_col|smallint_col|bool_col|null_byte
+ * Offsets: 0          16      20           22       23
  */
 public class TupleDescriptor {
   private final TupleId id_;
@@ -211,12 +220,13 @@ public class TupleDescriptor {
     if (hasMemLayout_) return;
     hasMemLayout_ = true;
 
-    // sort slots by size
+    // maps from slot size to slot descriptors with that size
     Map<Integer, List<SlotDescriptor>> slotsBySize =
         new HashMap<Integer, List<SlotDescriptor>>();
 
-    // populate slotsBySize; also compute avgSerializedSize
+    // populate slotsBySize
     int numNullableSlots = 0;
+    int totalSlotSize = 0;
     for (SlotDescriptor d: slots_) {
       if (!d.isMaterialized()) continue;
       ColumnStats stats = d.getStats();
@@ -229,6 +239,7 @@ public class TupleDescriptor {
       if (!slotsBySize.containsKey(d.getType().getSlotSize())) {
         slotsBySize.put(d.getType().getSlotSize(), new ArrayList<SlotDescriptor>());
       }
+      totalSlotSize += d.getType().getSlotSize();
       slotsBySize.get(d.getType().getSlotSize()).add(d);
       if (d.getIsNullable()) ++numNullableSlots;
     }
@@ -236,30 +247,25 @@ public class TupleDescriptor {
     Preconditions.checkState(!slotsBySize.containsKey(0));
     Preconditions.checkState(!slotsBySize.containsKey(-1));
 
-    // assign offsets to slots in order of ascending size
+    // assign offsets to slots in order of descending size
     numNullBytes_ = (numNullableSlots + 7) / 8;
-    int offset = numNullBytes_;
-    int nullIndicatorByte = 0;
+    int slotOffset = 0;
+    int nullIndicatorByte = totalSlotSize;
     int nullIndicatorBit = 0;
-    // slotIdx is the index into the resulting tuple struct.  The first (smallest) field
+    // slotIdx is the index into the resulting tuple struct.  The first (largest) field
     // is 0, next is 1, etc.
     int slotIdx = 0;
+    // sort slots in descending order of size
     List<Integer> sortedSizes = new ArrayList<Integer>(slotsBySize.keySet());
-    Collections.sort(sortedSizes);
+    Collections.sort(sortedSizes, Collections.reverseOrder());
     for (int slotSize: sortedSizes) {
       if (slotsBySize.get(slotSize).isEmpty()) continue;
-      if (slotSize > 1) {
-        // insert padding
-        int alignTo = Math.min(slotSize, 8);
-        offset = (offset + alignTo - 1) / alignTo * alignTo;
-      }
-
       for (SlotDescriptor d: slotsBySize.get(slotSize)) {
         Preconditions.checkState(d.isMaterialized());
         d.setByteSize(slotSize);
-        d.setByteOffset(offset);
+        d.setByteOffset(slotOffset);
         d.setSlotIdx(slotIdx++);
-        offset += slotSize;
+        slotOffset += slotSize;
 
         // assign null indicator
         if (d.getIsNullable()) {
@@ -268,14 +274,15 @@ public class TupleDescriptor {
           nullIndicatorBit = (nullIndicatorBit + 1) % 8;
           if (nullIndicatorBit == 0) ++nullIndicatorByte;
         } else {
-          // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask
+          // non-nullable slots will have 0 for the byte offset and -1 for the bit mask
           d.setNullIndicatorBit(-1);
           d.setNullIndicatorByte(0);
         }
       }
     }
+    Preconditions.checkState(slotOffset == totalSlotSize);
 
-    this.byteSize_ = offset;
+    byteSize_ = totalSlotSize + numNullBytes_;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/service/JniFrontend.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index 0b4ce8f..07d6ec6 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -30,17 +30,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.log4j.Appender;
-import org.apache.hadoop.fs.s3a.S3AFileSystem;
-import org.apache.log4j.FileAppender;
-import org.apache.thrift.TException;
-import org.apache.thrift.TSerializer;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.impala.analysis.DescriptorTable;
 import org.apache.impala.analysis.ToSqlUtils;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.authorization.ImpalaInternalAdminUser;
@@ -53,12 +46,13 @@ import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.JniUtil;
-import org.apache.impala.service.BackendConfig;
+import org.apache.impala.thrift.TBuildTestDescriptorTableParams;
 import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TDatabase;
 import org.apache.impala.thrift.TDescribeDbParams;
 import org.apache.impala.thrift.TDescribeResult;
 import org.apache.impala.thrift.TDescribeTableParams;
+import org.apache.impala.thrift.TDescriptorTable;
 import org.apache.impala.thrift.TExecRequest;
 import org.apache.impala.thrift.TFunctionCategory;
 import org.apache.impala.thrift.TGetAllHadoopConfigsResponse;
@@ -90,6 +84,14 @@ import org.apache.impala.thrift.TUpdateMembershipRequest;
 import org.apache.impala.util.GlogAppender;
 import org.apache.impala.util.PatternMatcher;
 import org.apache.impala.util.TSessionStateUtil;
+import org.apache.log4j.Appender;
+import org.apache.log4j.FileAppender;
+import org.apache.thrift.TException;
+import org.apache.thrift.TSerializer;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -477,6 +479,25 @@ public class JniFrontend {
   }
 
   /**
+   * Creates a thrift descriptor table for testing.
+   */
+  public byte[] buildTestDescriptorTable(byte[] buildTestDescTblParams)
+      throws ImpalaException {
+    TBuildTestDescriptorTableParams params = new TBuildTestDescriptorTableParams();
+    JniUtil.deserializeThrift(protocolFactory_, params, buildTestDescTblParams);
+    Preconditions.checkNotNull(params.slot_types);
+    TDescriptorTable result =
+        DescriptorTable.buildTestDescriptorTable(params.slot_types);
+    TSerializer serializer = new TSerializer(protocolFactory_);
+    try {
+      byte[] ret = serializer.serialize(result);
+      return ret;
+    } catch (TException e) {
+      throw new InternalException(e.getMessage());
+    }
+  }
+
+  /**
    * Gets all roles
    */
   public byte[] getRoles(byte[] showRolesParams) throws ImpalaException {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index d763deb..993f489 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -22,18 +22,17 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.catalog.Function;
-import org.apache.impala.catalog.PrimitiveType;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.thrift.TExpr;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 
 public class AnalyzerTest extends FrontendTestBase {
@@ -173,27 +172,24 @@ public class AnalyzerTest extends FrontendTestBase {
     SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.AllTypes");
     Analyzer analyzer = stmt.getAnalyzer();
     DescriptorTable descTbl = analyzer.getDescTbl();
-    TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0));
-    for (SlotDescriptor slotD: tupleD.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    TupleDescriptor tupleDesc = descTbl.getTupleDesc(new TupleId(0));
+    tupleDesc.materializeSlots();
     descTbl.computeMemLayout();
-    Assert.assertEquals(97.0f, tupleD.getAvgSerializedSize(), 0.0);
-    checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer);
-    checkLayoutParams("functional.alltypes.id", 4, 8, 0, 3, analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 12, 0, 4, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 16, 0, 5, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 20, 0, 6, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 24, 0, 7, analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 1, 0, analyzer);
-    checkLayoutParams("functional.alltypes.double_col", 8, 40, 1, 1, analyzer);
-    int strSlotSize = PrimitiveType.STRING.getSlotSize();
-    checkLayoutParams("functional.alltypes.date_string_col",
-        strSlotSize, 48, 1, 2, analyzer);
-    checkLayoutParams("functional.alltypes.string_col",
-        strSlotSize, 48 + strSlotSize, 1, 3, analyzer);
+
+    Assert.assertEquals(97.0f, tupleDesc.getAvgSerializedSize(), 0.0);
+    checkLayoutParams("functional.alltypes.date_string_col", 16, 0, 88, 0, analyzer);
+    checkLayoutParams("functional.alltypes.string_col", 16, 16, 88, 1, analyzer);
+    checkLayoutParams("functional.alltypes.timestamp_col", 16, 32, 88, 2, analyzer);
+    checkLayoutParams("functional.alltypes.bigint_col", 8, 48, 88, 3, analyzer);
+    checkLayoutParams("functional.alltypes.double_col", 8, 56, 88, 4, analyzer);
+    checkLayoutParams("functional.alltypes.id", 4, 64, 88, 5, analyzer);
+    checkLayoutParams("functional.alltypes.int_col", 4, 68, 88, 6, analyzer);
+    checkLayoutParams("functional.alltypes.float_col", 4, 72, 88, 7, analyzer);
+    checkLayoutParams("functional.alltypes.year", 4, 76, 89, 0, analyzer);
+    checkLayoutParams("functional.alltypes.month", 4, 80, 89, 1, analyzer);
+    checkLayoutParams("functional.alltypes.smallint_col", 2, 84, 89, 2, analyzer);
+    checkLayoutParams("functional.alltypes.bool_col", 1, 86, 89, 3, analyzer);
+    checkLayoutParams("functional.alltypes.tinyint_col", 1, 87, 89, 4, analyzer);
   }
 
   private void testNonNullable() throws AnalysisException {
@@ -205,9 +201,7 @@ public class AnalyzerTest extends FrontendTestBase {
         "select count(int_col), count(*) from functional.AllTypes");
     DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl();
     TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1));
-    for (SlotDescriptor slotD: aggDesc.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    aggDesc.materializeSlots();
     descTbl.computeMemLayout();
     Assert.assertEquals(16.0f, aggDesc.getAvgSerializedSize(), 0.0);
     Assert.assertEquals(16, aggDesc.getByteSize());
@@ -218,22 +212,19 @@ public class AnalyzerTest extends FrontendTestBase {
   private void testMixedNullable() throws AnalysisException {
     // one slot is nullable, one is not. The layout should look like:
     // (byte range : data)
-    // 0 : 1 nullable-byte (only 1 bit used)
-    // 1 - 7: padded bytes
-    // 8 - 15: sum(int_col)
-    // 16 - 23: count(*)
+    // 0 - 7: sum(int_col)
+    // 8 - 15: count(*)
+    // 16 - 17: nullable-byte (only 1 bit used)
     SelectStmt stmt = (SelectStmt) AnalyzesOk(
         "select sum(int_col), count(*) from functional.AllTypes");
     DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl();
     TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1));
-    for (SlotDescriptor slotD: aggDesc.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    aggDesc.materializeSlots();
     descTbl.computeMemLayout();
     Assert.assertEquals(16.0f, aggDesc.getAvgSerializedSize(), 0.0);
-    Assert.assertEquals(24, aggDesc.getByteSize());
-    checkLayoutParams(aggDesc.getSlots().get(0), 8, 8, 0, 0);
-    checkLayoutParams(aggDesc.getSlots().get(1), 8, 16, 0, -1);
+    Assert.assertEquals(17, aggDesc.getByteSize());
+    checkLayoutParams(aggDesc.getSlots().get(0), 8, 0, 16, 0);
+    checkLayoutParams(aggDesc.getSlots().get(1), 8, 8, 0, -1);
   }
 
   /**
@@ -243,34 +234,31 @@ public class AnalyzerTest extends FrontendTestBase {
     SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.alltypes");
     Analyzer analyzer = stmt.getAnalyzer();
     DescriptorTable descTbl = analyzer.getDescTbl();
-    TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0));
-    ArrayList<SlotDescriptor> slots = tupleD.getSlots();
-    for (SlotDescriptor slotD: slots) {
-      slotD.setIsMaterialized(true);
-    }
+    TupleDescriptor tupleDesc = descTbl.getTupleDesc(new TupleId(0));
+    tupleDesc.materializeSlots();
     // Mark slots 0 (id), 7 (double_col), 9 (string_col) as non-materialized.
+    ArrayList<SlotDescriptor> slots = tupleDesc.getSlots();
     slots.get(0).setIsMaterialized(false);
     slots.get(7).setIsMaterialized(false);
     slots.get(9).setIsMaterialized(false);
-
     descTbl.computeMemLayout();
-    Assert.assertEquals(68.0f, tupleD.getAvgSerializedSize(), 0.0);
+
+    Assert.assertEquals(68.0f, tupleDesc.getAvgSerializedSize(), 0.0);
     // Check non-materialized slots.
     checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0, analyzer);
     checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0, analyzer);
     checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0, analyzer);
     // Check materialized slots.
-    checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 8, 0, 3, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 12, 0, 4, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 16, 0, 5, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 20, 0, 6, analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 24, 0, 7, analyzer);
-    int strSlotSize = PrimitiveType.STRING.getSlotSize();
-    checkLayoutParams("functional.alltypes.date_string_col",
-        strSlotSize, 32, 1, 0, analyzer);
+    checkLayoutParams("functional.alltypes.date_string_col", 16, 0, 60, 0, analyzer);
+    checkLayoutParams("functional.alltypes.timestamp_col", 16, 16, 60, 1, analyzer);
+    checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 60, 2, analyzer);
+    checkLayoutParams("functional.alltypes.int_col", 4, 40, 60, 3, analyzer);
+    checkLayoutParams("functional.alltypes.float_col", 4, 44, 60, 4, analyzer);
+    checkLayoutParams("functional.alltypes.year", 4, 48, 60, 5, analyzer);
+    checkLayoutParams("functional.alltypes.month", 4, 52, 60, 6, analyzer);
+    checkLayoutParams("functional.alltypes.smallint_col", 2, 56, 60, 7, analyzer);
+    checkLayoutParams("functional.alltypes.bool_col", 1, 58, 61, 0, analyzer);
+    checkLayoutParams("functional.alltypes.tinyint_col", 1, 59, 61, 1, analyzer);
   }
 
   private void checkLayoutParams(SlotDescriptor d, int byteSize, int byteOffset,

[20/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

IMPALA-2905: Handle coordinator fragment lifecycle like all others

The plan-root fragment instance that runs on the coordinator should be
handled like all others: started via RPC and run asynchronously. Without
this, the fragment requires special-case code throughout the
coordinator, and does not show up in system metrics etc.

This patch adds a new sink type, PlanRootSink, to the root fragment
instance so that the coordinator can pull row batches that are pushed by
the root instance. The coordinator signals completion to the fragment
instance via closing the consumer side of the sink, whereupon the
instance is free to complete.

Since the root instance now runs asynchronously wrt to the coordinator,
we add several coordination methods to allow the coordinator to wait for
a point in the instance's execution to be hit - e.g. to wait until the
instance has been opened.

Done in this patch:

* Add PlanRootSink
* Add coordination to PFE to allow coordinator to observe lifecycle
* Make FragmentMgr a singleton
* Removed dead code from Coordinator::Wait() and elsewhere.
* Moved result output exprs out of QES and into PlanRootSink.
* Remove special-case limit-based teardown of coordinator fragment, and
  supporting functions in PlanFragmentExecutor.
* Simplified lifecycle of PlanFragmentExecutor by separating Open() into
  Open() and Exec(), the latter of which drives the sink by reading
  rows from the plan tree.
* Add child profile to PlanFragmentExecutor to measure time spent in
  each lifecycle phase.
* Removed dependency between InitExecProfiles() and starting root
  fragment.
* Removed mostly dead-code handling of LIMIT 0 queries.
* Ensured that SET returns a result set in all cases.
* Fix test_get_log() HS2 test. Errors are only guaranteed to be visible
  after fetch calls return EOS, but test was assuming this would happen
  after first fetch.

Change-Id: Ibb0064ec2f085fa3a5598ea80894fb489a01e4df
Reviewed-on: http://gerrit.cloudera.org:8080/4402
Tested-by: Internal Jenkins
Reviewed-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9f61397f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9f61397f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9f61397f

Branch: refs/heads/hadoop-next
Commit: 9f61397fc4d638aa78b37db2cd5b9c35b6deed94
Parents: 05b91a9
Author: Henry Robinson <he...@cloudera.com>
Authored: Wed Oct 5 11:48:01 2016 -0700
Committer: Henry Robinson <he...@cloudera.com>
Committed: Sun Oct 16 15:55:29 2016 +0000

----------------------------------------------------------------------
 be/src/exec/CMakeLists.txt                      |   1 +
 be/src/exec/data-sink.cc                        |   4 +
 be/src/exec/plan-root-sink.cc                   | 171 ++++++
 be/src/exec/plan-root-sink.h                    | 133 +++++
 be/src/runtime/coordinator.cc                   | 560 +++++++------------
 be/src/runtime/coordinator.h                    | 142 ++---
 be/src/runtime/exec-env.cc                      |  19 +-
 be/src/runtime/exec-env.h                       |   2 +
 be/src/runtime/plan-fragment-executor.cc        | 264 ++++-----
 be/src/runtime/plan-fragment-executor.h         | 149 ++---
 be/src/scheduling/query-schedule.cc             |  32 +-
 be/src/scheduling/query-schedule.h              |  27 +-
 be/src/scheduling/simple-scheduler.cc           |   5 -
 be/src/service/fragment-exec-state.cc           |   6 +-
 be/src/service/fragment-exec-state.h            |   8 +-
 be/src/service/fragment-mgr.cc                  |   8 +-
 be/src/service/impala-beeswax-server.cc         |  52 +-
 be/src/service/impala-hs2-server.cc             |  36 +-
 be/src/service/impala-internal-service.h        |  15 +-
 be/src/service/impala-server.cc                 |   5 +-
 be/src/service/impala-server.h                  |  47 --
 be/src/service/query-exec-state.cc              | 107 +---
 be/src/service/query-exec-state.h               |  11 +-
 be/src/service/query-result-set.h               |  64 +++
 be/src/testutil/in-process-servers.cc           |   4 +
 common/thrift/DataSinks.thrift                  |   7 +-
 .../org/apache/impala/analysis/QueryStmt.java   |   6 +
 .../org/apache/impala/planner/PlanRootSink.java |  39 ++
 .../java/org/apache/impala/planner/Planner.java |   2 +
 .../apache/impala/planner/PlannerContext.java   |   1 +
 .../queries/PlannerTest/aggregation.test        | 104 ++++
 .../queries/PlannerTest/analytic-fns.test       | 118 ++++
 .../PlannerTest/complex-types-file-formats.test |  14 +
 .../queries/PlannerTest/conjunct-ordering.test  |  30 +
 .../queries/PlannerTest/constant.test           |   4 +
 .../queries/PlannerTest/data-source-tables.test |  10 +
 .../PlannerTest/disable-preaggregations.test    |   4 +
 .../queries/PlannerTest/distinct-estimate.test  |   8 +
 .../queries/PlannerTest/distinct.test           |  54 ++
 .../queries/PlannerTest/empty.test              |  58 ++
 .../queries/PlannerTest/hbase.test              | 118 ++++
 .../queries/PlannerTest/hdfs.test               | 226 ++++++++
 .../queries/PlannerTest/implicit-joins.test     |  28 +
 .../queries/PlannerTest/inline-view-limit.test  |  58 ++
 .../queries/PlannerTest/inline-view.test        | 116 ++++
 .../queries/PlannerTest/join-order.test         |  72 +++
 .../queries/PlannerTest/joins.test              | 178 ++++++
 .../queries/PlannerTest/kudu-selectivity.test   |  16 +
 .../queries/PlannerTest/kudu.test               |  32 ++
 .../PlannerTest/mem-limit-broadcast-join.test   |   2 +
 .../queries/PlannerTest/nested-collections.test | 144 +++++
 .../queries/PlannerTest/nested-loop-join.test   |  12 +
 .../queries/PlannerTest/order.test              | 104 ++++
 .../queries/PlannerTest/outer-joins.test        |  54 ++
 .../PlannerTest/partition-key-scans.test        |  38 ++
 .../PlannerTest/predicate-propagation.test      |  90 +++
 .../PlannerTest/runtime-filter-propagation.test |  86 +++
 .../queries/PlannerTest/small-query-opt.test    |  42 ++
 .../queries/PlannerTest/subquery-rewrite.test   | 144 +++++
 .../queries/PlannerTest/topn.test               |  48 ++
 .../queries/PlannerTest/tpcds-all.test          | 146 +++++
 .../queries/PlannerTest/tpch-all.test           | 132 +++++
 .../queries/PlannerTest/tpch-kudu.test          |  44 ++
 .../queries/PlannerTest/tpch-nested.test        |  88 +++
 .../queries/PlannerTest/tpch-views.test         |  44 ++
 .../queries/PlannerTest/union.test              | 198 +++++++
 .../queries/PlannerTest/values.test             |  16 +
 .../queries/PlannerTest/views.test              |  48 ++
 .../queries/PlannerTest/with-clause.test        |  58 ++
 tests/custom_cluster/test_client_ssl.py         |   1 +
 tests/failure/test_failpoints.py                |   2 +-
 tests/hs2/test_hs2.py                           |  16 +-
 tests/hs2/test_json_endpoints.py                |   4 +
 tests/shell/util.py                             |   5 +-
 74 files changed, 3831 insertions(+), 910 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/exec/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt
index 571198f..fce5c81 100644
--- a/be/src/exec/CMakeLists.txt
+++ b/be/src/exec/CMakeLists.txt
@@ -77,6 +77,7 @@ add_library(Exec
   partitioned-hash-join-builder-ir.cc
   partitioned-hash-join-node.cc
   partitioned-hash-join-node-ir.cc
+  plan-root-sink.cc
   kudu-scanner.cc
   kudu-scan-node.cc
   kudu-table-sink.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/exec/data-sink.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/data-sink.cc b/be/src/exec/data-sink.cc
index b6ec0ee..c95b854 100644
--- a/be/src/exec/data-sink.cc
+++ b/be/src/exec/data-sink.cc
@@ -26,6 +26,7 @@
 #include "exec/hdfs-table-sink.h"
 #include "exec/kudu-table-sink.h"
 #include "exec/kudu-util.h"
+#include "exec/plan-root-sink.h"
 #include "exprs/expr.h"
 #include "gen-cpp/ImpalaInternalService_constants.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
@@ -94,6 +95,9 @@ Status DataSink::CreateDataSink(ObjectPool* pool,
       }
 
       break;
+    case TDataSinkType::PLAN_ROOT_SINK:
+      sink->reset(new PlanRootSink(row_desc, output_exprs, thrift_sink));
+      break;
     default:
       stringstream error_msg;
       map<int, const char*>::const_iterator i =

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/exec/plan-root-sink.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/plan-root-sink.cc b/be/src/exec/plan-root-sink.cc
new file mode 100644
index 0000000..bd73953
--- /dev/null
+++ b/be/src/exec/plan-root-sink.cc
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/plan-root-sink.h"
+
+#include "exprs/expr-context.h"
+#include "exprs/expr.h"
+#include "runtime/row-batch.h"
+#include "runtime/tuple-row.h"
+#include "service/query-result-set.h"
+
+#include <memory>
+#include <boost/thread/mutex.hpp>
+
+using namespace std;
+using boost::unique_lock;
+using boost::mutex;
+
+namespace impala {
+
+const string PlanRootSink::NAME = "PLAN_ROOT_SINK";
+
+PlanRootSink::PlanRootSink(const RowDescriptor& row_desc,
+    const std::vector<TExpr>& output_exprs, const TDataSink& thrift_sink)
+  : DataSink(row_desc), thrift_output_exprs_(output_exprs) {}
+
+Status PlanRootSink::Prepare(RuntimeState* state, MemTracker* mem_tracker) {
+  RETURN_IF_ERROR(DataSink::Prepare(state, mem_tracker));
+  RETURN_IF_ERROR(
+      Expr::CreateExprTrees(state->obj_pool(), thrift_output_exprs_, &output_expr_ctxs_));
+  RETURN_IF_ERROR(
+      Expr::Prepare(output_expr_ctxs_, state, row_desc_, expr_mem_tracker_.get()));
+
+  return Status::OK();
+}
+
+Status PlanRootSink::Open(RuntimeState* state) {
+  RETURN_IF_ERROR(Expr::Open(output_expr_ctxs_, state));
+  return Status::OK();
+}
+
+namespace {
+
+/// Validates that all collection-typed slots in the given batch are set to NULL.
+/// See SubplanNode for details on when collection-typed slots are set to NULL.
+/// TODO: This validation will become obsolete when we can return collection values.
+/// We will then need a different mechanism to assert the correct behavior of the
+/// SubplanNode with respect to setting collection-slots to NULL.
+void ValidateCollectionSlots(const RowDescriptor& row_desc, RowBatch* batch) {
+#ifndef NDEBUG
+  if (!row_desc.HasVarlenSlots()) return;
+  for (int i = 0; i < batch->num_rows(); ++i) {
+    TupleRow* row = batch->GetRow(i);
+    for (int j = 0; j < row_desc.tuple_descriptors().size(); ++j) {
+      const TupleDescriptor* tuple_desc = row_desc.tuple_descriptors()[j];
+      if (tuple_desc->collection_slots().empty()) continue;
+      for (int k = 0; k < tuple_desc->collection_slots().size(); ++k) {
+        const SlotDescriptor* slot_desc = tuple_desc->collection_slots()[k];
+        int tuple_idx = row_desc.GetTupleIdx(slot_desc->parent()->id());
+        const Tuple* tuple = row->GetTuple(tuple_idx);
+        if (tuple == NULL) continue;
+        DCHECK(tuple->IsNull(slot_desc->null_indicator_offset()));
+      }
+    }
+  }
+#endif
+}
+}
+
+Status PlanRootSink::Send(RuntimeState* state, RowBatch* batch) {
+  ValidateCollectionSlots(row_desc_, batch);
+  int current_batch_row = 0;
+  do {
+    unique_lock<mutex> l(lock_);
+    while (results_ == nullptr && !consumer_done_) sender_cv_.wait(l);
+    if (consumer_done_ || batch == nullptr) {
+      eos_ = true;
+      return Status::OK();
+    }
+
+    // Otherwise the consumer is ready. Fill out the rows.
+    DCHECK(results_ != nullptr);
+    // List of expr values to hold evaluated rows from the query
+    vector<void*> result_row;
+    result_row.resize(output_expr_ctxs_.size());
+
+    // List of scales for floating point values in result_row
+    vector<int> scales;
+    scales.resize(result_row.size());
+
+    int num_to_fetch = batch->num_rows() - current_batch_row;
+    if (num_rows_requested_ > 0) num_to_fetch = min(num_to_fetch, num_rows_requested_);
+    for (int i = 0; i < num_to_fetch; ++i) {
+      TupleRow* row = batch->GetRow(current_batch_row);
+      GetRowValue(row, &result_row, &scales);
+      RETURN_IF_ERROR(results_->AddOneRow(result_row, scales));
+      ++current_batch_row;
+    }
+    // Signal the consumer.
+    results_ = nullptr;
+    ExprContext::FreeLocalAllocations(output_expr_ctxs_);
+    consumer_cv_.notify_all();
+  } while (current_batch_row < batch->num_rows());
+  return Status::OK();
+}
+
+Status PlanRootSink::FlushFinal(RuntimeState* state) {
+  unique_lock<mutex> l(lock_);
+  sender_done_ = true;
+  eos_ = true;
+  consumer_cv_.notify_all();
+  return Status::OK();
+}
+
+void PlanRootSink::Close(RuntimeState* state) {
+  unique_lock<mutex> l(lock_);
+  // No guarantee that FlushFinal() has been called, so need to mark sender_done_ here as
+  // well.
+  sender_done_ = true;
+  consumer_cv_.notify_all();
+  // Wait for consumer to be done, in case sender tries to tear-down this sink while the
+  // sender is still reading from it.
+  while (!consumer_done_) sender_cv_.wait(l);
+  Expr::Close(output_expr_ctxs_, state);
+  DataSink::Close(state);
+}
+
+void PlanRootSink::CloseConsumer() {
+  unique_lock<mutex> l(lock_);
+  consumer_done_ = true;
+  sender_cv_.notify_all();
+}
+
+Status PlanRootSink::GetNext(
+    RuntimeState* state, QueryResultSet* results, int num_results, bool* eos) {
+  unique_lock<mutex> l(lock_);
+  DCHECK(!consumer_done_);
+
+  results_ = results;
+  num_rows_requested_ = num_results;
+  sender_cv_.notify_all();
+
+  while (!eos_ && results_ != nullptr && !sender_done_) consumer_cv_.wait(l);
+  *eos = eos_;
+  RETURN_IF_ERROR(state->CheckQueryState());
+  return Status::OK();
+}
+
+void PlanRootSink::GetRowValue(
+    TupleRow* row, vector<void*>* result, vector<int>* scales) {
+  DCHECK(result->size() >= output_expr_ctxs_.size());
+  for (int i = 0; i < output_expr_ctxs_.size(); ++i) {
+    (*result)[i] = output_expr_ctxs_[i]->GetValue(row);
+    (*scales)[i] = output_expr_ctxs_[i]->root()->output_scale();
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/exec/plan-root-sink.h
----------------------------------------------------------------------
diff --git a/be/src/exec/plan-root-sink.h b/be/src/exec/plan-root-sink.h
new file mode 100644
index 0000000..cc7c045
--- /dev/null
+++ b/be/src/exec/plan-root-sink.h
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_EXEC_PLAN_ROOT_SINK_H
+#define IMPALA_EXEC_PLAN_ROOT_SINK_H
+
+#include "exec/data-sink.h"
+
+#include <boost/thread/condition_variable.hpp>
+
+namespace impala {
+
+class TupleRow;
+class RowBatch;
+class QueryResultSet;
+class ExprContext;
+
+/// Sink which manages the handoff between a 'sender' (a fragment instance) that produces
+/// batches by calling Send(), and a 'consumer' (e.g. the coordinator) which consumes rows
+/// formed by computing a set of output expressions over the input batches, by calling
+/// GetNext(). Send() and GetNext() are called concurrently.
+///
+/// The consumer calls GetNext() with a QueryResultSet and a requested fetch
+/// size. GetNext() shares these fields with Send(), and then signals Send() to begin
+/// populating the result set. GetNext() returns when either the sender has sent all of
+/// its rows, or the requested fetch size has been satisfied.
+///
+/// Send() fills in as many rows as are requested from the current batch. When the batch
+/// is exhausted - which may take several calls to GetNext() - control is returned to the
+/// sender to produce another row batch.
+///
+/// Consumers must call CloseConsumer() when finished to allow the fragment to shut
+/// down. Senders must call Close() to signal to the consumer that no more batches will be
+/// produced.
+///
+/// The sink is thread safe up to a single producer and single consumer.
+///
+/// TODO: The consumer drives the sender in lock-step with GetNext() calls, forcing a
+/// context-switch on every invocation. Measure the impact of this, and consider moving to
+/// a fully asynchronous implementation with a queue to manage buffering between sender
+/// and consumer. See IMPALA-4268.
+class PlanRootSink : public DataSink {
+ public:
+  PlanRootSink(const RowDescriptor& row_desc, const std::vector<TExpr>& output_exprs,
+      const TDataSink& thrift_sink);
+
+  virtual std::string GetName() { return NAME; }
+
+  virtual Status Prepare(RuntimeState* state, MemTracker* tracker);
+
+  virtual Status Open(RuntimeState* state);
+
+  /// Sends a new batch. Ownership of 'batch' remains with the sender. Blocks until the
+  /// consumer has consumed 'batch' by calling GetNext().
+  virtual Status Send(RuntimeState* state, RowBatch* batch);
+
+  /// Sets eos and notifies consumer.
+  virtual Status FlushFinal(RuntimeState* state);
+
+  /// To be called by sender only. Signals to the consumer that no more batches will be
+  /// produced, then blocks until the consumer calls CloseConsumer().
+  virtual void Close(RuntimeState* state);
+
+  /// Populates 'result_set' with up to 'num_rows' rows produced by the fragment instance
+  /// that calls Send(). *eos is set to 'true' when there are no more rows to consume.
+  Status GetNext(
+      RuntimeState* state, QueryResultSet* result_set, int num_rows, bool* eos);
+
+  /// Signals to the producer that the sink will no longer be used. It's an error to call
+  /// GetNext() after this returns. May be called more than once; only the first call has
+  /// any effect.
+  void CloseConsumer();
+
+  static const std::string NAME;
+
+ private:
+  /// Protects all members, including the condition variables.
+  boost::mutex lock_;
+
+  /// Waited on by the sender only. Signalled when the consumer has written results_ and
+  /// num_rows_requested_, and so the sender may begin satisfying that request for rows
+  /// from its current batch. Also signalled when CloseConsumer() is called, to unblock
+  /// the sender.
+  boost::condition_variable sender_cv_;
+
+  /// Waited on by the consumer only. Signalled when the sender has finished serving a
+  /// request for rows. Also signalled by Close() and FlushFinal() to signal to the
+  /// consumer that no more rows are coming.
+  boost::condition_variable consumer_cv_;
+
+  /// Signals to producer that the consumer is done, and the sink may be torn down.
+  bool consumer_done_ = false;
+
+  /// Signals to consumer that the sender is done, and that there are no more row batches
+  /// to consume.
+  bool sender_done_ = false;
+
+  /// The current result set passed to GetNext(), to fill in Send(). Not owned by this
+  /// sink. Reset to nullptr after Send() completes the request to signal to the consumer
+  /// that it can return.
+  QueryResultSet* results_ = nullptr;
+
+  /// Set by GetNext() to indicate to Send() how many rows it should write to results_.
+  int num_rows_requested_ = 0;
+
+  /// Set to true in Send() and FlushFinal() when the Sink() has finished producing rows.
+  bool eos_ = false;
+
+  /// Output expressions to map plan row batches onto result set rows.
+  std::vector<TExpr> thrift_output_exprs_;
+  std::vector<ExprContext*> output_expr_ctxs_;
+
+  /// Writes a single row into 'result' and 'scales' by evaluating output_expr_ctxs_ over
+  /// 'row'.
+  void GetRowValue(TupleRow* row, std::vector<void*>* result, std::vector<int>* scales);
+};
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index df4ad7b..4214d4d 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -39,22 +39,28 @@
 #include <errno.h>
 
 #include "common/logging.h"
-#include "exprs/expr.h"
 #include "exec/data-sink.h"
+#include "exec/plan-root-sink.h"
+#include "exec/scan-node.h"
+#include "gen-cpp/Frontend_types.h"
+#include "gen-cpp/ImpalaInternalService.h"
+#include "gen-cpp/ImpalaInternalService_constants.h"
+#include "gen-cpp/ImpalaInternalService_types.h"
+#include "gen-cpp/Partitions_types.h"
+#include "gen-cpp/PlanNodes_types.h"
+#include "runtime/backend-client.h"
 #include "runtime/client-cache.h"
-#include "runtime/data-stream-sender.h"
 #include "runtime/data-stream-mgr.h"
+#include "runtime/data-stream-sender.h"
 #include "runtime/exec-env.h"
 #include "runtime/hdfs-fs-cache.h"
 #include "runtime/mem-tracker.h"
+#include "runtime/parallel-executor.h"
 #include "runtime/plan-fragment-executor.h"
 #include "runtime/row-batch.h"
-#include "runtime/backend-client.h"
-#include "runtime/parallel-executor.h"
 #include "runtime/tuple-row.h"
 #include "scheduling/scheduler.h"
-#include "exec/data-sink.h"
-#include "exec/scan-node.h"
+#include "service/fragment-exec-state.h"
 #include "util/bloom-filter.h"
 #include "util/container-util.h"
 #include "util/counting-barrier.h"
@@ -67,12 +73,6 @@
 #include "util/summary-util.h"
 #include "util/table-printer.h"
 #include "util/uid-util.h"
-#include "gen-cpp/ImpalaInternalService.h"
-#include "gen-cpp/ImpalaInternalService_types.h"
-#include "gen-cpp/Frontend_types.h"
-#include "gen-cpp/PlanNodes_types.h"
-#include "gen-cpp/Partitions_types.h"
-#include "gen-cpp/ImpalaInternalService_constants.h"
 
 #include "common/names.h"
 
@@ -240,8 +240,8 @@ class Coordinator::FragmentInstanceState {
   mutex lock_;
 
   /// If the status indicates an error status, execution of this fragment has either been
-  /// aborted by the remote impalad (which then reported the error) or cancellation has
-  /// been initiated; either way, execution must not be cancelled
+  /// aborted by the executing impalad (which then reported the error) or cancellation has
+  /// been initiated; either way, execution must not be cancelled.
   Status status_;
 
   /// If true, ExecPlanFragment() rpc has been sent.
@@ -377,15 +377,13 @@ Coordinator::Coordinator(const QuerySchedule& schedule, ExecEnv* exec_env,
     exec_env_(exec_env),
     has_called_wait_(false),
     returned_all_results_(false),
-    executor_(NULL), // Set in Prepare()
     query_mem_tracker_(), // Set in Exec()
     num_remaining_fragment_instances_(0),
     obj_pool_(new ObjectPool()),
     query_events_(events),
     filter_routing_table_complete_(false),
-    filter_mode_(schedule.query_options().runtime_filter_mode),
-    torn_down_(false) {
-}
+    filter_mode_(schedule_.query_options().runtime_filter_mode),
+    torn_down_(false) {}
 
 Coordinator::~Coordinator() {
   DCHECK(torn_down_) << "TearDown() must be called before Coordinator is destroyed";
@@ -448,14 +446,15 @@ static void ProcessQueryOptions(
       << "because nodes cannot be cancelled in Close()";
 }
 
-Status Coordinator::Exec(vector<ExprContext*>* output_expr_ctxs) {
+Status Coordinator::Exec() {
   const TQueryExecRequest& request = schedule_.request();
   DCHECK(request.fragments.size() > 0 || request.mt_plan_exec_info.size() > 0);
+
   needs_finalization_ = request.__isset.finalize_params;
   if (needs_finalization_) finalize_params_ = request.finalize_params;
 
   VLOG_QUERY << "Exec() query_id=" << schedule_.query_id()
-      << " stmt=" << request.query_ctx.request.stmt;
+             << " stmt=" << request.query_ctx.request.stmt;
   stmt_type_ = request.stmt_type;
   query_id_ = schedule_.query_id();
   desc_tbl_ = request.desc_tbl;
@@ -468,18 +467,6 @@ Status Coordinator::Exec(vector<ExprContext*>* output_expr_ctxs) {
 
   SCOPED_TIMER(query_profile_->total_time_counter());
 
-  // After the coordinator fragment is started, it may call UpdateFilter() asynchronously,
-  // which waits on this barrier for completion.
-  // TODO: remove special treatment of coord fragment
-  int num_remote_instances = schedule_.GetNumRemoteFInstances();
-  if (num_remote_instances > 0) {
-    exec_complete_barrier_.reset(new CountingBarrier(num_remote_instances));
-  }
-  num_remaining_fragment_instances_ = num_remote_instances;
-
-  // TODO: move initial setup into a separate function; right now part of it
-  // (InitExecProfile()) depends on the coordinator fragment having been started
-
   // initialize progress updater
   const string& str = Substitute("Query $0", PrintId(query_id_));
   progress_.Init(str, schedule_.num_scan_ranges());
@@ -489,64 +476,51 @@ Status Coordinator::Exec(vector<ExprContext*>* output_expr_ctxs) {
   // execution at Impala daemons where it hasn't even started
   lock_guard<mutex> l(lock_);
 
-  bool has_coordinator_fragment = schedule_.GetCoordFragment() != NULL;
-  if (has_coordinator_fragment) {
-    // Start this before starting any more plan
-    // fragments, otherwise they start sending data before the local exchange node had a
-    // chance to register with the stream mgr.
-    // TODO: This is no longer necessary (see IMPALA-1599). Consider starting all
-    // fragments in the same way with no coordinator special case.
-    RETURN_IF_ERROR(PrepareCoordFragment(output_expr_ctxs));
-  } else {
-    // The coordinator instance may require a query mem tracker even if there is no
-    // coordinator fragment. For example, result-caching tracks memory via the query mem
-    // tracker.
-    // If there is a fragment, the fragment executor created above initializes the query
-    // mem tracker. If not, the query mem tracker is created here.
-    int64_t query_limit = -1;
-    if (query_ctx_.request.query_options.__isset.mem_limit &&
-        query_ctx_.request.query_options.mem_limit > 0) {
-      query_limit = query_ctx_.request.query_options.mem_limit;
-    }
-    MemTracker* pool_tracker = MemTracker::GetRequestPoolMemTracker(
-        schedule_.request_pool(), exec_env_->process_mem_tracker());
-    query_mem_tracker_ =
-        MemTracker::GetQueryMemTracker(query_id_, query_limit, pool_tracker);
-
-    executor_.reset(NULL);
-  }
+  // The coordinator may require a query mem tracker for result-caching, which tracks
+  // memory via the query mem tracker.
+  int64_t query_limit = -1;
+  if (query_ctx_.request.query_options.__isset.mem_limit
+      && query_ctx_.request.query_options.mem_limit > 0) {
+    query_limit = query_ctx_.request.query_options.mem_limit;
+  }
+  MemTracker* pool_tracker = MemTracker::GetRequestPoolMemTracker(
+      schedule_.request_pool(), exec_env_->process_mem_tracker());
+  query_mem_tracker_ =
+      MemTracker::GetQueryMemTracker(query_id_, query_limit, pool_tracker);
+  DCHECK(query_mem_tracker() != nullptr);
   filter_mem_tracker_.reset(
       new MemTracker(-1, "Runtime Filter (Coordinator)", query_mem_tracker(), false));
 
-  // initialize execution profile structures
+  // Initialize the execution profile structures.
   bool is_mt_execution = request.query_ctx.request.query_options.mt_dop > 0;
   if (is_mt_execution) {
     MtInitExecProfiles();
     MtInitExecSummary();
+    MtStartFInstances();
   } else {
     InitExecProfile(request);
+    StartFragments();
   }
 
-  if (num_remote_instances > 0) {
-    // pre-size fragment_instance_states_ in order to directly address by instance idx
-    // when creating FragmentInstanceStates (instead of push_back())
-    int num_fragment_instances = schedule_.GetTotalFInstances();
-    DCHECK_GT(num_fragment_instances, 0);
-    fragment_instance_states_.resize(num_fragment_instances);
+  RETURN_IF_ERROR(FinishInstanceStartup());
 
-    if (is_mt_execution) {
-      MtStartRemoteFInstances();
-    } else {
-      StartRemoteFragments();
-    }
-    RETURN_IF_ERROR(FinishRemoteInstanceStartup());
+  // Grab executor and wait until Prepare() has finished so that runtime state etc. will
+  // be set up.
+  if (schedule_.GetCoordFragment() != nullptr) {
+    // Coordinator fragment instance has same ID as query.
+    shared_ptr<FragmentMgr::FragmentExecState> root_fragment_instance =
+        ExecEnv::GetInstance()->fragment_mgr()->GetFragmentExecState(query_id_);
+    DCHECK(root_fragment_instance.get() != nullptr);
+    executor_ = root_fragment_instance->executor();
 
-    // If we have a coordinator fragment and remote fragments (the common case), release
-    // the thread token on the coordinator fragment. This fragment spends most of the time
-    // waiting and doing very little work. Holding on to the token causes underutilization
-    // of the machine. If there are 12 queries on this node, that's 12 tokens reserved for
-    // no reason.
-    if (has_coordinator_fragment) executor_->ReleaseThreadToken();
+    // When WaitForPrepare() returns OK(), the executor's root sink will be set up. At
+    // that point, the coordinator must be sure to call root_sink()->CloseConsumer(); the
+    // fragment instance's executor will not complete until that point.
+    // TODO: Consider moving this to Wait().
+    Status prepare_status = executor_->WaitForPrepare();
+    root_sink_ = executor_->root_sink();
+    RETURN_IF_ERROR(prepare_status);
+    DCHECK(root_sink_ != nullptr);
   }
 
   PrintFragmentInstanceInfo();
@@ -610,60 +584,14 @@ void Coordinator::UpdateFilterRoutingTable(const vector<TPlanNode>& plan_nodes,
   }
 }
 
-Status Coordinator::PrepareCoordFragment(vector<ExprContext*>* output_expr_ctxs) {
-  const TQueryExecRequest& request = schedule_.request();
-  bool is_mt_execution = request.query_ctx.request.query_options.mt_dop > 0;
-  if (!is_mt_execution && filter_mode_ != TRuntimeFilterMode::OFF) {
-    UpdateFilterRoutingTable(schedule_.GetCoordFragment()->plan.nodes, 1, 0);
-    if (schedule_.GetNumFragmentInstances() == 0) MarkFilterRoutingTableComplete();
-  }
+void Coordinator::StartFragments() {
+  int num_fragment_instances = schedule_.GetNumFragmentInstances();
+  DCHECK_GT(num_fragment_instances, 0);
 
-  // create rpc params and FragmentInstanceState for the coordinator fragment
-  TExecPlanFragmentParams rpc_params;
-  FragmentInstanceState* coord_state = nullptr;
-  if (is_mt_execution) {
-    const FInstanceExecParams& coord_params = schedule_.GetCoordInstanceExecParams();
-    MtSetExecPlanFragmentParams(coord_params, &rpc_params);
-    coord_state = obj_pool()->Add(
-        new FragmentInstanceState(coord_params, obj_pool()));
-  } else {
-    const TPlanFragment& coord_fragment = *schedule_.GetCoordFragment();
-    SetExecPlanFragmentParams(
-        coord_fragment, schedule_.exec_params()[0], 0, &rpc_params);
-    coord_state = obj_pool()->Add(
-        new FragmentInstanceState(
-          coord_fragment.idx, schedule_.exec_params()[0], 0, obj_pool()));
-    // apparently this was never called for the coordinator fragment
-    // TODO: fix this
-    //exec_state->ComputeTotalSplitSize(
-        //rpc_params.fragment_instance_ctx.per_node_scan_ranges);
-  }
-  // register state before calling Prepare(), in case it fails
-  DCHECK_EQ(GetInstanceIdx(coord_state->fragment_instance_id()), 0);
-  fragment_instance_states_.push_back(coord_state);
-  DCHECK(coord_state != nullptr);
-  DCHECK_EQ(fragment_instance_states_.size(), 1);
-  executor_.reset(new PlanFragmentExecutor(
-      exec_env_, PlanFragmentExecutor::ReportStatusCallback()));
-  RETURN_IF_ERROR(executor_->Prepare(rpc_params));
-  coord_state->set_profile(executor_->profile());
-
-  // Prepare output_expr_ctxs before optimizing the LLVM module. The other exprs of this
-  // coordinator fragment have been prepared in executor_->Prepare().
-  DCHECK(output_expr_ctxs != NULL);
-  RETURN_IF_ERROR(Expr::CreateExprTrees(
-      runtime_state()->obj_pool(), schedule_.GetCoordFragment()->output_exprs,
-      output_expr_ctxs));
-  MemTracker* output_expr_tracker = runtime_state()->obj_pool()->Add(new MemTracker(
-      -1, "Output exprs", runtime_state()->instance_mem_tracker(), false));
-  RETURN_IF_ERROR(Expr::Prepare(
-      *output_expr_ctxs, runtime_state(), row_desc(), output_expr_tracker));
+  fragment_instance_states_.resize(num_fragment_instances);
+  exec_complete_barrier_.reset(new CountingBarrier(num_fragment_instances));
+  num_remaining_fragment_instances_ = num_fragment_instances;
 
-  return Status::OK();
-}
-
-void Coordinator::StartRemoteFragments() {
-  int num_fragment_instances = schedule_.GetNumFragmentInstances();
   DebugOptions debug_options;
   ProcessQueryOptions(schedule_.query_options(), &debug_options);
   const TQueryExecRequest& request = schedule_.request();
@@ -671,19 +599,14 @@ void Coordinator::StartRemoteFragments() {
   VLOG_QUERY << "starting " << num_fragment_instances << " fragment instances for query "
              << query_id_;
   query_events_->MarkEvent(
-      Substitute("Ready to start $0 remote fragment instances", num_fragment_instances));
+      Substitute("Ready to start $0 fragments", num_fragment_instances));
 
-  bool has_coordinator_fragment =
-      request.fragments[0].partition.type == TPartitionType::UNPARTITIONED;
-  int instance_state_idx = has_coordinator_fragment ? 1 : 0;
-  int first_remote_fragment_idx = has_coordinator_fragment ? 1 : 0;
+  int instance_state_idx = 0;
   if (filter_mode_ != TRuntimeFilterMode::OFF) {
-    // Populate the runtime filter routing table. This should happen before starting
-    // the remote fragments.
-    // This code anticipates the indices of the instance states created later on in
-    // ExecRemoteFragment()
-    for (int fragment_idx = first_remote_fragment_idx;
-         fragment_idx < request.fragments.size(); ++fragment_idx) {
+    // Populate the runtime filter routing table. This should happen before starting the
+    // fragment instances. This code anticipates the indices of the instance states
+    // created later on in ExecRemoteFragment()
+    for (int fragment_idx = 0; fragment_idx < request.fragments.size(); ++fragment_idx) {
       const FragmentExecParams& params = schedule_.exec_params()[fragment_idx];
       int num_hosts = params.hosts.size();
       DCHECK_GT(num_hosts, 0);
@@ -697,8 +620,7 @@ void Coordinator::StartRemoteFragments() {
   int num_instances = 0;
   // Start one fragment instance per fragment per host (number of hosts running each
   // fragment may not be constant).
-  for (int fragment_idx = first_remote_fragment_idx;
-       fragment_idx < request.fragments.size(); ++fragment_idx) {
+  for (int fragment_idx = 0; fragment_idx < request.fragments.size(); ++fragment_idx) {
     const FragmentExecParams& params = schedule_.exec_params()[fragment_idx];
     int num_hosts = params.hosts.size();
     DCHECK_GT(num_hosts, 0);
@@ -719,11 +641,17 @@ void Coordinator::StartRemoteFragments() {
   }
   exec_complete_barrier_->Wait();
   query_events_->MarkEvent(
-      Substitute("All $0 remote fragments instances started", num_instances));
+      Substitute("All $0 fragments instances started", num_instances));
 }
 
-void Coordinator::MtStartRemoteFInstances() {
+void Coordinator::MtStartFInstances() {
   int num_fragment_instances = schedule_.GetNumFragmentInstances();
+  DCHECK_GT(num_fragment_instances, 0);
+
+  fragment_instance_states_.resize(num_fragment_instances);
+  exec_complete_barrier_.reset(new CountingBarrier(num_fragment_instances));
+  num_remaining_fragment_instances_ = num_fragment_instances;
+
   DebugOptions debug_options;
   ProcessQueryOptions(schedule_.query_options(), &debug_options);
   const TQueryExecRequest& request = schedule_.request();
@@ -731,7 +659,7 @@ void Coordinator::MtStartRemoteFInstances() {
   VLOG_QUERY << "starting " << num_fragment_instances << " fragment instances for query "
              << query_id_;
   query_events_->MarkEvent(
-      Substitute("Ready to start $0 remote fragment instances", num_fragment_instances));
+      Substitute("Ready to start $0 fragment instances", num_fragment_instances));
 
   // TODO: populate the runtime filter routing table
   // this requires local aggregation of filters prior to sending
@@ -739,7 +667,6 @@ void Coordinator::MtStartRemoteFInstances() {
 
   int num_instances = 0;
   for (const MtFragmentExecParams& fragment_params: schedule_.mt_fragment_exec_params()) {
-    if (fragment_params.is_coord_fragment) continue;
     for (int i = 0; i < fragment_params.instance_exec_params.size();
         ++i, ++num_instances) {
       const FInstanceExecParams& instance_params =
@@ -760,10 +687,10 @@ void Coordinator::MtStartRemoteFInstances() {
   VLOG_QUERY << "started " << num_fragment_instances << " fragment instances for query "
       << query_id_;
   query_events_->MarkEvent(
-      Substitute("All $0 remote fragment instances started", num_instances));
+      Substitute("All $0 fragment instances started", num_instances));
 }
 
-Status Coordinator::FinishRemoteInstanceStartup() {
+Status Coordinator::FinishInstanceStartup() {
   Status status = Status::OK();
   const TMetricDef& def =
       MakeTMetricDef("fragment-latencies", TMetricKind::HISTOGRAM, TUnit::TIME_MS);
@@ -868,7 +795,7 @@ Status Coordinator::UpdateStatus(const Status& status, const TUniqueId& instance
   {
     lock_guard<mutex> l(lock_);
 
-    // The query is done and we are just waiting for remote fragments to clean up.
+    // The query is done and we are just waiting for fragment instances to clean up.
     // Ignore their cancelled updates.
     if (returned_all_results_ && status.IsCancelled()) return query_status_;
 
@@ -1187,7 +1114,8 @@ Status Coordinator::WaitForAllInstances() {
   if (query_status_.ok()) {
     VLOG_QUERY << "All fragment instances finished successfully.";
   } else {
-    VLOG_QUERY << "All fragment instances finished due to one or more errors.";
+    VLOG_QUERY << "All fragment instances finished due to one or more errors. "
+               << query_status_.GetDetail();
   }
 
   return query_status_;
@@ -1198,143 +1126,84 @@ Status Coordinator::Wait() {
   SCOPED_TIMER(query_profile_->total_time_counter());
   if (has_called_wait_) return Status::OK();
   has_called_wait_ = true;
-  Status return_status = Status::OK();
-  if (executor_.get() != NULL) {
-    // Open() may block
-    return_status = UpdateStatus(executor_->Open(),
-        runtime_state()->fragment_instance_id(), FLAGS_hostname);
-
-    if (return_status.ok()) {
-      // If the coordinator fragment has a sink, it will have finished executing at this
-      // point.  It's safe therefore to copy the set of files to move and updated
-      // partitions into the query-wide set.
-      RuntimeState* state = runtime_state();
-      DCHECK(state != NULL);
-
-      // No other instances should have updated these structures if the coordinator has a
-      // fragment. (Instances have a sink only if the coordinator does not)
-      DCHECK_EQ(files_to_move_.size(), 0);
-      DCHECK_EQ(per_partition_status_.size(), 0);
-
-      // Because there are no other updates, safe to copy the maps rather than merge them.
-      files_to_move_ = *state->hdfs_files_to_move();
-      per_partition_status_ = *state->per_partition_status();
-    }
-  } else {
-    // Query finalization can only happen when all instances have reported
-    // relevant state. They only have relevant state to report in the parallel
-    // INSERT case, otherwise all the relevant state is from the coordinator
-    // fragment which will be available after Open() returns.
-    // Ignore the returned status if finalization is required., since FinalizeQuery() will
-    // pick it up and needs to execute regardless.
-    Status status = WaitForAllInstances();
-    if (!needs_finalization_ && !status.ok()) return status;
-  }
 
-  // Query finalization is required only for HDFS table sinks
-  if (needs_finalization_) {
-    RETURN_IF_ERROR(FinalizeQuery());
+  if (stmt_type_ == TStmtType::QUERY) {
+    DCHECK(executor_ != nullptr);
+    return UpdateStatus(executor_->WaitForOpen(), runtime_state()->fragment_instance_id(),
+        FLAGS_hostname);
   }
 
-  if (stmt_type_ == TStmtType::DML) {
-    query_profile_->AddInfoString("Insert Stats",
-        DataSink::OutputInsertStats(per_partition_status_, "\n"));
-    // For DML queries, when Wait is done, the query is complete.  Report aggregate
-    // query profiles at this point.
-    // TODO: make sure ReportQuerySummary gets called on error
-    ReportQuerySummary();
-  }
+  DCHECK_EQ(stmt_type_, TStmtType::DML);
+  // Query finalization can only happen when all backends have reported
+  // relevant state. They only have relevant state to report in the parallel
+  // INSERT case, otherwise all the relevant state is from the coordinator
+  // fragment which will be available after Open() returns.
+  // Ignore the returned status if finalization is required., since FinalizeQuery() will
+  // pick it up and needs to execute regardless.
+  Status status = WaitForAllInstances();
+  if (!needs_finalization_ && !status.ok()) return status;
 
-  if (filter_routing_table_.size() > 0) {
-    query_profile_->AddInfoString("Final filter table", FilterDebugString());
-  }
+  // Query finalization is required only for HDFS table sinks
+  if (needs_finalization_) RETURN_IF_ERROR(FinalizeQuery());
 
-  return return_status;
+  query_profile_->AddInfoString(
+      "Insert Stats", DataSink::OutputInsertStats(per_partition_status_, "\n"));
+  // For DML queries, when Wait is done, the query is complete.  Report aggregate
+  // query profiles at this point.
+  // TODO: make sure ReportQuerySummary gets called on error
+  ReportQuerySummary();
+
+  return status;
 }
 
-Status Coordinator::GetNext(RowBatch** batch, RuntimeState* state) {
+Status Coordinator::GetNext(QueryResultSet* results, int max_rows, bool* eos) {
   VLOG_ROW << "GetNext() query_id=" << query_id_;
   DCHECK(has_called_wait_);
   SCOPED_TIMER(query_profile_->total_time_counter());
 
-  if (executor_.get() == NULL) {
-    // If there is no local fragment, we produce no output, and execution will
-    // have finished after Wait.
-    *batch = NULL;
-    return GetStatus();
-  }
-
-  // do not acquire lock_ here, otherwise we could block and prevent an async
-  // Cancel() from proceeding
-  Status status = executor_->GetNext(batch);
+  DCHECK(root_sink_ != nullptr)
+      << "GetNext() called without result sink. Perhaps Prepare() failed and was not "
+      << "checked?";
+  Status status = root_sink_->GetNext(runtime_state(), results, max_rows, eos);
 
   // if there was an error, we need to return the query's error status rather than
   // the status we just got back from the local executor (which may well be CANCELLED
   // in that case).  Coordinator fragment failed in this case so we log the query_id.
-  RETURN_IF_ERROR(UpdateStatus(status, runtime_state()->fragment_instance_id(),
-      FLAGS_hostname));
+  RETURN_IF_ERROR(
+      UpdateStatus(status, runtime_state()->fragment_instance_id(), FLAGS_hostname));
 
-  if (*batch == NULL) {
+  if (*eos) {
     returned_all_results_ = true;
-    if (executor_->ReachedLimit()) {
-      // We've reached the query limit, cancel the remote fragments.  The
-      // Exchange node on our fragment is no longer receiving rows so the
-      // remote fragments must be explicitly cancelled.
-      CancelRemoteFragments();
-      RuntimeState* state = runtime_state();
-      if (state != NULL) {
-        // Cancel the streams receiving batches.  The exchange nodes that would
-        // normally read from the streams are done.
-        state->stream_mgr()->Cancel(state->fragment_instance_id());
-      }
-    }
-
-    // Don't return final NULL until all instances have completed.
-    // GetNext must wait for all instances to complete before
-    // ultimately signalling the end of execution via a NULL
-    // batch. After NULL is returned, the coordinator may tear down
-    // query state, and perform post-query finalization which might
-    // depend on the reports from all instances.
+    // Trigger tear-down of coordinator fragment by closing the consumer. Must do before
+    // WaitForAllInstances().
+    root_sink_->CloseConsumer();
+    root_sink_ = nullptr;
+
+    // Don't return final NULL until all instances have completed.  GetNext must wait for
+    // all instances to complete before ultimately signalling the end of execution via a
+    // NULL batch. After NULL is returned, the coordinator may tear down query state, and
+    // perform post-query finalization which might depend on the reports from all
+    // instances.
+    //
+    // TODO: Waiting should happen in TearDown() (and then we wouldn't need to call
+    // CloseConsumer() here). See IMPALA-4275 for details.
     RETURN_IF_ERROR(WaitForAllInstances());
     if (query_status_.ok()) {
       // If the query completed successfully, report aggregate query profiles.
       ReportQuerySummary();
     }
-  } else {
-#ifndef NDEBUG
-    ValidateCollectionSlots(*batch);
-#endif
   }
 
   return Status::OK();
 }
 
-void Coordinator::ValidateCollectionSlots(RowBatch* batch) {
-  const RowDescriptor& row_desc = executor_->row_desc();
-  if (!row_desc.HasVarlenSlots()) return;
-  for (int i = 0; i < batch->num_rows(); ++i) {
-    TupleRow* row = batch->GetRow(i);
-    for (int j = 0; j < row_desc.tuple_descriptors().size(); ++j) {
-      const TupleDescriptor* tuple_desc = row_desc.tuple_descriptors()[j];
-      if (tuple_desc->collection_slots().empty()) continue;
-      for (int k = 0; k < tuple_desc->collection_slots().size(); ++k) {
-        const SlotDescriptor* slot_desc = tuple_desc->collection_slots()[k];
-        int tuple_idx = row_desc.GetTupleIdx(slot_desc->parent()->id());
-        const Tuple* tuple = row->GetTuple(tuple_idx);
-        if (tuple == NULL) continue;
-        DCHECK(tuple->IsNull(slot_desc->null_indicator_offset()));
-      }
-    }
-  }
-}
-
 void Coordinator::PrintFragmentInstanceInfo() {
   for (FragmentInstanceState* state: fragment_instance_states_) {
     SummaryStats& acc = fragment_profiles_[state->fragment_idx()].bytes_assigned;
     acc(state->total_split_size());
   }
 
-  for (int id = (executor_.get() == NULL ? 0 : 1); id < fragment_profiles_.size(); ++id) {
+  for (int id = (executor_ == NULL ? 0 : 1); id < fragment_profiles_.size(); ++id) {
     SummaryStats& acc = fragment_profiles_[id].bytes_assigned;
     double min = accumulators::min(acc);
     double max = accumulators::max(acc);
@@ -1360,6 +1229,7 @@ void Coordinator::PrintFragmentInstanceInfo() {
 
 void Coordinator::InitExecProfile(const TQueryExecRequest& request) {
   // Initialize the structure to collect execution summary of every plan node.
+  fragment_profiles_.resize(request.fragments.size());
   exec_summary_.__isset.nodes = true;
   for (int i = 0; i < request.fragments.size(); ++i) {
     if (!request.fragments[i].__isset.plan) continue;
@@ -1394,46 +1264,30 @@ void Coordinator::InitExecProfile(const TQueryExecRequest& request) {
     }
   }
 
-  if (executor_.get() != NULL) {
-    // register coordinator's fragment profile now, before those of the backends,
-    // so it shows up at the top
-    query_profile_->AddChild(executor_->profile());
-    executor_->profile()->set_name(Substitute("Coordinator Fragment $0",
-        request.fragments[0].display_name));
-    CollectScanNodeCounters(executor_->profile(), &coordinator_counters_);
-  }
-
   // Initialize the runtime profile structure. This adds the per fragment average
   // profiles followed by the per fragment instance profiles.
-  bool has_coordinator_fragment =
-      request.fragments[0].partition.type == TPartitionType::UNPARTITIONED;
-  fragment_profiles_.resize(request.fragments.size());
   for (int i = 0; i < request.fragments.size(); ++i) {
-    fragment_profiles_[i].num_instances = 0;
-
-    // Special case fragment idx 0 if there is a coordinator. There is only one
-    // instance of this profile so the average is just the coordinator profile.
-    if (i == 0 && has_coordinator_fragment) {
-      fragment_profiles_[i].averaged_profile = executor_->profile();
-      fragment_profiles_[i].num_instances = 1;
-      continue;
-    }
-    fragment_profiles_[i].averaged_profile =
-        obj_pool()->Add(new RuntimeProfile(obj_pool(),
-            Substitute("Averaged Fragment $0", request.fragments[i].display_name), true));
-    // Insert the avg profiles in ascending fragment number order. If
-    // there is a coordinator fragment, it's been placed in
-    // fragment_profiles_[0].averaged_profile, ensuring that this code
-    // will put the first averaged profile immediately after it. If
-    // there is no coordinator fragment, the first averaged profile
-    // will be inserted as the first child of query_profile_, and then
-    // all other averaged fragments will follow.
-    query_profile_->AddChild(fragment_profiles_[i].averaged_profile, true,
-        (i > 0) ? fragment_profiles_[i-1].averaged_profile : NULL);
-
+    // Insert the avg profiles in ascending fragment number order. If there is a
+    // coordinator fragment, it's been placed in fragment_profiles_[0].averaged_profile,
+    // ensuring that this code will put the first averaged profile immediately after
+    // it. If there is no coordinator fragment, the first averaged profile will be
+    // inserted as the first child of query_profile_, and then all other averaged
+    // fragments will follow.
+    bool is_coordinator_fragment = (i == 0 && schedule_.GetCoordFragment() != nullptr);
+    string profile_name =
+        Substitute(is_coordinator_fragment ? "Coordinator Fragment $0" : "Fragment $0",
+            request.fragments[i].display_name);
     fragment_profiles_[i].root_profile =
-        obj_pool()->Add(new RuntimeProfile(obj_pool(),
-            Substitute("Fragment $0", request.fragments[i].display_name)));
+        obj_pool()->Add(new RuntimeProfile(obj_pool(), profile_name));
+    if (is_coordinator_fragment) {
+      fragment_profiles_[i].averaged_profile = nullptr;
+    } else {
+      fragment_profiles_[i].averaged_profile = obj_pool()->Add(new RuntimeProfile(
+          obj_pool(),
+          Substitute("Averaged Fragment $0", request.fragments[i].display_name), true));
+      query_profile_->AddChild(fragment_profiles_[i].averaged_profile, true,
+          (i > 0) ? fragment_profiles_[i - 1].averaged_profile : NULL);
+    }
     // Note: we don't start the wall timer here for the fragment
     // profile; it's uninteresting and misleading.
     query_profile_->AddChild(fragment_profiles_[i].root_profile);
@@ -1490,40 +1344,23 @@ void Coordinator::MtInitExecProfiles() {
   schedule_.GetTPlanFragments(&fragments);
   fragment_profiles_.resize(fragments.size());
 
-  // start with coordinator fragment, if there is one
   const TPlanFragment* coord_fragment = schedule_.GetCoordFragment();
-  if (coord_fragment != NULL) {
-    DCHECK(executor_.get() != NULL);
-    PerFragmentProfileData* data = &fragment_profiles_[coord_fragment->idx];
-    data->num_instances = 1;
-    // TODO: fix this; this is not an averaged profile; we should follow the exact
-    // same structure we have for all other profiles (average + root + single
-    // instance profile)
-    data->averaged_profile = executor_->profile();
-
-    // register coordinator's fragment profile in the query profile now, before those
-    // of the fragment instances, so it shows up at the top
-    query_profile_->AddChild(executor_->profile());
-    executor_->profile()->set_name(Substitute("Coordinator Fragment $0",
-        coord_fragment->display_name));
-    CollectScanNodeCounters(executor_->profile(), &coordinator_counters_);
-  }
 
   // Initialize the runtime profile structure. This adds the per fragment average
   // profiles followed by the per fragment instance profiles.
   for (const TPlanFragment* fragment: fragments) {
-    if (fragment == coord_fragment) continue;
+    string profile_name =
+        (fragment == coord_fragment) ? "Coordinator Fragment $0" : "Fragment $0";
     PerFragmentProfileData* data = &fragment_profiles_[fragment->idx];
     data->num_instances =
         schedule_.GetFragmentExecParams(fragment->idx).instance_exec_params.size();
-
-    data->averaged_profile =
-        obj_pool()->Add(new RuntimeProfile(obj_pool(),
-          Substitute("Averaged Fragment $0", fragment->display_name), true));
-    query_profile_->AddChild(data->averaged_profile, true);
-    data->root_profile =
-        obj_pool()->Add(new RuntimeProfile(obj_pool(),
-          Substitute("Fragment $0", fragment->display_name)));
+    if (fragment != coord_fragment) {
+      data->averaged_profile = obj_pool()->Add(new RuntimeProfile(
+          obj_pool(), Substitute("Averaged Fragment $0", fragment->display_name), true));
+      query_profile_->AddChild(data->averaged_profile, true);
+    }
+    data->root_profile = obj_pool()->Add(
+        new RuntimeProfile(obj_pool(), Substitute(profile_name, fragment->display_name)));
     // Note: we don't start the wall timer here for the fragment profile;
     // it's uninteresting and misleading.
     query_profile_->AddChild(data->root_profile);
@@ -1637,8 +1474,6 @@ void Coordinator::ExecRemoteFragment(const FragmentExecParams& fragment_exec_par
             << " instance_id=" << PrintId(exec_state->fragment_instance_id())
             << " host=" << exec_state->impalad_address();
 
-  // Guard against concurrent UpdateExecStatus() that may arrive after RPC returns.
-  lock_guard<mutex> l(*exec_state->lock());
   int64_t start = MonotonicMillis();
 
   Status client_connect_status;
@@ -1685,32 +1520,27 @@ void Coordinator::Cancel(const Status* cause) {
   // if the query status indicates an error, cancellation has already been initiated
   if (!query_status_.ok()) return;
   // prevent others from cancelling a second time
+
+  // TODO: This should default to OK(), not CANCELLED if there is no cause (or callers
+  // should explicitly pass Status::OK()). Fragment instances may be cancelled at the end
+  // of a successful query. Need to clean up relationship between query_status_ here and
+  // in QueryExecState. See IMPALA-4279.
   query_status_ = (cause != NULL && !cause->ok()) ? *cause : Status::CANCELLED;
   CancelInternal();
 }
 
 void Coordinator::CancelInternal() {
   VLOG_QUERY << "Cancel() query_id=" << query_id_;
-  DCHECK(!query_status_.ok());
-
-  // cancel local fragment
-  if (executor_.get() != NULL) executor_->Cancel();
-
-  CancelRemoteFragments();
+  CancelFragmentInstances();
 
   // Report the summary with whatever progress the query made before being cancelled.
   ReportQuerySummary();
 }
 
-void Coordinator::CancelRemoteFragments() {
+void Coordinator::CancelFragmentInstances() {
+  int num_cancelled = 0;
   for (FragmentInstanceState* exec_state: fragment_instance_states_) {
     DCHECK(exec_state != nullptr);
-    if (exec_state->fragment_idx() == 0) continue;  // the coord fragment
-
-    // If a fragment failed before we finished issuing all remote fragments,
-    // this function will have been called before we finished populating
-    // fragment_instance_states_. Skip any such uninitialized exec states.
-    if (exec_state == NULL) continue;
 
     // lock each exec_state individually to synchronize correctly with
     // UpdateFragmentExecStatus() (which doesn't get the global lock_
@@ -1735,7 +1565,7 @@ void Coordinator::CancelRemoteFragments() {
     ImpalaBackendConnection backend_client(
         exec_env_->impalad_client_cache(), exec_state->impalad_address(), &status);
     if (!status.ok()) continue;
-
+    ++num_cancelled;
     TCancelPlanFragmentParams params;
     params.protocol_version = ImpalaInternalServiceVersion::V1;
     params.__set_fragment_instance_id(exec_state->fragment_instance_id());
@@ -1765,8 +1595,11 @@ void Coordinator::CancelRemoteFragments() {
       exec_state->status()->AddDetail(join(res.status.error_msgs, "; "));
     }
   }
+  VLOG_QUERY << Substitute(
+      "CancelFragmentInstances() query_id=$0, tried to cancel $1 fragment instances",
+      PrintId(query_id_), num_cancelled);
 
-  // notify that we completed with an error
+  // Notify that we completed with an error.
   instance_completion_cv_.notify_all();
 }
 
@@ -1799,11 +1632,11 @@ Status Coordinator::UpdateFragmentExecStatus(const TReportExecStatusParams& para
       // We can't update this backend's profile if ReportQuerySummary() is running,
       // because it depends on all profiles not changing during its execution (when it
       // calls SortChildren()). ReportQuerySummary() only gets called after
-      // WaitForAllInstances() returns or at the end of CancelRemoteFragments().
+      // WaitForAllInstances() returns or at the end of CancelFragmentInstances().
       // WaitForAllInstances() only returns after all backends have completed (in which
       // case we wouldn't be in this function), or when there's an error, in which case
-      // CancelRemoteFragments() is called. CancelRemoteFragments sets all exec_state's
-      // statuses to cancelled.
+      // CancelFragmentInstances() is called. CancelFragmentInstances sets all
+      // exec_state's statuses to cancelled.
       // TODO: We're losing this profile information. Call ReportQuerySummary only after
       // all backends have completed.
       exec_state->profile()->Update(cumulative_profile);
@@ -1899,18 +1732,12 @@ Status Coordinator::UpdateFragmentExecStatus(const TReportExecStatusParams& para
   return Status::OK();
 }
 
-const RowDescriptor& Coordinator::row_desc() const {
-  DCHECK(executor_.get() != NULL);
-  return executor_->row_desc();
-}
-
 RuntimeState* Coordinator::runtime_state() {
-  return executor_.get() == NULL ? NULL : executor_->runtime_state();
+  return executor_ == NULL ? NULL : executor_->runtime_state();
 }
 
 MemTracker* Coordinator::query_mem_tracker() {
-  return executor_.get() == NULL ? query_mem_tracker_.get() :
-      executor_->runtime_state()->query_mem_tracker();
+  return query_mem_tracker_.get();
 }
 
 bool Coordinator::PrepareCatalogUpdate(TUpdateCatalogRequest* catalog_update) {
@@ -1941,7 +1768,9 @@ void Coordinator::UpdateAverageProfile(FragmentInstanceState* instance_state) {
   PerFragmentProfileData* data = &fragment_profiles_[fragment_idx];
 
   // No locks are taken since UpdateAverage() and AddChild() take their own locks
-  data->averaged_profile->UpdateAverage(instance_state->profile());
+  if (data->averaged_profile != nullptr) {
+    data->averaged_profile->UpdateAverage(instance_state->profile());
+  }
   data->root_profile->AddChild(instance_state->profile());
 }
 
@@ -2000,18 +1829,18 @@ void Coordinator::UpdateExecSummary(const FragmentInstanceState& instance_state)
 
 // This function appends summary information to the query_profile_ before
 // outputting it to VLOG.  It adds:
-//   1. Averaged remote fragment profiles (TODO: add outliers)
-//   2. Summary of remote fragment durations (min, max, mean, stddev)
-//   3. Summary of remote fragment rates (min, max, mean, stddev)
+//   1. Averaged fragment instance profiles (TODO: add outliers)
+//   2. Summary of fragment instance durations (min, max, mean, stddev)
+//   3. Summary of fragment instance rates (min, max, mean, stddev)
 // TODO: add histogram/percentile
 void Coordinator::ReportQuerySummary() {
-  // In this case, the query did not even get to start on all the remote nodes,
-  // some of the state that is used below might be uninitialized.  In this case,
+  // In this case, the query did not even get to start all fragment instances.
+  // Some of the state that is used below might be uninitialized.  In this case,
   // the query has made so little progress, reporting a summary is not very useful.
   if (!has_called_wait_) return;
 
   if (!fragment_instance_states_.empty()) {
-    // Average all remote fragments for each fragment.
+    // Average all fragment instances for each fragment.
     for (FragmentInstanceState* state: fragment_instance_states_) {
       // TODO: make profiles uniform across all fragments so we don't have
       // to keep special-casing the coord fragment
@@ -2028,7 +1857,7 @@ void Coordinator::ReportQuerySummary() {
 
     InstanceComparator comparator;
     // Per fragment instances have been collected, output summaries
-    for (int i = (executor_.get() != NULL ? 1 : 0); i < fragment_profiles_.size(); ++i) {
+    for (int i = (executor_ != NULL ? 1 : 0); i < fragment_profiles_.size(); ++i) {
       fragment_profiles_[i].root_profile->SortChildren(comparator);
       SummaryStats& completion_times = fragment_profiles_[i].completion_times;
       SummaryStats& rates = fragment_profiles_[i].rates;
@@ -2088,16 +1917,6 @@ void Coordinator::ReportQuerySummary() {
 
 string Coordinator::GetErrorLog() {
   ErrorLogMap merged;
-  {
-    lock_guard<mutex> l(lock_);
-    // TODO-MT: use FragmentInstanceState::error_log_ instead
-    // as part of getting rid of the special-casing of the coordinator instance
-    if (executor_.get() != NULL && executor_->runtime_state() != NULL) {
-      ErrorLogMap runtime_error_log;
-      executor_->runtime_state()->GetErrors(&runtime_error_log);
-      MergeErrorMaps(&merged, runtime_error_log);
-    }
-  }
   for (FragmentInstanceState* state: fragment_instance_states_) {
     lock_guard<mutex> l(*state->lock());
     if (state->error_log()->size() > 0)  MergeErrorMaps(&merged, *state->error_log());
@@ -2285,11 +2104,20 @@ void DistributeFilters(shared_ptr<TPublishFilterParams> params,
 void Coordinator::TearDown() {
   DCHECK(!torn_down_) << "Coordinator::TearDown() may not be called twice";
   torn_down_ = true;
-  lock_guard<SpinLock> l(filter_lock_);
-  for (auto& filter: filter_routing_table_) {
-    FilterState* state = &filter.second;
-    state->Disable(filter_mem_tracker_.get());
+  if (filter_routing_table_.size() > 0) {
+    query_profile_->AddInfoString("Final filter table", FilterDebugString());
   }
+
+  {
+    lock_guard<SpinLock> l(filter_lock_);
+    for (auto& filter : filter_routing_table_) {
+      FilterState* state = &filter.second;
+      state->Disable(filter_mem_tracker_.get());
+    }
+  }
+
+  // Need to protect against failed Prepare(), where root_sink() would not be set.
+  if (root_sink_ != nullptr) root_sink_->CloseConsumer();
 }
 
 void Coordinator::UpdateFilter(const TUpdateFilterParams& params) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index bb67377..f73cf42 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -34,16 +34,18 @@
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition_variable.hpp>
 
+#include "common/global-types.h"
 #include "common/hdfs.h"
 #include "common/status.h"
-#include "common/global-types.h"
-#include "util/progress-updater.h"
-#include "util/histogram-metric.h"
-#include "util/runtime-profile.h"
+#include "gen-cpp/Frontend_types.h"
+#include "gen-cpp/Types_types.h"
 #include "runtime/runtime-state.h"
 #include "scheduling/simple-scheduler.h"
-#include "gen-cpp/Types_types.h"
-#include "gen-cpp/Frontend_types.h"
+#include "service/fragment-exec-state.h"
+#include "service/fragment-mgr.h"
+#include "util/histogram-metric.h"
+#include "util/progress-updater.h"
+#include "util/runtime-profile.h"
 
 namespace impala {
 
@@ -67,28 +69,33 @@ class TRuntimeProfileTree;
 class RuntimeProfile;
 class TablePrinter;
 class TPlanFragment;
+class QueryResultSet;
 
 struct DebugOptions;
 
-/// Query coordinator: handles execution of plan fragments on remote nodes, given
-/// a TQueryExecRequest. As part of that, it handles all interactions with the
-/// executing backends; it is also responsible for implementing all client requests
-/// regarding the query, including cancellation.
-/// The coordinator fragment is executed locally in the calling thread, all other
-/// fragments are sent to remote nodes. The coordinator also monitors
-/// the execution status of the remote fragments and aborts the entire query if an error
-/// occurs, either in any of the remote fragments or in the local fragment.
+/// Query coordinator: handles execution of fragment instances on remote nodes, given a
+/// TQueryExecRequest. As part of that, it handles all interactions with the executing
+/// backends; it is also responsible for implementing all client requests regarding the
+/// query, including cancellation.
+///
+/// The coordinator monitors the execution status of fragment instances and aborts the
+/// entire query if an error is reported by any of them.
+///
+/// Queries that have results have those results fetched by calling GetNext(). Results
+/// rows are produced by a fragment instance that always executes on the same machine as
+/// the coordinator.
+///
 /// Once a query has finished executing and all results have been returned either to the
 /// caller of GetNext() or a data sink, execution_completed() will return true. If the
-/// query is aborted, execution_completed should also be set to true.
-/// Coordinator is thread-safe, with the exception of GetNext().
+/// query is aborted, execution_completed should also be set to true. Coordinator is
+/// thread-safe, with the exception of GetNext().
 //
 /// A typical sequence of calls for a single query (calls under the same numbered
 /// item can happen concurrently):
 /// 1. client: Exec()
 /// 2. client: Wait()/client: Cancel()/backend: UpdateFragmentExecStatus()
 /// 3. client: GetNext()*/client: Cancel()/backend: UpdateFragmentExecStatus()
-//
+///
 /// The implementation ensures that setting an overall error status and initiating
 /// cancellation of local and all remote fragments is atomic.
 ///
@@ -104,14 +111,10 @@ class Coordinator {
       RuntimeProfile::EventSequence* events);
   ~Coordinator();
 
-  /// Initiate asynchronous execution of a query with the given schedule. Returns as soon
-  /// as all plan fragments have started executing at their respective backends.
-  /// 'schedule' must contain at least a coordinator plan fragment (ie, can't
-  /// be for a query like 'SELECT 1').
-  /// Populates and prepares output_expr_ctxs from the coordinator's fragment if there is
-  /// one, and LLVM optimizes them together with the fragment's other exprs.
+  /// Initiate asynchronous execution of a query with the given schedule. When it returns,
+  /// all fragment instances have started executing at their respective backends.
   /// A call to Exec() must precede all other member function calls.
-  Status Exec(std::vector<ExprContext*>* output_expr_ctxs);
+  Status Exec();
 
   /// Blocks until result rows are ready to be retrieved via GetNext(), or, if the
   /// query doesn't return rows, until the query finishes or is cancelled.
@@ -120,25 +123,25 @@ class Coordinator {
   /// Wait() calls concurrently.
   Status Wait();
 
-  /// Returns tuples from the coordinator fragment. Any returned tuples are valid until
-  /// the next GetNext() call. If *batch is NULL, execution has completed and GetNext()
-  /// must not be called again.
-  /// GetNext() will not set *batch=NULL until all fragment instances have
-  /// either completed or have failed.
-  /// It is safe to call GetNext() even in the case where there is no coordinator fragment
-  /// (distributed INSERT).
-  /// '*batch' is owned by the underlying PlanFragmentExecutor and must not be deleted.
-  /// *state is owned by the caller, and must not be deleted.
-  /// Returns an error status if an error was encountered either locally or by
-  /// any of the remote fragments or if the query was cancelled.
-  /// GetNext() is not thread-safe: multiple threads must not make concurrent
-  /// GetNext() calls (but may call any of the other member functions concurrently
-  /// with GetNext()).
-  Status GetNext(RowBatch** batch, RuntimeState* state);
+  /// Fills 'results' with up to 'max_rows' rows. May return fewer than 'max_rows'
+  /// rows, but will not return more.
+  ///
+  /// If *eos is true, execution has completed and GetNext() must not be called
+  /// again.
+  ///
+  /// GetNext() will not set *eos=true until all fragment instances have either completed
+  /// or have failed.
+  ///
+  /// Returns an error status if an error was encountered either locally or by any of the
+  /// remote fragments or if the query was cancelled.
+  ///
+  /// GetNext() is not thread-safe: multiple threads must not make concurrent GetNext()
+  /// calls (but may call any of the other member functions concurrently with GetNext()).
+  Status GetNext(QueryResultSet* results, int max_rows, bool* eos);
 
   /// Cancel execution of query. This includes the execution of the local plan fragment,
-  /// if any, as well as all plan fragments on remote nodes. Sets query_status_ to
-  /// the given cause if non-NULL. Otherwise, sets query_status_ to Status::CANCELLED.
+  /// if any, as well as all plan fragments on remote nodes. Sets query_status_ to the
+  /// given cause if non-NULL. Otherwise, sets query_status_ to Status::CANCELLED.
   /// Idempotent.
   void Cancel(const Status* cause = NULL);
 
@@ -151,12 +154,18 @@ class Coordinator {
   /// to CancelInternal().
   Status UpdateFragmentExecStatus(const TReportExecStatusParams& params);
 
-  /// only valid *after* calling Exec(), and may return NULL if there is no executor
+  /// Only valid *after* calling Exec(). Return nullptr if the running query does not
+  /// produce any rows.
+  ///
+  /// TODO: The only dependency on this is QueryExecState, used to track memory for the
+  /// result cache. Remove this dependency, possibly by moving result caching inside this
+  /// class.
   RuntimeState* runtime_state();
-  const RowDescriptor& row_desc() const;
 
   /// Only valid after Exec(). Returns runtime_state()->query_mem_tracker() if there
   /// is a coordinator fragment, or query_mem_tracker_ (initialized in Exec()) otherwise.
+  ///
+  /// TODO: Remove, see runtime_state().
   MemTracker* query_mem_tracker();
 
   /// Get cumulative profile aggregated over all fragments of the query.
@@ -275,8 +284,18 @@ class Coordinator {
   /// Once this is set to true, errors from remote fragments are ignored.
   bool returned_all_results_;
 
-  /// execution state of coordinator fragment
-  boost::scoped_ptr<PlanFragmentExecutor> executor_;
+  /// Non-null if and only if the query produces results for the client; i.e. is of
+  /// TStmtType::QUERY. Coordinator uses these to pull results from plan tree and return
+  /// them to the client in GetNext(), and also to access the fragment instance's runtime
+  /// state.
+  ///
+  /// Result rows are materialized by this fragment instance in its own thread. They are
+  /// materialized into a QueryResultSet provided to the coordinator during GetNext().
+  ///
+  /// Not owned by this class, created during fragment instance start-up by
+  /// FragmentExecState and set here in Exec().
+  PlanFragmentExecutor* executor_ = nullptr;
+  PlanRootSink* root_sink_ = nullptr;
 
   /// Query mem tracker for this coordinator initialized in Exec(). Only valid if there
   /// is no coordinator fragment (i.e. executor_ == NULL). If executor_ is not NULL,
@@ -383,7 +402,7 @@ class Coordinator {
   RuntimeProfile::Counter* finalization_timer_;
 
   /// Barrier that is released when all calls to ExecRemoteFragment() have
-  /// returned, successfully or not. Initialised during StartRemoteFragments().
+  /// returned, successfully or not. Initialised during Exec().
   boost::scoped_ptr<CountingBarrier> exec_complete_barrier_;
 
   /// Represents a runtime filter target.
@@ -465,10 +484,9 @@ class Coordinator {
   /// Runs cancel logic. Assumes that lock_ is held.
   void CancelInternal();
 
-  /// Cancels remote fragments. Assumes that lock_ is held.  This can be called when
-  /// the query is not being cancelled in the case where the query limit is
-  /// reached.
-  void CancelRemoteFragments();
+  /// Cancels all fragment instances. Assumes that lock_ is held. This may be called when
+  /// the query is not being cancelled in the case where the query limit is reached.
+  void CancelFragmentInstances();
 
   /// Acquires lock_ and updates query_status_ with 'status' if it's not already
   /// an error status, and returns the current query_status_.
@@ -531,30 +549,18 @@ class Coordinator {
   void PopulatePathPermissionCache(hdfsFS fs, const std::string& path_str,
       PermissionCache* permissions_cache);
 
-  /// Validates that all collection-typed slots in the given batch are set to NULL.
-  /// See SubplanNode for details on when collection-typed slots are set to NULL.
-  /// TODO: This validation will become obsolete when we can return collection values.
-  /// We will then need a different mechanism to assert the correct behavior of the
-  /// SubplanNode with respect to setting collection-slots to NULL.
-  void ValidateCollectionSlots(RowBatch* batch);
-
-  /// Prepare coordinator fragment for execution (update filter routing table,
-  /// prepare executor, set up output exprs) and create its FragmentInstanceState.
-  Status PrepareCoordFragment(std::vector<ExprContext*>* output_expr_ctxs);
-
-  /// Starts all remote fragments contained in the schedule by issuing RPCs in parallel,
+  /// Starts all fragment instances contained in the schedule by issuing RPCs in parallel,
   /// and then waiting for all of the RPCs to complete.
-  void StartRemoteFragments();
+  void StartFragments();
 
-  /// Starts all remote fragment instances contained in the schedule by issuing RPCs in
-  /// parallel and then waiting for all of the RPCs to complete. Also sets up and
-  /// registers the state for all non-coordinator fragment instance.
-  void MtStartRemoteFInstances();
+  /// Starts all fragment instances contained in the schedule by issuing RPCs in parallel
+  /// and then waiting for all of the RPCs to complete.
+  void MtStartFInstances();
 
   /// Calls CancelInternal() and returns an error if there was any error starting the
   /// fragments.
   /// Also updates query_profile_ with the startup latency histogram.
-  Status FinishRemoteInstanceStartup();
+  Status FinishInstanceStartup();
 
   /// Build the filter routing table by iterating over all plan nodes and collecting the
   /// filters that they either produce or consume. The source and target fragment

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/exec-env.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/exec-env.cc b/be/src/runtime/exec-env.cc
index 1b3fa14..db0d242 100644
--- a/be/src/runtime/exec-env.cc
+++ b/be/src/runtime/exec-env.cc
@@ -24,6 +24,8 @@
 #include <gutil/strings/substitute.h>
 
 #include "common/logging.h"
+#include "gen-cpp/CatalogService.h"
+#include "gen-cpp/ImpalaInternalService.h"
 #include "runtime/backend-client.h"
 #include "runtime/client-cache.h"
 #include "runtime/coordinator.h"
@@ -36,25 +38,24 @@
 #include "runtime/thread-resource-mgr.h"
 #include "runtime/tmp-file-mgr.h"
 #include "scheduling/request-pool-service.h"
-#include "service/frontend.h"
 #include "scheduling/simple-scheduler.h"
+#include "service/fragment-mgr.h"
+#include "service/frontend.h"
 #include "statestore/statestore-subscriber.h"
 #include "util/debug-util.h"
+#include "util/debug-util.h"
 #include "util/default-path-handlers.h"
 #include "util/hdfs-bulk-ops.h"
 #include "util/mem-info.h"
+#include "util/mem-info.h"
+#include "util/memory-metrics.h"
+#include "util/memory-metrics.h"
 #include "util/metrics.h"
 #include "util/network-util.h"
 #include "util/parse-util.h"
-#include "util/memory-metrics.h"
-#include "util/webserver.h"
-#include "util/mem-info.h"
-#include "util/debug-util.h"
-#include "util/memory-metrics.h"
 #include "util/pretty-printer.h"
 #include "util/thread-pool.h"
-#include "gen-cpp/ImpalaInternalService.h"
-#include "gen-cpp/CatalogService.h"
+#include "util/webserver.h"
 
 #include "common/names.h"
 
@@ -145,6 +146,7 @@ ExecEnv::ExecEnv()
     fragment_exec_thread_pool_(new CallableThreadPool("coordinator-fragment-rpc",
         "worker", FLAGS_coordinator_rpc_threads, numeric_limits<int32_t>::max())),
     async_rpc_pool_(new CallableThreadPool("rpc-pool", "async-rpc-sender", 8, 10000)),
+    fragment_mgr_(new FragmentMgr()),
     enable_webserver_(FLAGS_enable_webserver),
     is_fe_tests_(false),
     backend_address_(MakeNetworkAddress(FLAGS_hostname, FLAGS_be_port)) {
@@ -197,6 +199,7 @@ ExecEnv::ExecEnv(const string& hostname, int backend_port, int subscriber_port,
     fragment_exec_thread_pool_(new CallableThreadPool("coordinator-fragment-rpc",
         "worker", FLAGS_coordinator_rpc_threads, numeric_limits<int32_t>::max())),
     async_rpc_pool_(new CallableThreadPool("rpc-pool", "async-rpc-sender", 8, 10000)),
+    fragment_mgr_(new FragmentMgr()),
     enable_webserver_(FLAGS_enable_webserver && webserver_port > 0),
     is_fe_tests_(false),
     backend_address_(MakeNetworkAddress(FLAGS_hostname, FLAGS_be_port)) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/exec-env.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/exec-env.h b/be/src/runtime/exec-env.h
index 303876f..718c6d0 100644
--- a/be/src/runtime/exec-env.h
+++ b/be/src/runtime/exec-env.h
@@ -96,6 +96,7 @@ class ExecEnv {
   Frontend* frontend() { return frontend_.get(); };
   RequestPoolService* request_pool_service() { return request_pool_service_.get(); }
   CallableThreadPool* rpc_pool() { return async_rpc_pool_.get(); }
+  FragmentMgr* fragment_mgr() { return fragment_mgr_.get(); }
 
   void set_enable_webserver(bool enable) { enable_webserver_ = enable; }
 
@@ -137,6 +138,7 @@ class ExecEnv {
   boost::scoped_ptr<Frontend> frontend_;
   boost::scoped_ptr<CallableThreadPool> fragment_exec_thread_pool_;
   boost::scoped_ptr<CallableThreadPool> async_rpc_pool_;
+  boost::scoped_ptr<FragmentMgr> fragment_mgr_;
 
   /// Not owned by this class
   ImpalaServer* impala_server_;

[26/32] incubator-impala git commit: Add search / sort to HTML tables for metrics and threads

Posted by ta...@apache.org.

Add search / sort to HTML tables for metrics and threads

Change-Id: If069ce6a9eae00bacaa30605d23bea72f29e5c4f
Reviewed-on: http://gerrit.cloudera.org:8080/4743
Tested-by: Internal Jenkins
Reviewed-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d0a2d1d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d0a2d1d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d0a2d1d4

Branch: refs/heads/hadoop-next
Commit: d0a2d1d43da5ab8d4c1c06db3524458cb23a76bb
Parents: e3a0891
Author: Henry Robinson <he...@cloudera.com>
Authored: Mon Oct 17 17:05:05 2016 -0700
Committer: Henry Robinson <he...@cloudera.com>
Committed: Tue Oct 18 05:08:21 2016 +0000

----------------------------------------------------------------------
 www/metric_group.tmpl | 62 ++++++++++++++++++++++++++++------------------
 www/thread-group.tmpl | 44 +++++++++++++++++++++-----------
 2 files changed, 67 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0a2d1d4/www/metric_group.tmpl
----------------------------------------------------------------------
diff --git a/www/metric_group.tmpl b/www/metric_group.tmpl
index 82f5408..0cbfefe 100644
--- a/www/metric_group.tmpl
+++ b/www/metric_group.tmpl
@@ -19,33 +19,47 @@ under the License.
 {{!Renders a metric group and all its children, one table each}}
 <a id="{{name}}"><h3>{{name}}</h3></a>
 
-<table class='table table-bordered table-hover'>
-  <tr>
-    <th>Name</th>
-    <th>Value</th>
-    <th>Description</th>
-  </tr>
+<table id ="{{name}}-tbl" class='table table-bordered table-hover'>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Value</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
     {{#metrics}}
-  <tr>
-    <td><tt>{{name}}</tt></td>
-    {{! Is this a stats metric? }}
-    {{?mean}}
-    <td>
-      Last (of {{count}}): <strong>{{last}}</strong>.
-      Min: {{min}}, max: {{max}}, avg: {{mean}}</td>
-    {{/mean}}
-    {{^mean}}
-    <td>
-      {{human_readable}}
-    </td>
-    {{/mean}}
-    <td>
-      {{description}}
-    </td>
-  </tr>
-  {{/metrics}}
+    <tr>
+      <td><tt>{{name}}</tt></td>
+      {{! Is this a stats metric? }}
+      {{?mean}}
+      <td>
+        Last (of {{count}}): <strong>{{last}}</strong>.
+        Min: {{min}}, max: {{max}}, avg: {{mean}}</td>
+      {{/mean}}
+      {{^mean}}
+      <td>
+        {{human_readable}}
+      </td>
+      {{/mean}}
+      <td>
+        {{description}}
+      </td>
+    </tr>
+    {{/metrics}}
+  </tbody>
 </table>
 
+<script>
+    $(document).ready(function() {
+        $('#{{name}}-tbl').DataTable({
+            "order": [[ 1, "desc" ]],
+            "pageLength": 100
+        });
+    });
+</script>
+
+
 {{! Recurse into all child groups }}
 {{#child_groups}}
 {{>www/metric_group.tmpl}}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0a2d1d4/www/thread-group.tmpl
----------------------------------------------------------------------
diff --git a/www/thread-group.tmpl b/www/thread-group.tmpl
index 90f6cd4..19730c6 100644
--- a/www/thread-group.tmpl
+++ b/www/thread-group.tmpl
@@ -20,21 +20,35 @@ under the License.
 
 <h2>Thread Group: {{thread-group.category}}</h2>
 
-<table class='table table-hover table-border'>
-  <tr>
-    <th>Thread name</th>
-    <th>Cumulative User CPU(s)</th>
-    <th>Cumulative Kernel CPU(s)</th>
-    <th>Cumulative IO-wait(s)</th>
-  </tr>
-  {{#threads}}
-  <tr>
-    <td>{{name}}</td>
-    <td>{{user_ns}}</td>
-    <td>{{kernel_ns}}</td>
-    <td>{{iowait_ns}}</td>
-  </tr>
-  {{/threads}}
+<table id="{{thread-group.category}}-tbl" class='table table-hover table-bordered'
+       style='table-layout:fixed; word-wrap: break-word'>
+  <thead>
+    <tr>
+      <th>Thread name</th>
+      <th>Cumulative User CPU(s)</th>
+      <th>Cumulative Kernel CPU(s)</th>
+      <th>Cumulative IO-wait(s)</th>
+    </tr>
+  </thead>
+  <tbody>
+    {{#threads}}
+    <tr>
+      <td>{{name}}</td>
+      <td>{{user_ns}}</td>
+      <td>{{kernel_ns}}</td>
+      <td>{{iowait_ns}}</td>
+    </tr>
+    {{/threads}}
+  </tbody>
 </table>
 
+<script>
+    $(document).ready(function() {
+        $('#{{thread-group.category}}-tbl').DataTable({
+            "order": [[ 1, "desc" ]],
+            "pageLength": 100
+        });
+    });
+</script>
+
 {{> www/common-footer.tmpl}}

[17/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
index 79f75b6..556ba65 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
@@ -1,10 +1,14 @@
 # predicate pushdown
 select * from (select * from functional.alltypessmall) a where id < 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
    predicates: functional.alltypessmall.id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypessmall]
@@ -14,6 +18,8 @@ select * from (select * from functional.alltypessmall) a where id < 5
 # predicate pushdown is prevented in presence of limit clause
 select * from (select * from functional.alltypessmall limit 10) a where id < 5 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SELECT
 |  predicates: functional.alltypessmall.id < 5
 |  limit: 5
@@ -22,6 +28,8 @@ select * from (select * from functional.alltypessmall limit 10) a where id < 5 l
    partitions=4/4 files=4 size=6.32KB
    limit: 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:SELECT
 |  predicates: functional.alltypessmall.id < 5
 |  limit: 5
@@ -39,6 +47,8 @@ select *
 from (select * from functional.alltypessmall order by id limit 10) a
 where id < 5 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SELECT
 |  predicates: id < 5
 |  limit: 5
@@ -49,6 +59,8 @@ where id < 5 limit 5
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:SELECT
 |  predicates: id < 5
 |  limit: 5
@@ -72,6 +84,8 @@ from functional.alltypes
     group by 1 order by 2 limit 5) a using (id)
 where a.id < 5 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = id
 |  runtime filters: RF000 <- id
@@ -92,6 +106,8 @@ where a.id < 5 limit 5
    predicates: functional.alltypes.id < 5
    runtime filters: RF000 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |  limit: 5
 |
@@ -135,6 +151,8 @@ from (
   limit 10) a
 where id < 5 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SELECT
 |  predicates: a.id < 5
 |  limit: 5
@@ -151,6 +169,8 @@ where id < 5 limit 5
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> a.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:SELECT
 |  predicates: a.id < 5
 |  limit: 5
@@ -179,6 +199,8 @@ where id < 5
 order by id
 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -189,6 +211,8 @@ limit 5
    partitions=4/4 files=4 size=6.32KB
    limit: 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -213,6 +237,8 @@ where id < 5
 order by id
 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -231,6 +257,8 @@ limit 5
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> a.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -259,6 +287,8 @@ select *
 from functional.alltypes
   join (select id from functional.alltypessmall limit 10) a using (id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = id
 |  runtime filters: RF000 <- id
@@ -271,6 +301,8 @@ from functional.alltypes
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -296,6 +328,8 @@ select *
 from functional.alltypes
   join (select id from functional.alltypessmall order by id limit 10) a using (id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = id
 |  runtime filters: RF000 <- id
@@ -310,6 +344,8 @@ from functional.alltypes
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
@@ -344,6 +380,8 @@ where a.id < 5
 order by a.id
 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -368,6 +406,8 @@ limit 5
    predicates: functional.alltypes.id < 5
    runtime filters: RF000 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |  limit: 5
@@ -416,6 +456,8 @@ where a.id < 5
 order by a.id
 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=5]
 |  order by: id ASC
 |
@@ -442,6 +484,8 @@ limit 5
    predicates: functional.alltypes.id < 5
    runtime filters: RF000 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |  limit: 5
@@ -486,6 +530,8 @@ select x.id from (
 order by x.id
 limit 100 offset 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=100 OFFSET=4]
 |  order by: id ASC
 |
@@ -495,6 +541,8 @@ limit 100 offset 4
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=100 OFFSET=4]
 |  order by: id ASC
 |
@@ -519,6 +567,8 @@ left outer join
 on (a.id = b.id)
 where a.id > 10 and b.id > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: id = id
 |  other predicates: id > 20
@@ -542,6 +592,8 @@ left outer join
 on (a.id = b.id)
 where a.id > 10 and b.id > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = id
 |  other predicates: id > 20
@@ -570,6 +622,8 @@ right outer join
 on (a.id = b.id)
 where a.id > 10 and b.id > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = id
 |  other predicates: id > 10
@@ -595,6 +649,8 @@ right outer join
 on (a.id = b.id)
 where a.id > 10 and b.id > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = id
 |  other predicates: id > 10
@@ -618,6 +674,8 @@ where a.id > 10 and b.id > 20
 # have explain_level=1
 select * from (select * from functional.alltypes limit 100) v where id < 10 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SELECT
 |  predicates: functional.alltypes.id < 10
 |  limit: 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
index fe6ade8..13f6326 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
@@ -12,6 +12,8 @@ from (
 join functional.alltypes t2 on (t1.int_col = t2.int_col)
 where month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -31,6 +33,8 @@ where month = 1
    partitions=2/24 files=2 size=40.32KB
    runtime filters: RF000 -> t2.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -67,8 +71,12 @@ where month = 1
 # simple full scan subquery
 select * from (select y x from (select id y from functional_hbase.alltypessmall) a) b
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -78,6 +86,8 @@ select * from (select t2.*
 from functional.testtbl t1 join functional.testtbl t2 using(id)
 where t1.zip = 94611) x
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
 |  runtime filters: RF000 <- t2.id
@@ -90,6 +100,8 @@ where t1.zip = 94611) x
    predicates: t1.zip = 94611
    runtime filters: RF000 -> t1.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -121,6 +133,8 @@ from
      and b.string_col = '15'
      and a.tinyint_col + b.tinyint_col < 15) x
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.id = b.id, a.int_col = b.int_col
 |  other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
@@ -145,6 +159,8 @@ NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -168,6 +184,8 @@ NODE 1:
 # predicate pushdown
 select * from (select * from functional_hbase.alltypessmall) a where id < 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: functional_hbase.alltypessmall.id < 5
 ====
@@ -188,6 +206,8 @@ and b.string_col = '15'
 and a.tinyint_col + b.tinyint_col < 15
 and b.id + 15 = 27
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = id, int_col = int_col
 |  other predicates: tinyint_col = 15, day >= 6, tinyint_col + tinyint_col < 15
@@ -202,6 +222,8 @@ and b.id + 15 = 27
    predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
    runtime filters: RF000 -> id, RF001 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -240,6 +262,8 @@ and a.tinyint_col = 15
 and b.string_col = '15'
 and a.tinyint_col + b.tinyint_col < 15
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: id = id, int_col = int_col
 |  other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
@@ -264,6 +288,8 @@ NODE 1:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -301,6 +327,8 @@ and x.float_col > 4.5
 and c.string_col < '7'
 and x.int_col + x.float_col + cast(c.string_col as float) < 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = a.tinyint_col
 |  other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
@@ -337,6 +365,8 @@ NODE 2:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -374,6 +404,8 @@ avg(tinyint_col)
 from (select * from functional.alltypesagg) a
 group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), avg(functional.alltypesagg.tinyint_col)
 |  group by: functional.alltypesagg.tinyint_col
@@ -394,6 +426,8 @@ NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -417,6 +451,8 @@ from functional.alltypesagg
 group by 1
 ) a
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  group by: tinyint_col
@@ -424,6 +460,8 @@ group by 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -450,6 +488,8 @@ from
     limit 5
 ) y
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=5]
 |  order by: c2 ASC, c3 DESC
 |
@@ -459,6 +499,8 @@ from
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c2 ASC, c3 DESC
 |  limit: 5
@@ -496,14 +538,20 @@ from (
 order by 2,1 desc
 limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # distinct *
 select distinct *
 from (select distinct * from functional.testtbl) x
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
 |
@@ -513,6 +561,8 @@ from (select distinct * from functional.testtbl) x
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -533,6 +583,8 @@ from (select distinct * from functional.testtbl) x
 select distinct id, zip
 from (select distinct * from functional.testtbl) x
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  group by: functional.testtbl.id, functional.testtbl.zip
 |
@@ -542,6 +594,8 @@ from (select distinct * from functional.testtbl) x
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -576,6 +630,8 @@ from (
 where c1 is not null
 and   c2 > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*), avg(functional.alltypesagg.int_col)
 |  group by: functional.alltypesagg.int_col % 7
@@ -584,6 +640,8 @@ and   c2 > 10
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -614,6 +672,8 @@ from (
      on (j.test_name = d.name)
 where j.test_id <= 1006
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: b.name = a.test_name
 |  runtime filters: RF000 <- a.test_name
@@ -626,6 +686,8 @@ where j.test_id <= 1006
    partitions=1/1 files=1 size=171B
    runtime filters: RF000 -> b.name
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -663,6 +725,8 @@ from functional.alltypessmall c
      on (x.tinyint_col = c.id)
 group by x.smallint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(b.id)
 |  group by: a.smallint_col
@@ -685,6 +749,8 @@ group by x.smallint_col
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 10:AGGREGATE [FINALIZE]
@@ -746,6 +812,8 @@ and x.float_col > 4.5
 and c.string_col < '7'
 and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = a.tinyint_col
 |  other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
@@ -769,6 +837,8 @@ and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
    predicates: c.string_col < '7'
    runtime filters: RF000 -> c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -812,6 +882,8 @@ from functional.alltypessmall c
   ) x on (x.smallint_col = c.id)
 group by x.smallint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum(count(a.id))
 |  group by: b.smallint_col
@@ -839,6 +911,8 @@ group by x.smallint_col
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:EXCHANGE [UNPARTITIONED]
 |
 12:AGGREGATE [FINALIZE]
@@ -887,9 +961,13 @@ group by x.smallint_col
 # Values statement in subqueries with predicate
 select * from (select y from (values((1 as y),(11))) a where y < 10) b
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ====
@@ -902,6 +980,8 @@ select * from
      (select tinyint_col from functional.alltypes)) a
    where y < 10) b
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=1
 |
@@ -909,6 +989,8 @@ select * from
    partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.tinyint_col < 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -922,9 +1004,13 @@ select * from
 select * from (select 1 as y union all select 2 union all select * from (select 11) a) b
 where y < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=2
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=2
 ====
@@ -933,11 +1019,15 @@ where y < 10
 select * from (values(1 as y) union all values(2) union all select * from (values(11)) a) b
 where y < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=2
 |
 01:UNION
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=2
 |
@@ -951,6 +1041,8 @@ inner join
 inner join
 (select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: b = b
 |
@@ -966,6 +1058,8 @@ inner join
 00:UNION
    constant-operands=2
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: b = b
 |
@@ -992,6 +1086,8 @@ left semi join
 inner join
 (select 1 a, 3 b union all select 1 a, 3 b) z on z.b = x.id + 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: x.id + 2 = b
 |  runtime filters: RF000 <- b
@@ -1010,6 +1106,8 @@ inner join
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> x.id + 2, RF001 -> x.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -1039,6 +1137,8 @@ select b.* from functional.decimal_tbl a left outer join
   (select d1, d1 + NULL IS NULL x from functional.decimal_tbl) b
 on (a.d1 = b.d1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.d1 = d1
 |
@@ -1056,6 +1156,8 @@ where foo = 10
 group by foo
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: sum(foo)
 |  group by: foo
@@ -1075,6 +1177,8 @@ select * from
    from functional.alltypestiny t1 left outer join functional.alltypes t2
    on t1.int_col = t2.int_col and t1.tinyint_col = t2.tinyint_col) t
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: t2.int_col = t1.int_col, t2.tinyint_col = t1.tinyint_col
 |  runtime filters: RF000 <- t1.int_col, RF001 <- t1.tinyint_col
@@ -1096,6 +1200,8 @@ select 1 from
 inner join functional.alltypestiny c
 on (aid < bid and aid = c.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = c.id
 |  runtime filters: RF000 <- c.id
@@ -1131,6 +1237,8 @@ inner join
    on a.id = b.int_col) v
 on (t1.id = v.id and v.int_col is null and v.int_col < 10 and v.id < 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = t1.id
 |  runtime filters: RF000 <- t1.id
@@ -1161,6 +1269,8 @@ select * from
      from functional.alltypestiny) iv
   ) ivv
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ====
@@ -1171,6 +1281,8 @@ select * from
      from functional.alltypestiny) iv
   ) ivv where bigint_col = bigint_col2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
    predicates: bigint_col = bigint_col
@@ -1183,6 +1295,8 @@ select * from
      from functional.alltypestiny) iv
   ) ivv
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(bigint_col)
 |
@@ -1196,6 +1310,8 @@ select * from
      from functional.alltypestiny) iv
   ) ivv where s1 = s2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(bigint_col)
 |  having: sum(bigint_col) = sum(bigint_col)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
index e1951e0..49a0b87 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
@@ -23,6 +23,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=10]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
 |
@@ -52,6 +54,8 @@ limit 10
    predicates: l_shipdate > '1995-03-15'
    runtime filters: RF001 -> l.l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
 |  limit: 10
@@ -120,6 +124,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=10]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
 |
@@ -149,6 +155,8 @@ limit 10
    predicates: c.c_mktsegment = 'BUILDING'
    runtime filters: RF001 -> c.c_custkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
 |  limit: 10
@@ -219,6 +227,8 @@ order by
   revenue desc
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=100]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
 |
@@ -271,6 +281,8 @@ limit 100
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
 |  limit: 100
@@ -369,6 +381,8 @@ where
   and s.s_nationkey = n.n_nationkey
   and n.n_regionkey = r.r_regionkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: n.n_regionkey = r.r_regionkey
 |  runtime filters: RF000 <- r.r_regionkey
@@ -405,6 +419,8 @@ where
    partitions=1/1 files=1 size=1.33MB
    runtime filters: RF001 -> s.s_nationkey, RF002 -> s.s_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:EXCHANGE [UNPARTITIONED]
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
@@ -469,6 +485,8 @@ order by
   o_orderpriority
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=10]
 |  order by: o_orderpriority ASC
 |
@@ -489,6 +507,8 @@ limit 10
    predicates: l_commitdate < l_receiptdate
    runtime filters: RF000 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |  limit: 10
@@ -531,6 +551,8 @@ from tpch.orders
 group by o_orderpriority
 order by o_orderpriority limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=10]
 |  order by: o_orderpriority ASC
 |
@@ -547,6 +569,8 @@ order by o_orderpriority limit 10
 00:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |  limit: 10
@@ -585,6 +609,8 @@ from tpch.orders
 group by o_orderpriority
 order by o_orderpriority limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=10]
 |  order by: o_orderpriority ASC
 |
@@ -603,6 +629,8 @@ order by o_orderpriority limit 10
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |  limit: 10
@@ -643,6 +671,8 @@ from tpch.customer
   join tpch.nation on (c_nationkey = n_nationkey)
 where n_name = 'x'
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -666,6 +696,8 @@ where n_name = 'x'
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF001 -> o_custkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -708,6 +740,8 @@ from tpch.customer
   join tpch.nation on (c_nationkey = n_nationkey)
 where n_name = 'x'
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -728,6 +762,8 @@ where n_name = 'x'
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF000 -> c_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -765,6 +801,8 @@ select c.int_col from functional.alltypestiny a
 cross join functional.alltypestiny b
 cross join functional.alltypes c
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--03:NESTED LOOP JOIN [CROSS JOIN]
@@ -788,6 +826,8 @@ right join functional.alltypesagg t4 on (t3.id = t4.id)
 inner join functional.alltypes t5 on (t4.id = t5.id)
 inner join functional.alltypestiny t6 on (t5.id = t6.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -843,6 +883,8 @@ left semi join functional.alltypesagg t4 on (t3.id = t4.id)
 inner join functional.alltypes t5 on (t3.id = t5.id)
 right join functional.alltypestiny t6 on (t5.id = t6.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -897,6 +939,8 @@ inner join functional.alltypessmall t4 on (t3.id = t4.id)
 left semi join functional.alltypes t5 on (t4.id = t5.id)
 inner join functional.alltypestiny t6 on (t3.id = t6.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -952,6 +996,8 @@ inner join functional.alltypessmall t4 on (t3.id = t4.id)
 left anti join functional.alltypes t5 on (t4.id = t5.id)
 inner join functional.alltypestiny t6 on (t3.id = t6.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -1012,6 +1058,8 @@ on (t2.id = t3.id)
 inner join functional.alltypestiny t4
 on (t3.id = t4.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [INNER JOIN]
 |  hash predicates: t3.id = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -1075,6 +1123,8 @@ on (t2.id = t3.id)
 inner join functional.alltypestiny t4
 on (t3.id = t4.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [INNER JOIN]
 |  hash predicates: t3.id = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -1137,6 +1187,8 @@ inner join functional.alltypestiny t4
 on (t2.id = t4.id)
 where t2.month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -1201,6 +1253,8 @@ inner join functional.alltypestiny t4
 on (t2.id = t4.id)
 where t2.month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = t4.id
 |  runtime filters: RF000 <- t4.id
@@ -1264,6 +1318,8 @@ LEFT SEMI JOIN
 ON t4.bigint_col = `$a$3`.`$c$1`
 WHERE `$a$2`.`$c$1` > t4.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:AGGREGATE [FINALIZE]
 |  output: sum(t4.tinyint_col)
 |
@@ -1310,6 +1366,8 @@ left outer join functional.alltypessmall b
 # both predicates should appear in the 'other predicates'
 where a.int_col = b.int_col and b.bigint_col < a.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: b.id = a.id
 |  other predicates: a.int_col = b.int_col, b.bigint_col < a.tinyint_col
@@ -1333,6 +1391,8 @@ right outer join functional.alltypes c
 where a.int_col = b.int_col and b.bigint_col < a.tinyint_col
   and b.tinyint_col = c.tinyint_col and b.bool_col != c.bool_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = b.id
 |  other predicates: a.int_col = b.int_col, b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col, b.bigint_col < a.tinyint_col
@@ -1363,6 +1423,8 @@ right outer join functional.alltypes c
 # all predicates should appear in the 'other predicates'
 where b.tinyint_col = c.tinyint_col and b.bool_col != c.bool_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = b.id
 |  other predicates: b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col
@@ -1393,6 +1455,8 @@ select count(1) from
 left outer join functional.alltypestiny t3
 on (t3.string_col = t1.string_col_1 and t3.date_string_col = t1.string_col_2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(1)
 |
@@ -1427,6 +1491,8 @@ where timestamp_col = now()) b
 on (a.id = b.id)
 and a.date_string_col = ''
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.id = functional.alltypes.id
 |  runtime filters: RF000 <- functional.alltypes.id
@@ -1447,6 +1513,8 @@ where date_string_col = '') b
 on (a.id = b.id)
 and a.timestamp_col = now()
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: functional.alltypes.id = a.id
 |  runtime filters: RF000 <- a.id
@@ -1468,6 +1536,8 @@ where timestamp_col <=> now()) b
 on (a.id = b.id)
 and a.date_string_col <=> ''
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.id = functional.alltypes.id
 |  runtime filters: RF000 <- functional.alltypes.id
@@ -1488,6 +1558,8 @@ where date_string_col <=> '') b
 on (a.id = b.id)
 and a.timestamp_col <=> now()
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: functional.alltypes.id = a.id
 |  runtime filters: RF000 <- a.id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 260ba21..ba1395a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -2,6 +2,8 @@ select *
 from functional.testtbl t1 join functional.testtbl t2 using(id)
 where t1.zip = 94611
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
 |  runtime filters: RF000 <- t2.id
@@ -14,6 +16,8 @@ where t1.zip = 94611
    predicates: t1.zip = 94611
    runtime filters: RF000 -> t1.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -36,6 +40,8 @@ from functional.testtbl t1 left outer join functional.testtbl t2
 on (t1.id - 1 = t2.id + 1)
 where t1.zip = 94611
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t1.id - 1 = t2.id + 1
 |
@@ -46,6 +52,8 @@ where t1.zip = 94611
    partitions=1/1 files=0 size=0B
    predicates: t1.zip = 94611
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
@@ -67,6 +75,8 @@ from (select * from functional.alltypestiny) t1
   join (select * from functional.alltypestiny) t2 on (t1.id = t2.id)
   join functional.alltypestiny t3 on (coalesce(t1.id, t2.id) = t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -86,6 +96,8 @@ from (select * from functional.alltypestiny) t1
    partitions=4/4 files=4 size=460B
    runtime filters: RF000 -> coalesce(functional.alltypestiny.id, functional.alltypestiny.id), RF001 -> functional.alltypestiny.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -122,6 +134,8 @@ and a.tinyint_col = 15
 and b.string_col = '15'
 and a.tinyint_col + b.tinyint_col < 15
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.id = b.id, a.int_col = b.int_col
 |  other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
@@ -136,6 +150,8 @@ and a.tinyint_col + b.tinyint_col < 15
    predicates: a.tinyint_col = 15
    runtime filters: RF000 -> a.id, RF001 -> a.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -171,6 +187,8 @@ and a.tinyint_col + b.tinyint_col < 15
 and a.float_col - c.double_col < 0
 and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = a.id, c.string_col = b.string_col
 |  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
@@ -189,6 +207,8 @@ and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 02:SCAN HDFS [functional.alltypesaggnonulls c]
    partitions=2/10 files=2 size=148.10KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
@@ -224,6 +244,8 @@ inner join
 (select 1 as x, id from functional.alltypessmall) b
 on a.x = b.x
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: 1 = 1
 |
@@ -237,6 +259,8 @@ on a.x = b.x
 select a.int_col, b.x from functional.alltypessmall a inner join
 (values(1 as int_col, 'a' as x), (1, 'b'), (2, 'c')) b on a.int_col = b.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -248,6 +272,8 @@ select a.int_col, b.x from functional.alltypessmall a inner join
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> a.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -267,6 +293,8 @@ select a.int_col, b.x from functional.alltypessmall a inner join
 select *
 from functional.alltypesagg join functional_hbase.alltypessmall using (id, int_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypesagg.id = functional_hbase.alltypessmall.id, functional.alltypesagg.int_col = functional_hbase.alltypessmall.int_col
 |  runtime filters: RF000 <- functional_hbase.alltypessmall.id, RF001 <- functional_hbase.alltypessmall.int_col
@@ -277,6 +305,8 @@ from functional.alltypesagg join functional_hbase.alltypessmall using (id, int_c
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> functional.alltypesagg.id, RF001 -> functional.alltypesagg.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -302,6 +332,8 @@ and b.tinyint_col = 5
 and b.tinyint_col > 123
 and a.tinyint_col + b.tinyint_col < 15
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.int_col = b.int_col, a.id = CAST(b.id AS INT)
 |  other predicates: a.tinyint_col + b.tinyint_col < 15
@@ -326,6 +358,8 @@ NODE 0:
 NODE 1:
   HBASE KEYRANGE port=16202 5:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -361,6 +395,8 @@ group by x.tinyint_col
 order by 2
 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=5]
 |  order by: count(x.day) ASC
 |
@@ -401,6 +437,8 @@ limit 5
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> d.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=5]
 |  order by: count(x.day) ASC
 |
@@ -456,6 +494,8 @@ limit 5
 # join without "other join conjuncts"
 select * from functional.alltypessmall a, functional.alltypessmall b where a.id = b.id limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
@@ -468,6 +508,8 @@ select * from functional.alltypessmall a, functional.alltypessmall b where a.id
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> a.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |  limit: 1
 |
@@ -493,6 +535,8 @@ select *
 from functional.testtbl t1, functional.testtbl t2, functional.testtbl t3
 where t1.id = t3.id and t2.id = t3.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -512,6 +556,8 @@ where t1.id = t3.id and t2.id = t3.id
    partitions=1/1 files=0 size=0B
    runtime filters: RF000 -> t1.id, RF001 -> t1.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -542,6 +588,8 @@ where t1.id = t3.id and t2.id = t3.id
 select * from functional.emptytable a inner join
 functional.alltypes b on a.f2 = b.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: b.int_col = a.f2
 |  runtime filters: RF000 <- a.f2
@@ -553,6 +601,8 @@ functional.alltypes b on a.f2 = b.int_col
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> b.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
@@ -572,6 +622,8 @@ functional.alltypes b on a.f2 = b.int_col
 select *
 from functional.testtbl t1 cross join functional.testtbl
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--01:SCAN HDFS [functional.testtbl]
@@ -580,6 +632,8 @@ from functional.testtbl t1 cross join functional.testtbl
 00:SCAN HDFS [functional.testtbl t1]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
@@ -596,6 +650,8 @@ from functional.testtbl t1 cross join functional.testtbl
 select *
 from functional.testtbl t1 cross join functional.testtbl t2 where t1.id < t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: t1.id < t2.id
 |
@@ -605,6 +661,8 @@ from functional.testtbl t1 cross join functional.testtbl t2 where t1.id < t2.id
 00:SCAN HDFS [functional.testtbl t1]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
@@ -626,6 +684,8 @@ on (a.id = b.id and b.int_col = a.int_col)
 inner join [shuffle] functional.alltypes c
 on (b.id = c.id and c.int_col = b.int_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = c.id, b.int_col = c.int_col
 |  runtime filters: RF000 <- c.id, RF001 <- c.int_col
@@ -645,6 +705,8 @@ on (b.id = c.id and c.int_col = b.int_col)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.id, RF001 -> a.int_col, RF002 -> a.id, RF003 -> a.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -680,6 +742,8 @@ inner join [shuffle]
    from functional.alltypes group by int_col, bool_col) b
 on (a.int_col = b.int_col and b.bool_col = a.bool_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: a.bool_col = bool_col, a.int_col = int_col
 |  runtime filters: RF000 <- bool_col, RF001 <- int_col
@@ -695,6 +759,8 @@ on (a.int_col = b.int_col and b.bool_col = a.bool_col)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -730,6 +796,8 @@ inner join [shuffle]
    from functional.alltypes group by int_col, bool_col) c
 on (b.int_col = c.int_col and c.bool_col = b.bool_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col
 |  runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col
@@ -753,6 +821,8 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |
 05:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -801,6 +871,8 @@ and b.string_col = a.string_col and b.date_string_col = a.string_col
 # redundant predicates to test minimal spanning tree of equivalent slots at a
 where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = a.id, b.string_col = a.string_col
 |  runtime filters: RF000 <- a.id, RF001 <- a.string_col
@@ -826,6 +898,8 @@ and b.string_col = a.string_col and b.date_string_col = a.string_col
 # redundant predicates to test minimal spanning tree of equivalent slots at a
 where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: b.id = a.id, b.int_col = a.id, b.id = a.int_col, b.id = a.bigint_col, b.bigint_col = a.id, b.id = a.smallint_col, b.string_col = a.string_col, b.id = a.tinyint_col, b.date_string_col = a.string_col
 |  runtime filters: RF000 <- a.id, RF001 <- a.id, RF002 <- a.int_col, RF003 <- a.bigint_col, RF004 <- a.id, RF005 <- a.smallint_col, RF006 <- a.string_col, RF007 <- a.tinyint_col, RF008 <- a.string_col
@@ -848,6 +922,8 @@ inner join functional.alltypessmall c
 # redundant join predicates
 where a.id = c.id and a.int_col = c.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = b.id, c.int_col = b.int_col
 |  runtime filters: RF000 <- b.id, RF001 <- b.int_col
@@ -877,6 +953,8 @@ functional.alltypes b,
 functional.alltypessmall c
 where a.id = c.id and b.int_col = c.int_col and b.int_col = c.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: c.id = a.id
 |  runtime filters: RF000 <- a.id
@@ -907,6 +985,8 @@ select a.* from
    group by 1, 2) b
 on a.int_col = b.int_col and a.int_col = b.smallint_col and a.int_col = b.c
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -946,6 +1026,8 @@ and t3.int_col = t2.bigint_col
 # already been established by 't3.int_col = t2.bigint_col'
 and t3.bigint_col = t2.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.smallint_col = t3.int_col, t2.bigint_col = t3.int_col
 |  runtime filters: RF000 <- t3.int_col, RF001 <- t3.int_col
@@ -966,6 +1048,8 @@ and t3.bigint_col = t2.bigint_col
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> t2.smallint_col, RF001 -> t2.bigint_col, RF002 -> t2.smallint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1006,6 +1090,8 @@ inner join
 on a.id = b.x and a.id = b.tinyint_col and
    a.int_col = b.y and a.int_col = b.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id + id, a.int_col = int_col * int_col
 |  runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
@@ -1031,6 +1117,8 @@ inner join
 on a.id = b.x and a.id = b.tinyint_col and
    a.int_col = b.y and a.int_col = b.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id + id, a.int_col = int_col * int_col
 |  runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
@@ -1061,6 +1149,8 @@ inner join
 on a.id = b.x and a.id = b.tinyint_col and
    a.int_col = b.y and a.int_col = b.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = id + id, a.int_col = int_col * int_col
 |  runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
@@ -1098,6 +1188,8 @@ inner join
 on a.id = b.x and a.id = b.tinyint_col and
    a.int_col = b.y and a.int_col = b.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = tinyint_col, a.id = x, a.int_col = bigint_col, a.int_col = y
 |  runtime filters: RF000 <- tinyint_col, RF001 <- x, RF002 <- bigint_col, RF003 <- y
@@ -1123,6 +1215,8 @@ on t3.smallint_col = t1.tinyint_col
 inner join functional.alltypes t2
 on t2.string_col = t1.string_col and t3.date_string_col = t2.string_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.string_col = t2.string_col
 |  runtime filters: RF000 <- t2.string_col
@@ -1149,6 +1243,8 @@ full outer join
 (select distinct bigint_col from functional.alltypestiny) b
 on (a.int_col = b.bigint_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: int_col = bigint_col
 |
@@ -1164,6 +1260,8 @@ on (a.int_col = b.bigint_col)
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -1206,6 +1304,8 @@ functional.alltypestiny b
 on a.id = b.id
 where b.id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -1246,6 +1346,8 @@ left outer join
 on a.id = b.id
 where b.id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
@@ -1294,6 +1396,8 @@ full outer join
 on a.id = b.id
 where b.id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 06:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -1329,6 +1433,8 @@ where b.id < 5
 select straight_join count(*)
 from functional.decimal_tbl a join functional.decimal_tbl b on a.d1 = b.d5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -1348,6 +1454,8 @@ select j.* from functional.jointbl j left semi join functional.dimtbl d
   on (j.test_id = d.id and j.test_zip < d.zip and d.name = 'Name2')
 where j.test_id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: j.test_id = d.id
 |  other join predicates: j.test_zip < d.zip
@@ -1367,6 +1475,8 @@ select b.* from functional.alltypes a right semi join functional.alltypestiny b
   on (a.id = b.id and a.int_col < b.int_col and a.bool_col = false)
 where b.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: a.id = b.id
 |  other join predicates: a.int_col < b.int_col
@@ -1386,6 +1496,8 @@ select j.* from functional.jointbl j left anti join functional.dimtbl d
   on (j.test_id = d.id and j.test_zip < d.zip and d.name = 'Name2')
 where j.test_id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: j.test_id = d.id
 |  other join predicates: j.test_zip < d.zip
@@ -1403,6 +1515,8 @@ select count(*) from functional.JoinTbl j
 left anti join functional.DimTbl d on j.test_id = d.id
 inner join functional.JoinTbl k on j.test_id = k.test_id and j.alltypes_id = 5000
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -1429,6 +1543,8 @@ select straight_join * from functional.alltypestiny a
 inner join [broadcast] functional.alltypes b on a.id = b.id
 inner join [shuffle] functional.alltypessmall c on b.id = c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1461,6 +1577,8 @@ select /* +straight_join */ * from functional.alltypestiny a
 inner join /* +broadcast */ functional.alltypes b on a.id = b.id
 inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1499,6 +1617,8 @@ inner join
 -- +shuffle
 functional.alltypessmall c on b.id = c.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1536,6 +1656,8 @@ cross join functional.alltypes c
 # assigned to inverted cross join
 where c.id != b.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: c.id != b.id
 |
@@ -1570,6 +1692,8 @@ on (v1.tinyint_col = v2.tinyint_col and
     v1.tinyint_col = v2.int_col and
     v1.tinyint_col = v2.bigint_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1620,6 +1744,8 @@ on (v1.tinyint_col = v2.tinyint_col and
     v1.tinyint_col = v2.int_col and
     v1.tinyint_col = v2.bigint_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -1666,6 +1792,8 @@ inner join
 on (v1.string_col = v2.string_col and
     v1.bigint_col = v2.bigint_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1706,6 +1834,8 @@ left semi join functional.alltypes b
 # predicates are in reverse order of compatible group by exprs
 on (a.string_col = b.string_col and a.int_col = b.int_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]
@@ -1738,6 +1868,8 @@ right semi join
 # predicates are in reverse order of compatible group by exprs
 on (a.string_col = b.string_col and a.int_col = b.int_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
@@ -1777,6 +1909,8 @@ inner join
     where bigint_col = smallint_col and smallint_col = tinyint_col) c
 on (b.int_col = c.smallint_col and b.string_col = c.string_col)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 16:EXCHANGE [UNPARTITIONED]
 |
 07:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1842,6 +1976,8 @@ where not exists (select *
 # Predicate on c (invisible side of anti join) is assigned to the scan node
                   and c.int_col < 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: a.id = c.id
 |  other join predicates: a.tinyint_col = 10, a.int_col = b.int_col
@@ -1875,6 +2011,8 @@ inner join functional.alltypesagg d
   on (a.tinyint_col = d.tinyint_col and a.int_col < 10)
 where a.float_col < b.float_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: d.tinyint_col = a.tinyint_col
 |  runtime filters: RF000 <- a.tinyint_col
@@ -1914,6 +2052,8 @@ inner join functional.alltypesagg d
   on b.tinyint_col > d.int_col or b.id != d.id
 where a.int_col = 10 and c.bigint_col = d.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: a.id < b.id, b.tinyint_col > d.int_col OR b.id != d.id
 |
@@ -1938,6 +2078,8 @@ where a.int_col = 10 and c.bigint_col = d.bigint_col
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> d.bigint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 06:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
@@ -1985,6 +2127,8 @@ inner join
 on vv1.bigint_col > vv2.bigint_col
 where vv1.tinyint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: a.bigint_col > c.bigint_col
 |
@@ -2021,6 +2165,8 @@ left anti join functional.alltypesagg e
   on c.string_col != e.string_col
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:NESTED LOOP JOIN [RIGHT ANTI JOIN]
 |  join predicates: c.string_col != e.string_col
 |
@@ -2055,6 +2201,8 @@ left join (select coalesce(1, 10) as int_col
 from functional.alltypessmall) t2 on t1.id = t2.int_col
 where t2.int_col in (t2.int_col, 10);
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(id)
 |
@@ -2074,6 +2222,8 @@ select *
 from functional.testtbl t1 join functional.testtbl t2
 where t1.id <=> t2.id and t1.zip = 94611
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id IS NOT DISTINCT FROM t2.id
 |  runtime filters: RF000 <- t2.id
@@ -2090,6 +2240,8 @@ select *
 from functional.testtbl t1 join functional.testtbl t2
 where t1.id is not distinct from t2.id and t1.zip = 94611
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id IS NOT DISTINCT FROM t2.id
 |  runtime filters: RF000 <- t2.id
@@ -2106,6 +2258,8 @@ select *
 from functional.testtbl t1 join functional.testtbl t2
 where (t1.id IS DISTINCT FROM t2.id) and t1.zip = 94611
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: (t1.id IS DISTINCT FROM t2.id)
 |
@@ -2122,6 +2276,8 @@ from (select * from functional.alltypestiny) t1
   join (select * from functional.alltypestiny) t2 on (t1.id is not distinct from t2.id)
   join functional.alltypestiny t3 on (coalesce(t1.id, t2.id) is not distinct from t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) IS NOT DISTINCT FROM t3.id
 |
@@ -2152,6 +2308,8 @@ and a.tinyint_col + b.tinyint_col < 15
 and a.float_col - c.double_col < 0
 and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = a.id, c.string_col IS NOT DISTINCT FROM b.string_col
 |  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
@@ -2173,6 +2331,8 @@ and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 select t1.d, t2.d from functional.nulltable t1, functional.nulltable t2
 where not(t1.d IS DISTINCT FROM t2.d)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: NOT (t1.d IS DISTINCT FROM t2.d)
 |
@@ -2187,6 +2347,8 @@ from functional.nulltable t1, functional.nulltable t2, functional.nulltable t3
 where t1.d IS DISTINCT FROM t2.d
 and t3.a != t2.g
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: t3.a != t2.g
 |
@@ -2207,6 +2369,8 @@ and t3.a != t2.g
 # have explain_level=1
 select a.c_custkey as c_custkey from tpch.customer a, tpch.customer b limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [CROSS JOIN]
 |  limit: 1
 |
@@ -2219,6 +2383,8 @@ select a.c_custkey as c_custkey from tpch.customer a, tpch.customer b limit 1
 select a.c_custkey as c_custkey from tpch.customer a left semi join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  runtime filters: RF000 <- b.c_custkey
@@ -2234,6 +2400,8 @@ using (c_custkey) limit 1
 select b.c_custkey as c_custkey from tpch.customer a right semi join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  runtime filters: RF000 <- b.c_custkey
@@ -2249,6 +2417,8 @@ using (c_custkey) limit 1
 select a.c_custkey as c_custkey from tpch.customer a left outer join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  limit: 1
@@ -2262,6 +2432,8 @@ using (c_custkey) limit 1
 select b.c_custkey as c_custkey from tpch.customer a right outer join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  runtime filters: RF000 <- b.c_custkey
@@ -2277,6 +2449,8 @@ using (c_custkey) limit 1
 select a.c_custkey as c_custkey from tpch.customer a full outer join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  limit: 1
@@ -2290,6 +2464,8 @@ using (c_custkey) limit 1
 select a.c_custkey as c_custkey from tpch.customer a left anti join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  limit: 1
@@ -2303,6 +2479,8 @@ using (c_custkey) limit 1
 select b.c_custkey as c_custkey from tpch.customer a right anti join tpch.customer b
 using (c_custkey) limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT ANTI JOIN]
 |  hash predicates: a.c_custkey = b.c_custkey
 |  limit: 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index 73acf7f..a187ed8 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -1,12 +1,16 @@
 select * from functional_kudu.zipcode_incomes where id = '8600000US00601'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
      kudu predicates: id = '8600000US00601'
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=1
 ---- DISTRIBUTEDPLAN
 F01:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   01:EXCHANGE [UNPARTITIONED]
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=1
@@ -22,6 +26,8 @@ F00:PLAN FRAGMENT [RANDOM]
 select * from functional_kudu.zipcode_incomes where id != '1' and zip = '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
      predicates: id != '1'
      kudu predicates: zip = '2'
@@ -29,6 +35,8 @@ F00:PLAN FRAGMENT [UNPARTITIONED]
      tuple-ids=0 row-size=124B cardinality=1
 ---- DISTRIBUTEDPLAN
 F01:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   01:EXCHANGE [UNPARTITIONED]
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=1
@@ -44,12 +52,16 @@ F00:PLAN FRAGMENT [RANDOM]
 select * from functional_kudu.zipcode_incomes where id > '1' and zip > '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
      kudu predicates: zip > '2', id > '1'
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=3317
 ---- DISTRIBUTEDPLAN
 F01:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   01:EXCHANGE [UNPARTITIONED]
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=3317
@@ -64,12 +76,16 @@ F00:PLAN FRAGMENT [RANDOM]
 select * from functional_kudu.zipcode_incomes where id = '1' or id = '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
      predicates: id = '1' OR id = '2'
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=2
 ---- DISTRIBUTEDPLAN
 F01:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
   01:EXCHANGE [UNPARTITIONED]
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=124B cardinality=2

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 565f3a3..06ce157 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -1,5 +1,7 @@
 select * from functional_kudu.testtbl
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
 ---- SCANRANGELOCATIONS
 NODE 0:
@@ -7,12 +9,16 @@ NODE 0:
   ScanToken{table=testtbl, range-partition: [(int64 id=1007), <end>)}
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
 ====
 select * from functional_kudu.testtbl where name = '10'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    kudu predicates: name = '10'
 ---- SCANRANGELOCATIONS
@@ -21,6 +27,8 @@ NODE 0:
   ScanToken{table=testtbl, range-partition: [(int64 id=1007), <end>)}
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
@@ -28,6 +36,8 @@ NODE 0:
 ====
 select * from functional_kudu.testtbl where name = NULL
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: name = NULL
 ====
@@ -95,12 +105,16 @@ select * from functional_kudu.testtbl
 where id >= 10 and zip <= 5 and 20 >= id and 'foo' = name and zip >= 0 and 30 >= zip
 and zip > 1 and zip < 50
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    kudu predicates: id <= 20, zip <= 30, id >= 10, zip < 50, zip <= 5, zip > 1, zip >= 0, name = 'foo'
 ---- SCANRANGELOCATIONS
 NODE 0:
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
@@ -110,6 +124,8 @@ NODE 0:
 select * from functional_kudu.testtbl
 where id < 10 + 30  and cast(sin(id) as boolean) = true and 20 * 3 >= id and 10 * 10 + 3 > id
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: CAST(sin(id) AS BOOLEAN) = TRUE
    kudu predicates: id <= 60, id < 40, id < 103
@@ -117,6 +133,8 @@ where id < 10 + 30  and cast(sin(id) as boolean) = true and 20 * 3 >= id and 10
 NODE 0:
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
@@ -127,6 +145,8 @@ NODE 0:
 select * from functional_kudu.testtbl
 where cast(sin(id) as boolean) = true and name = 'a'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: CAST(sin(id) AS BOOLEAN) = TRUE
    kudu predicates: name = 'a'
@@ -136,6 +156,8 @@ NODE 0:
   ScanToken{table=testtbl, range-partition: [(int64 id=1007), <end>)}
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
@@ -147,6 +169,8 @@ NODE 0:
 select * from functional_kudu.testtbl
 where cast(sin(id) as boolean) = true and name is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: name IS NULL, CAST(sin(id) AS BOOLEAN) = TRUE
 ---- SCANRANGELOCATIONS
@@ -155,6 +179,8 @@ NODE 0:
   ScanToken{table=testtbl, range-partition: [(int64 id=1007), <end>)}
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
@@ -163,6 +189,8 @@ NODE 0:
 # IMPALA-3856: KuduScanNode crash when pushing predicates including a cast
 select o_orderkey from tpch_kudu.orders where o_orderkey < 10.0 order by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: o_orderkey ASC
 |
@@ -174,6 +202,8 @@ select t.c from
   (select cast(o_orderdate as timestamp) c from tpch_kudu.orders where o_orderkey < 10) t
 where t.c <= cast('1995-01-01 00:00:00' as timestamp) order by c
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SORT
 |  order by: c ASC
 |
@@ -186,6 +216,8 @@ select count(*) from functional_kudu.alltypes
 where id < 1475059765 + 10
 and 1475059765 + 100 < id
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/mem-limit-broadcast-join.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mem-limit-broadcast-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/mem-limit-broadcast-join.test
index 2e73767..689e78a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/mem-limit-broadcast-join.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mem-limit-broadcast-join.test
@@ -2,6 +2,8 @@ select * from tpch.nation n1
 join[broadcast]
 tpch.nation n2 on n1.n_regionkey=n2.n_regionkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
index 6270c11..da2e17f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
@@ -2,6 +2,8 @@
 select * from functional.allcomplextypes.int_array_col
 where item > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.allcomplextypes.int_array_col]
    partitions=0/0 files=0 size=0B
    predicates: item > 10
@@ -10,6 +12,8 @@ where item > 10
 select * from functional.allcomplextypes.int_map_col
 where key = 'test' and value < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.allcomplextypes.int_map_col]
    partitions=0/0 files=0 size=0B
    predicates: value < 10, key = 'test'
@@ -18,6 +22,8 @@ where key = 'test' and value < 10
 select count(f21) from functional.allcomplextypes.complex_nested_struct_col.f2.f12
 where key = 'test'
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(f21)
 |
@@ -32,6 +38,8 @@ inner join functional.allcomplextypes.struct_map_col b
 on (a.f1 = b.f1)
 where a.f2 = 'test_a' and b.f2 = 'test_b'
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -58,6 +66,8 @@ select 1 from
 where c_nationkey = n_nationkey and s_nationkey = n_nationkey
   and c_comment = s_comment and n_comment = s_comment
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: c_nationkey = n_nationkey, s_comment = n_comment
 |  runtime filters: RF000 <- n_nationkey, RF001 <- n_comment
@@ -90,6 +100,8 @@ where c_nationkey = n_nationkey and s_nationkey = n_nationkey
 select a.id, b.item from functional.allcomplextypes a cross join a.int_array_col b
 where a.id < 10 and b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -107,6 +119,8 @@ where a.id < 10 and b.item % 2 = 0
 select a.id from functional.allcomplextypes a left semi join a.int_array_col b
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -123,6 +137,8 @@ where a.id < 10
 select b.item from functional.allcomplextypes a right semi join a.int_array_col b
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT SEMI JOIN]
@@ -140,6 +156,8 @@ where b.item % 2 = 0
 select a.id from functional.allcomplextypes a left anti join a.int_array_col b
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -157,6 +175,8 @@ select a.id from functional.allcomplextypes a
 left anti join (select * from a.int_array_col) v
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -173,6 +193,8 @@ where a.id < 10
 select b.item from functional.allcomplextypes a right anti join a.int_array_col b
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT ANTI JOIN]
@@ -189,6 +211,8 @@ where b.item % 2 = 0
 select a.id, b.item from functional.allcomplextypes a left outer join a.int_array_col b
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -206,6 +230,8 @@ select a.id, v.item from functional.allcomplextypes a
 left outer join (select * from a.int_array_col) v
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -222,6 +248,8 @@ where a.id < 10
 select a.id, b.item from functional.allcomplextypes a right outer join a.int_array_col b
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT OUTER JOIN]
@@ -238,6 +266,8 @@ where b.item % 2 = 0
 select a.id, b.item from functional.allcomplextypes a full outer join a.int_array_col b
 where b.item % 2 = 0 and a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [FULL OUTER JOIN]
@@ -256,6 +286,8 @@ where b.item % 2 = 0 and a.id < 10
 select a.id, b.item from functional.allcomplextypes a, a.int_array_col b
 where a.id < 10 and b.item % 2 = 0 and a.id < b.item
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [INNER JOIN]
@@ -275,6 +307,8 @@ select a.id from functional.allcomplextypes a
   left semi join a.int_array_col b on (a.id < b.item and b.item % 2 = 0)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -294,6 +328,8 @@ select b.item from functional.allcomplextypes a
   right semi join a.int_array_col b on (a.id < b.item and a.id < 10)
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT SEMI JOIN]
@@ -313,6 +349,8 @@ select a.id from functional.allcomplextypes a
   left anti join a.int_array_col b on (a.id < b.item and b.item % 2 = 0)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -332,6 +370,8 @@ select b.item from functional.allcomplextypes a
   right anti join a.int_array_col b on (a.id < b.item and a.id < 10)
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT ANTI JOIN]
@@ -351,6 +391,8 @@ select a.id, b.item from functional.allcomplextypes a
   left outer join a.int_array_col b on (a.id < b.item and b.item % 2 = 0)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -370,6 +412,8 @@ select a.id, b.item from functional.allcomplextypes a
   right outer join a.int_array_col b on (a.id < b.item and a.id < 10)
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT OUTER JOIN]
@@ -389,6 +433,8 @@ select a.id, b.item from functional.allcomplextypes a
   full outer join a.int_array_col b on (a.id < b.item and a.id < 10)
 where b.item % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [FULL OUTER JOIN]
@@ -408,6 +454,8 @@ select a.id, b.f1, b.f2 from functional.allcomplextypes a
   inner join a.struct_array_col b
 where a.id < 10 and b.f1 % 2 = 0 and b.f1 = a.id and b.f1 < a.year
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [INNER JOIN]
@@ -429,6 +477,8 @@ select a.id from functional.allcomplextypes a
     on (b.f1 % 2 = 0 and b.f1 = a.id and b.f1 < a.year)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -449,6 +499,8 @@ select b.f1, b.f2 from functional.allcomplextypes a
     on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
 where b.f1 % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT SEMI JOIN]
@@ -469,6 +521,8 @@ select a.id from functional.allcomplextypes a
     on (b.f1 % 2 = 0 and b.f1 = a.id and b.f1 < a.year)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -489,6 +543,8 @@ select b.f1, b.f2 from functional.allcomplextypes a
     on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
 where b.f1 % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT ANTI JOIN]
@@ -509,6 +565,8 @@ select a.id from functional.allcomplextypes a
     on (b.f1 % 2 = 0 and b.f1 = a.id and b.f1 < a.year)
 where a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -529,6 +587,8 @@ select b.f1, b.f2 from functional.allcomplextypes a
     on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
 where b.f1 % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [LEFT OUTER JOIN]
@@ -549,6 +609,8 @@ select b.f1, b.f2 from functional.allcomplextypes a
     on (b.f1 = a.id and b.f1 < a.year)
 where a.id < 10 and b.f1 % 2 = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [FULL OUTER JOIN]
@@ -572,6 +634,8 @@ select 1 from functional.allcomplextypes a
   left outer join a.struct_array_col d on (a.month < 4 or d.f1 > 5)
   inner join a.struct_map_col e on (e.f1 = a.id and e.f2 = 'test')
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--10:HASH JOIN [INNER JOIN]
@@ -612,6 +676,8 @@ inner join functional.alltypes d on (b.id = d.id)
 inner join a.struct_array_col e
 where e.f1 < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = b.id
 |  runtime filters: RF000 <- b.id
@@ -656,6 +722,8 @@ inner join a.struct_array_col e
 right anti join functional.alltypessmall c on (b.int_col = c.int_col and e.f1 < 10)
 inner join functional.alltypes d on (b.id = d.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = b.id
 |
@@ -697,6 +765,8 @@ inner join a.int_map_col d
 left semi join functional.alltypes e on (d.value = e.id)
 where b.item < 10 and c.int_col > 30
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [RIGHT SEMI JOIN]
 |  hash predicates: e.id = d.value
 |  runtime filters: RF000 <- d.value
@@ -745,6 +815,8 @@ inner join a.int_map_col d
 right anti join functional.alltypestiny e on (d.value = e.id)
 where b.item < 10 and c.int_col > 30
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:HASH JOIN [RIGHT ANTI JOIN]
 |  hash predicates: d.value = e.id
 |
@@ -785,6 +857,8 @@ from functional.allcomplextypes a,
   (select count(*) cnt from a.int_array_col) v
 where v.cnt < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--05:NESTED LOOP JOIN [CROSS JOIN]
@@ -806,6 +880,8 @@ from functional.allcomplextypes a,
   (select f1, count(*) cnt from a.struct_array_col group by f1) v
 where v.cnt < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--05:NESTED LOOP JOIN [CROSS JOIN]
@@ -828,6 +904,8 @@ from functional.allcomplextypes a,
   (select * from a.struct_array_col order by f1 limit 10) v
 where v.f2 = 'test'
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--06:NESTED LOOP JOIN [CROSS JOIN]
@@ -851,6 +929,8 @@ from functional.allcomplextypes a,
   (select key, row_number() over (order by key) rnum from a.int_map_col) v
 where v.key != 'bad'
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--07:NESTED LOOP JOIN [CROSS JOIN]
@@ -877,6 +957,8 @@ where v.key != 'bad'
 select a.id from functional.allcomplextypes a
 where id < (select avg(item) from a.int_array_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--05:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -896,6 +978,8 @@ where id < (select avg(item) from a.int_array_col)
 select a.id from functional.allcomplextypes a
 where exists (select item from a.int_array_col where item > 100)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -912,6 +996,8 @@ where exists (select item from a.int_array_col where item > 100)
 select a.id from functional.allcomplextypes a
 where not exists (select item from a.int_array_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -928,6 +1014,8 @@ where not exists (select item from a.int_array_col)
 select a.id from functional.allcomplextypes a
 where exists (select m.key from a.struct_map_col m where a.id < m.f1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -945,6 +1033,8 @@ where exists (select m.key from a.struct_map_col m where a.id < m.f1)
 select a.id from functional.allcomplextypes a
 where not exists (select c.f2 from a.struct_array_col c where a.id < c.f1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT ANTI JOIN]
@@ -964,6 +1054,8 @@ select a.id
 from functional.allcomplextypes a
 where id in (select b.item from a.int_array_col b where a.year < b.item)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
@@ -984,6 +1076,8 @@ select a.id
 from functional.allcomplextypes a
 where id not in (select b.item from a.int_array_col b)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
@@ -1004,6 +1098,8 @@ select a.id
 from functional.allcomplextypes a
 where id not in (select b.item from a.int_array_col b where a.year < b.item)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
@@ -1028,6 +1124,8 @@ cross join
    from a.struct_array_col x inner join b.struct_map_col y
    on x.f1 = y.f1) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 17:SUBPLAN
 |
 |--15:NESTED LOOP JOIN [CROSS JOIN]
@@ -1085,6 +1183,8 @@ cross join
    on x.f1 = y.f1) v
 where b.id = d.value
 ---- PLAN
+PLAN-ROOT SINK
+|
 17:SUBPLAN
 |
 |--15:NESTED LOOP JOIN [CROSS JOIN]
@@ -1145,6 +1245,8 @@ cross join
    from a.struct_array_col x inner join b.struct_map_col y
    on x.f1 = y.f1) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 15:SUBPLAN
 |
 |--13:NESTED LOOP JOIN [CROSS JOIN]
@@ -1192,6 +1294,8 @@ from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
 where c_custkey < 10 and o_orderkey < 5 and l_linenumber < 3
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |  limit: 10
 |
@@ -1222,6 +1326,8 @@ cross join
   (select m1.key from a.map_map_col m1,
    (select m2.key from m1.value m2) v1) v2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
@@ -1249,6 +1355,8 @@ inner join functional.allcomplextypes b on (a.id = b.id)
 cross join (select count(*) c from a.int_map_col) v1
 cross join (select avg(item) a from b.int_array_col) v2
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SUBPLAN
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
@@ -1288,6 +1396,8 @@ where c.c_custkey = o.o_orderkey and c.c_custkey = o.o_shippriority
 # redundant predicates
   and o.o_orderkey = l.l_partkey and o.o_shippriority = l.l_suppkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--08:NESTED LOOP JOIN [INNER JOIN]
@@ -1327,6 +1437,8 @@ where a.item between 10 and 20
   and v2.key = 'test2'
   and v2.x = 'test3'
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--12:NESTED LOOP JOIN [INNER JOIN]
@@ -1376,6 +1488,8 @@ where s.s_suppkey not in
  inner join s.s_partsupps ps3
    on ps2.ps_comment = ps3.ps_comment)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--08:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
@@ -1404,6 +1518,8 @@ left outer join functional.allcomplextypes t2 ON (t1.id = t2.id)
 # The subplan for this table ref must come after the outer join of t1 and t2.
 inner join t2.int_array_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -1426,6 +1542,8 @@ select a from functional.allcomplextypes t,
   (select count(*) over(partition by key) a
    from t.int_map_col group by key) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--07:NESTED LOOP JOIN [CROSS JOIN]
@@ -1454,6 +1572,8 @@ left outer join functional.allcomplextypes t2 on (t1.id = t2.id)
 inner join t1.map_map_col m1
 inner join m1.value m2
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SUBPLAN
 |
 |--08:SUBPLAN
@@ -1494,6 +1614,8 @@ where c.c_custkey in
  left outer join c.c_orders o3 on o3.pos = o2.pos
  where c.c_custkey = o2.pos)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--12:HASH JOIN [RIGHT SEMI JOIN]
@@ -1538,6 +1660,8 @@ where c.c_custkey in
  inner join c.c_orders o3 on o3.pos = o2.pos
  where c.c_custkey = o2.pos)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--12:HASH JOIN [RIGHT SEMI JOIN]
@@ -1580,6 +1704,8 @@ where c.c_custkey in
  left outer join o2.o_lineitems l
  where c.c_custkey = o2.pos)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--12:HASH JOIN [RIGHT SEMI JOIN]
@@ -1616,6 +1742,8 @@ where c.c_custkey in
 select straight_join id from functional.allcomplextypes t1 left outer join t1.int_array_col t2
 where t1.id = t2.pos and t1.int_struct_col.f1 = 10 and t2.item = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:HASH JOIN [LEFT OUTER JOIN]
@@ -1637,6 +1765,8 @@ where t1.id = t2.pos and t1.int_struct_col.f1 = 10 and t2.item = 1
 select straight_join id from functional.allcomplextypes t1 right outer join t1.int_array_col t2
 where t1.id = t2.pos and t1.int_struct_col.f1 = 10 and t2.item = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:HASH JOIN [RIGHT OUTER JOIN]
@@ -1658,6 +1788,8 @@ where t1.id = t2.pos and t1.int_struct_col.f1 = 10 and t2.item = 1
 select id from functional.allcomplextypes t1 full outer join t1.int_array_col t2
 where t1.id = t2.pos and t1.int_struct_col.f1 = 10 and t2.item = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [FULL OUTER JOIN]
@@ -1679,6 +1811,8 @@ select id from functional.allcomplextypes t1 left outer join
   (select pos, item from t1.int_array_col t2) v
  where t1.id > v.pos and v.item = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -1698,6 +1832,8 @@ left outer join c.c_orders o
 # Has an ordering dependency on c and o
 inner join o.o_lineitems
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--08:SUBPLAN
@@ -1724,6 +1860,8 @@ inner join c1.c_orders
 right outer join tpch_nested_parquet.customer c2
   on c1.c_custkey = c2.c_custkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: c1.c_custkey = c2.c_custkey
 |  runtime filters: RF000 <- c2.c_custkey
@@ -1751,6 +1889,8 @@ full outer join tpch_nested_parquet.customer c2
 inner join c1.c_orders o1
 left semi join c2.c_orders o2
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:SUBPLAN
 |
 |--06:NESTED LOOP JOIN [LEFT SEMI JOIN]
@@ -1778,6 +1918,8 @@ select * from tpch_nested_parquet.customer c
 left outer join c.c_orders o
 where o.o_orderkey is null and o.o_orderstatus <=> o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
@@ -1799,6 +1941,8 @@ left join t2.c_orders t4
 inner join tpch_nested_parquet.region t5 on t5.r_regionkey = t2.c_custkey
 left join t4.item.o_lineitems t6 on t6.item.l_returnflag = t4.item.o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 14:SUBPLAN
 |
 |--12:SUBPLAN

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test
index c7f9830..fecec7f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test
@@ -6,6 +6,8 @@ right outer join functional.alltypesagg c
   on a.smallint_col >= c.smallint_col
 where a.id < 10 and c.bigint_col = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
 |  join predicates: a.smallint_col >= c.smallint_col
 |  predicates: a.id < 10
@@ -34,6 +36,8 @@ right semi join functional.alltypesagg d
   on c.tinyint_col < d.bigint_col
 where d.bigint_col < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
 |  join predicates: c.tinyint_col < d.bigint_col
 |
@@ -62,6 +66,8 @@ full outer join functional.alltypes d
   on c.int_col > d.int_col
 where a.bigint_col != c.bigint_col and a.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [FULL OUTER JOIN]
 |  join predicates: c.int_col > d.int_col
 |  predicates: a.bigint_col != c.bigint_col, a.id < 10
@@ -93,6 +99,8 @@ from functional.alltypestiny a right anti join functional.alltypessmall b
   on a.id < b.id
 where b.int_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -116,6 +124,8 @@ right semi join functional.alltypes d on c.tinyint_col < d.tinyint_col
 right anti join functional.alltypesnopart e on d.tinyint_col > e.tinyint_col
 where e.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -158,6 +168,8 @@ right semi join functional.alltypes d on c.tinyint_col < d.tinyint_col
 right anti join functional.alltypesnopart e on d.tinyint_col > e.tinyint_col
 where e.id < 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 15:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |

[22/32] incubator-impala git commit: IMPALA-4270: Gracefully fail unsupported queries with mt_dop > 0.

Posted by ta...@apache.org.

IMPALA-4270: Gracefully fail unsupported queries with mt_dop > 0.

MT_DOP > 0 is only supported for plans without distributed joins
or table sinks. Adds validation to fail unsupported queries
gracefully in planning.

For scans in queries that are executable with MT_DOP > 0 we either
use the optimized MT scan node BE implementation (only Parquet), or
we use the conventional scan node with num_scanner_threads=1.

TODO: Still need to add end-to-end tests.

Change-Id: I91a60ea7b6e3ae4ee44be856615ddd3cd0af476d
Reviewed-on: http://gerrit.cloudera.org:8080/4677
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/04802535
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/04802535
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/04802535

Branch: refs/heads/hadoop-next
Commit: 04802535661979c50e5d06ef04e62eee677b901e
Parents: b0e87c6
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Oct 10 11:03:43 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 17 09:22:57 2016 +0000

----------------------------------------------------------------------
 be/src/exec/exec-node.cc                        |   5 +-
 common/thrift/PlanNodes.thrift                  |   5 +
 .../org/apache/impala/analysis/Analyzer.java    |  10 +-
 .../org/apache/impala/planner/HdfsScanNode.java |  28 +-
 .../java/org/apache/impala/planner/Planner.java |  13 +-
 .../apache/impala/planner/PlannerContext.java   |  10 +-
 .../impala/planner/SingleNodePlanner.java       |  24 +-
 .../org/apache/impala/planner/PlannerTest.java  |  21 +-
 .../apache/impala/planner/PlannerTestBase.java  |  18 +-
 .../queries/PlannerTest/mt-dop-validation.test  | 350 +++++++++++++++++++
 10 files changed, 450 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/be/src/exec/exec-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc
index 837fc09..df491dd 100644
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -264,9 +264,12 @@ Status ExecNode::CreateNode(ObjectPool* pool, const TPlanNode& tnode,
   switch (tnode.node_type) {
     case TPlanNodeType::HDFS_SCAN_NODE:
       *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
-      if (state->query_options().mt_dop > 0) {
+      if (tnode.hdfs_scan_node.use_mt_scan_node) {
+        DCHECK_GT(state->query_options().mt_dop, 0);
         *node = pool->Add(new HdfsScanNodeMt(pool, tnode, descs));
       } else {
+        DCHECK(state->query_options().mt_dop == 0
+            || state->query_options().num_scanner_threads == 1);
         *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
       }
       // If true, this node requests codegen over interpretation for conjuncts

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/common/thrift/PlanNodes.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift
index 4cf1357..49fcfbb 100644
--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -202,6 +202,11 @@ struct THdfsScanNode {
   // Number of header lines to skip at the beginning of each file of this table. Only set
   // for hdfs text files.
   6: optional i32 skip_header_line_count
+
+  // Indicates whether the MT scan node implementation should be used.
+  // If this is true then the MT_DOP query option must be > 0.
+  // TODO: Remove this option when the MT scan node supports all file formats.
+  7: optional bool use_mt_scan_node
 }
 
 struct TDataSourceScanNode {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 3edddf2..f9909b1 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -30,9 +30,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.authorization.Privilege;
@@ -66,10 +63,14 @@ import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TLineageGraph;
 import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.DisjointSet;
 import org.apache.impala.util.EventSequence;
 import org.apache.impala.util.ListMap;
 import org.apache.impala.util.TSessionStateUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
@@ -2246,6 +2247,9 @@ public class Analyzer {
   public String getDefaultDb() { return globalState_.queryCtx.session.database; }
   public User getUser() { return user_; }
   public TQueryCtx getQueryCtx() { return globalState_.queryCtx; }
+  public TQueryOptions getQueryOptions() {
+    return globalState_.queryCtx.getRequest().getQuery_options();
+  }
   public AuthorizationConfig getAuthzConfig() { return globalState_.authzConfig; }
   public ListMap<TNetworkAddress> getHostIndex() { return globalState_.hostIndex; }
   public ColumnLineageGraph getColumnLineageGraph() { return globalState_.lineageGraph; }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 4052867..3d52aa4 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -21,9 +21,7 @@ import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Set;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.Expr;
@@ -55,6 +53,9 @@ import org.apache.impala.thrift.TScanRange;
 import org.apache.impala.thrift.TScanRangeLocation;
 import org.apache.impala.thrift.TScanRangeLocations;
 import org.apache.impala.util.MembershipSnapshot;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Objects.ToStringHelper;
@@ -107,6 +108,9 @@ public class HdfsScanNode extends ScanNode {
   // True if this scan node should use codegen for evaluting conjuncts.
   private boolean codegenConjuncts_;
 
+  // True if this scan node should use the MT implementation in the backend.
+  private boolean useMtScanNode_;
+
   // Conjuncts that can be evaluated while materializing the items (tuples) of
   // collection-typed slots. Maps from tuple descriptor to the conjuncts bound by that
   // tuple. Uses a linked hash map for consistent display in explain.
@@ -168,7 +172,16 @@ public class HdfsScanNode extends ScanNode {
     computeMemLayout(analyzer);
 
     // compute scan range locations
-    computeScanRangeLocations(analyzer);
+    Set<HdfsFileFormat> fileFormats = computeScanRangeLocations(analyzer);
+
+    // Determine backend scan node implementation to use. The optimized MT implementation
+    // is currently only supported for Parquet.
+    if (analyzer.getQueryOptions().mt_dop > 0 &&
+        fileFormats.size() == 1 && fileFormats.contains(HdfsFileFormat.PARQUET)) {
+      useMtScanNode_ = true;
+    } else {
+      useMtScanNode_ = false;
+    }
 
     // do this at the end so it can take all conjuncts and scan ranges into account
     computeStats(analyzer);
@@ -298,12 +311,15 @@ public class HdfsScanNode extends ScanNode {
   /**
    * Computes scan ranges (hdfs splits) plus their storage locations, including volume
    * ids, based on the given maximum number of bytes each scan range should scan.
+   * Returns the set of file formats being scanned.
    */
-  private void computeScanRangeLocations(Analyzer analyzer) {
+  private Set<HdfsFileFormat> computeScanRangeLocations(Analyzer analyzer) {
     long maxScanRangeLength = analyzer.getQueryCtx().getRequest().getQuery_options()
         .getMax_scan_range_length();
     scanRanges_ = Lists.newArrayList();
+    Set<HdfsFileFormat> fileFormats = Sets.newHashSet();
     for (HdfsPartition partition: partitions_) {
+      fileFormats.add(partition.getFileFormat());
       Preconditions.checkState(partition.getId() >= 0);
       for (HdfsPartition.FileDescriptor fileDesc: partition.getFileDescriptors()) {
         for (THdfsFileBlock thriftBlock: fileDesc.getFileBlocks()) {
@@ -353,6 +369,7 @@ public class HdfsScanNode extends ScanNode {
         }
       }
     }
+    return fileFormats;
   }
 
   /**
@@ -542,6 +559,7 @@ public class HdfsScanNode extends ScanNode {
     if (skipHeaderLineCount_ > 0) {
       msg.hdfs_scan_node.setSkip_header_line_count(skipHeaderLineCount_);
     }
+    msg.hdfs_scan_node.setUse_mt_scan_node(useMtScanNode_);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/Planner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java
index ed4c677..8abb901 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -21,9 +21,6 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.ColumnLineageGraph;
@@ -43,6 +40,9 @@ import org.apache.impala.thrift.TQueryExecRequest;
 import org.apache.impala.thrift.TRuntimeFilterMode;
 import org.apache.impala.thrift.TTableName;
 import org.apache.impala.util.MaxRowsProcessedVisitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -117,12 +117,13 @@ public class Planner {
           "Runtime filters computed");
     }
 
+    singleNodePlanner.validatePlan(singleNodePlan);
+
     if (ctx_.isSingleNodeExec()) {
       // create one fragment containing the entire single-node plan tree
       fragments = Lists.newArrayList(new PlanFragment(
           ctx_.getNextFragmentId(), singleNodePlan, DataPartition.UNPARTITIONED));
     } else {
-      singleNodePlanner.validatePlan(singleNodePlan);
       // create distributed plan
       fragments = distributedPlanner.createPlanFragments(singleNodePlan);
     }
@@ -200,10 +201,14 @@ public class Planner {
    * TODO: roll into createPlan()
    */
   public List<PlanFragment> createParallelPlans() throws ImpalaException {
+    Preconditions.checkState(ctx_.getQueryOptions().mt_dop > 0);
     ArrayList<PlanFragment> distrPlan = createPlan();
     Preconditions.checkNotNull(distrPlan);
     ParallelPlanner planner = new ParallelPlanner(ctx_);
     List<PlanFragment> parallelPlans = planner.createPlans(distrPlan.get(0));
+    // Only use one scanner thread per scan-node instance since intra-node
+    // parallelism is achieved via multiple fragment instances.
+    ctx_.getQueryOptions().setNum_scanner_threads(1);
     ctx_.getRootAnalyzer().getTimeline().markEvent("Parallel plans created");
     return parallelPlans;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
index 3275a7a..721acf9 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
@@ -25,6 +25,7 @@ import org.apache.impala.analysis.QueryStmt;
 import org.apache.impala.common.IdGenerator;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
+
 import com.google.common.collect.Lists;
 
 /**
@@ -79,9 +80,7 @@ public class PlannerContext {
 
   public QueryStmt getQueryStmt() { return queryStmt_; }
   public TQueryCtx getQueryCtx() { return queryCtx_; }
-  public TQueryOptions getQueryOptions() {
-    return queryCtx_.getRequest().getQuery_options();
-  }
+  public TQueryOptions getQueryOptions() { return getRootAnalyzer().getQueryOptions(); }
   public AnalysisContext.AnalysisResult getAnalysisResult() { return analysisResult_; }
   public Analyzer getRootAnalyzer() { return analysisResult_.getAnalyzer(); }
   public boolean isSingleNodeExec() { return getQueryOptions().num_nodes == 1; }
@@ -91,7 +90,10 @@ public class PlannerContext {
     return analysisResult_.isInsertStmt() || analysisResult_.isCreateTableAsSelectStmt();
   }
   public boolean isQuery() { return analysisResult_.isQueryStmt(); }
-
+  public boolean hasTableSink() {
+    return isInsertOrCtas() || analysisResult_.isUpdateStmt()
+        || analysisResult_.isDeleteStmt();
+  }
   public boolean hasSubplan() { return !subplans_.isEmpty(); }
   public SubplanNode getSubplan() { return subplans_.getFirst(); }
   public boolean pushSubplan(SubplanNode n) { return subplans_.offerFirst(n); }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index b686fe6..434e36d 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -27,9 +27,6 @@ import java.util.ListIterator;
 import java.util.Map;
 import java.util.Set;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.AggregateInfo;
 import org.apache.impala.analysis.AnalyticInfo;
 import org.apache.impala.analysis.Analyzer;
@@ -67,6 +64,10 @@ import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.common.RuntimeEnv;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
@@ -148,11 +149,22 @@ public class SingleNodePlanner {
   }
 
   /**
-   * Validates a single-node plan by checking that it does not contain right or
-   * full outer joins with no equi-join conjuncts that are not inside the right child
-   * of a SubplanNode. Throws a NotImplementedException if plan validation fails.
+   * Checks that the given single-node plan is executable:
+   * - It may not contain right or full outer joins with no equi-join conjuncts that
+   *   are not inside the right child of a SubplanNode.
+   * - MT_DOP > 0 is not supported for plans with base table joins or table sinks.
+   * Throws a NotImplementedException if plan validation fails.
    */
   public void validatePlan(PlanNode planNode) throws NotImplementedException {
+    if (ctx_.getQueryOptions().mt_dop > 0 && !RuntimeEnv.INSTANCE.isTestEnv()
+        && (planNode instanceof JoinNode || ctx_.hasTableSink())) {
+      throw new NotImplementedException(
+          "MT_DOP not supported for plans with base table joins or table sinks.");
+    }
+
+    // As long as MT_DOP == 0 any join can run in a single-node plan.
+    if (ctx_.isSingleNodeExec() && ctx_.getQueryOptions().mt_dop == 0) return;
+
     if (planNode instanceof NestedLoopJoinNode) {
       JoinNode joinNode = (JoinNode) planNode;
       JoinOperator joinOp = joinNode.getJoinOp();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 88a8631..6250969 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -17,14 +17,13 @@
 
 package org.apache.impala.planner;
 
-import org.junit.Assume;
-import org.junit.Test;
-
 import org.apache.impala.catalog.Db;
 import org.apache.impala.common.RuntimeEnv;
 import org.apache.impala.thrift.TExplainLevel;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.thrift.TRuntimeFilterMode;
+import org.junit.Assume;
+import org.junit.Test;
 
 // All planner tests, except for S3 specific tests should go here.
 public class PlannerTest extends PlannerTestBase {
@@ -279,4 +278,20 @@ public class PlannerTest extends PlannerTestBase {
     Assume.assumeTrue(RuntimeEnv.INSTANCE.isKuduSupported());
     runPlannerTestFile("tpch-kudu");
   }
+
+  @Test
+  public void testMtDopValidation() {
+    // Tests that queries supported with mt_dop > 0 produce a parallel plan, or
+    // throw a NotImplementedException otherwise (e.g. plan has a distributed join).
+    TQueryOptions options = defaultQueryOptions();
+    options.setMt_dop(3);
+    try {
+      // Temporarily unset the test env such that unsupported queries with mt_dop > 0
+      // throw an exception. Those are otherwise allowed for testing parallel plans.
+      RuntimeEnv.INSTANCE.setTestEnv(false);
+      runPlannerTestFile("mt-dop-validation", options);
+    } finally {
+      RuntimeEnv.INSTANCE.setTestEnv(true);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index 284d7e5..9c12b89 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -33,13 +33,6 @@ import java.util.regex.Pattern;
 
 import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.hadoop.fs.Path;
-import org.apache.kudu.client.KuduClient;
-import org.apache.kudu.client.KuduScanToken;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.ColumnLineageGraph;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.common.FrontendTestBase;
@@ -72,6 +65,13 @@ import org.apache.impala.thrift.TTableDescriptor;
 import org.apache.impala.thrift.TTupleDescriptor;
 import org.apache.impala.thrift.TUpdateMembershipRequest;
 import org.apache.impala.util.MembershipSnapshot;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduScanToken;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -503,6 +503,7 @@ public class PlannerTestBase extends FrontendTestBase {
     // Query exec request may not be set for DDL, e.g., CTAS.
     String locationsStr = null;
     if (execRequest != null && execRequest.isSetQuery_exec_request()) {
+      if (execRequest.query_exec_request.fragments == null) return;
       buildMaps(execRequest.query_exec_request);
       // If we optimize the partition key scans, we may get all the partition key values
       // from the metadata and don't reference any table. Skip the check in this case.
@@ -563,7 +564,8 @@ public class PlannerTestBase extends FrontendTestBase {
       String query, TExecRequest execRequest, StringBuilder errorLog) {
     if (execRequest == null) return;
     if (!execRequest.isSetQuery_exec_request()
-        || execRequest.query_exec_request == null) {
+        || execRequest.query_exec_request == null
+        || execRequest.query_exec_request.fragments == null) {
       return;
     }
     for (TPlanFragment planFragment : execRequest.query_exec_request.fragments) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
new file mode 100644
index 0000000..fe25599
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -0,0 +1,350 @@
+# Distributed nested-loop join not allowed.
+select count(*) from
+functional_parquet.alltypestiny a,
+functional_parquet.alltypestiny b
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Distributed hash-join not allowed.
+select count(*) from
+functional_parquet.alltypestiny a,
+functional_parquet.alltypestiny b
+where a.id = b.id
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Insert not allowed.
+insert into functional_parquet.alltypes partition(year,month)
+select * from functional_parquet.alltypessmall
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# CTAS not allowed.
+create table ctas_mt_dop_test as select * from functional_parquet.alltypes
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Single-table scan/filter/agg/topn should work.
+select count(int_col) cnt from functional_parquet.alltypes
+where id < 10
+group by bigint_col
+order by cnt, bigint_col
+limit 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:TOP-N [LIMIT=10]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=16B cardinality=unavailable
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+05:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  limit: 10
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+02:TOP-N [LIMIT=10]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+04:AGGREGATE [FINALIZE]
+|  output: count:merge(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+03:EXCHANGE [HASH(bigint_col)]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+01:AGGREGATE [STREAMING]
+|  output: count(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=16B cardinality=unavailable
+====
+# Single-table scan/filter/analysic should work.
+select row_number() over(partition by int_col order by id)
+from functional_parquet.alltypes
+where id < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: int_col
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+01:SORT
+|  order by: int_col ASC NULLS FIRST, id ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3 row-size=8B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=8B cardinality=unavailable
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+04:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: int_col
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+01:SORT
+|  order by: int_col ASC NULLS FIRST, id ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3 row-size=8B cardinality=unavailable
+|
+03:EXCHANGE [HASH(int_col)]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=0 row-size=8B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=8B cardinality=unavailable
+====
+# Nested-loop join in a subplan should work.
+select *
+from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
+where c_custkey < 10 and o_orderkey < 5 and l_linenumber < 3
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+|--08:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1,0 row-size=562B cardinality=100
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=254B cardinality=1
+|  |
+|  04:SUBPLAN
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1 row-size=308B cardinality=100
+|  |
+|  |--07:NESTED LOOP JOIN [CROSS JOIN]
+|  |  |  hosts=3 per-host-mem=unavailable
+|  |  |  tuple-ids=2,1 row-size=308B cardinality=10
+|  |  |
+|  |  |--05:SINGULAR ROW SRC
+|  |  |     parent-subplan=04
+|  |  |     hosts=3 per-host-mem=unavailable
+|  |  |     tuple-ids=1 row-size=124B cardinality=1
+|  |  |
+|  |  06:UNNEST [o.o_lineitems]
+|  |     parent-subplan=04
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  03:UNNEST [c.c_orders o]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: c_custkey < 10, !empty(c.c_orders)
+   predicates on o: !empty(o.o_lineitems), o_orderkey < 5
+   predicates on o_lineitems: l_linenumber < 3
+   table stats: 150000 rows total
+   columns missing stats: c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=254B cardinality=15000
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+09:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+|--08:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1,0 row-size=562B cardinality=100
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=254B cardinality=1
+|  |
+|  04:SUBPLAN
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1 row-size=308B cardinality=100
+|  |
+|  |--07:NESTED LOOP JOIN [CROSS JOIN]
+|  |  |  hosts=3 per-host-mem=unavailable
+|  |  |  tuple-ids=2,1 row-size=308B cardinality=10
+|  |  |
+|  |  |--05:SINGULAR ROW SRC
+|  |  |     parent-subplan=04
+|  |  |     hosts=3 per-host-mem=unavailable
+|  |  |     tuple-ids=1 row-size=124B cardinality=1
+|  |  |
+|  |  06:UNNEST [o.o_lineitems]
+|  |     parent-subplan=04
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  03:UNNEST [c.c_orders o]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: c_custkey < 10, !empty(c.c_orders)
+   predicates on o: !empty(o.o_lineitems), o_orderkey < 5
+   predicates on o_lineitems: l_linenumber < 3
+   table stats: 150000 rows total
+   columns missing stats: c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=254B cardinality=15000
+====
+# Hash-join in a subplan should work.
+select c.*
+from tpch_nested_parquet.customer c, c.c_orders o1, c.c_orders o2
+where o1.o_orderkey = o2.o_orderkey + 2 and o1.o_orderkey < 5
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+|--06:HASH JOIN [INNER JOIN]
+|  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0,2 row-size=286B cardinality=10
+|  |
+|  |--04:UNNEST [c.c_orders o2]
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  05:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0 row-size=278B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=270B cardinality=1
+|  |
+|  03:UNNEST [c.c_orders o1]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: !empty(c.c_orders), !empty(c.c_orders)
+   predicates on o1: o1.o_orderkey < 5
+   table stats: 150000 rows total
+   columns missing stats: c_orders, c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=270B cardinality=150000
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+07:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+|--06:HASH JOIN [INNER JOIN]
+|  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0,2 row-size=286B cardinality=10
+|  |
+|  |--04:UNNEST [c.c_orders o2]
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  05:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0 row-size=278B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=270B cardinality=1
+|  |
+|  03:UNNEST [c.c_orders o1]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: !empty(c.c_orders), !empty(c.c_orders)
+   predicates on o1: o1.o_orderkey < 5
+   table stats: 150000 rows total
+   columns missing stats: c_orders, c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=270B cardinality=150000
+====

[06/32] incubator-impala git commit: IMPALA-3971, IMPALA-3229: Bootstrap an Impala dev environment

Posted by ta...@apache.org.

IMPALA-3971, IMPALA-3229: Bootstrap an Impala dev environment

This script bootstraps an Impala dev environment on Ubuntu 14.04. It
is not hermetic -- it changes some config files for the user and for
the OS.

It is green on Jenkins, and it runs in about 6.5 hours. The intention
is to have this script run in a CI tool for post-commit testing, with
the hope that this will make it easier for new developers to get a
working development environment. Previously, the new developer
workflow lived on wiki pages and tended to bit-rot.

Still left to do: migrating the install script into the official
Impala repo.

Change-Id: If166a8a286d7559af547da39f6cc09e723f34c7e
Reviewed-on: http://gerrit.cloudera.org:8080/4674
Reviewed-by: Jim Apple <jb...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/784716f7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/784716f7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/784716f7

Branch: refs/heads/hadoop-next
Commit: 784716f776492ed648833f2717b3784900b6f090
Parents: 2a04b0e
Author: Jim Apple <jb...@cloudera.com>
Authored: Fri Oct 7 20:34:28 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 06:07:46 2016 +0000

----------------------------------------------------------------------
 bin/bootstrap_development.sh | 80 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/784716f7/bin/bootstrap_development.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_development.sh b/bin/bootstrap_development.sh
new file mode 100755
index 0000000..8c4f742
--- /dev/null
+++ b/bin/bootstrap_development.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script bootstraps a development environment from almost nothing; it is known to
+# work on Ubuntu 14.04, and it definitely clobbers some local environment, so it's best to
+# run this in a sandbox first, like a VM or docker.
+#
+# The intended user is a person who wants to start contributing code to Impala. This
+# script serves as an executable reference point for how to get started.
+#
+# At this time, it completes in about 6.5 hours. It generates and loads the test data and
+# metadata without using a snapshot (which takes about 3 hours) and it then runs the full
+# testsuite (frontend, backend, end-to-end, JDBC, and custom cluster) in "core"
+# exploration mode.
+
+set -eux -o pipefail
+
+HOMEDIR="/home/$(whoami)/"
+
+if [[ ! -d "${HOMEDIR}" ]]
+then
+    echo "${HOMEDIR} is needed for installing Impala dependencies"
+    exit 1
+fi
+
+if [[ -z "${JAVA_HOME}" ]]
+then
+    echo "JAVA_HOME must be set to install Impala dependencies"
+    exit 1
+fi
+
+if ! sudo true
+then
+    echo "Passwordless sudo is needed for this script"
+    exit 1
+fi
+
+IMPALA_SETUP_REPO_URL="https://github.com/awleblang/impala-setup"
+
+# Place to download setup scripts
+TMPDIR=$(mktemp -d)
+function cleanup {
+    rm -rf "${TMPDIR}"
+}
+trap cleanup EXIT
+
+# Install build and test pre-reqs
+pushd "${TMPDIR}"
+git clone "${IMPALA_SETUP_REPO_URL}" impala-setup
+cd impala-setup
+chmod +x ./install.sh
+sudo ./install.sh
+popd
+
+# HDFS bug workaround
+echo "127.0.0.1 $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
+echo "NoHostAuthenticationForLocalhost yes" > ~/.ssh/config
+
+pushd "$(dirname $0)/.."
+export IMPALA_HOME="$(pwd)"
+export MAX_PYTEST_FAILURES=0
+source bin/impala-config.sh
+./buildall.sh -noclean -format -testdata
+popd

[13/32] incubator-impala git commit: IMPALA-4294: Make check-schema-diff.sh executable from anywhere.

Posted by ta...@apache.org.

IMPALA-4294: Make check-schema-diff.sh executable from anywhere.

Fixes a regression in the data load process that had been introduced
by commit 75a857c. To making check-schema-diff.sh work from anywhere.
we need to specify the git-dir and work-tree arguments everywhere we
call git.

Change-Id: I32e0dce2c10c443763a038aa3b64b1c123ed62ad
Reviewed-on: http://gerrit.cloudera.org:8080/4726
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/05b91a97
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/05b91a97
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/05b91a97

Branch: refs/heads/hadoop-next
Commit: 05b91a973c3aa23ef049761ef6d240482bb86c11
Parents: ca3fd40
Author: David Knupp <dk...@cloudera.com>
Authored: Thu Oct 13 15:58:45 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sat Oct 15 04:05:04 2016 +0000

----------------------------------------------------------------------
 testdata/bin/check-schema-diff.sh | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/05b91a97/testdata/bin/check-schema-diff.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/check-schema-diff.sh b/testdata/bin/check-schema-diff.sh
index f2cabc5..4eb8172 100755
--- a/testdata/bin/check-schema-diff.sh
+++ b/testdata/bin/check-schema-diff.sh
@@ -31,11 +31,17 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
 DATASET=${1-}
 hdfs dfs -test -e  ${WAREHOUSE_LOCATION_PREFIX}/test-warehouse/githash.txt || { exit 0; }
 GIT_HASH=$(echo $(hdfs dfs -cat ${WAREHOUSE_LOCATION_PREFIX}/test-warehouse/githash.txt))
-if ! git show $GIT_HASH &>/dev/null; then
+
+# To ensure this script works when executed from anywhere, even outside the Impala repo,
+# we specifiy the git-dir and work-tree for all git commands.
+if ! git --git-dir ${IMPALA_HOME}/.git --work-tree=${IMPALA_HOME} \
+  show $GIT_HASH &>/dev/null; then
   echo The git commit used to create the test warehouse snapshot is not available \
       locally. Fetching the latest commits from remotes.
-  git fetch --all &>/dev/null
+  git --git-dir ${IMPALA_HOME}/.git --work-tree=${IMPALA_HOME} fetch --all &>/dev/null
 fi
 # Check whether a non-empty diff exists.
-git diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/datasets/$DATASET
-git diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/avro_schema_resolution
+git --git-dir ${IMPALA_HOME}/.git --work-tree=${IMPALA_HOME} \
+  diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/datasets/$DATASET
+git --git-dir ${IMPALA_HOME}/.git --work-tree=${IMPALA_HOME} \
+  diff --exit-code ${GIT_HASH}..HEAD ${IMPALA_HOME}/testdata/avro_schema_resolution

[27/32] incubator-impala git commit: IMPALA-4277: allow overriding of Hive/Hadoop versions/locations

Posted by ta...@apache.org.

IMPALA-4277: allow overriding of Hive/Hadoop versions/locations

This is to help with IMPALA-4277 to make it easier to build against
Hadoop/Hive distributions where the directory layout doesn't exactly
match our current CDH dependencies, or where we may want to
temporarily override a version without making a source change.

Change-Id: I7da10e38f9c4309f2d193dc25f14a6ea308c9639
Reviewed-on: http://gerrit.cloudera.org:8080/4720
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df680cfe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df680cfe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df680cfe

Branch: refs/heads/hadoop-next
Commit: df680cfe3a99fa295d25d39f2eab4a9cd98509be
Parents: d0a2d1d
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Oct 13 15:00:08 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 05:54:09 2016 +0000

----------------------------------------------------------------------
 bin/impala-config.sh         | 33 ++++++++++++++++++++++-----------
 buildall.sh                  |  2 +-
 cmake_modules/FindHDFS.cmake | 11 +++--------
 common/thrift/CMakeLists.txt |  2 +-
 4 files changed, 27 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 90e8fc0..2a25248 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -295,13 +295,13 @@ if [[ $OSTYPE == "darwin"* ]]; then
   IMPALA_THRIFT_JAVA_VERSION=0.9.2
 fi
 
-export IMPALA_HADOOP_VERSION=2.6.0-cdh5.10.0-SNAPSHOT
-export IMPALA_HBASE_VERSION=1.2.0-cdh5.10.0-SNAPSHOT
-export IMPALA_HIVE_VERSION=1.1.0-cdh5.10.0-SNAPSHOT
-export IMPALA_SENTRY_VERSION=1.5.1-cdh5.10.0-SNAPSHOT
-export IMPALA_LLAMA_VERSION=1.0.0-cdh5.10.0-SNAPSHOT
-export IMPALA_PARQUET_VERSION=1.5.0-cdh5.10.0-SNAPSHOT
-export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
+export IMPALA_HADOOP_VERSION=${IMPALA_HADOOP_VERSION:-2.6.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_HBASE_VERSION=${IMPALA_HBASE_VERSION:-1.2.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_HIVE_VERSION=${IMPALA_HIVE_VERSION:-1.1.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_SENTRY_VERSION=${IMPALA_SENTRY_VERSION:-1.5.1-cdh5.10.0-SNAPSHOT}
+export IMPALA_LLAMA_VERSION=${IMPALA_LLAMA_VERSION:-1.0.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_PARQUET_VERSION=${IMPALA_PARQUET_VERSION:-1.5.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_LLAMA_MINIKDC_VERSION=${IMPALA_LLAMA_MINIKDC_VERSION:-1.0.0}
 
 export IMPALA_FE_DIR="$IMPALA_HOME/fe"
 export IMPALA_BE_DIR="$IMPALA_HOME/be"
@@ -319,12 +319,17 @@ else
   export CDH_COMPONENTS_HOME="$IMPALA_HOME/thirdparty"
 fi
 
-# Hadoop dependencies are snapshots in the Impala tree
+# Typically we build against a snapshot build of Hadoop that includes everything we need
+# for building Impala and running a minicluster.
 export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
+# The include and lib paths are needed to pick up hdfs.h and libhdfs.*
+# Allow overriding in case we want to point to a package/install with a different layout.
+export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR:-"${HADOOP_HOME}/include"}
+export HADOOP_LIB_DIR=${HADOOP_LIB_DIR:-"${HADOOP_HOME}/lib"}
 
 : ${HADOOP_CLASSPATH=}
-# Please note that the * is inside quotes, thus it won't get exanded by bash but
+# Please note that the * is inside quotes, thus it won't get expanded by bash but
 # by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
 export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:${HADOOP_HOME}/share/hadoop/tools/lib/*"
 # YARN is configured to use LZO so the LZO jar needs to be in the hadoop classpath.
@@ -341,6 +346,9 @@ export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
 
 export HIVE_HOME="$CDH_COMPONENTS_HOME/hive-${IMPALA_HIVE_VERSION}/"
 export PATH="$HIVE_HOME/bin:$PATH"
+# Allow overriding of Hive source location in case we want to build Impala without
+# a complete Hive build.
+export HIVE_SRC_DIR=${HIVE_SRC_DIR:-"${HIVE_HOME}/src"}
 export HIVE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 
 # Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus
@@ -392,7 +400,7 @@ export USER="${USER-`id -un`}"
 #LIBHDFS_OPTS="-Xcheck:jni -Xcheck:nabounds"
 # - Points to the location of libbackend.so.
 LIBHDFS_OPTS="${LIBHDFS_OPTS:-}"
-LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_HOME}/lib/native/"
+LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_LIB_DIR}/native/"
 # READER BEWARE: This always points to the debug build.
 # TODO: Consider having cmake scripts change this value depending on
 # the build type.
@@ -412,7 +420,7 @@ LIB_JVM=` find "${JAVA_HOME}/"   -name libjvm.so  | head -1`
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH-}"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JAVA}`:`dirname ${LIB_JSIG}`"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JVM}`"
-LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native"
+LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_LIB_DIR}/native"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_HOME}/be/build/debug/service"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_SNAPPY_PATH}"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_LZO}/build"
@@ -443,9 +451,12 @@ export IMPALA_CONFIG_SOURCED=1
 echo "IMPALA_HOME             = $IMPALA_HOME"
 echo "HADOOP_HOME             = $HADOOP_HOME"
 echo "HADOOP_CONF_DIR         = $HADOOP_CONF_DIR"
+echo "HADOOP_INCLUDE_DIR      = $HADOOP_INCLUDE_DIR"
+echo "HADOOP_LIB_DIR          = $HADOOP_LIB_DIR"
 echo "MINI_DFS_BASE_DATA_DIR  = $MINI_DFS_BASE_DATA_DIR"
 echo "HIVE_HOME               = $HIVE_HOME"
 echo "HIVE_CONF_DIR           = $HIVE_CONF_DIR"
+echo "HIVE_SRC_DIR            = $HIVE_SRC_DIR"
 echo "HBASE_HOME              = $HBASE_HOME"
 echo "HBASE_CONF_DIR          = $HBASE_CONF_DIR"
 echo "MINIKDC_HOME            = $MINIKDC_HOME"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index a7858a3..d7159e7 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -365,7 +365,7 @@ reconfigure_test_cluster() {
 
   # Copy Hadoop-lzo dependencies if available (required to generate Lzo data).
   if stat "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* > /dev/null ; then
-    cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_HOME/lib/native"
+    cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_LIB_DIR/native"
   else
     echo "No hadoop-lzo found"
   fi

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/cmake_modules/FindHDFS.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindHDFS.cmake b/cmake_modules/FindHDFS.cmake
index 6affc56..f0916e1 100644
--- a/cmake_modules/FindHDFS.cmake
+++ b/cmake_modules/FindHDFS.cmake
@@ -28,9 +28,9 @@
 exec_program(hadoop ARGS version OUTPUT_VARIABLE Hadoop_VERSION
              RETURN_VALUE Hadoop_RETURN)
 
-# currently only looking in HADOOP_HOME
+# Only look in HADOOP_INCLUDE_DIR
 find_path(HDFS_INCLUDE_DIR hdfs.h PATHS
-  $ENV{HADOOP_HOME}/include/
+  $ENV{HADOOP_INCLUDE_DIR}
   # make sure we don't accidentally pick up a different version
   NO_DEFAULT_PATH
 )
@@ -44,12 +44,7 @@ else ()
 endif()
 
 message(STATUS "Architecture: ${arch_hint}")
-
-if ("${arch_hint}" STREQUAL "x64")
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
-else ()
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
-endif ()
+set(HDFS_LIB_PATHS $ENV{HADOOP_LIB_DIR}/native)
 
 message(STATUS "HDFS_LIB_PATHS: ${HDFS_LIB_PATHS}")
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/common/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/common/thrift/CMakeLists.txt b/common/thrift/CMakeLists.txt
index 3104ee2..08fabd8 100644
--- a/common/thrift/CMakeLists.txt
+++ b/common/thrift/CMakeLists.txt
@@ -119,7 +119,7 @@ function(THRIFT_GEN_DS VAR)
 endfunction(THRIFT_GEN_DS)
 
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
-set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_HOME}/src/metastore/if)
+set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_SRC_DIR}/metastore/if)
 set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
 set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources)
 # TODO: avoid duplicating generated java classes

[03/32] incubator-impala git commit: IMPALA-4231: fix codegen time regression

Posted by ta...@apache.org.

IMPALA-4231: fix codegen time regression

The commit "IMPALA-3567 Part 2, IMPALA-3899: factor out PHJ builder"
slightly increased codegen time, which caused TPC-H Q2 to sometimes
regress significantly because of races in runtime filter arrival.

This patch attempts to fix the regression by improving codegen time in a
few places.

* Revert to using the old bool/Status return pattern. The regular Status
  return pattern results in significantly more complex IR because it has
  to emit code to copy and free statuses. I spent some time trying to
  convince it to optimise the extra code out, but didn't have much success.
* Remove some code that cannot be specialized from cross-compilation.
* Add noexcept to some functions that are used from the IR to ensure
  exception-handling IR is not emitted. This is less important after the
  first change but still should help produce cleaner IR.

Performance:
I was able to reproduce a regression locally, which is fixed by this
patch. I'm in the process of trying to verify the fix on a cluster.

Change-Id: Idf0fdedabd488550b6db90167a30c582949d608d
Reviewed-on: http://gerrit.cloudera.org:8080/4623
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c7fe4385
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c7fe4385
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c7fe4385

Branch: refs/heads/hadoop-next
Commit: c7fe4385d927509443a1c4e2c6e9a802d2dcf63b
Parents: 89b41c6
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Sep 30 15:18:54 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 02:53:59 2016 +0000

----------------------------------------------------------------------
 be/src/common/status.h                          |  30 +--
 be/src/exec/hash-table.cc                       |   8 +-
 be/src/exec/hash-table.h                        |  16 +-
 be/src/exec/partitioned-aggregation-node-ir.cc  |   6 +-
 be/src/exec/partitioned-aggregation-node.cc     |  10 +-
 be/src/exec/partitioned-aggregation-node.h      |   9 +-
 be/src/exec/partitioned-hash-join-builder-ir.cc |  20 +-
 be/src/exec/partitioned-hash-join-builder.cc    |  19 +-
 be/src/exec/partitioned-hash-join-builder.h     |  14 +-
 be/src/exec/partitioned-hash-join-node-ir.cc    |  33 ++--
 be/src/exec/partitioned-hash-join-node.h        |   9 +-
 be/src/runtime/buffered-tuple-stream.cc         |  21 ++-
 be/src/runtime/buffered-tuple-stream.h          |  31 +--
 be/src/runtime/buffered-tuple-stream.inline.h   |   8 +-
 be/src/runtime/raw-value.cc                     | 169 +++++++++++++++++
 be/src/runtime/raw-value.h                      |   7 +-
 be/src/runtime/raw-value.inline.h               | 188 -------------------
 be/src/util/bloom-filter.cc                     |  72 +++++++
 be/src/util/bloom-filter.h                      |  80 +-------
 19 files changed, 382 insertions(+), 368 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/common/status.h
----------------------------------------------------------------------
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 35a9a94..9a9ea67 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -99,6 +99,10 @@ class Status {
     if (UNLIKELY(status.msg_ != NULL)) CopyMessageFrom(status);
   }
 
+  /// Move constructor that moves the error message (if any) and resets 'other' to the
+  /// default OK Status.
+  ALWAYS_INLINE Status(Status&& other) : msg_(other.msg_) { other.msg_ = NULL; }
+
   /// Status using only the error code as a parameter. This can be used for error messages
   /// that don't take format parameters.
   Status(TErrorCode::type code);
@@ -153,6 +157,15 @@ class Status {
     return *this;
   }
 
+  /// Move assignment that moves the error message (if any) and resets 'other' to the
+  /// default OK Status.
+  ALWAYS_INLINE Status& operator=(Status&& other) {
+    if (UNLIKELY(msg_ != NULL)) FreeMessage();
+    msg_ = other.msg_;
+    other.msg_ = NULL;
+    return *this;
+  }
+
   ALWAYS_INLINE ~Status() {
     // The UNLIKELY and inlining here are important hints for the compiler to
     // streamline the common case of Status::OK(). Use FreeMessage() which is
@@ -244,21 +257,12 @@ class Status {
 };
 
 /// some generally useful macros
-#define RETURN_IF_ERROR(stmt) \
-  do { \
-    Status __status__ = (stmt); \
-    if (UNLIKELY(!__status__.ok())) return __status__; \
+#define RETURN_IF_ERROR(stmt)                                     \
+  do {                                                            \
+    Status __status__ = (stmt);                                   \
+    if (UNLIKELY(!__status__.ok())) return std::move(__status__); \
   } while (false)
 
-#define RETURN_IF_ERROR_PREPEND(expr, prepend) \
-  do { \
-    Status __status__ = (stmt); \
-    if (UNLIKELY(!__status__.ok())) { \
-      return Status(strings::Substitute("$0: $1", prepend, __status__.GetDetail())); \
-    } \
-  } while (false)
-
-
 #define ABORT_IF_ERROR(stmt) \
   do { \
     Status __status__ = (stmt); \

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index 0d780b9..6626b33 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -149,7 +149,7 @@ uint32_t HashTableCtx::Hash(const void* input, int len, uint32_t hash) const {
 }
 
 uint32_t HashTableCtx::HashRow(
-    const uint8_t* expr_values, const uint8_t* expr_values_null) const {
+    const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept {
   DCHECK_LT(level_, seeds_.size());
   if (expr_values_cache_.var_result_offset() == -1) {
     /// This handles NULLs implicitly since a constant seed value was put
@@ -162,7 +162,7 @@ uint32_t HashTableCtx::HashRow(
 }
 
 bool HashTableCtx::EvalRow(const TupleRow* row, const vector<ExprContext*>& ctxs,
-    uint8_t* expr_values, uint8_t* expr_values_null) {
+    uint8_t* expr_values, uint8_t* expr_values_null) noexcept {
   bool has_null = false;
   for (int i = 0; i < ctxs.size(); ++i) {
     void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
@@ -213,7 +213,7 @@ uint32_t HashTableCtx::HashVariableLenRow(const uint8_t* expr_values,
 
 template <bool FORCE_NULL_EQUALITY>
 bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values,
-    const uint8_t* expr_values_null) const {
+    const uint8_t* expr_values_null) const noexcept {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     void* val = build_expr_ctxs_[i]->GetValue(build_row);
     if (val == NULL) {
@@ -331,7 +331,7 @@ void HashTableCtx::ExprValuesCache::ResetIterators() {
   cur_expr_values_hash_ = expr_values_hash_array_.get();
 }
 
-void HashTableCtx::ExprValuesCache::Reset() {
+void HashTableCtx::ExprValuesCache::Reset() noexcept {
   ResetIterators();
   // Set the end pointer after resetting the other pointers so they point to
   // the same location.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index fead1f7..4edd130 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -256,7 +256,7 @@ class HashTableCtx {
     void Close(MemTracker* tracker);
 
     /// Resets the cache states (iterators, end pointers etc) before writing.
-    void Reset();
+    void Reset() noexcept;
 
     /// Resets the iterators to the start before reading. Will record the current position
     /// of the iterators in end pointer before resetting so AtEnd() can determine if all
@@ -406,7 +406,7 @@ class HashTableCtx {
   /// This will be replaced by codegen.  We don't want this inlined for replacing
   /// with codegen'd functions so the function name does not change.
   uint32_t IR_NO_INLINE HashRow(
-      const uint8_t* expr_values, const uint8_t* expr_values_null) const;
+      const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept;
 
   /// Wrapper function for calling correct HashUtil function in non-codegen'd case.
   uint32_t Hash(const void* input, int len, uint32_t hash) const;
@@ -416,15 +416,15 @@ class HashTableCtx {
   /// inlined when cross compiled because we need to be able to differentiate between
   /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the
   /// codegen'd function.
-  bool IR_NO_INLINE EvalBuildRow(const TupleRow* row, uint8_t* expr_values,
-      uint8_t* expr_values_null) {
+  bool IR_NO_INLINE EvalBuildRow(
+      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept {
     return EvalRow(row, build_expr_ctxs_, expr_values, expr_values_null);
   }
 
   /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness
   /// into 'expr_values_null'. This will be replaced by codegen.
-  bool IR_NO_INLINE EvalProbeRow(const TupleRow* row, uint8_t* expr_values,
-      uint8_t* expr_values_null) {
+  bool IR_NO_INLINE EvalProbeRow(
+      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept {
     return EvalRow(row, probe_expr_ctxs_, expr_values, expr_values_null);
   }
 
@@ -437,7 +437,7 @@ class HashTableCtx {
   /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This will be
   /// replaced by codegen.
   bool EvalRow(const TupleRow* row, const std::vector<ExprContext*>& ctxs,
-      uint8_t* expr_values, uint8_t* expr_values_null);
+      uint8_t* expr_values, uint8_t* expr_values_null) noexcept;
 
   /// Returns true if the values of build_exprs evaluated over 'build_row' equal the
   /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is
@@ -445,7 +445,7 @@ class HashTableCtx {
   /// 'finds_nulls_'. This will be replaced by codegen.
   template <bool FORCE_NULL_EQUALITY>
   bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t* expr_values,
-      const uint8_t* expr_values_null) const;
+      const uint8_t* expr_values_null) const noexcept;
 
   /// Helper function that calls Equals() with the current row. Always inlined so that
   /// it does not appear in cross-compiled IR.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-aggregation-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node-ir.cc b/be/src/exec/partitioned-aggregation-node-ir.cc
index ed95844..83362e7 100644
--- a/be/src/exec/partitioned-aggregation-node-ir.cc
+++ b/be/src/exec/partitioned-aggregation-node-ir.cc
@@ -153,7 +153,7 @@ Status PartitionedAggregationNode::AddIntermediateTuple(Partition* __restrict__
       insert_it.SetTuple(intermediate_tuple, hash);
       return Status::OK();
     } else if (!process_batch_status_.ok()) {
-      return process_batch_status_;
+      return std::move(process_batch_status_);
     }
 
     // We did not have enough memory to add intermediate_tuple to the stream.
@@ -198,13 +198,13 @@ Status PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize,
           !TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx],
             GetHashTable(partition_idx), in_row, hash, &remaining_capacity[partition_idx],
             &process_batch_status_)) {
-        RETURN_IF_ERROR(process_batch_status_);
+        RETURN_IF_ERROR(std::move(process_batch_status_));
         // Tuple is not going into hash table, add it to the output batch.
         Tuple* intermediate_tuple = ConstructIntermediateTuple(agg_fn_ctxs_,
             out_batch->tuple_data_pool(), &process_batch_status_);
         if (UNLIKELY(intermediate_tuple == NULL)) {
           DCHECK(!process_batch_status_.ok());
-          return process_batch_status_;
+          return std::move(process_batch_status_);
         }
         UpdateTuple(&agg_fn_ctxs_[0], intermediate_tuple, in_row);
         out_batch_iterator.Get()->SetTuple(0, intermediate_tuple);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc
index f926725..629e407 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -965,7 +965,7 @@ Tuple* PartitionedAggregationNode::ConstructSingletonOutputTuple(
 }
 
 Tuple* PartitionedAggregationNode::ConstructIntermediateTuple(
-    const vector<FunctionContext*>& agg_fn_ctxs, MemPool* pool, Status* status) {
+    const vector<FunctionContext*>& agg_fn_ctxs, MemPool* pool, Status* status) noexcept {
   const int fixed_size = intermediate_tuple_desc_->byte_size();
   const int varlen_size = GroupingExprsVarlenSize();
   const int tuple_data_size = fixed_size + varlen_size;
@@ -985,8 +985,8 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple(
 }
 
 Tuple* PartitionedAggregationNode::ConstructIntermediateTuple(
-    const vector<FunctionContext*>& agg_fn_ctxs,
-    BufferedTupleStream* stream, Status* status) {
+    const vector<FunctionContext*>& agg_fn_ctxs, BufferedTupleStream* stream,
+    Status* status) noexcept {
   DCHECK(stream != NULL && status != NULL);
   // Allocate space for the entire tuple in the stream.
   const int fixed_size = intermediate_tuple_desc_->byte_size();
@@ -1090,8 +1090,8 @@ void PartitionedAggregationNode::InitAggSlots(
   }
 }
 
-void PartitionedAggregationNode::UpdateTuple(FunctionContext** agg_fn_ctxs,
-    Tuple* tuple, TupleRow* row, bool is_merge) {
+void PartitionedAggregationNode::UpdateTuple(
+    FunctionContext** agg_fn_ctxs, Tuple* tuple, TupleRow* row, bool is_merge) noexcept {
   DCHECK(tuple != NULL || aggregate_evaluators_.empty());
   for (int i = 0; i < aggregate_evaluators_.size(); ++i) {
     if (is_merge) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-aggregation-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.h b/be/src/exec/partitioned-aggregation-node.h
index c766ab2..0c0f3e8 100644
--- a/be/src/exec/partitioned-aggregation-node.h
+++ b/be/src/exec/partitioned-aggregation-node.h
@@ -444,14 +444,13 @@ class PartitionedAggregationNode : public ExecNode {
   /// full, it will attempt to switch to IO-buffers.
   Tuple* ConstructIntermediateTuple(
       const std::vector<impala_udf::FunctionContext*>& agg_fn_ctxs,
-      BufferedTupleStream* stream, Status* status);
+      BufferedTupleStream* stream, Status* status) noexcept;
 
   /// Constructs intermediate tuple, allocating memory from pool instead of the stream.
   /// Returns NULL and sets status if there is not enough memory to allocate the tuple.
   Tuple* ConstructIntermediateTuple(
-      const std::vector<impala_udf::FunctionContext*>& agg_fn_ctxs,
-      MemPool* pool, Status* status);
-
+      const std::vector<impala_udf::FunctionContext*>& agg_fn_ctxs, MemPool* pool,
+      Status* status) noexcept;
 
   /// Returns the number of bytes of variable-length data for the grouping values stored
   /// in 'ht_ctx_'.
@@ -477,7 +476,7 @@ class PartitionedAggregationNode : public ExecNode {
   /// This function is replaced by codegen (which is why we don't use a vector argument
   /// for agg_fn_ctxs).. Any var-len data is allocated from the FunctionContexts.
   void UpdateTuple(impala_udf::FunctionContext** agg_fn_ctxs, Tuple* tuple, TupleRow* row,
-      bool is_merge = false);
+      bool is_merge = false) noexcept;
 
   /// Called on the intermediate tuple of each group after all input rows have been
   /// consumed and aggregated. Computes the final aggregate values to be returned in

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-hash-join-builder-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc b/be/src/exec/partitioned-hash-join-builder-ir.cc
index 21fd9e4..11974e3 100644
--- a/be/src/exec/partitioned-hash-join-builder-ir.cc
+++ b/be/src/exec/partitioned-hash-join-builder-ir.cc
@@ -29,15 +29,16 @@
 
 using namespace impala;
 
-inline Status PhjBuilder::AppendRow(BufferedTupleStream* stream, TupleRow* row) {
-  Status status;
-  if (LIKELY(stream->AddRow(row, &status))) return Status::OK();
-  RETURN_IF_ERROR(status);
-  return AppendRowStreamFull(stream, row);
+inline bool PhjBuilder::AppendRow(
+    BufferedTupleStream* stream, TupleRow* row, Status* status) {
+  if (LIKELY(stream->AddRow(row, status))) return true;
+  if (UNLIKELY(!status->ok())) return false;
+  return AppendRowStreamFull(stream, row, status);
 }
 
 Status PhjBuilder::ProcessBuildBatch(
     RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) {
+  Status status;
   HashTableCtx::ExprValuesCache* expr_vals_cache = ctx->expr_values_cache();
   expr_vals_cache->Reset();
   FOREACH_ROW(build_batch, 0, build_batch_iter) {
@@ -47,7 +48,10 @@ Status PhjBuilder::ProcessBuildBatch(
         // TODO: remove with codegen/template
         // If we are NULL aware and this build row has NULL in the eq join slot,
         // append it to the null_aware partition. We will need it later.
-        RETURN_IF_ERROR(AppendRow(null_aware_partition_->build_rows(), build_row));
+        if (UNLIKELY(
+                !AppendRow(null_aware_partition_->build_rows(), build_row, &status))) {
+          return std::move(status);
+        }
       }
       continue;
     }
@@ -66,7 +70,9 @@ Status PhjBuilder::ProcessBuildBatch(
     const uint32_t hash = expr_vals_cache->CurExprValuesHash();
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
     Partition* partition = hash_partitions_[partition_idx];
-    RETURN_IF_ERROR(AppendRow(partition->build_rows(), build_row));
+    if (UNLIKELY(!AppendRow(partition->build_rows(), build_row, &status))) {
+      return std::move(status);
+    }
   }
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc
index b17bbff..bf5b42a 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -260,23 +260,26 @@ Status PhjBuilder::CreateHashPartitions(int level) {
   return Status::OK();
 }
 
-Status PhjBuilder::AppendRowStreamFull(BufferedTupleStream* stream, TupleRow* row) {
-  Status status;
+bool PhjBuilder::AppendRowStreamFull(
+    BufferedTupleStream* stream, TupleRow* row, Status* status) noexcept {
   while (true) {
     // Check if the stream is still using small buffers and try to switch to IO-buffers.
     if (stream->using_small_buffers()) {
       bool got_buffer;
-      RETURN_IF_ERROR(stream->SwitchToIoBuffers(&got_buffer));
+      *status = stream->SwitchToIoBuffers(&got_buffer);
+      if (!status->ok()) return false;
+
       if (got_buffer) {
-        if (LIKELY(stream->AddRow(row, &status))) return Status::OK();
-        RETURN_IF_ERROR(status);
+        if (LIKELY(stream->AddRow(row, status))) return true;
+        if (!status->ok()) return false;
       }
     }
     // We ran out of memory. Pick a partition to spill. If we ran out of unspilled
     // partitions, SpillPartition() will return an error status.
-    RETURN_IF_ERROR(SpillPartition(BufferedTupleStream::UNPIN_ALL_EXCEPT_CURRENT));
-    if (stream->AddRow(row, &status)) return Status::OK();
-    RETURN_IF_ERROR(status);
+    *status = SpillPartition(BufferedTupleStream::UNPIN_ALL_EXCEPT_CURRENT);
+    if (!status->ok()) return false;
+    if (stream->AddRow(row, status)) return true;
+    if (!status->ok()) return false;
     // Spilling one partition does not guarantee we can append a row. Keep
     // spilling until we can append this row.
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-hash-join-builder.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.h b/be/src/exec/partitioned-hash-join-builder.h
index 23822b2..7f81e5a 100644
--- a/be/src/exec/partitioned-hash-join-builder.h
+++ b/be/src/exec/partitioned-hash-join-builder.h
@@ -261,14 +261,18 @@ class PhjBuilder : public DataSink {
 
   /// Append 'row' to 'stream'. In the common case, appending the row to the stream
   /// immediately succeeds. Otherwise this function falls back to the slower path of
-  /// AppendRowStreamFull(), which may spill partitions to free memory. Returns an error
-  /// if it was unable to append the row, even after spilling partitions.
-  Status AppendRow(BufferedTupleStream* stream, TupleRow* row);
+  /// AppendRowStreamFull(), which may spill partitions to free memory. Returns false
+  /// and sets 'status' if it was unable to append the row, even after spilling
+  /// partitions. This odd return convention is used to avoid emitting unnecessary code
+  /// for ~Status in perf-critical code.
+  bool AppendRow(BufferedTupleStream* stream, TupleRow* row, Status* status);
 
   /// Slow path for AppendRow() above. It is called when the stream has failed to append
   /// the row. We need to find more memory by either switching to IO-buffers, in case the
-  /// stream still uses small buffers, or spilling a partition.
-  Status AppendRowStreamFull(BufferedTupleStream* stream, TupleRow* row);
+  /// stream still uses small buffers, or spilling a partition. Returns false and sets
+  /// 'status' if it was unable to append the row, even after spilling partitions.
+  bool AppendRowStreamFull(
+      BufferedTupleStream* stream, TupleRow* row, Status* status) noexcept;
 
   /// Frees memory by spilling one of the hash partitions. The 'mode' argument is passed
   /// to the Spill() call for the selected partition. The current policy is to spill the

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-hash-join-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node-ir.cc b/be/src/exec/partitioned-hash-join-node-ir.cc
index 44cb14b..bbed06d 100644
--- a/be/src/exec/partitioned-hash-join-node-ir.cc
+++ b/be/src/exec/partitioned-hash-join-node-ir.cc
@@ -149,10 +149,11 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::ProcessProbeRowLeftSemiJoins(
       // build side. For those rows, we need to process the remaining join
       // predicates later.
       if (builder_->null_aware_partition()->build_rows()->num_rows() != 0) {
-        if (num_other_join_conjuncts > 0) {
-          *status = AppendProbeRow(
-              null_aware_probe_partition_->probe_rows(), current_probe_row_);
-          if (UNLIKELY(!status->ok())) return false;
+        if (num_other_join_conjuncts > 0
+            && UNLIKELY(!AppendProbeRow(null_aware_probe_partition_->probe_rows(),
+                   current_probe_row_, status))) {
+          DCHECK(!status->ok());
+          return false;
         }
         return true;
       }
@@ -217,7 +218,7 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::ProcessProbeRowOuterJoins(
   return true;
 }
 
-template<int const JoinOp>
+template <int const JoinOp>
 bool IR_ALWAYS_INLINE PartitionedHashJoinNode::ProcessProbeRow(
     ExprContext* const* other_join_conjunct_ctxs, int num_other_join_conjuncts,
     ExprContext* const* conjunct_ctxs, int num_conjuncts,
@@ -282,8 +283,11 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::NextProbeRow(
           skip_row = true;
         } else {
           // Condition 3 above.
-          *status = AppendProbeRow(null_probe_rows_.get(), current_probe_row_);
-          if (UNLIKELY(!status->ok())) return false;
+          if (UNLIKELY(
+                  !AppendProbeRow(null_probe_rows_.get(), current_probe_row_, status))) {
+            DCHECK(!status->ok());
+            return false;
+          }
           matched_null_probe_.push_back(false);
           skip_row = true;
         }
@@ -306,8 +310,10 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::NextProbeRow(
           // Skip the current row if we manage to append to the spilled partition's BTS.
           // Otherwise, we need to bail out and report the failure.
           BufferedTupleStream* probe_rows = probe_partition->probe_rows();
-          *status = AppendProbeRow(probe_rows, current_probe_row_);
-          if (UNLIKELY(!status->ok())) return false;
+          if (UNLIKELY(!AppendProbeRow(probe_rows, current_probe_row_, status))) {
+            DCHECK(!status->ok());
+            return false;
+          }
           skip_row = true;
         }
       }
@@ -426,15 +432,12 @@ int PartitionedHashJoinNode::ProcessProbeBatch(TPrefetchMode::type prefetch_mode
   return num_rows_added;
 }
 
-inline Status PartitionedHashJoinNode::AppendProbeRow(
-    BufferedTupleStream* stream, TupleRow* row) {
+inline bool PartitionedHashJoinNode::AppendProbeRow(
+    BufferedTupleStream* stream, TupleRow* row, Status* status) {
   DCHECK(stream->has_write_block());
   DCHECK(!stream->using_small_buffers());
   DCHECK(!stream->is_pinned());
-  Status status;
-  if (LIKELY(stream->AddRow(row, &status))) return Status::OK();
-  DCHECK(!status.ok());
-  return status;
+  return stream->AddRow(row, status);
 }
 
 template int PartitionedHashJoinNode::ProcessProbeBatch<TJoinOp::INNER_JOIN>(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/exec/partitioned-hash-join-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node.h b/be/src/exec/partitioned-hash-join-node.h
index 9827788..5b9264c 100644
--- a/be/src/exec/partitioned-hash-join-node.h
+++ b/be/src/exec/partitioned-hash-join-node.h
@@ -166,7 +166,10 @@ class PartitionedHashJoinNode : public BlockingJoinNode {
 
   /// Append the probe row 'row' to 'stream'. The stream must be unpinned and must have
   /// a write buffer allocated, so this will succeed unless an error is encountered.
-  Status AppendProbeRow(BufferedTupleStream* stream, TupleRow* row);
+  /// Returns false and sets 'status' to an error if an error is encountered. This odd
+  /// return convention is used to avoid emitting unnecessary code for ~Status in perf-
+  /// critical code.
+  bool AppendProbeRow(BufferedTupleStream* stream, TupleRow* row, Status* status);
 
   /// Probes the hash table for rows matching the current probe row and appends
   /// all the matching build rows (with probe row) to output batch. Returns true
@@ -267,8 +270,8 @@ class PartitionedHashJoinNode : public BlockingJoinNode {
   /// probe_batch_ is entirely consumed.
   /// For RIGHT_ANTI_JOIN, all this function does is to mark whether each build row
   /// had a match.
-  /// Returns the number of rows added to out_batch; -1 on error (and *status will be
-  /// set). This function doesn't commit rows to the output batch so it's the caller's
+  /// Returns the number of rows added to out_batch; -1 on error (and *status will
+  /// be set). This function doesn't commit rows to the output batch so it's the caller's
   /// responsibility to do so.
   template<int const JoinOp>
   int ProcessProbeBatch(TPrefetchMode::type, RowBatch* out_batch, HashTableCtx* ht_ctx,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/buffered-tuple-stream.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream.cc b/be/src/runtime/buffered-tuple-stream.cc
index b2fce45..e3b3b4a 100644
--- a/be/src/runtime/buffered-tuple-stream.cc
+++ b/be/src/runtime/buffered-tuple-stream.cc
@@ -222,8 +222,8 @@ Status BufferedTupleStream::UnpinBlock(BufferedBlockMgr::Block* block) {
   return Status::OK();
 }
 
-Status BufferedTupleStream::NewWriteBlock(int64_t block_len, int64_t null_indicators_size,
-    bool* got_block) {
+Status BufferedTupleStream::NewWriteBlock(
+    int64_t block_len, int64_t null_indicators_size, bool* got_block) noexcept {
   DCHECK(!closed_);
   DCHECK_GE(null_indicators_size, 0);
   *got_block = false;
@@ -282,7 +282,8 @@ Status BufferedTupleStream::NewWriteBlock(int64_t block_len, int64_t null_indica
   return Status::OK();
 }
 
-Status BufferedTupleStream::NewWriteBlockForRow(int64_t row_size, bool* got_block) {
+Status BufferedTupleStream::NewWriteBlockForRow(
+    int64_t row_size, bool* got_block) noexcept {
   int64_t block_len;
   int64_t null_indicators_size;
   if (use_small_buffers_) {
@@ -694,7 +695,7 @@ void BufferedTupleStream::FixUpCollectionsForRead(const vector<SlotDescriptor*>&
   }
 }
 
-int64_t BufferedTupleStream::ComputeRowSize(TupleRow* row) const {
+int64_t BufferedTupleStream::ComputeRowSize(TupleRow* row) const noexcept {
   int64_t size = 0;
   if (has_nullable_tuple_) {
     for (int i = 0; i < fixed_tuple_sizes_.size(); ++i) {
@@ -733,7 +734,15 @@ int64_t BufferedTupleStream::ComputeRowSize(TupleRow* row) const {
   return size;
 }
 
-bool BufferedTupleStream::DeepCopy(TupleRow* row) {
+bool BufferedTupleStream::AddRowSlow(TupleRow* row, Status* status) noexcept {
+  bool got_block;
+  int64_t row_size = ComputeRowSize(row);
+  *status = NewWriteBlockForRow(row_size, &got_block);
+  if (!status->ok() || !got_block) return false;
+  return DeepCopy(row);
+}
+
+bool BufferedTupleStream::DeepCopy(TupleRow* row) noexcept {
   if (has_nullable_tuple_) {
     return DeepCopyInternal<true>(row);
   } else {
@@ -744,7 +753,7 @@ bool BufferedTupleStream::DeepCopy(TupleRow* row) {
 // TODO: this really needs codegen
 // TODO: in case of duplicate tuples, this can redundantly serialize data.
 template <bool HasNullableTuple>
-bool BufferedTupleStream::DeepCopyInternal(TupleRow* row) {
+bool BufferedTupleStream::DeepCopyInternal(TupleRow* row) noexcept {
   if (UNLIKELY(write_block_ == NULL)) return false;
   DCHECK_GE(write_block_null_indicators_size_, 0);
   DCHECK(write_block_->is_pinned()) << DebugString() << std::endl

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/buffered-tuple-stream.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream.h b/be/src/runtime/buffered-tuple-stream.h
index d3bfa81..d138150 100644
--- a/be/src/runtime/buffered-tuple-stream.h
+++ b/be/src/runtime/buffered-tuple-stream.h
@@ -238,15 +238,17 @@ class BufferedTupleStream {
   /// Must be called for streams using small buffers to switch to IO-sized buffers.
   /// If it fails to get a buffer (i.e. the switch fails) it resets the use_small_buffers_
   /// back to false.
-  /// TODO: this does not seem like the best mechanism.
+  /// TODO: IMPALA-3200: remove this when small buffers are removed.
   Status SwitchToIoBuffers(bool* got_buffer);
 
-  /// Adds a single row to the stream. Returns false and sets *status if an error
-  /// occurred. BufferedTupleStream will do a deep copy of the memory in the row.
-  /// After AddRow returns false, it should not be called again, unless
-  /// using_small_buffers_ is true, in which case it is valid to call SwitchToIoBuffers()
-  /// then AddRow() again.
-  bool AddRow(TupleRow* row, Status* status);
+  /// Adds a single row to the stream. Returns true if the append succeeded, returns false
+  /// and sets 'status' to OK if appending failed but can be retried or returns false and
+  /// sets 'status' to an error if an error occurred.
+  /// BufferedTupleStream will do a deep copy of the memory in the row. After AddRow()
+  /// returns an error, it should not be called again. If appending failed without an
+  /// error and the stream is using small buffers, it is valid to call
+  /// SwitchToIoBuffers() then AddRow() again.
+  bool AddRow(TupleRow* row, Status* status) noexcept;
 
   /// Allocates space to store a row of with fixed length 'fixed_size' and variable
   /// length data 'varlen_size'. If successful, returns the pointer where fixed length
@@ -458,11 +460,15 @@ class BufferedTupleStream {
   RuntimeProfile::Counter* unpin_timer_;
   RuntimeProfile::Counter* get_new_block_timer_;
 
+  /// The slow path for AddRow() that is called if there is not sufficient space in
+  /// the current block.
+  bool AddRowSlow(TupleRow* row, Status* status) noexcept;
+
   /// Copies 'row' into write_block_. Returns false if there is not enough space in
   /// 'write_block_'. After returning false, write_ptr_ may be left pointing to the
   /// partially-written row, and no more data can be written to write_block_.
   template <bool HAS_NULLABLE_TUPLE>
-  bool DeepCopyInternal(TupleRow* row);
+  bool DeepCopyInternal(TupleRow* row) noexcept;
 
   /// Helper function to copy strings in string_slots from tuple into write_block_.
   /// Updates write_ptr_ to the end of the string data added. Returns false if the data
@@ -480,7 +486,7 @@ class BufferedTupleStream {
       const std::vector<SlotDescriptor*>& collection_slots);
 
   /// Wrapper of the templated DeepCopyInternal() function.
-  bool DeepCopy(TupleRow* row);
+  bool DeepCopy(TupleRow* row) noexcept;
 
   /// Gets a new block of 'block_len' bytes from the block_mgr_, updating write_block_,
   /// write_tuple_idx_, write_ptr_ and write_end_ptr_. 'null_indicators_size' is the
@@ -488,12 +494,13 @@ class BufferedTupleStream {
   /// *got_block is set to true if a block was successfully acquired. Null indicators
   /// (if any) will also be reserved and initialized. If there are no blocks available,
   /// *got_block is set to false and write_block_ is unchanged.
-  Status NewWriteBlock(int64_t block_len, int64_t null_indicators_size, bool* got_block);
+  Status NewWriteBlock(
+      int64_t block_len, int64_t null_indicators_size, bool* got_block) noexcept;
 
   /// A wrapper around NewWriteBlock(). 'row_size' is the size of the tuple row to be
   /// appended to this block. This function determines the block size required in order
   /// to fit the row and null indicators.
-  Status NewWriteBlockForRow(int64_t row_size, bool* got_block);
+  Status NewWriteBlockForRow(int64_t row_size, bool* got_block) noexcept;
 
   /// Reads the next block from the block_mgr_. This blocks if necessary.
   /// Updates read_block_, read_ptr_, read_tuple_idx_ and read_end_ptr_.
@@ -502,7 +509,7 @@ class BufferedTupleStream {
   /// Returns the total additional bytes that this row will consume in write_block_ if
   /// appended to the block. This includes the fixed length part of the row and the
   /// data for inlined_string_slots_ and inlined_coll_slots_.
-  int64_t ComputeRowSize(TupleRow* row) const;
+  int64_t ComputeRowSize(TupleRow* row) const noexcept;
 
   /// Unpins block if it is an IO-sized block and updates tracking stats.
   Status UnpinBlock(BufferedBlockMgr::Block* block);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/buffered-tuple-stream.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream.inline.h b/be/src/runtime/buffered-tuple-stream.inline.h
index 7a2f247..ba6bb8c 100644
--- a/be/src/runtime/buffered-tuple-stream.inline.h
+++ b/be/src/runtime/buffered-tuple-stream.inline.h
@@ -25,14 +25,10 @@
 
 namespace impala {
 
-inline bool BufferedTupleStream::AddRow(TupleRow* row, Status* status) {
+inline bool BufferedTupleStream::AddRow(TupleRow* row, Status* status) noexcept {
   DCHECK(!closed_);
   if (LIKELY(DeepCopy(row))) return true;
-  bool got_block;
-  int64_t row_size = ComputeRowSize(row);
-  *status = NewWriteBlockForRow(row_size, &got_block);
-  if (!status->ok() || !got_block) return false;
-  return DeepCopy(row);
+  return AddRowSlow(row, status);
 }
 
 inline uint8_t* BufferedTupleStream::AllocateRow(int fixed_size, int varlen_size,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/raw-value.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index 7247b8e..aef58f2 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -191,4 +191,173 @@ void RawValue::Write(const void* value, Tuple* tuple, const SlotDescriptor* slot
   }
 }
 
+uint32_t RawValue::GetHashValue(
+    const void* v, const ColumnType& type, uint32_t seed) noexcept {
+  // The choice of hash function needs to be consistent across all hosts of the cluster.
+
+  // Use HashCombine with arbitrary constant to ensure we don't return seed.
+  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
+
+  switch (type.type) {
+    case TYPE_CHAR:
+    case TYPE_STRING:
+    case TYPE_VARCHAR:
+      return RawValue::GetHashValueNonNull<impala::StringValue>(
+          reinterpret_cast<const StringValue*>(v), type, seed);
+    case TYPE_BOOLEAN:
+      return RawValue::GetHashValueNonNull<bool>(
+          reinterpret_cast<const bool*>(v), type, seed);
+    case TYPE_TINYINT:
+      return RawValue::GetHashValueNonNull<int8_t>(
+          reinterpret_cast<const int8_t*>(v), type, seed);
+    case TYPE_SMALLINT:
+      return RawValue::GetHashValueNonNull<int16_t>(
+          reinterpret_cast<const int16_t*>(v), type, seed);
+    case TYPE_INT:
+      return RawValue::GetHashValueNonNull<int32_t>(
+          reinterpret_cast<const int32_t*>(v), type, seed);
+    case TYPE_BIGINT:
+      return RawValue::GetHashValueNonNull<int64_t>(
+          reinterpret_cast<const int64_t*>(v), type, seed);
+    case TYPE_FLOAT:
+      return RawValue::GetHashValueNonNull<float>(
+          reinterpret_cast<const float*>(v), type, seed);
+    case TYPE_DOUBLE:
+      return RawValue::GetHashValueNonNull<double>(
+          reinterpret_cast<const double*>(v), type, seed);
+    case TYPE_TIMESTAMP:
+      return RawValue::GetHashValueNonNull<TimestampValue>(
+          reinterpret_cast<const TimestampValue*>(v), type, seed);
+    case TYPE_DECIMAL:
+      switch (type.GetByteSize()) {
+        case 4:
+          return RawValue::GetHashValueNonNull<Decimal4Value>(
+              reinterpret_cast<const impala::Decimal4Value*>(v), type, seed);
+        case 8:
+          return RawValue::GetHashValueNonNull<Decimal8Value>(
+              reinterpret_cast<const Decimal8Value*>(v), type, seed);
+        case 16:
+          return RawValue::GetHashValueNonNull<Decimal16Value>(
+              reinterpret_cast<const Decimal16Value*>(v), type, seed);
+          DCHECK(false);
+      }
+    default: DCHECK(false); return 0;
+  }
+}
+
+uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type, uint32_t seed) {
+  // Use HashCombine with arbitrary constant to ensure we don't return seed.
+  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
+
+  switch (type.type) {
+    case TYPE_STRING:
+    case TYPE_VARCHAR: {
+      const StringValue* string_value = reinterpret_cast<const StringValue*>(v);
+      if (string_value->len == 0) {
+        return HashUtil::HashCombine32(HASH_VAL_EMPTY, seed);
+      }
+      return HashUtil::FnvHash64to32(string_value->ptr, string_value->len, seed);
+    }
+    case TYPE_BOOLEAN:
+      return HashUtil::HashCombine32(*reinterpret_cast<const bool*>(v), seed);
+    case TYPE_TINYINT: return HashUtil::FnvHash64to32(v, 1, seed);
+    case TYPE_SMALLINT: return HashUtil::FnvHash64to32(v, 2, seed);
+    case TYPE_INT: return HashUtil::FnvHash64to32(v, 4, seed);
+    case TYPE_BIGINT: return HashUtil::FnvHash64to32(v, 8, seed);
+    case TYPE_FLOAT: return HashUtil::FnvHash64to32(v, 4, seed);
+    case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed);
+    case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed);
+    case TYPE_CHAR:
+      return HashUtil::FnvHash64to32(StringValue::CharSlotToPtr(v, type), type.len, seed);
+    case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), seed);
+    default: DCHECK(false); return 0;
+  }
+}
+
+void RawValue::PrintValue(
+    const void* value, const ColumnType& type, int scale, std::stringstream* stream) {
+  if (value == NULL) {
+    *stream << "NULL";
+    return;
+  }
+
+  int old_precision = stream->precision();
+  std::ios_base::fmtflags old_flags = stream->flags();
+  if (scale > -1) {
+    stream->precision(scale);
+    // Setting 'fixed' causes precision to set the number of digits printed after the
+    // decimal (by default it sets the maximum number of digits total).
+    *stream << std::fixed;
+  }
+
+  const StringValue* string_val = NULL;
+  switch (type.type) {
+    case TYPE_BOOLEAN: {
+      bool val = *reinterpret_cast<const bool*>(value);
+      *stream << (val ? "true" : "false");
+      return;
+    }
+    case TYPE_TINYINT:
+      // Extra casting for chars since they should not be interpreted as ASCII.
+      *stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));
+      break;
+    case TYPE_SMALLINT: *stream << *reinterpret_cast<const int16_t*>(value); break;
+    case TYPE_INT: *stream << *reinterpret_cast<const int32_t*>(value); break;
+    case TYPE_BIGINT: *stream << *reinterpret_cast<const int64_t*>(value); break;
+    case TYPE_FLOAT: {
+      float val = *reinterpret_cast<const float*>(value);
+      if (LIKELY(std::isfinite(val))) {
+        *stream << val;
+      } else if (std::isinf(val)) {
+        // 'Infinity' is Java's text representation of inf. By staying close to Java, we
+        // allow Hive to read text tables containing non-finite values produced by
+        // Impala. (The same logic applies to 'NaN', below).
+        *stream << (val < 0 ? "-Infinity" : "Infinity");
+      } else if (std::isnan(val)) {
+        *stream << "NaN";
+      }
+    } break;
+    case TYPE_DOUBLE: {
+      double val = *reinterpret_cast<const double*>(value);
+      if (LIKELY(std::isfinite(val))) {
+        *stream << val;
+      } else if (std::isinf(val)) {
+        // See TYPE_FLOAT for rationale.
+        *stream << (val < 0 ? "-Infinity" : "Infinity");
+      } else if (std::isnan(val)) {
+        *stream << "NaN";
+      }
+    } break;
+    case TYPE_VARCHAR:
+    case TYPE_STRING:
+      string_val = reinterpret_cast<const StringValue*>(value);
+      if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
+      stream->write(string_val->ptr, string_val->len);
+      break;
+    case TYPE_TIMESTAMP:
+      *stream << *reinterpret_cast<const TimestampValue*>(value);
+      break;
+    case TYPE_CHAR:
+      stream->write(StringValue::CharSlotToPtr(value, type), type.len);
+      break;
+    case TYPE_DECIMAL:
+      switch (type.GetByteSize()) {
+        case 4:
+          *stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
+          break;
+        case 8:
+          *stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
+          break;
+        case 16:
+          *stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
+          break;
+        default: DCHECK(false) << type;
+      }
+      break;
+    default: DCHECK(false);
+  }
+  stream->precision(old_precision);
+  // Undo setting stream to fixed
+  stream->flags(old_flags);
+}
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/raw-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h
index bc76b2c..5ec8ed1 100644
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -54,8 +54,8 @@ class RawValue {
 
   /// Returns hash value for 'v' interpreted as 'type'.  The resulting hash value
   /// is combined with the seed value.
-  static inline uint32_t GetHashValue(const void* v, const ColumnType& type,
-      uint32_t seed = 0);
+  static uint32_t GetHashValue(
+      const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;
 
   /// Templatized version of GetHashValue, use if type is known ahead. GetHashValue
   /// handles nulls.
@@ -74,8 +74,7 @@ class RawValue {
   /// GetHashValue() does not have this property and cannot be safely used as the first
   /// step in data repartitioning. However, GetHashValue() can be significantly faster.
   /// TODO: fix GetHashValue
-  static inline uint32_t GetHashValueFnv(const void* v, const ColumnType& type,
-      uint32_t seed);
+  static uint32_t GetHashValueFnv(const void* v, const ColumnType& type, uint32_t seed);
 
   /// Compares both values.
   /// Return value is < 0  if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/runtime/raw-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h
index a1f1d75..63c9a07 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -211,194 +211,6 @@ inline uint32_t RawValue::GetHashValue(const T* v, const ColumnType& type,
   if (UNLIKELY(v == NULL)) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
   return RawValue::GetHashValueNonNull<T>(v, type, seed);
 }
-
-inline uint32_t RawValue::GetHashValue(const void* v, const ColumnType& type,
-    uint32_t seed) {
-  //The choice of hash function needs to be consistent across all hosts of the cluster.
-
-  // Use HashCombine with arbitrary constant to ensure we don't return seed.
-  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
-
-  switch (type.type) {
-    case TYPE_CHAR:
-    case TYPE_STRING:
-    case TYPE_VARCHAR:
-      return RawValue::GetHashValueNonNull<impala::StringValue>(
-        reinterpret_cast<const StringValue*>(v), type, seed);
-    case TYPE_BOOLEAN:
-      return RawValue::GetHashValueNonNull<bool>(
-        reinterpret_cast<const bool*>(v), type, seed);
-    case TYPE_TINYINT:
-      return RawValue::GetHashValueNonNull<int8_t>(
-        reinterpret_cast<const int8_t*>(v), type, seed);
-    case TYPE_SMALLINT:
-      return RawValue::GetHashValueNonNull<int16_t>(
-        reinterpret_cast<const int16_t*>(v), type, seed);
-    case TYPE_INT:
-      return RawValue::GetHashValueNonNull<int32_t>(
-        reinterpret_cast<const int32_t*>(v), type, seed);
-    case TYPE_BIGINT:
-      return RawValue::GetHashValueNonNull<int64_t>(
-        reinterpret_cast<const int64_t*>(v), type, seed);
-    case TYPE_FLOAT:
-      return  RawValue::GetHashValueNonNull<float>(
-        reinterpret_cast<const float*>(v), type, seed);
-    case TYPE_DOUBLE:
-      return RawValue::GetHashValueNonNull<double>(
-        reinterpret_cast<const double*>(v), type, seed);
-    case TYPE_TIMESTAMP:
-      return  RawValue::GetHashValueNonNull<TimestampValue>(
-        reinterpret_cast<const TimestampValue*>(v), type, seed);
-    case TYPE_DECIMAL:
-      switch(type.GetByteSize()) {
-        case 4: return
-          RawValue::GetHashValueNonNull<Decimal4Value>(
-            reinterpret_cast<const impala::Decimal4Value*>(v), type, seed);
-        case 8:
-          return RawValue::GetHashValueNonNull<Decimal8Value>(
-            reinterpret_cast<const Decimal8Value*>(v), type, seed);
-        case 16:
-          return RawValue::GetHashValueNonNull<Decimal16Value>(
-            reinterpret_cast<const Decimal16Value*>(v), type, seed);
-        DCHECK(false);
-    }
-    default:
-      DCHECK(false);
-      return 0;
-  }
-}
-
-inline uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type,
-    uint32_t seed) {
-  // Use HashCombine with arbitrary constant to ensure we don't return seed.
-  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
-
-  switch (type.type ) {
-    case TYPE_STRING:
-    case TYPE_VARCHAR: {
-      const StringValue* string_value = reinterpret_cast<const StringValue*>(v);
-      if (string_value->len == 0) {
-        return HashUtil::HashCombine32(HASH_VAL_EMPTY, seed);
-      }
-      return HashUtil::FnvHash64to32(string_value->ptr, string_value->len, seed);
-    }
-    case TYPE_BOOLEAN:
-      return HashUtil::HashCombine32(*reinterpret_cast<const bool*>(v), seed);
-    case TYPE_TINYINT: return HashUtil::FnvHash64to32(v, 1, seed);
-    case TYPE_SMALLINT: return HashUtil::FnvHash64to32(v, 2, seed);
-    case TYPE_INT: return HashUtil::FnvHash64to32(v, 4, seed);
-    case TYPE_BIGINT: return HashUtil::FnvHash64to32(v, 8, seed);
-    case TYPE_FLOAT: return HashUtil::FnvHash64to32(v, 4, seed);
-    case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed);
-    case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed);
-    case TYPE_CHAR: return HashUtil::FnvHash64to32(StringValue::CharSlotToPtr(v, type),
-                                                   type.len, seed);
-    case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), seed);
-    default:
-      DCHECK(false);
-      return 0;
-  }
-}
-
-inline void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
-    std::stringstream* stream) {
-  if (value == NULL) {
-    *stream << "NULL";
-    return;
-  }
-
-  int old_precision = stream->precision();
-  std::ios_base::fmtflags old_flags = stream->flags();
-  if (scale > -1) {
-    stream->precision(scale);
-    // Setting 'fixed' causes precision to set the number of digits printed after the
-    // decimal (by default it sets the maximum number of digits total).
-    *stream << std::fixed;
-  }
-
-  const StringValue* string_val = NULL;
-  switch (type.type) {
-    case TYPE_BOOLEAN: {
-      bool val = *reinterpret_cast<const bool*>(value);
-      *stream << (val ? "true" : "false");
-      return;
-    }
-    case TYPE_TINYINT:
-      // Extra casting for chars since they should not be interpreted as ASCII.
-      *stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));
-      break;
-    case TYPE_SMALLINT:
-      *stream << *reinterpret_cast<const int16_t*>(value);
-      break;
-    case TYPE_INT:
-      *stream << *reinterpret_cast<const int32_t*>(value);
-      break;
-    case TYPE_BIGINT:
-      *stream << *reinterpret_cast<const int64_t*>(value);
-      break;
-    case TYPE_FLOAT:
-      {
-        float val = *reinterpret_cast<const float*>(value);
-        if (LIKELY(std::isfinite(val))) {
-          *stream << val;
-        } else if (std::isinf(val)) {
-          // 'Infinity' is Java's text representation of inf. By staying close to Java, we
-          // allow Hive to read text tables containing non-finite values produced by
-          // Impala. (The same logic applies to 'NaN', below).
-          *stream << (val < 0 ? "-Infinity" : "Infinity");
-        } else if (std::isnan(val)) {
-          *stream << "NaN";
-        }
-      }
-      break;
-    case TYPE_DOUBLE:
-      {
-        double val = *reinterpret_cast<const double*>(value);
-        if (LIKELY(std::isfinite(val))) {
-          *stream << val;
-        } else if (std::isinf(val)) {
-          // See TYPE_FLOAT for rationale.
-          *stream << (val < 0 ? "-Infinity" : "Infinity");
-        } else if (std::isnan(val)) {
-          *stream << "NaN";
-        }
-      }
-      break;
-    case TYPE_VARCHAR:
-    case TYPE_STRING:
-      string_val = reinterpret_cast<const StringValue*>(value);
-      if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
-      stream->write(string_val->ptr, string_val->len);
-      break;
-    case TYPE_TIMESTAMP:
-      *stream << *reinterpret_cast<const TimestampValue*>(value);
-      break;
-    case TYPE_CHAR:
-      stream->write(StringValue::CharSlotToPtr(value, type), type.len);
-      break;
-    case TYPE_DECIMAL:
-      switch (type.GetByteSize()) {
-        case 4:
-          *stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
-          break;
-        case 8:
-          *stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
-          break;
-        case 16:
-          *stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
-          break;
-        default:
-          DCHECK(false) << type;
-      }
-      break;
-    default:
-      DCHECK(false);
-  }
-  stream->precision(old_precision);
-  // Undo setting stream to fixed
-  stream->flags(old_flags);
-}
-
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/util/bloom-filter.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.cc b/be/src/util/bloom-filter.cc
index 7d8c8f7..6fd53f5 100644
--- a/be/src/util/bloom-filter.cc
+++ b/be/src/util/bloom-filter.cc
@@ -83,6 +83,78 @@ void BloomFilter::ToThrift(const BloomFilter* filter, TBloomFilter* thrift) {
   filter->ToThrift(thrift);
 }
 
+// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
+// compile with -fno-strict-aliasing.
+
+void BloomFilter::BucketInsert(const uint32_t bucket_idx, const uint32_t hash) {
+  // new_bucket will be all zeros except for eight 1-bits, one in each 32-bit word. It is
+  // 16-byte aligned so it can be read as a __m128i using aligned SIMD loads in the second
+  // part of this method.
+  uint32_t new_bucket[8] __attribute__((aligned(16)));
+  for (int i = 0; i < 8; ++i) {
+    // Rehash 'hash' and use the top LOG_BUCKET_WORD_BITS bits, following Dietzfelbinger.
+    new_bucket[i] =
+        (REHASH[i] * hash) >> ((1 << LOG_BUCKET_WORD_BITS) - LOG_BUCKET_WORD_BITS);
+    new_bucket[i] = 1U << new_bucket[i];
+  }
+  for (int i = 0; i < 2; ++i) {
+    __m128i new_bucket_sse =
+        _mm_load_si128(reinterpret_cast<__m128i*>(new_bucket + 4 * i));
+    __m128i* existing_bucket = reinterpret_cast<__m128i*>(&directory_[bucket_idx][4 * i]);
+    *existing_bucket = _mm_or_si128(*existing_bucket, new_bucket_sse);
+  }
+}
+
+__m256i BloomFilter::MakeMask(const uint32_t hash) {
+   const __m256i ones = _mm256_set1_epi32(1);
+   const __m256i rehash = _mm256_setr_epi32(IMPALA_BLOOM_HASH_CONSTANTS);
+  // Load hash into a YMM register, repeated eight times
+  __m256i hash_data = _mm256_set1_epi32(hash);
+  // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
+  // odd constants, then keep the 5 most significant bits from each product.
+  hash_data = _mm256_mullo_epi32(rehash, hash_data);
+  hash_data = _mm256_srli_epi32(hash_data, 27);
+  // Use these 5 bits to shift a single bit to a location in each 32-bit lane
+  return _mm256_sllv_epi32(ones, hash_data);
+}
+
+void BloomFilter::BucketInsertAVX2(
+    const uint32_t bucket_idx, const uint32_t hash) {
+  const __m256i mask = MakeMask(hash);
+  __m256i* const bucket = &reinterpret_cast<__m256i*>(directory_)[bucket_idx];
+  _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
+  // For SSE compatibility, unset the high bits of each YMM register so SSE instructions
+  // dont have to save them off before using XMM registers.
+  _mm256_zeroupper();
+}
+
+bool BloomFilter::BucketFindAVX2(
+    const uint32_t bucket_idx, const uint32_t hash) const {
+  const __m256i mask = MakeMask(hash);
+  const __m256i bucket = reinterpret_cast<__m256i*>(directory_)[bucket_idx];
+  // We should return true if 'bucket' has a one wherever 'mask' does. _mm256_testc_si256
+  // takes the negation of its first argument and ands that with its second argument. In
+  // our case, the result is zero everywhere iff there is a one in 'bucket' wherever
+  // 'mask' is one. testc returns 1 if the result is 0 everywhere and returns 0 otherwise.
+  const bool result = _mm256_testc_si256(bucket, mask);
+  _mm256_zeroupper();
+  return result;
+}
+
+bool BloomFilter::BucketFind(
+    const uint32_t bucket_idx, const uint32_t hash) const {
+  for (int i = 0; i < BUCKET_WORDS; ++i) {
+    BucketWord hval =
+        (REHASH[i] * hash) >> ((1 << LOG_BUCKET_WORD_BITS) - LOG_BUCKET_WORD_BITS);
+    hval = 1U << hval;
+    if (!(directory_[bucket_idx][i] & hval)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+
 void BloomFilter::Or(const TBloomFilter& in, TBloomFilter* out) {
   DCHECK(out != NULL);
   DCHECK_EQ(in.log_heap_space, out->log_heap_space);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7fe4385/be/src/util/bloom-filter.h
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.h b/be/src/util/bloom-filter.h
index 7a94995..4342814 100644
--- a/be/src/util/bloom-filter.h
+++ b/be/src/util/bloom-filter.h
@@ -25,9 +25,9 @@
 
 #include <immintrin.h>
 
-#include "gutil/macros.h"
-
+#include "common/compiler-util.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
+#include "gutil/macros.h"
 #include "runtime/buffered-block-mgr.h"
 
 namespace impala {
@@ -173,7 +173,7 @@ class BloomFilter {
 // the advantage of requiring fewer random bits: log2(32) * 8 = 5 * 8 = 40 random bits for
 // a split Bloom filter, but log2(256) * 8 = 64 random bits for a standard Bloom filter.
 
-inline void BloomFilter::Insert(const uint32_t hash) {
+inline void ALWAYS_INLINE BloomFilter::Insert(const uint32_t hash) {
   const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
   if (CpuInfo::IsSupported(CpuInfo::AVX2)) {
     BucketInsertAVX2(bucket_idx, hash);
@@ -182,7 +182,7 @@ inline void BloomFilter::Insert(const uint32_t hash) {
   }
 }
 
-inline bool BloomFilter::Find(const uint32_t hash) const {
+inline bool ALWAYS_INLINE BloomFilter::Find(const uint32_t hash) const {
   const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
   if (CpuInfo::IsSupported(CpuInfo::AVX2)) {
     return BucketFindAVX2(bucket_idx, hash);
@@ -191,78 +191,6 @@ inline bool BloomFilter::Find(const uint32_t hash) const {
   }
 }
 
-// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
-// compile with -fno-strict-aliasing.
-
-inline void BloomFilter::BucketInsert(const uint32_t bucket_idx, const uint32_t hash) {
-  // new_bucket will be all zeros except for eight 1-bits, one in each 32-bit word. It is
-  // 16-byte aligned so it can be read as a __m128i using aligned SIMD loads in the second
-  // part of this method.
-  uint32_t new_bucket[8] __attribute__((aligned(16)));
-  for (int i = 0; i < 8; ++i) {
-    // Rehash 'hash' and use the top LOG_BUCKET_WORD_BITS bits, following Dietzfelbinger.
-    new_bucket[i] =
-        (REHASH[i] * hash) >> ((1 << LOG_BUCKET_WORD_BITS) - LOG_BUCKET_WORD_BITS);
-    new_bucket[i] = 1U << new_bucket[i];
-  }
-  for (int i = 0; i < 2; ++i) {
-    __m128i new_bucket_sse =
-        _mm_load_si128(reinterpret_cast<__m128i*>(new_bucket + 4 * i));
-    __m128i* existing_bucket = reinterpret_cast<__m128i*>(&directory_[bucket_idx][4 * i]);
-    *existing_bucket = _mm_or_si128(*existing_bucket, new_bucket_sse);
-  }
-}
-
-inline __m256i BloomFilter::MakeMask(const uint32_t hash) {
-   const __m256i ones = _mm256_set1_epi32(1);
-   const __m256i rehash = _mm256_setr_epi32(IMPALA_BLOOM_HASH_CONSTANTS);
-  // Load hash into a YMM register, repeated eight times
-  __m256i hash_data = _mm256_set1_epi32(hash);
-  // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
-  // odd constants, then keep the 5 most significant bits from each product.
-  hash_data = _mm256_mullo_epi32(rehash, hash_data);
-  hash_data = _mm256_srli_epi32(hash_data, 27);
-  // Use these 5 bits to shift a single bit to a location in each 32-bit lane
-  return _mm256_sllv_epi32(ones, hash_data);
-}
-
-inline void BloomFilter::BucketInsertAVX2(
-    const uint32_t bucket_idx, const uint32_t hash) {
-  const __m256i mask = MakeMask(hash);
-  __m256i* const bucket = &reinterpret_cast<__m256i*>(directory_)[bucket_idx];
-  _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
-  // For SSE compatibility, unset the high bits of each YMM register so SSE instructions
-  // dont have to save them off before using XMM registers.
-  _mm256_zeroupper();
-}
-
-inline bool BloomFilter::BucketFindAVX2(
-    const uint32_t bucket_idx, const uint32_t hash) const {
-  const __m256i mask = MakeMask(hash);
-  const __m256i bucket = reinterpret_cast<__m256i*>(directory_)[bucket_idx];
-  // We should return true if 'bucket' has a one wherever 'mask' does. _mm256_testc_si256
-  // takes the negation of its first argument and ands that with its second argument. In
-  // our case, the result is zero everywhere iff there is a one in 'bucket' wherever
-  // 'mask' is one. testc returns 1 if the result is 0 everywhere and returns 0 otherwise.
-  const bool result = _mm256_testc_si256(bucket, mask);
-  _mm256_zeroupper();
-  return result;
-}
-
-inline bool BloomFilter::BucketFind(
-    const uint32_t bucket_idx, const uint32_t hash) const {
-  for (int i = 0; i < BUCKET_WORDS; ++i) {
-    BucketWord hval =
-        (REHASH[i] * hash) >> ((1 << LOG_BUCKET_WORD_BITS) - LOG_BUCKET_WORD_BITS);
-    hval = 1U << hval;
-    if (!(directory_[bucket_idx][i] & hval)) {
-      return false;
-    }
-  }
-  return true;
-}
-
 }  // namespace impala
 
-#undef IMPALA_BLOOM_HASH_CONSTANTS
 #endif  // IMPALA_UTIL_BLOOM_H

[25/32] incubator-impala git commit: Buffer pool: Add basic counters to buffer pool client

Posted by ta...@apache.org.

Buffer pool: Add basic counters to buffer pool client

Change-Id: I9a5a57b7cfccf67ee498e68964f1e077075ee325
Reviewed-on: http://gerrit.cloudera.org:8080/4714
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e3a08914
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e3a08914
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e3a08914

Branch: refs/heads/hadoop-next
Commit: e3a08914451a63fe65e8f66afc743739f4570ba4
Parents: 07da767
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Oct 7 09:23:59 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 04:48:43 2016 +0000

----------------------------------------------------------------------
 be/src/bufferpool/buffer-pool-counters.h      | 47 ++++++++++++++++++++++
 be/src/bufferpool/buffer-pool-test.cc         | 30 +++++++-------
 be/src/bufferpool/buffer-pool.cc              | 33 ++++++++++++---
 be/src/bufferpool/buffer-pool.h               | 15 +++++--
 be/src/bufferpool/reservation-tracker-test.cc |  9 +----
 5 files changed, 102 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool-counters.h
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool-counters.h b/be/src/bufferpool/buffer-pool-counters.h
new file mode 100644
index 0000000..6f3801e
--- /dev/null
+++ b/be/src/bufferpool/buffer-pool-counters.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_BUFFER_POOL_COUNTERS_H
+#define IMPALA_BUFFER_POOL_COUNTERS_H
+
+#include "util/runtime-profile.h"
+
+namespace impala {
+
+/// A set of counters for each buffer pool client.
+struct BufferPoolClientCounters {
+ public:
+  /// Amount of time spent trying to get a buffer.
+  RuntimeProfile::Counter* get_buffer_time;
+
+  /// Amount of time spent waiting for reads from disk to complete.
+  RuntimeProfile::Counter* read_wait_time;
+
+  /// Amount of time spent waiting for writes to disk to complete.
+  RuntimeProfile::Counter* write_wait_time;
+
+  /// The peak total size of unpinned buffers.
+  RuntimeProfile::HighWaterMarkCounter* peak_unpinned_bytes;
+
+  /// The total bytes of data unpinned. Every time a page's pin count goes from 1 to 0,
+  /// this counter is incremented by the page size.
+  RuntimeProfile::Counter* total_unpinned_bytes;
+};
+
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool-test.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool-test.cc b/be/src/bufferpool/buffer-pool-test.cc
index 16cf12c..793bcb9 100644
--- a/be/src/bufferpool/buffer-pool-test.cc
+++ b/be/src/bufferpool/buffer-pool-test.cc
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <boost/bind.hpp>
 #include <boost/scoped_ptr.hpp>
 #include <boost/thread/thread.hpp>
@@ -29,7 +28,7 @@
 #include "common/init.h"
 #include "common/object-pool.h"
 #include "testutil/death-test-util.h"
-#include "testutil/test-macros.h"
+#include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
@@ -125,7 +124,8 @@ void BufferPoolTest::RegisterQueriesAndClients(BufferPool* pool, int query_id_hi
       EXPECT_TRUE(
           client_reservations[i][j].IncreaseReservationToFit(initial_client_reservation));
       string name = Substitute("Client $0 for query $1", j, query_id);
-      EXPECT_OK(pool->RegisterClient(name, &client_reservations[i][j], &clients[i][j]));
+      EXPECT_OK(pool->RegisterClient(
+          name, &client_reservations[i][j], NewProfile(), &clients[i][j]));
     }
 
     for (int j = 0; j < clients_per_query; ++j) {
@@ -209,7 +209,7 @@ TEST_F(BufferPoolTest, PageCreation) {
   client_tracker->InitChildTracker(NewProfile(), &global_reservations_, NULL, total_mem);
   ASSERT_TRUE(client_tracker->IncreaseReservation(total_mem));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   vector<BufferPool::PageHandle> handles(num_pages);
 
@@ -256,7 +256,7 @@ TEST_F(BufferPoolTest, BufferAllocation) {
   client_tracker->InitChildTracker(NewProfile(), &global_reservations_, NULL, total_mem);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(total_mem));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   vector<BufferPool::BufferHandle> handles(num_buffers);
 
@@ -302,7 +302,8 @@ TEST_F(BufferPoolTest, BufferTransfer) {
     client_trackers[i].InitChildTracker(
         NewProfile(), &global_reservations_, NULL, TEST_BUFFER_LEN);
     ASSERT_TRUE(client_trackers[i].IncreaseReservationToFit(TEST_BUFFER_LEN));
-    ASSERT_OK(pool.RegisterClient("test client", &client_trackers[i], &clients[i]));
+    ASSERT_OK(pool.RegisterClient(
+        "test client", &client_trackers[i], NewProfile(), &clients[i]));
   }
 
   // Transfer the page around between the clients repeatedly in a circle.
@@ -344,7 +345,7 @@ TEST_F(BufferPoolTest, Pin) {
       NewProfile(), &global_reservations_, NULL, child_reservation);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(child_reservation));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle handle1, handle2;
 
@@ -395,7 +396,7 @@ TEST_F(BufferPoolTest, PinWithoutReservation) {
   client_tracker->InitChildTracker(
       NewProfile(), &global_reservations_, NULL, TEST_BUFFER_LEN);
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle handle;
   IMPALA_ASSERT_DEBUG_DEATH(pool.CreatePage(&client, TEST_BUFFER_LEN, &handle), "");
@@ -423,7 +424,7 @@ TEST_F(BufferPoolTest, ExtractBuffer) {
       NewProfile(), &global_reservations_, NULL, child_reservation);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(child_reservation));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle page;
   BufferPool::BufferHandle buffer;
@@ -499,7 +500,7 @@ void BufferPoolTest::CreatePageLoop(
   ReservationTracker client_tracker;
   client_tracker.InitChildTracker(NewProfile(), parent_tracker, NULL, TEST_BUFFER_LEN);
   BufferPool::Client client;
-  ASSERT_OK(pool->RegisterClient("test client", &client_tracker, &client));
+  ASSERT_OK(pool->RegisterClient("test client", &client_tracker, NewProfile(), &client));
   for (int i = 0; i < num_ops; ++i) {
     BufferPool::PageHandle handle;
     ASSERT_TRUE(client_tracker.IncreaseReservation(TEST_BUFFER_LEN));
@@ -525,7 +526,8 @@ TEST_F(BufferPoolTest, CapacityExhausted) {
   BufferPool::PageHandle handle1, handle2, handle3;
 
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", &global_reservations_, &client));
+  ASSERT_OK(
+      pool.RegisterClient("test client", &global_reservations_, NewProfile(), &client));
   ASSERT_TRUE(global_reservations_.IncreaseReservation(TEST_BUFFER_LEN));
   ASSERT_OK(pool.CreatePage(&client, TEST_BUFFER_LEN, &handle1));
 
@@ -549,8 +551,4 @@ TEST_F(BufferPoolTest, CapacityExhausted) {
 }
 }
 
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
-  return RUN_ALL_TESTS();
-}
+IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool.cc b/be/src/bufferpool/buffer-pool.cc
index eaa4262..3035694 100644
--- a/be/src/bufferpool/buffer-pool.cc
+++ b/be/src/bufferpool/buffer-pool.cc
@@ -25,6 +25,7 @@
 #include "common/names.h"
 #include "gutil/strings/substitute.h"
 #include "util/bit-util.h"
+#include "util/runtime-profile-counters.h"
 #include "util/uid-util.h"
 
 namespace impala {
@@ -176,14 +177,26 @@ BufferPool::~BufferPool() {
 }
 
 Status BufferPool::RegisterClient(
-    const string& name, ReservationTracker* reservation, Client* client) {
+    const string& name, ReservationTracker* reservation, RuntimeProfile* profile,
+    Client* client) {
   DCHECK(!client->is_registered());
   DCHECK(reservation != NULL);
+  client->InitCounters(profile);
   client->reservation_ = reservation;
   client->name_ = name;
   return Status::OK();
 }
 
+void BufferPool::Client::InitCounters(RuntimeProfile* profile) {
+  counters_.get_buffer_time = ADD_TIMER(profile, "BufferPoolGetBufferTime");
+  counters_.read_wait_time = ADD_TIMER(profile, "BufferPoolReadWaitTime");
+  counters_.write_wait_time = ADD_TIMER(profile, "BufferPoolWriteWaitTime");
+  counters_.peak_unpinned_bytes =
+      profile->AddHighWaterMarkCounter("BufferPoolPeakUnpinnedBytes", TUnit::BYTES);
+  counters_.total_unpinned_bytes =
+      ADD_COUNTER(profile, "BufferPoolTotalUnpinnedBytes", TUnit::BYTES);
+}
+
 void BufferPool::DeregisterClient(Client* client) {
   if (!client->is_registered()) return;
   client->reservation_->Close();
@@ -256,13 +269,16 @@ Status BufferPool::Pin(Client* client, PageHandle* handle) {
   Page* page = handle->page_;
   {
     lock_guard<SpinLock> pl(page->lock); // Lock page while we work on its state.
-    if (!page->buffer.is_open()) {
-      // No changes have been made to state yet, so we can cleanly return on error.
-      RETURN_IF_ERROR(AllocateBufferInternal(client, page->len, &page->buffer));
+    if (page->pin_count == 0)  {
+      if (!page->buffer.is_open()) {
+        // No changes have been made to state yet, so we can cleanly return on error.
+        RETURN_IF_ERROR(AllocateBufferInternal(client, page->len, &page->buffer));
+
+        // TODO: will need to initiate/wait for read if the page is not in-memory.
+      }
+      COUNTER_ADD(client->counters_.peak_unpinned_bytes, -handle->len());
     }
     page->IncrementPinCount(handle);
-
-    // TODO: will need to initiate/wait for read if the page is not in-memory.
   }
 
   client->reservation_->AllocateFrom(page->len);
@@ -286,12 +302,16 @@ void BufferPool::UnpinLocked(Client* client, PageHandle* handle) {
   page->DecrementPinCount(handle);
   client->reservation_->ReleaseTo(page->len);
 
+  COUNTER_ADD(client->counters_.total_unpinned_bytes, handle->len());
+  COUNTER_ADD(client->counters_.peak_unpinned_bytes, handle->len());
+
   // TODO: can evict now. Only need to preserve contents if 'page->dirty' is true.
 }
 
 void BufferPool::ExtractBuffer(
     Client* client, PageHandle* page_handle, BufferHandle* buffer_handle) {
   DCHECK(page_handle->is_pinned());
+
   DCHECK_EQ(page_handle->client_, client);
 
   Page* page = page_handle->page_;
@@ -316,6 +336,7 @@ Status BufferPool::AllocateBufferInternal(
   DCHECK(!buffer->is_open());
   DCHECK_GE(len, min_buffer_len_);
   DCHECK_EQ(len, BitUtil::RoundUpToPowerOfTwo(len));
+  SCOPED_TIMER(client->counters_.get_buffer_time);
 
   // If there is headroom in 'buffer_bytes_remaining_', we can just allocate a new buffer.
   if (TryDecreaseBufferBytesRemaining(len)) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool.h
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool.h b/be/src/bufferpool/buffer-pool.h
index 44b5574..6a9641d 100644
--- a/be/src/bufferpool/buffer-pool.h
+++ b/be/src/bufferpool/buffer-pool.h
@@ -24,6 +24,7 @@
 #include <string>
 
 #include "bufferpool/buffer-allocator.h"
+#include "bufferpool/buffer-pool-counters.h"
 #include "common/atomic.h"
 #include "common/status.h"
 #include "gutil/macros.h"
@@ -167,10 +168,11 @@ class BufferPool {
 
   /// Register a client. Returns an error status and does not register the client if the
   /// arguments are invalid. 'name' is an arbitrary name used to identify the client in
-  /// any errors messages or logging. 'client' is the client to register. 'client' should
-  /// not already be registered.
+  /// any errors messages or logging. Counters for this client are added to the (non-NULL)
+  /// 'profile'. 'client' is the client to register. 'client' should not already be
+  /// registered.
   Status RegisterClient(const std::string& name, ReservationTracker* reservation,
-      Client* client);
+      RuntimeProfile* profile, Client* client);
 
   /// Deregister 'client' if it is registered. Idempotent.
   void DeregisterClient(Client* client);
@@ -305,12 +307,19 @@ class BufferPool::Client {
   friend class BufferPool;
   DISALLOW_COPY_AND_ASSIGN(Client);
 
+  /// Initialize 'counters_' and add the counters to 'profile'.
+  void InitCounters(RuntimeProfile* profile);
+
   /// A name identifying the client.
   std::string name_;
 
   /// The reservation tracker for the client. NULL means the client isn't registered.
   /// All pages pinned by the client count as usage against 'reservation_'.
   ReservationTracker* reservation_;
+
+  /// The RuntimeProfile counters for this client. All non-NULL if is_registered()
+  /// is true.
+  BufferPoolClientCounters counters_;
 };
 
 /// A handle to a buffer allocated from the buffer pool. Each BufferHandle should only

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/reservation-tracker-test.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/reservation-tracker-test.cc b/be/src/bufferpool/reservation-tracker-test.cc
index 93bf7b8..66ce287 100644
--- a/be/src/bufferpool/reservation-tracker-test.cc
+++ b/be/src/bufferpool/reservation-tracker-test.cc
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <limits>
 #include <string>
 #include <vector>
@@ -24,7 +23,7 @@
 #include "common/init.h"
 #include "common/object-pool.h"
 #include "runtime/mem-tracker.h"
-#include "testutil/test-macros.h"
+#include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
@@ -376,8 +375,4 @@ TEST_F(ReservationTrackerTest, MemTrackerIntegrationMultiLevel) {
 }
 }
 
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
-  return RUN_ALL_TESTS();
-}
+IMPALA_TEST_MAIN();

[31/32] incubator-impala git commit: IMPALA-2905: Move QueryResultSet implementations into separate module

Posted by ta...@apache.org.

IMPALA-2905: Move QueryResultSet implementations into separate module

This mostly mechanical change moves the definition and implementation of
the Beeswax and HS2-specific result sets into their own module. Result
sets are now uniformly created by one of two factory methods, so the
implementation is decoupled from the client.

Change-Id: I6ab883b62d3ec7012240edf8d56889349e7c0e32
Reviewed-on: http://gerrit.cloudera.org:8080/4736
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3f5380dc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3f5380dc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3f5380dc

Branch: refs/heads/hadoop-next
Commit: 3f5380dc73f3ab907443a2858d4fe0de6e3685e7
Parents: 080a678
Author: Henry Robinson <he...@cloudera.com>
Authored: Sat Oct 15 16:47:24 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 09:30:09 2016 +0000

----------------------------------------------------------------------
 be/src/service/CMakeLists.txt           |   1 +
 be/src/service/impala-beeswax-server.cc |  98 +-----
 be/src/service/impala-hs2-server.cc     | 324 +-----------------
 be/src/service/query-result-set.cc      | 478 +++++++++++++++++++++++++++
 be/src/service/query-result-set.h       |  21 +-
 5 files changed, 503 insertions(+), 419 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3f5380dc/be/src/service/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index aa12ceb..35130ff 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -33,6 +33,7 @@ add_library(Service
   impala-beeswax-server.cc
   query-exec-state.cc
   query-options.cc
+  query-result-set.cc
   child-query.cc
   impalad-main.cc
 )

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3f5380dc/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index ee7f958..b50499e 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -47,100 +47,8 @@ using namespace beeswax;
     }                                                           \
   } while (false)
 
-namespace {
-
-/// Ascii output precision for double/float
-constexpr int ASCII_PRECISION = 16;
-}
-
 namespace impala {
 
-// Ascii result set for Beeswax.
-// Beeswax returns rows in ascii, using "\t" as column delimiter.
-class AsciiQueryResultSet : public QueryResultSet {
- public:
-  // Rows are added into rowset.
-  AsciiQueryResultSet(const TResultSetMetadata& metadata, vector<string>* rowset)
-    : metadata_(metadata), result_set_(rowset), owned_result_set_(NULL) {
-  }
-
-  // Rows are added into a new rowset that is owned by this result set.
-  AsciiQueryResultSet(const TResultSetMetadata& metadata)
-    : metadata_(metadata), result_set_(new vector<string>()),
-      owned_result_set_(result_set_) {
-  }
-
-  virtual ~AsciiQueryResultSet() { }
-
-  // Convert expr values (col_values) to ASCII using "\t" as column delimiter and store
-  // it in this result set.
-  // TODO: Handle complex types.
-  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales) {
-    int num_col = col_values.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    stringstream out_stream;
-    out_stream.precision(ASCII_PRECISION);
-    for (int i = 0; i < num_col; ++i) {
-      // ODBC-187 - ODBC can only take "\t" as the delimiter
-      out_stream << (i > 0 ? "\t" : "");
-      DCHECK_EQ(1, metadata_.columns[i].columnType.types.size());
-      RawValue::PrintValue(col_values[i],
-          ColumnType::FromThrift(metadata_.columns[i].columnType),
-          scales[i], &out_stream);
-    }
-    result_set_->push_back(out_stream.str());
-    return Status::OK();
-  }
-
-  // Convert TResultRow to ASCII using "\t" as column delimiter and store it in this
-  // result set.
-  virtual Status AddOneRow(const TResultRow& row) {
-    int num_col = row.colVals.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    stringstream out_stream;
-    out_stream.precision(ASCII_PRECISION);
-    for (int i = 0; i < num_col; ++i) {
-      // ODBC-187 - ODBC can only take "\t" as the delimiter
-      out_stream << (i > 0 ? "\t" : "");
-      out_stream << row.colVals[i];
-    }
-    result_set_->push_back(out_stream.str());
-    return Status::OK();
-  }
-
-  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) {
-    const AsciiQueryResultSet* o = static_cast<const AsciiQueryResultSet*>(other);
-    if (start_idx >= o->result_set_->size()) return 0;
-    const int rows_added =
-        min(static_cast<size_t>(num_rows), o->result_set_->size() - start_idx);
-    result_set_->insert(result_set_->end(), o->result_set_->begin() + start_idx,
-        o->result_set_->begin() + start_idx + rows_added);
-    return rows_added;
-  }
-
-  virtual int64_t ByteSize(int start_idx, int num_rows) {
-    int64_t bytes = 0;
-    const int end = min(static_cast<size_t>(num_rows), result_set_->size() - start_idx);
-    for (int i = start_idx; i < start_idx + end; ++i) {
-      bytes += sizeof(result_set_[i]) + result_set_[i].capacity();
-    }
-    return bytes;
-  }
-
-  virtual size_t size() { return result_set_->size(); }
-
- private:
-  // Metadata of the result set
-  const TResultSetMetadata& metadata_;
-
-  // Points to the result set to be filled. The result set this points to may be owned by
-  // this object, in which case owned_result_set_ is set.
-  vector<string>* result_set_;
-
-  // Set to result_set_ if result_set_ is owned.
-  scoped_ptr<vector<string>> owned_result_set_;
-};
-
 void ImpalaServer::query(QueryHandle& query_handle, const Query& query) {
   VLOG_QUERY << "query(): query=" << query.query;
   ScopedSessionState session_handle(this);
@@ -588,9 +496,9 @@ Status ImpalaServer::FetchInternal(const TUniqueId& query_id,
   Status fetch_rows_status;
   query_results->data.clear();
   if (!exec_state->eos()) {
-    AsciiQueryResultSet result_set(*(exec_state->result_metadata()),
-        &(query_results->data));
-    fetch_rows_status = exec_state->FetchRows(fetch_size, &result_set);
+    scoped_ptr<QueryResultSet> result_set(QueryResultSet::CreateAsciiQueryResultSet(
+        *exec_state->result_metadata(), &query_results->data));
+    fetch_rows_status = exec_state->FetchRows(fetch_size, result_set.get());
   }
   query_results->__set_has_more(!exec_state->eos());
   query_results->__isset.data = true;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3f5380dc/be/src/service/impala-hs2-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index de0e2f3..488a1ee 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -85,315 +85,10 @@ namespace impala {
 
 const string IMPALA_RESULT_CACHING_OPT = "impala.resultset.cache.size";
 
-// Utility functions for computing the size of HS2 Thrift structs in bytes.
-static inline
-int64_t ByteSize(const thrift::TColumnValue& val) {
-  return sizeof(val) + val.stringVal.value.capacity();
-}
-
-static int64_t ByteSize(const thrift::TRow& row) {
-  int64_t bytes = sizeof(row);
-  for (const thrift::TColumnValue& c: row.colVals) {
-    bytes += ByteSize(c);
-  }
-  return bytes;
-}
-
-// Returns the size, in bytes, of a Hive TColumn structure, only taking into account those
-// values in the range [start_idx, end_idx).
-static uint32_t TColumnByteSize(const thrift::TColumn& col, uint32_t start_idx,
-    uint32_t end_idx) {
-  DCHECK_LE(start_idx, end_idx);
-  uint32_t num_rows = end_idx - start_idx;
-  if (num_rows == 0) return 0L;
-
-  if (col.__isset.boolVal) return (num_rows * sizeof(bool)) + col.boolVal.nulls.size();
-  if (col.__isset.byteVal) return num_rows + col.byteVal.nulls.size();
-  if (col.__isset.i16Val) return (num_rows * sizeof(int16_t)) + col.i16Val.nulls.size();
-  if (col.__isset.i32Val) return (num_rows * sizeof(int32_t)) + col.i32Val.nulls.size();
-  if (col.__isset.i64Val) return (num_rows * sizeof(int64_t)) + col.i64Val.nulls.size();
-  if (col.__isset.doubleVal) {
-    return (num_rows * sizeof(double)) + col.doubleVal.nulls.size();
-  }
-  if (col.__isset.stringVal) {
-    uint32_t bytes = 0;
-    for (int i = start_idx; i < end_idx; ++i) bytes += col.stringVal.values[i].size();
-    return bytes + col.stringVal.nulls.size();
-  }
-
-  return 0;
-}
-
 // Helper function to translate between Beeswax and HiveServer2 type
 static TOperationState::type QueryStateToTOperationState(
     const beeswax::QueryState::type& query_state);
 
-// Result set container for Hive protocol versions >= V6, where results are returned in
-// column-orientation.
-class HS2ColumnarResultSet : public QueryResultSet {
- public:
-  HS2ColumnarResultSet(const TResultSetMetadata& metadata, TRowSet* rowset = NULL)
-      : metadata_(metadata), result_set_(rowset), num_rows_(0) {
-    if (rowset == NULL) {
-      owned_result_set_.reset(new TRowSet());
-      result_set_ = owned_result_set_.get();
-    }
-    InitColumns();
-  }
-
-  virtual ~HS2ColumnarResultSet() { }
-
-  // Add a row of expr values
-  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales) {
-    int num_col = col_values.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    for (int i = 0; i < num_col; ++i) {
-      ExprValueToHS2TColumn(col_values[i], metadata_.columns[i].columnType, num_rows_,
-          &(result_set_->columns[i]));
-    }
-    ++num_rows_;
-    return Status::OK();
-  }
-
-  // Add a row from a TResultRow
-  virtual Status AddOneRow(const TResultRow& row) {
-    int num_col = row.colVals.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    for (int i = 0; i < num_col; ++i) {
-      TColumnValueToHS2TColumn(row.colVals[i], metadata_.columns[i].columnType, num_rows_,
-          &(result_set_->columns[i]));
-    }
-    ++num_rows_;
-    return Status::OK();
-  }
-
-  // Copy all columns starting at 'start_idx' and proceeding for a maximum of 'num_rows'
-  // from 'other' into this result set
-  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) {
-    const HS2ColumnarResultSet* o = static_cast<const HS2ColumnarResultSet*>(other);
-    DCHECK_EQ(metadata_.columns.size(), o->metadata_.columns.size());
-    if (start_idx >= o->num_rows_) return 0;
-    const int rows_added = min<int64_t>(num_rows, o->num_rows_ - start_idx);
-    for (int j = 0; j < metadata_.columns.size(); ++j) {
-      thrift::TColumn* from = &o->result_set_->columns[j];
-      thrift::TColumn* to = &result_set_->columns[j];
-      switch (metadata_.columns[j].columnType.types[0].scalar_type.type) {
-        case TPrimitiveType::NULL_TYPE:
-        case TPrimitiveType::BOOLEAN:
-          StitchNulls(num_rows_, rows_added, start_idx, from->boolVal.nulls,
-              &(to->boolVal.nulls));
-          to->boolVal.values.insert(
-              to->boolVal.values.end(),
-              from->boolVal.values.begin() + start_idx,
-              from->boolVal.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::TINYINT:
-          StitchNulls(num_rows_, rows_added, start_idx, from->byteVal.nulls,
-              &(to->byteVal.nulls));
-          to->byteVal.values.insert(
-              to->byteVal.values.end(),
-              from->byteVal.values.begin() + start_idx,
-              from->byteVal.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::SMALLINT:
-          StitchNulls(num_rows_, rows_added, start_idx, from->i16Val.nulls,
-              &(to->i16Val.nulls));
-          to->i16Val.values.insert(
-              to->i16Val.values.end(),
-              from->i16Val.values.begin() + start_idx,
-              from->i16Val.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::INT:
-          StitchNulls(num_rows_, rows_added, start_idx, from->i32Val.nulls,
-              &(to->i32Val.nulls));
-          to->i32Val.values.insert(
-              to->i32Val.values.end(),
-              from->i32Val.values.begin() + start_idx,
-              from->i32Val.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::BIGINT:
-          StitchNulls(num_rows_, rows_added, start_idx, from->i64Val.nulls,
-              &(to->i64Val.nulls));
-          to->i64Val.values.insert(
-              to->i64Val.values.end(),
-              from->i64Val.values.begin() + start_idx,
-              from->i64Val.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::FLOAT:
-        case TPrimitiveType::DOUBLE:
-          StitchNulls(num_rows_, rows_added, start_idx, from->doubleVal.nulls,
-              &(to->doubleVal.nulls));
-          to->doubleVal.values.insert(
-              to->doubleVal.values.end(),
-              from->doubleVal.values.begin() + start_idx,
-              from->doubleVal.values.begin() + start_idx + rows_added);
-          break;
-        case TPrimitiveType::TIMESTAMP:
-        case TPrimitiveType::DECIMAL:
-        case TPrimitiveType::STRING:
-        case TPrimitiveType::VARCHAR:
-        case TPrimitiveType::CHAR:
-          StitchNulls(num_rows_, rows_added, start_idx, from->stringVal.nulls,
-              &(to->stringVal.nulls));
-          to->stringVal.values.insert(to->stringVal.values.end(),
-              from->stringVal.values.begin() + start_idx,
-              from->stringVal.values.begin() + start_idx + rows_added);
-          break;
-        default:
-          DCHECK(false) << "Unsupported type: " << TypeToString(ThriftToType(
-              metadata_.columns[j].columnType.types[0].scalar_type.type));
-          break;
-      }
-    }
-    num_rows_ += rows_added;
-    return rows_added;
-  }
-
-  virtual int64_t ByteSize(int start_idx, int num_rows) {
-    const int end = min(start_idx + num_rows, (int)size());
-    int64_t bytes = 0L;
-    for (const thrift::TColumn& c: result_set_->columns) {
-      bytes += TColumnByteSize(c, start_idx, end);
-    }
-    return bytes;
-  }
-
-  virtual size_t size() { return num_rows_; }
-
- private:
-  // Metadata of the result set
-  const TResultSetMetadata& metadata_;
-
-  // Points to the TRowSet to be filled. The row set this points to may be owned by
-  // this object, in which case owned_result_set_ is set.
-  TRowSet* result_set_;
-
-  // Set to result_set_ if result_set_ is owned.
-  scoped_ptr<TRowSet> owned_result_set_;
-
-  int64_t num_rows_;
-
-  void InitColumns() {
-    result_set_->__isset.columns = true;
-    for (const TColumn& col: metadata_.columns) {
-      DCHECK(col.columnType.types.size() == 1) <<
-          "Structured columns unsupported in HS2 interface";
-      thrift::TColumn column;
-      switch (col.columnType.types[0].scalar_type.type) {
-        case TPrimitiveType::NULL_TYPE:
-        case TPrimitiveType::BOOLEAN:
-          column.__isset.boolVal = true;
-          break;
-        case TPrimitiveType::TINYINT:
-          column.__isset.byteVal = true;
-          break;
-        case TPrimitiveType::SMALLINT:
-          column.__isset.i16Val = true;
-          break;
-        case TPrimitiveType::INT:
-          column.__isset.i32Val = true;
-          break;
-        case TPrimitiveType::BIGINT:
-          column.__isset.i64Val = true;
-          break;
-        case TPrimitiveType::FLOAT:
-        case TPrimitiveType::DOUBLE:
-          column.__isset.doubleVal = true;
-          break;
-        case TPrimitiveType::TIMESTAMP:
-        case TPrimitiveType::DECIMAL:
-        case TPrimitiveType::VARCHAR:
-        case TPrimitiveType::CHAR:
-        case TPrimitiveType::STRING:
-          column.__isset.stringVal = true;
-          break;
-        default:
-          DCHECK(false) << "Unhandled column type: "
-                        << TypeToString(
-                            ThriftToType(col.columnType.types[0].scalar_type.type));
-      }
-      result_set_->columns.push_back(column);
-    }
-  }
-};
-
-// TRow result set for HiveServer2
-class HS2RowOrientedResultSet : public QueryResultSet {
- public:
-  // Rows are added into rowset.
-  HS2RowOrientedResultSet(const TResultSetMetadata& metadata, TRowSet* rowset = NULL)
-      : metadata_(metadata), result_set_(rowset) {
-    if (rowset == NULL) {
-      owned_result_set_.reset(new TRowSet());
-      result_set_ = owned_result_set_.get();
-    }
-  }
-
-  virtual ~HS2RowOrientedResultSet() { }
-
-  // Convert expr value to HS2 TRow and store it in TRowSet.
-  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales) {
-    int num_col = col_values.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    result_set_->rows.push_back(TRow());
-    TRow& trow = result_set_->rows.back();
-    trow.colVals.resize(num_col);
-    for (int i = 0; i < num_col; ++i) {
-      ExprValueToHS2TColumnValue(col_values[i],
-          metadata_.columns[i].columnType, &(trow.colVals[i]));
-    }
-    return Status::OK();
-  }
-
-  // Convert TResultRow to HS2 TRow and store it in TRowSet.
-  virtual Status AddOneRow(const TResultRow& row) {
-    int num_col = row.colVals.size();
-    DCHECK_EQ(num_col, metadata_.columns.size());
-    result_set_->rows.push_back(TRow());
-    TRow& trow = result_set_->rows.back();
-    trow.colVals.resize(num_col);
-    for (int i = 0; i < num_col; ++i) {
-      TColumnValueToHS2TColumnValue(row.colVals[i], metadata_.columns[i].columnType,
-          &(trow.colVals[i]));
-    }
-    return Status::OK();
-  }
-
-  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) {
-    const HS2RowOrientedResultSet* o = static_cast<const HS2RowOrientedResultSet*>(other);
-    if (start_idx >= o->result_set_->rows.size()) return 0;
-    const int rows_added =
-        min(static_cast<size_t>(num_rows), o->result_set_->rows.size() - start_idx);
-    for (int i = start_idx; i < start_idx + rows_added; ++i) {
-      result_set_->rows.push_back(o->result_set_->rows[i]);
-    }
-    return rows_added;
-  }
-
-  virtual int64_t ByteSize(int start_idx, int num_rows) {
-    int64_t bytes = 0;
-    const int end =
-        min(static_cast<size_t>(num_rows), result_set_->rows.size() - start_idx);
-    for (int i = start_idx; i < start_idx + end; ++i) {
-      bytes += impala::ByteSize(result_set_->rows[i]);
-    }
-    return bytes;
-  }
-
-  virtual size_t size() { return result_set_->rows.size(); }
-
- private:
-  // Metadata of the result set
-  const TResultSetMetadata& metadata_;
-
-  // Points to the TRowSet to be filled. The row set this points to may be owned by
-  // this object, in which case owned_result_set_ is set.
-  TRowSet* result_set_;
-
-  // Set to result_set_ if result_set_ is owned.
-  scoped_ptr<TRowSet> owned_result_set_;
-};
-
 void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
     TMetadataOpRequest* request, TOperationHandle* handle, thrift::TStatus* status) {
   TUniqueId session_id;
@@ -473,18 +168,6 @@ void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
   status->__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS);
 }
 
-namespace {
-
-QueryResultSet* CreateHS2ResultSet(
-    TProtocolVersion::type version, const TResultSetMetadata& metadata, TRowSet* rowset) {
-  if (version < TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6) {
-    return new HS2RowOrientedResultSet(metadata, rowset);
-  } else {
-    return new HS2ColumnarResultSet(metadata, rowset);
-  }
-}
-}
-
 Status ImpalaServer::FetchInternal(const TUniqueId& query_id, int32_t fetch_size,
     bool fetch_first, TFetchResultsResp* fetch_results) {
   shared_ptr<QueryExecState> exec_state = GetQueryExecState(query_id, false);
@@ -522,8 +205,8 @@ Status ImpalaServer::FetchInternal(const TUniqueId& query_id, int32_t fetch_size
   bool is_child_query = exec_state->parent_query_id() != TUniqueId();
   TProtocolVersion::type version = is_child_query ?
       TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V1 : session->hs2_version;
-  scoped_ptr<QueryResultSet> result_set(CreateHS2ResultSet(version,
-      *(exec_state->result_metadata()), &(fetch_results->results)));
+  scoped_ptr<QueryResultSet> result_set(QueryResultSet::CreateHS2ResultSet(
+      version, *(exec_state->result_metadata()), &(fetch_results->results)));
   RETURN_IF_ERROR(exec_state->FetchRows(fetch_size, result_set.get()));
   fetch_results->__isset.results = true;
   fetch_results->__set_hasMoreRows(!exec_state->eos());
@@ -763,7 +446,8 @@ void ImpalaServer::ExecuteStatement(TExecuteStatementResp& return_val,
   // Optionally enable result caching on the QueryExecState.
   if (cache_num_rows > 0) {
     status = exec_state->SetResultCache(
-        CreateHS2ResultSet(session->hs2_version, *exec_state->result_metadata(), nullptr),
+        QueryResultSet::CreateHS2ResultSet(
+            session->hs2_version, *exec_state->result_metadata(), nullptr),
         cache_num_rows);
     if (!status.ok()) {
       UnregisterQuery(exec_state->query_id(), false, &status);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3f5380dc/be/src/service/query-result-set.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-result-set.cc b/be/src/service/query-result-set.cc
new file mode 100644
index 0000000..3b17af7
--- /dev/null
+++ b/be/src/service/query-result-set.cc
@@ -0,0 +1,478 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "service/query-result-set.h"
+
+#include <sstream>
+#include <boost/scoped_ptr.hpp>
+
+#include "rpc/thrift-util.h"
+#include "runtime/raw-value.h"
+#include "runtime/types.h"
+#include "service/hs2-util.h"
+
+#include "common/names.h"
+
+using ThriftTColumn = apache::hive::service::cli::thrift::TColumn;
+using ThriftTColumnValue = apache::hive::service::cli::thrift::TColumnValue;
+using apache::hive::service::cli::thrift::TProtocolVersion;
+using apache::hive::service::cli::thrift::TRow;
+using apache::hive::service::cli::thrift::TRowSet;
+
+namespace {
+
+/// Ascii output precision for double/float
+constexpr int ASCII_PRECISION = 16;
+}
+
+namespace impala {
+
+/// Ascii result set for Beeswax. Rows are returned in ascii text encoding, using "\t" as
+/// column delimiter.
+class AsciiQueryResultSet : public QueryResultSet {
+ public:
+  /// Rows are added into 'rowset'.
+  AsciiQueryResultSet(const TResultSetMetadata& metadata, vector<string>* rowset)
+    : metadata_(metadata), result_set_(rowset) {}
+
+  virtual ~AsciiQueryResultSet() {}
+
+  /// Convert one row's expr values stored in 'col_values' to ASCII using "\t" as column
+  /// delimiter and store it in this result set.
+  /// TODO: Handle complex types.
+  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales);
+
+  /// Convert TResultRow to ASCII using "\t" as column delimiter and store it in this
+  /// result set.
+  virtual Status AddOneRow(const TResultRow& row);
+
+  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows);
+  virtual int64_t ByteSize(int start_idx, int num_rows);
+  virtual size_t size() { return result_set_->size(); }
+
+ private:
+  /// Metadata of the result set
+  const TResultSetMetadata& metadata_;
+
+  /// Points to the result set to be filled. Not owned by this object.
+  vector<string>* result_set_;
+};
+
+/// Result set container for Hive protocol versions >= V6, where results are returned in
+/// column-orientation.
+class HS2ColumnarResultSet : public QueryResultSet {
+ public:
+  HS2ColumnarResultSet(const TResultSetMetadata& metadata, TRowSet* rowset);
+
+  virtual ~HS2ColumnarResultSet(){};
+
+  /// Add a row of expr values
+  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales);
+
+  /// Add a row from a TResultRow
+  virtual Status AddOneRow(const TResultRow& row);
+
+  /// Copy all columns starting at 'start_idx' and proceeding for a maximum of 'num_rows'
+  /// from 'other' into this result set
+  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows);
+
+  virtual int64_t ByteSize(int start_idx, int num_rows);
+  virtual size_t size() { return num_rows_; }
+
+ private:
+  /// Metadata of the result set
+  const TResultSetMetadata& metadata_;
+
+  /// Points to the TRowSet to be filled. The row set
+  /// this points to may be owned by
+  /// this object, in which case owned_result_set_ is set.
+  TRowSet* result_set_;
+
+  /// Set to result_set_ if result_set_ is owned.
+  boost::scoped_ptr<TRowSet> owned_result_set_;
+
+  int64_t num_rows_;
+
+  void InitColumns();
+};
+
+/// Row oriented result set for HiveServer2, used to serve HS2 requests with protocol
+/// version <= V5.
+class HS2RowOrientedResultSet : public QueryResultSet {
+ public:
+  /// Rows are added into rowset.
+  HS2RowOrientedResultSet(const TResultSetMetadata& metadata, TRowSet* rowset);
+
+  virtual ~HS2RowOrientedResultSet() {}
+
+  /// Convert expr values to HS2 TRow and store it in a TRowSet.
+  virtual Status AddOneRow(const vector<void*>& col_values, const vector<int>& scales);
+
+  /// Convert TResultRow to HS2 TRow and store it in a TRowSet
+  virtual Status AddOneRow(const TResultRow& row);
+
+  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows);
+  virtual int64_t ByteSize(int start_idx, int num_rows);
+  virtual size_t size() { return result_set_->rows.size(); }
+
+ private:
+  /// Metadata of the result set
+  const TResultSetMetadata& metadata_;
+
+  /// Points to the TRowSet to be filled. The row set
+  /// this points to may be owned by
+  /// this object, in which case owned_result_set_ is set.
+  TRowSet* result_set_;
+
+  /// Set to result_set_ if result_set_ is owned.
+  scoped_ptr<TRowSet> owned_result_set_;
+};
+
+QueryResultSet* QueryResultSet::CreateAsciiQueryResultSet(
+    const TResultSetMetadata& metadata, vector<string>* rowset) {
+  return new AsciiQueryResultSet(metadata, rowset);
+}
+
+QueryResultSet* QueryResultSet::CreateHS2ResultSet(
+    TProtocolVersion::type version, const TResultSetMetadata& metadata, TRowSet* rowset) {
+  if (version < TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6) {
+    return new HS2RowOrientedResultSet(metadata, rowset);
+  } else {
+    return new HS2ColumnarResultSet(metadata, rowset);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+Status AsciiQueryResultSet::AddOneRow(
+    const vector<void*>& col_values, const vector<int>& scales) {
+  int num_col = col_values.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  stringstream out_stream;
+  out_stream.precision(ASCII_PRECISION);
+  for (int i = 0; i < num_col; ++i) {
+    // ODBC-187 - ODBC can only take "\t" as the delimiter
+    out_stream << (i > 0 ? "\t" : "");
+    DCHECK_EQ(1, metadata_.columns[i].columnType.types.size());
+    RawValue::PrintValue(col_values[i],
+        ColumnType::FromThrift(metadata_.columns[i].columnType), scales[i], &out_stream);
+  }
+  result_set_->push_back(out_stream.str());
+  return Status::OK();
+}
+
+Status AsciiQueryResultSet::AddOneRow(const TResultRow& row) {
+  int num_col = row.colVals.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  stringstream out_stream;
+  out_stream.precision(ASCII_PRECISION);
+  for (int i = 0; i < num_col; ++i) {
+    // ODBC-187 - ODBC can only take "\t" as the delimiter
+    out_stream << (i > 0 ? "\t" : "");
+    out_stream << row.colVals[i];
+  }
+  result_set_->push_back(out_stream.str());
+  return Status::OK();
+}
+
+int AsciiQueryResultSet::AddRows(
+    const QueryResultSet* other, int start_idx, int num_rows) {
+  const AsciiQueryResultSet* o = static_cast<const AsciiQueryResultSet*>(other);
+  if (start_idx >= o->result_set_->size()) return 0;
+  const int rows_added =
+      min(static_cast<size_t>(num_rows), o->result_set_->size() - start_idx);
+  result_set_->insert(result_set_->end(), o->result_set_->begin() + start_idx,
+      o->result_set_->begin() + start_idx + rows_added);
+  return rows_added;
+}
+
+int64_t AsciiQueryResultSet::ByteSize(int start_idx, int num_rows) {
+  int64_t bytes = 0;
+  const int end = min(static_cast<size_t>(num_rows), result_set_->size() - start_idx);
+  for (int i = start_idx; i < start_idx + end; ++i) {
+    bytes += sizeof(result_set_[i]) + result_set_[i].capacity();
+  }
+  return bytes;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+// Utility functions for computing the size of HS2 Thrift structs in bytes.
+inline int64_t ByteSize(const ThriftTColumnValue& val) {
+  return sizeof(val) + val.stringVal.value.capacity();
+}
+
+int64_t ByteSize(const TRow& row) {
+  int64_t bytes = sizeof(row);
+  for (const ThriftTColumnValue& c : row.colVals) {
+    bytes += ByteSize(c);
+  }
+  return bytes;
+}
+
+// Returns the size, in bytes, of a Hive TColumn structure, only taking into account those
+// values in the range [start_idx, end_idx).
+uint32_t TColumnByteSize(const ThriftTColumn& col, uint32_t start_idx, uint32_t end_idx) {
+  DCHECK_LE(start_idx, end_idx);
+  uint32_t num_rows = end_idx - start_idx;
+  if (num_rows == 0) return 0L;
+
+  if (col.__isset.boolVal) return (num_rows * sizeof(bool)) + col.boolVal.nulls.size();
+  if (col.__isset.byteVal) return num_rows + col.byteVal.nulls.size();
+  if (col.__isset.i16Val) return (num_rows * sizeof(int16_t)) + col.i16Val.nulls.size();
+  if (col.__isset.i32Val) return (num_rows * sizeof(int32_t)) + col.i32Val.nulls.size();
+  if (col.__isset.i64Val) return (num_rows * sizeof(int64_t)) + col.i64Val.nulls.size();
+  if (col.__isset.doubleVal) {
+    return (num_rows * sizeof(double)) + col.doubleVal.nulls.size();
+  }
+  if (col.__isset.stringVal) {
+    uint32_t bytes = 0;
+    for (int i = start_idx; i < end_idx; ++i) bytes += col.stringVal.values[i].size();
+    return bytes + col.stringVal.nulls.size();
+  }
+
+  return 0;
+}
+}
+
+// Result set container for Hive protocol versions >= V6, where results are returned in
+// column-orientation.
+HS2ColumnarResultSet::HS2ColumnarResultSet(
+    const TResultSetMetadata& metadata, TRowSet* rowset)
+  : metadata_(metadata), result_set_(rowset), num_rows_(0) {
+  if (rowset == NULL) {
+    owned_result_set_.reset(new TRowSet());
+    result_set_ = owned_result_set_.get();
+  }
+  InitColumns();
+}
+
+// Add a row of expr values
+Status HS2ColumnarResultSet::AddOneRow(
+    const vector<void*>& col_values, const vector<int>& scales) {
+  int num_col = col_values.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  for (int i = 0; i < num_col; ++i) {
+    ExprValueToHS2TColumn(col_values[i], metadata_.columns[i].columnType, num_rows_,
+        &(result_set_->columns[i]));
+  }
+  ++num_rows_;
+  return Status::OK();
+}
+
+// Add a row from a TResultRow
+Status HS2ColumnarResultSet::AddOneRow(const TResultRow& row) {
+  int num_col = row.colVals.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  for (int i = 0; i < num_col; ++i) {
+    TColumnValueToHS2TColumn(row.colVals[i], metadata_.columns[i].columnType, num_rows_,
+        &(result_set_->columns[i]));
+  }
+  ++num_rows_;
+  return Status::OK();
+}
+
+// Copy all columns starting at 'start_idx' and proceeding for a maximum of 'num_rows'
+// from 'other' into this result set
+int HS2ColumnarResultSet::AddRows(
+    const QueryResultSet* other, int start_idx, int num_rows) {
+  const HS2ColumnarResultSet* o = static_cast<const HS2ColumnarResultSet*>(other);
+  DCHECK_EQ(metadata_.columns.size(), o->metadata_.columns.size());
+  if (start_idx >= o->num_rows_) return 0;
+  const int rows_added = min<int64_t>(num_rows, o->num_rows_ - start_idx);
+  for (int j = 0; j < metadata_.columns.size(); ++j) {
+    ThriftTColumn* from = &o->result_set_->columns[j];
+    ThriftTColumn* to = &result_set_->columns[j];
+    switch (metadata_.columns[j].columnType.types[0].scalar_type.type) {
+      case TPrimitiveType::NULL_TYPE:
+      case TPrimitiveType::BOOLEAN:
+        StitchNulls(
+            num_rows_, rows_added, start_idx, from->boolVal.nulls, &(to->boolVal.nulls));
+        to->boolVal.values.insert(to->boolVal.values.end(),
+            from->boolVal.values.begin() + start_idx,
+            from->boolVal.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::TINYINT:
+        StitchNulls(
+            num_rows_, rows_added, start_idx, from->byteVal.nulls, &(to->byteVal.nulls));
+        to->byteVal.values.insert(to->byteVal.values.end(),
+            from->byteVal.values.begin() + start_idx,
+            from->byteVal.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::SMALLINT:
+        StitchNulls(
+            num_rows_, rows_added, start_idx, from->i16Val.nulls, &(to->i16Val.nulls));
+        to->i16Val.values.insert(to->i16Val.values.end(),
+            from->i16Val.values.begin() + start_idx,
+            from->i16Val.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::INT:
+        StitchNulls(
+            num_rows_, rows_added, start_idx, from->i32Val.nulls, &(to->i32Val.nulls));
+        to->i32Val.values.insert(to->i32Val.values.end(),
+            from->i32Val.values.begin() + start_idx,
+            from->i32Val.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::BIGINT:
+        StitchNulls(
+            num_rows_, rows_added, start_idx, from->i64Val.nulls, &(to->i64Val.nulls));
+        to->i64Val.values.insert(to->i64Val.values.end(),
+            from->i64Val.values.begin() + start_idx,
+            from->i64Val.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::FLOAT:
+      case TPrimitiveType::DOUBLE:
+        StitchNulls(num_rows_, rows_added, start_idx, from->doubleVal.nulls,
+            &(to->doubleVal.nulls));
+        to->doubleVal.values.insert(to->doubleVal.values.end(),
+            from->doubleVal.values.begin() + start_idx,
+            from->doubleVal.values.begin() + start_idx + rows_added);
+        break;
+      case TPrimitiveType::TIMESTAMP:
+      case TPrimitiveType::DECIMAL:
+      case TPrimitiveType::STRING:
+      case TPrimitiveType::VARCHAR:
+      case TPrimitiveType::CHAR:
+        StitchNulls(num_rows_, rows_added, start_idx, from->stringVal.nulls,
+            &(to->stringVal.nulls));
+        to->stringVal.values.insert(to->stringVal.values.end(),
+            from->stringVal.values.begin() + start_idx,
+            from->stringVal.values.begin() + start_idx + rows_added);
+        break;
+      default:
+        DCHECK(false) << "Unsupported type: "
+                      << TypeToString(ThriftToType(
+                             metadata_.columns[j].columnType.types[0].scalar_type.type));
+        break;
+    }
+  }
+  num_rows_ += rows_added;
+  return rows_added;
+}
+
+int64_t HS2ColumnarResultSet::ByteSize(int start_idx, int num_rows) {
+  const int end = min(start_idx + num_rows, (int)size());
+  int64_t bytes = 0L;
+  for (const ThriftTColumn& c : result_set_->columns) {
+    bytes += TColumnByteSize(c, start_idx, end);
+  }
+  return bytes;
+}
+
+void HS2ColumnarResultSet::InitColumns() {
+  result_set_->__isset.columns = true;
+  for (const TColumn& col : metadata_.columns) {
+    DCHECK(col.columnType.types.size() == 1)
+        << "Structured columns unsupported in HS2 interface";
+    ThriftTColumn column;
+    switch (col.columnType.types[0].scalar_type.type) {
+      case TPrimitiveType::NULL_TYPE:
+      case TPrimitiveType::BOOLEAN:
+        column.__isset.boolVal = true;
+        break;
+      case TPrimitiveType::TINYINT:
+        column.__isset.byteVal = true;
+        break;
+      case TPrimitiveType::SMALLINT:
+        column.__isset.i16Val = true;
+        break;
+      case TPrimitiveType::INT:
+        column.__isset.i32Val = true;
+        break;
+      case TPrimitiveType::BIGINT:
+        column.__isset.i64Val = true;
+        break;
+      case TPrimitiveType::FLOAT:
+      case TPrimitiveType::DOUBLE:
+        column.__isset.doubleVal = true;
+        break;
+      case TPrimitiveType::TIMESTAMP:
+      case TPrimitiveType::DECIMAL:
+      case TPrimitiveType::VARCHAR:
+      case TPrimitiveType::CHAR:
+      case TPrimitiveType::STRING:
+        column.__isset.stringVal = true;
+        break;
+      default:
+        DCHECK(false) << "Unhandled column type: "
+                      << TypeToString(
+                             ThriftToType(col.columnType.types[0].scalar_type.type));
+    }
+    result_set_->columns.push_back(column);
+  }
+}
+
+HS2RowOrientedResultSet::HS2RowOrientedResultSet(
+    const TResultSetMetadata& metadata, TRowSet* rowset)
+  : metadata_(metadata), result_set_(rowset) {
+  if (rowset == NULL) {
+    owned_result_set_.reset(new TRowSet());
+    result_set_ = owned_result_set_.get();
+  }
+}
+
+Status HS2RowOrientedResultSet::AddOneRow(
+    const vector<void*>& col_values, const vector<int>& scales) {
+  int num_col = col_values.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  result_set_->rows.push_back(TRow());
+  TRow& trow = result_set_->rows.back();
+  trow.colVals.resize(num_col);
+  for (int i = 0; i < num_col; ++i) {
+    ExprValueToHS2TColumnValue(
+        col_values[i], metadata_.columns[i].columnType, &(trow.colVals[i]));
+  }
+  return Status::OK();
+}
+
+Status HS2RowOrientedResultSet::AddOneRow(const TResultRow& row) {
+  int num_col = row.colVals.size();
+  DCHECK_EQ(num_col, metadata_.columns.size());
+  result_set_->rows.push_back(TRow());
+  TRow& trow = result_set_->rows.back();
+  trow.colVals.resize(num_col);
+  for (int i = 0; i < num_col; ++i) {
+    TColumnValueToHS2TColumnValue(
+        row.colVals[i], metadata_.columns[i].columnType, &(trow.colVals[i]));
+  }
+  return Status::OK();
+}
+
+int HS2RowOrientedResultSet::AddRows(
+    const QueryResultSet* other, int start_idx, int num_rows) {
+  const HS2RowOrientedResultSet* o = static_cast<const HS2RowOrientedResultSet*>(other);
+  if (start_idx >= o->result_set_->rows.size()) return 0;
+  const int rows_added =
+      min(static_cast<size_t>(num_rows), o->result_set_->rows.size() - start_idx);
+  for (int i = start_idx; i < start_idx + rows_added; ++i) {
+    result_set_->rows.push_back(o->result_set_->rows[i]);
+  }
+  return rows_added;
+}
+
+int64_t HS2RowOrientedResultSet::ByteSize(int start_idx, int num_rows) {
+  int64_t bytes = 0;
+  const int end =
+      min(static_cast<size_t>(num_rows), result_set_->rows.size() - start_idx);
+  for (int i = start_idx; i < start_idx + end; ++i) {
+    bytes += impala::ByteSize(result_set_->rows[i]);
+  }
+  return bytes;
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3f5380dc/be/src/service/query-result-set.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-result-set.h b/be/src/service/query-result-set.h
index b444ca3..e0c88d7 100644
--- a/be/src/service/query-result-set.h
+++ b/be/src/service/query-result-set.h
@@ -20,15 +20,17 @@
 
 #include "common/status.h"
 #include "gen-cpp/Data_types.h"
+#include "gen-cpp/Results_types.h"
+#include "gen-cpp/TCLIService_types.h"
 
 #include <vector>
 
 namespace impala {
 
-/// Stores client-ready query result rows returned by
-/// QueryExecState::FetchRows(). Subclasses implement AddRows() / AddOneRow() to
-/// specialise how Impala's row batches are converted to client-API result
-/// representations.
+/// Wraps a client-API specific result representation, and implements the logic required
+/// to translate into that format from Impala's row format.
+///
+/// Subclasses implement AddRows() / AddOneRow() to specialise that logic.
 class QueryResultSet {
  public:
   QueryResultSet() {}
@@ -58,6 +60,17 @@ class QueryResultSet {
 
   /// Returns the size of this result set in number of rows.
   virtual size_t size() = 0;
+
+  /// Returns a result set suitable for Beeswax-based clients.
+  static QueryResultSet* CreateAsciiQueryResultSet(
+      const TResultSetMetadata& metadata, std::vector<std::string>* rowset);
+
+  /// Returns a result set suitable for HS2-based clients. If 'rowset' is nullptr, the
+  /// returned object will allocate and manage its own rowset.
+  static QueryResultSet* CreateHS2ResultSet(
+      apache::hive::service::cli::thrift::TProtocolVersion::type version,
+      const TResultSetMetadata& metadata,
+      apache::hive::service::cli::thrift::TRowSet* rowset);
 };
 }

[04/32] incubator-impala git commit: IMPALA-4291: Reduce LLVM module's preparation time

Posted by ta...@apache.org.

IMPALA-4291: Reduce LLVM module's preparation time

Previously, when creating a LlvmCodeGen object, we
run an O(mn) algorithm to map the IRFunction::Type
to the actual LLVM::Function object in the module.
m is the size of IRFunction::Type enum and n is
the total number of functions in the module. This
is a waste of time if we only use few functions
from the module.

This change reduces the preparation time of a simple
query from 23ms to 10ms.

select count(*) from tpch100_parquet.lineitem where l_orderkey > 20;

Change-Id: I61ab9fa8cca5a0909bb716c3c62819da3e3b3041
Reviewed-on: http://gerrit.cloudera.org:8080/4691
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/47b8aa3a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/47b8aa3a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/47b8aa3a

Branch: refs/heads/hadoop-next
Commit: 47b8aa3a9e7682ebb182696901916900d3323039
Parents: c7fe438
Author: Michael Ho <kw...@cloudera.com>
Authored: Sat Oct 8 22:43:35 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 03:02:35 2016 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py | 224 +++++++++++++++++++----------
 be/src/codegen/llvm-codegen.cc        |  74 ++++------
 2 files changed, 175 insertions(+), 123 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/47b8aa3a/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index 60244a7..2b08509 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -38,90 +38,156 @@ options, args = parser.parse_args()
 #   - be/src/generated-sources/impala-ir/impala-ir-function-names.h
 #     This file contains a mapping of <string, enum>
 
-# Mapping of enum to compiled function name.  The compiled function name only has to
-# be a substring of the actual, mangled compiler generated name.
+# Mapping of enum to compiled function name. The compiled function name has to be
+# the actual mangled compiler generated name. One can easily obtain that by disassembling
+# the bit code module.
 # TODO: should we work out the mangling rules?
 ir_functions = [
-  ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING", "ProcessRowBatchWithGrouping"],
-  ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING", "ProcessRowBatchNoGrouping"],
-  ["AGG_NODE_GET_EXPR_CTX", "GetAggExprCtx"],
-  ["AGG_NODE_GET_FN_CTX", "GetAggFnCtx"],
+  ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING",
+   "_ZN6impala15AggregationNode27ProcessRowBatchWithGroupingEPNS_8RowBatchE"],
+  ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING",
+   "_ZN6impala15AggregationNode25ProcessRowBatchNoGroupingEPNS_8RowBatchE"],
+  ["AGG_NODE_GET_EXPR_CTX",
+   "_ZNK6impala15AggregationNode13GetAggExprCtxEi"],
+  ["AGG_NODE_GET_FN_CTX",
+   "_ZNK6impala15AggregationNode11GetAggFnCtxEi"],
   ["PART_AGG_NODE_PROCESS_BATCH_UNAGGREGATED",
-      "PartitionedAggregationNode12ProcessBatchILb0"],
+   "_ZN6impala26PartitionedAggregationNode12ProcessBatchILb0EEENS_6StatusEPNS_8RowBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxE"],
   ["PART_AGG_NODE_PROCESS_BATCH_AGGREGATED",
-      "PartitionedAggregationNode12ProcessBatchILb1"],
+   "_ZN6impala26PartitionedAggregationNode12ProcessBatchILb1EEENS_6StatusEPNS_8RowBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxE"],
   ["PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING",
-      "PartitionedAggregationNode22ProcessBatchNoGrouping"],
+   "_ZN6impala26PartitionedAggregationNode22ProcessBatchNoGroupingEPNS_8RowBatchE"],
   ["PART_AGG_NODE_PROCESS_BATCH_STREAMING",
-      "PartitionedAggregationNode21ProcessBatchStreaming"],
+   "_ZN6impala26PartitionedAggregationNode21ProcessBatchStreamingEbNS_13TPrefetchMode4typeEPNS_8RowBatchES4_PNS_12HashTableCtxEPi"],
   ["PART_AGG_NODE_GET_EXPR_CTX",
-      "PartitionedAggregationNode17GetAggExprContext"],
-  ["AVG_UPDATE_BIGINT", "9AvgUpdateIN10impala_udf9BigIntVal"],
-  ["AVG_UPDATE_DOUBLE", "9AvgUpdateIN10impala_udf9DoubleVal"],
-  ["AVG_UPDATE_TIMESTAMP", "TimestampAvgUpdate"],
-  ["AVG_UPDATE_DECIMAL", "DecimalAvgUpdate"],
-  ["AVG_MERGE", "8AvgMerge"],
-  ["AVG_MERGE_DECIMAL", "DecimalAvgMerge"],
-  ["CODEGEN_ANYVAL_STRING_VAL_EQ", "StringValEq"],
-  ["CODEGEN_ANYVAL_STRING_VALUE_EQ", "StringValueEq"],
-  ["CODEGEN_ANYVAL_TIMESTAMP_VAL_EQ", "TimestampValEq"],
-  ["CODEGEN_ANYVAL_TIMESTAMP_VALUE_EQ", "TimestampValueEq"],
-  ["EXPR_GET_BOOLEAN_VAL", "4Expr13GetBooleanVal"],
-  ["EXPR_GET_TINYINT_VAL", "4Expr13GetTinyIntVal"],
-  ["EXPR_GET_SMALLINT_VAL", "4Expr14GetSmallIntVal"],
-  ["EXPR_GET_INT_VAL", "4Expr9GetIntVal"],
-  ["EXPR_GET_BIGINT_VAL", "4Expr12GetBigIntVal"],
-  ["EXPR_GET_FLOAT_VAL", "4Expr11GetFloatVal"],
-  ["EXPR_GET_DOUBLE_VAL", "4Expr12GetDoubleVal"],
-  ["EXPR_GET_STRING_VAL", "4Expr12GetStringVal"],
-  ["EXPR_GET_TIMESTAMP_VAL", "4Expr15GetTimestampVal"],
-  ["EXPR_GET_DECIMAL_VAL", "4Expr13GetDecimalVal"],
+   "_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi"],
+  ["AVG_UPDATE_BIGINT",
+   "_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["AVG_UPDATE_DOUBLE",
+   "_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["AVG_UPDATE_TIMESTAMP",
+   "_ZN6impala18AggregateFunctions18TimestampAvgUpdateEPN10impala_udf15FunctionContextERKNS1_12TimestampValEPNS1_9StringValE"],
+  ["AVG_UPDATE_DECIMAL",
+   "_ZN6impala18AggregateFunctions16DecimalAvgUpdateEPN10impala_udf15FunctionContextERKNS1_10DecimalValEPNS1_9StringValE"],
+  ["AVG_MERGE",
+   "_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["AVG_MERGE_DECIMAL",
+   "_ZN6impala18AggregateFunctions15DecimalAvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["CODEGEN_ANYVAL_STRING_VAL_EQ",
+   "_Z11StringValEqRKN10impala_udf9StringValES2_"],
+  ["CODEGEN_ANYVAL_STRING_VALUE_EQ",
+   "_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE"],
+  ["CODEGEN_ANYVAL_TIMESTAMP_VAL_EQ",
+   "_Z14TimestampValEqRKN10impala_udf12TimestampValES2_"],
+  ["CODEGEN_ANYVAL_TIMESTAMP_VALUE_EQ",
+   "_Z16TimestampValueEqRKN10impala_udf12TimestampValERKN6impala14TimestampValueE"],
+  ["EXPR_GET_BOOLEAN_VAL",
+   "_ZN6impala4Expr13GetBooleanValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_TINYINT_VAL",
+   "_ZN6impala4Expr13GetTinyIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_SMALLINT_VAL",
+   "_ZN6impala4Expr14GetSmallIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_INT_VAL",
+   "_ZN6impala4Expr9GetIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_BIGINT_VAL",
+   "_ZN6impala4Expr12GetBigIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_FLOAT_VAL",
+   "_ZN6impala4Expr11GetFloatValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_DOUBLE_VAL",
+   "_ZN6impala4Expr12GetDoubleValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_STRING_VAL",
+   "_ZN6impala4Expr12GetStringValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_TIMESTAMP_VAL",
+   "_ZN6impala4Expr15GetTimestampValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_DECIMAL_VAL",
+   "_ZN6impala4Expr13GetDecimalValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
   ["HASH_CRC", "IrCrcHash"],
   ["HASH_FNV", "IrFnvHash"],
   ["HASH_MURMUR", "IrMurmurHash"],
-  ["HASH_JOIN_PROCESS_BUILD_BATCH", "12HashJoinNode17ProcessBuildBatch"],
-  ["HASH_JOIN_PROCESS_PROBE_BATCH", "12HashJoinNode17ProcessProbeBatch"],
-  ["PHJ_PROCESS_BUILD_BATCH", "10PhjBuilder17ProcessBuildBatch"],
-  ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN", "ProcessProbeBatchILi0"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN", "ProcessProbeBatchILi1"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_SEMI_JOIN", "ProcessProbeBatchILi2"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_ANTI_JOIN", "ProcessProbeBatchILi3"],
-  ["PHJ_PROCESS_PROBE_BATCH_NULL_AWARE_LEFT_ANTI_JOIN", "ProcessProbeBatchILi4"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_OUTER_JOIN", "ProcessProbeBatchILi5"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_SEMI_JOIN", "ProcessProbeBatchILi6"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_ANTI_JOIN", "ProcessProbeBatchILi7"],
-  ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN", "ProcessProbeBatchILi8"],
-  ["PHJ_INSERT_BATCH", "10PhjBuilder9Partition11InsertBatch"],
-  ["HASH_TABLE_GET_HASH_SEED", "GetHashSeed"],
-  ["HASH_TABLE_GET_BUILD_EXPR_CTX", "HashTableCtx15GetBuildExprCtx"],
-  ["HASH_TABLE_GET_PROBE_EXPR_CTX", "HashTableCtx15GetProbeExprCtx"],
-  ["HLL_UPDATE_BOOLEAN", "HllUpdateIN10impala_udf10BooleanVal"],
-  ["HLL_UPDATE_TINYINT", "HllUpdateIN10impala_udf10TinyIntVal"],
-  ["HLL_UPDATE_SMALLINT", "HllUpdateIN10impala_udf11SmallIntVal"],
-  ["HLL_UPDATE_INT", "HllUpdateIN10impala_udf6IntVal"],
-  ["HLL_UPDATE_BIGINT", "HllUpdateIN10impala_udf9BigIntVal"],
-  ["HLL_UPDATE_FLOAT", "HllUpdateIN10impala_udf8FloatVal"],
-  ["HLL_UPDATE_DOUBLE", "HllUpdateIN10impala_udf9DoubleVal"],
-  ["HLL_UPDATE_STRING", "HllUpdateIN10impala_udf9StringVal"],
-  ["HLL_UPDATE_TIMESTAMP", "HllUpdateIN10impala_udf12TimestampVal"],
-  ["HLL_UPDATE_DECIMAL", "HllUpdateIN10impala_udf10DecimalVal"],
-  ["HLL_MERGE", "HllMerge"],
-  ["DECODE_AVRO_DATA", "DecodeAvroData"],
-  ["READ_UNION_TYPE", "ReadUnionType"],
-  ["READ_AVRO_BOOLEAN", "ReadAvroBoolean"],
-  ["READ_AVRO_INT32", "ReadAvroInt32"],
-  ["READ_AVRO_INT64", "ReadAvroInt64"],
-  ["READ_AVRO_FLOAT", "ReadAvroFloat"],
-  ["READ_AVRO_DOUBLE", "ReadAvroDouble"],
-  ["READ_AVRO_STRING", "ReadAvroString"],
-  ["READ_AVRO_VARCHAR", "ReadAvroVarchar"],
-  ["READ_AVRO_CHAR", "ReadAvroChar"],
-  ["READ_AVRO_DECIMAL", "ReadAvroDecimal"],
-  ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES", "WriteAlignedTuples"],
-  ["HDFS_SCANNER_GET_CONJUNCT_CTX", "GetConjunctCtx"],
-  ["PROCESS_SCRATCH_BATCH", "ProcessScratchBatch"],
+  ["HASH_JOIN_PROCESS_BUILD_BATCH",
+   "_ZN6impala12HashJoinNode17ProcessBuildBatchEPNS_8RowBatchE"],
+  ["HASH_JOIN_PROCESS_PROBE_BATCH",
+   "_ZN6impala12HashJoinNode17ProcessProbeBatchEPNS_8RowBatchES2_i"],
+  ["PHJ_PROCESS_BUILD_BATCH",
+   "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEb"],
+  ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi0EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi1EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_SEMI_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi2EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_ANTI_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi3EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_NULL_AWARE_LEFT_ANTI_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi4EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_OUTER_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi5EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_SEMI_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi6EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_ANTI_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi7EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN",
+   "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi8EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_INSERT_BATCH",
+   "_ZN6impala10PhjBuilder9Partition11InsertBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxEPNS_8RowBatchERKSt6vectorINS_19BufferedTupleStream6RowIdxESaISA_EE"],
+  ["HASH_TABLE_GET_HASH_SEED",
+   "_ZNK6impala12HashTableCtx11GetHashSeedEv"],
+  ["HASH_TABLE_GET_BUILD_EXPR_CTX",
+   "_ZNK6impala12HashTableCtx15GetBuildExprCtxEi"],
+  ["HASH_TABLE_GET_PROBE_EXPR_CTX",
+   "_ZNK6impala12HashTableCtx15GetProbeExprCtxEi"],
+  ["HLL_UPDATE_BOOLEAN",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10BooleanValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_TINYINT",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10TinyIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_SMALLINT",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf11SmallIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_INT",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf6IntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_BIGINT",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_FLOAT",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf8FloatValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_DOUBLE",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_STRING",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9StringValEEEvPNS2_15FunctionContextERKT_PS3_"],
+  ["HLL_UPDATE_TIMESTAMP",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf12TimestampValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_DECIMAL",
+   "_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10DecimalValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_MERGE",
+   "_ZN6impala18AggregateFunctions8HllMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["DECODE_AVRO_DATA",
+   "_ZN6impala15HdfsAvroScanner14DecodeAvroDataEiPNS_7MemPoolEPPhS3_PNS_5TupleEPNS_8TupleRowE"],
+  ["READ_UNION_TYPE",
+   "_ZN6impala15HdfsAvroScanner13ReadUnionTypeEiPPhS1_Pb"],
+  ["READ_AVRO_BOOLEAN",
+   "_ZN6impala15HdfsAvroScanner15ReadAvroBooleanENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_INT32",
+   "_ZN6impala15HdfsAvroScanner13ReadAvroInt32ENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_INT64",
+   "_ZN6impala15HdfsAvroScanner13ReadAvroInt64ENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_FLOAT",
+   "_ZN6impala15HdfsAvroScanner13ReadAvroFloatENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_DOUBLE",
+   "_ZN6impala15HdfsAvroScanner14ReadAvroDoubleENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_STRING",
+   "_ZN6impala15HdfsAvroScanner14ReadAvroStringENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_VARCHAR",
+   "_ZN6impala15HdfsAvroScanner15ReadAvroVarcharENS_13PrimitiveTypeEiPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_CHAR",
+   "_ZN6impala15HdfsAvroScanner12ReadAvroCharENS_13PrimitiveTypeEiPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_DECIMAL",
+   "_ZN6impala15HdfsAvroScanner15ReadAvroDecimalEiPPhS1_bPvPNS_7MemPoolE"],
+  ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES",
+   "_ZN6impala11HdfsScanner18WriteAlignedTuplesEPNS_7MemPoolEPNS_8TupleRowEiPNS_13FieldLocationEiiii"],
+  ["HDFS_SCANNER_GET_CONJUNCT_CTX",
+   "_ZNK6impala11HdfsScanner14GetConjunctCtxEi"],
+  ["PROCESS_SCRATCH_BATCH",
+   "_ZN6impala18HdfsParquetScanner19ProcessScratchBatchEPNS_8RowBatchE"],
   ["STRING_TO_BOOL", "IrStringToBool"],
-  ["STRING_TO_INT8", "IrStringToInt8"],
+  ["STRING_TO_INT8", "_Z14IrStringToInt8PKciPN6impala12StringParser11ParseResultE"],
   ["STRING_TO_INT16", "IrStringToInt16"],
   ["STRING_TO_INT32", "IrStringToInt32"],
   ["STRING_TO_INT64", "IrStringToInt64"],
@@ -129,10 +195,14 @@ ir_functions = [
   ["STRING_TO_DOUBLE", "IrStringToDouble"],
   ["IS_NULL_STRING", "IrIsNullString"],
   ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
-  ["RAW_VALUE_COMPARE", "8RawValue7Compare"],
-  ["TOPN_NODE_INSERT_BATCH", "TopNNode11InsertBatch"],
-  ["MEMPOOL_ALLOCATE", "MemPool8AllocateILb0"],
-  ["MEMPOOL_CHECKED_ALLOCATE", "MemPool8AllocateILb1"],
+  ["RAW_VALUE_COMPARE",
+   "_ZN6impala8RawValue7CompareEPKvS2_RKNS_10ColumnTypeE"],
+  ["TOPN_NODE_INSERT_BATCH",
+   "_ZN6impala8TopNNode11InsertBatchEPNS_8RowBatchE"],
+  ["MEMPOOL_ALLOCATE",
+   "_ZN6impala7MemPool8AllocateILb0EEEPhl"],
+  ["MEMPOOL_CHECKED_ALLOCATE",
+   "_ZN6impala7MemPool8AllocateILb1EEEPhl"]
 ]
 
 enums_preamble = '\

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/47b8aa3a/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index b107c51..cc8b46c 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -201,6 +201,15 @@ void LlvmCodeGen::InitializeLlvm(bool load_backend) {
   scoped_ptr<LlvmCodeGen> init_codegen;
   Status status = LlvmCodeGen::CreateFromMemory(&init_pool, "init", &init_codegen);
   ParseGVForFunctions(init_codegen->module_, &gv_ref_ir_fns_);
+
+  // Validate the module by verifying that functions for all IRFunction::Type
+  // can be found.
+  for (int i = IRFunction::FN_START; i < IRFunction::FN_END; ++i) {
+    DCHECK(FN_MAPPINGS[i].fn == i);
+    const string& fn_name = FN_MAPPINGS[i].fn_name;
+    DCHECK(init_codegen->module_->getFunction(fn_name) != NULL)
+        << "Failed to find function " << fn_name;
+  }
 }
 
 LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& id) :
@@ -210,7 +219,8 @@ LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& id) :
   is_corrupt_(false),
   is_compiled_(false),
   context_(new llvm::LLVMContext()),
-  module_(NULL) {
+  module_(NULL),
+  loaded_functions_(IRFunction::FN_END, NULL) {
 
   DCHECK(llvm_initialized_) << "Must call LlvmCodeGen::InitializeLlvm first.";
 
@@ -222,8 +232,6 @@ LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& id) :
   compile_timer_ = ADD_TIMER(&profile_, "CompileTime");
   num_functions_ = ADD_COUNTER(&profile_, "NumFunctions", TUnit::UNIT);
   num_instructions_ = ADD_COUNTER(&profile_, "NumInstructions", TUnit::UNIT);
-
-  loaded_functions_.resize(IRFunction::FN_END);
 }
 
 Status LlvmCodeGen::CreateFromFile(ObjectPool* pool,
@@ -390,49 +398,12 @@ Status LlvmCodeGen::CreateImpalaCodegen(
     return Status("Could not create llvm struct type for StringVal");
   }
 
-  // Fills 'functions' with all the cross-compiled functions that are defined in
-  // the module.
-  vector<Function*> functions;
-  for (Function& fn: codegen->module_->functions()) {
-    if (fn.isMaterializable()) functions.push_back(&fn);
-    if (gv_ref_ir_fns_.find(fn.getName()) != gv_ref_ir_fns_.end()) {
-      codegen->MaterializeFunction(&fn);
-    }
-  }
-  int parsed_functions = 0;
-  for (int i = 0; i < functions.size(); ++i) {
-    string fn_name = functions[i]->getName();
-    for (int j = IRFunction::FN_START; j < IRFunction::FN_END; ++j) {
-      // Substring match to match precompiled functions.  The compiled function names
-      // will be mangled.
-      // TODO: reconsider this.  Substring match is probably not strict enough but
-      // undoing the mangling is no fun either.
-      if (fn_name.find(FN_MAPPINGS[j].fn_name) != string::npos) {
-        // TODO: make this a DCHECK when we resolve IMPALA-2439
-        CHECK(codegen->loaded_functions_[FN_MAPPINGS[j].fn] == NULL)
-            << "Duplicate definition found for function " << FN_MAPPINGS[j].fn_name
-            << ": " << fn_name;
-        functions[i]->addFnAttr(Attribute::AlwaysInline);
-        codegen->loaded_functions_[FN_MAPPINGS[j].fn] = functions[i];
-        ++parsed_functions;
-      }
-    }
-  }
-
-  if (parsed_functions != IRFunction::FN_END) {
-    stringstream ss;
-    ss << "Unable to find these precompiled functions: ";
-    bool first = true;
-    for (int i = IRFunction::FN_START; i != IRFunction::FN_END; ++i) {
-      if (codegen->loaded_functions_[i] == NULL) {
-        if (!first) ss << ", ";
-        ss << FN_MAPPINGS[i].fn_name;
-        first = false;
-      }
-    }
-    return Status(ss.str());
+  // Materialize functions implicitly referenced by the global variables.
+  for (const string& fn_name : gv_ref_ir_fns_) {
+    Function* fn = codegen->module_->getFunction(fn_name);
+    DCHECK(fn != NULL);
+    codegen->MaterializeFunction(fn);
   }
-
   return Status::OK();
 }
 
@@ -687,8 +658,19 @@ Function* LlvmCodeGen::GetFunction(const string& symbol) {
 }
 
 Function* LlvmCodeGen::GetFunction(IRFunction::Type ir_type, bool clone) {
-  DCHECK(loaded_functions_[ir_type] != NULL);
   Function* fn = loaded_functions_[ir_type];
+  if (fn == NULL) {
+    DCHECK(FN_MAPPINGS[ir_type].fn == ir_type);
+    const string& fn_name = FN_MAPPINGS[ir_type].fn_name;
+    fn = module_->getFunction(fn_name);
+    if (fn == NULL) {
+      LOG(ERROR) << "Unable to locate function " << fn_name;
+      return NULL;
+    }
+    // Mixing "NoInline" with "AlwaysInline" will lead to compilation failure.
+    if (!fn->hasFnAttribute(Attribute::NoInline)) fn->addFnAttr(Attribute::AlwaysInline);
+    loaded_functions_[ir_type] = fn;
+  }
   Status status = MaterializeFunction(fn);
   if (UNLIKELY(!status.ok())) return NULL;
   if (clone) return CloneFunction(fn);

[19/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/plan-fragment-executor.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/plan-fragment-executor.cc b/be/src/runtime/plan-fragment-executor.cc
index aba4a26..5269fe5 100644
--- a/be/src/runtime/plan-fragment-executor.cc
+++ b/be/src/runtime/plan-fragment-executor.cc
@@ -26,22 +26,23 @@
 #include "common/logging.h"
 #include "common/object-pool.h"
 #include "exec/data-sink.h"
-#include "exec/exec-node.h"
 #include "exec/exchange-node.h"
-#include "exec/scan-node.h"
-#include "exec/hdfs-scan-node.h"
+#include "exec/exec-node.h"
 #include "exec/hbase-table-scanner.h"
+#include "exec/hdfs-scan-node.h"
+#include "exec/plan-root-sink.h"
+#include "exec/scan-node.h"
 #include "exprs/expr.h"
-#include "runtime/descriptors.h"
 #include "runtime/data-stream-mgr.h"
+#include "runtime/descriptors.h"
+#include "runtime/mem-tracker.h"
 #include "runtime/row-batch.h"
 #include "runtime/runtime-filter-bank.h"
-#include "runtime/mem-tracker.h"
+#include "util/container-util.h"
 #include "util/cpu-info.h"
 #include "util/debug-util.h"
-#include "util/container-util.h"
-#include "util/parse-util.h"
 #include "util/mem-info.h"
+#include "util/parse-util.h"
 #include "util/periodic-counter-updater.h"
 #include "util/pretty-printer.h"
 
@@ -60,28 +61,45 @@ namespace impala {
 
 const string PlanFragmentExecutor::PER_HOST_PEAK_MEM_COUNTER = "PerHostPeakMemUsage";
 
-PlanFragmentExecutor::PlanFragmentExecutor(ExecEnv* exec_env,
-    const ReportStatusCallback& report_status_cb) :
-    exec_env_(exec_env), plan_(NULL), report_status_cb_(report_status_cb),
-    report_thread_active_(false), done_(false), closed_(false),
-    has_thread_token_(false), is_prepared_(false), is_cancelled_(false),
-    average_thread_tokens_(NULL), mem_usage_sampled_counter_(NULL),
-    thread_usage_sampled_counter_(NULL) {
-}
+PlanFragmentExecutor::PlanFragmentExecutor(
+    ExecEnv* exec_env, const ReportStatusCallback& report_status_cb)
+  : exec_env_(exec_env),
+    exec_tree_(NULL),
+    report_status_cb_(report_status_cb),
+    report_thread_active_(false),
+    closed_(false),
+    has_thread_token_(false),
+    is_prepared_(false),
+    is_cancelled_(false),
+    average_thread_tokens_(NULL),
+    mem_usage_sampled_counter_(NULL),
+    thread_usage_sampled_counter_(NULL) {}
 
 PlanFragmentExecutor::~PlanFragmentExecutor() {
-  Close();
+  DCHECK(!is_prepared_ || closed_);
   // at this point, the report thread should have been stopped
   DCHECK(!report_thread_active_);
 }
 
 Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
+  Status status = PrepareInternal(request);
+  prepared_promise_.Set(status);
+  return status;
+}
+
+Status PlanFragmentExecutor::WaitForOpen() {
+  DCHECK(prepared_promise_.IsSet()) << "Prepare() must complete before WaitForOpen()";
+  RETURN_IF_ERROR(prepared_promise_.Get());
+  return opened_promise_.Get();
+}
+
+Status PlanFragmentExecutor::PrepareInternal(const TExecPlanFragmentParams& request) {
   lock_guard<mutex> l(prepare_lock_);
   DCHECK(!is_prepared_);
 
   if (is_cancelled_) return Status::CANCELLED;
-
   is_prepared_ = true;
+
   // TODO: Break this method up.
   fragment_sw_.Start();
   const TPlanFragmentInstanceCtx& fragment_instance_ctx = request.fragment_instance_ctx;
@@ -100,6 +118,10 @@ Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
 
   // total_time_counter() is in the runtime_state_ so start it up now.
   SCOPED_TIMER(profile()->total_time_counter());
+  timings_profile_ =
+      obj_pool()->Add(new RuntimeProfile(obj_pool(), "PlanFragmentExecutor"));
+  profile()->AddChild(timings_profile_);
+  SCOPED_TIMER(ADD_TIMER(timings_profile_, "PrepareTime"));
 
   // reservation or a query option.
   int64_t bytes_limit = -1;
@@ -145,22 +167,22 @@ Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
 
   // set up plan
   DCHECK(request.__isset.fragment_ctx);
-  RETURN_IF_ERROR(ExecNode::CreateTree(runtime_state_.get(),
-      request.fragment_ctx.fragment.plan, *desc_tbl, &plan_));
-  runtime_state_->set_fragment_root_id(plan_->id());
+  RETURN_IF_ERROR(ExecNode::CreateTree(
+      runtime_state_.get(), request.fragment_ctx.fragment.plan, *desc_tbl, &exec_tree_));
+  runtime_state_->set_fragment_root_id(exec_tree_->id());
 
   if (fragment_instance_ctx.__isset.debug_node_id) {
     DCHECK(fragment_instance_ctx.__isset.debug_action);
     DCHECK(fragment_instance_ctx.__isset.debug_phase);
     ExecNode::SetDebugOptions(fragment_instance_ctx.debug_node_id,
-        fragment_instance_ctx.debug_phase, fragment_instance_ctx.debug_action, plan_);
+        fragment_instance_ctx.debug_phase, fragment_instance_ctx.debug_action,
+        exec_tree_);
   }
 
   // set #senders of exchange nodes before calling Prepare()
   vector<ExecNode*> exch_nodes;
-  plan_->CollectNodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);
-  for (ExecNode* exch_node: exch_nodes)
-  {
+  exec_tree_->CollectNodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);
+  for (ExecNode* exch_node : exch_nodes) {
     DCHECK_EQ(exch_node->type(), TPlanNodeType::EXCHANGE_NODE);
     int num_senders = FindWithDefault(fragment_instance_ctx.per_exch_num_senders,
         exch_node->id(), 0);
@@ -171,7 +193,7 @@ Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
   // set scan ranges
   vector<ExecNode*> scan_nodes;
   vector<TScanRangeParams> no_scan_ranges;
-  plan_->CollectScanNodes(&scan_nodes);
+  exec_tree_->CollectScanNodes(&scan_nodes);
   for (int i = 0; i < scan_nodes.size(); ++i) {
     ScanNode* scan_node = static_cast<ScanNode*>(scan_nodes[i]);
     const vector<TScanRangeParams>& scan_ranges = FindWithDefault(
@@ -179,42 +201,47 @@ Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
     scan_node->SetScanRanges(scan_ranges);
   }
 
-  RuntimeProfile::Counter* prepare_timer = ADD_TIMER(profile(), "PrepareTime");
+  RuntimeProfile::Counter* prepare_timer = ADD_TIMER(profile(), "ExecTreePrepareTime");
   {
     SCOPED_TIMER(prepare_timer);
-    RETURN_IF_ERROR(plan_->Prepare(runtime_state_.get()));
+    RETURN_IF_ERROR(exec_tree_->Prepare(runtime_state_.get()));
   }
 
   PrintVolumeIds(fragment_instance_ctx.per_node_scan_ranges);
 
-  // set up sink, if required
-  if (request.fragment_ctx.fragment.__isset.output_sink) {
-    RETURN_IF_ERROR(DataSink::CreateDataSink(
-        obj_pool(), request.fragment_ctx.fragment.output_sink,
-        request.fragment_ctx.fragment.output_exprs,
-        fragment_instance_ctx, row_desc(), &sink_));
-    sink_mem_tracker_.reset(new MemTracker(
-        -1, sink_->GetName(), runtime_state_->instance_mem_tracker(), true));
-    RETURN_IF_ERROR(sink_->Prepare(runtime_state(), sink_mem_tracker_.get()));
-
-    RuntimeProfile* sink_profile = sink_->profile();
-    if (sink_profile != NULL) {
-      profile()->AddChild(sink_profile);
-    }
-  } else {
-    sink_.reset(NULL);
+  DCHECK(request.fragment_ctx.fragment.__isset.output_sink);
+  RETURN_IF_ERROR(
+      DataSink::CreateDataSink(obj_pool(), request.fragment_ctx.fragment.output_sink,
+          request.fragment_ctx.fragment.output_exprs, fragment_instance_ctx,
+          exec_tree_->row_desc(), &sink_));
+  sink_mem_tracker_.reset(
+      new MemTracker(-1, sink_->GetName(), runtime_state_->instance_mem_tracker(), true));
+  RETURN_IF_ERROR(sink_->Prepare(runtime_state(), sink_mem_tracker_.get()));
+
+  RuntimeProfile* sink_profile = sink_->profile();
+  if (sink_profile != NULL) {
+    profile()->AddChild(sink_profile);
+  }
+
+  if (request.fragment_ctx.fragment.output_sink.type == TDataSinkType::PLAN_ROOT_SINK) {
+    root_sink_ = reinterpret_cast<PlanRootSink*>(sink_.get());
+    // Release the thread token on the root fragment instance. This fragment spends most
+    // of the time waiting and doing very little work. Holding on to the token causes
+    // underutilization of the machine. If there are 12 queries on this node, that's 12
+    // tokens reserved for no reason.
+    ReleaseThreadToken();
   }
 
   // set up profile counters
-  profile()->AddChild(plan_->runtime_profile());
+  profile()->AddChild(exec_tree_->runtime_profile());
   rows_produced_counter_ =
       ADD_COUNTER(profile(), "RowsProduced", TUnit::UNIT);
   per_host_mem_usage_ =
       ADD_COUNTER(profile(), PER_HOST_PEAK_MEM_COUNTER, TUnit::BYTES);
 
-  row_batch_.reset(new RowBatch(plan_->row_desc(), runtime_state_->batch_size(),
-        runtime_state_->instance_mem_tracker()));
-  VLOG(2) << "plan_root=\n" << plan_->DebugString();
+  row_batch_.reset(new RowBatch(exec_tree_->row_desc(), runtime_state_->batch_size(),
+      runtime_state_->instance_mem_tracker()));
+  VLOG(2) << "plan_root=\n" << exec_tree_->DebugString();
   return Status::OK();
 }
 
@@ -251,12 +278,21 @@ void PlanFragmentExecutor::PrintVolumeIds(
 }
 
 Status PlanFragmentExecutor::Open() {
-  VLOG_QUERY << "Open(): instance_id="
-      << runtime_state_->fragment_instance_id();
+  SCOPED_TIMER(profile()->total_time_counter());
+  SCOPED_TIMER(ADD_TIMER(timings_profile_, "OpenTime"));
+  VLOG_QUERY << "Open(): instance_id=" << runtime_state_->fragment_instance_id();
+  Status status = OpenInternal();
+  UpdateStatus(status);
+  opened_promise_.Set(status);
+  return status;
+}
 
-  RETURN_IF_ERROR(runtime_state_->desc_tbl().PrepareAndOpenPartitionExprs(runtime_state_.get()));
+Status PlanFragmentExecutor::OpenInternal() {
+  RETURN_IF_ERROR(
+      runtime_state_->desc_tbl().PrepareAndOpenPartitionExprs(runtime_state_.get()));
 
-  // we need to start the profile-reporting thread before calling Open(), since it
+  // we need to start the profile-reporting thread before calling exec_tree_->Open(),
+  // since it
   // may block
   if (!report_status_cb_.empty() && FLAGS_status_report_interval > 0) {
     unique_lock<mutex> l(report_thread_lock_);
@@ -271,22 +307,25 @@ Status PlanFragmentExecutor::Open() {
 
   OptimizeLlvmModule();
 
-  Status status = OpenInternal();
-  if (sink_.get() != NULL) {
-    // We call Close() here rather than in OpenInternal() because we want to make sure
-    // that Close() gets called even if there was an error in OpenInternal().
-    // We also want to call sink_->Close() here rather than in PlanFragmentExecutor::Close
-    // because we do not want the sink_ to hold on to all its resources as we will never
-    // use it after this.
-    sink_->Close(runtime_state());
-    // If there's a sink and no error, OpenInternal() completed the fragment execution.
-    if (status.ok()) {
-      done_ = true;
-      FragmentComplete();
-    }
+  {
+    SCOPED_TIMER(ADD_TIMER(timings_profile_, "ExecTreeOpenTime"));
+    RETURN_IF_ERROR(exec_tree_->Open(runtime_state_.get()));
   }
+  return sink_->Open(runtime_state_.get());
+}
 
-  if (!status.ok() && !status.IsCancelled() && !status.IsMemLimitExceeded()) {
+Status PlanFragmentExecutor::Exec() {
+  SCOPED_TIMER(ADD_TIMER(timings_profile_, "ExecTime"));
+  {
+    lock_guard<mutex> l(status_lock_);
+    RETURN_IF_ERROR(status_);
+  }
+  Status status = ExecInternal();
+
+  // If there's no error, ExecInternal() completed the fragment instance's execution.
+  if (status.ok()) {
+    FragmentComplete();
+  } else if (!status.IsCancelled() && !status.IsMemLimitExceeded()) {
     // Log error message in addition to returning in Status. Queries that do not
     // fetch results (e.g. insert) may not receive the message directly and can
     // only retrieve the log.
@@ -296,21 +335,23 @@ Status PlanFragmentExecutor::Open() {
   return status;
 }
 
-Status PlanFragmentExecutor::OpenInternal() {
-  SCOPED_TIMER(profile()->total_time_counter());
-  RETURN_IF_ERROR(plan_->Open(runtime_state_.get()));
-  if (sink_.get() == NULL) return Status::OK();
-
-  // If there is a sink, do all the work of driving it here, so that
-  // when this returns the query has actually finished
-  RETURN_IF_ERROR(sink_->Open(runtime_state_.get()));
-  while (!done_) {
+Status PlanFragmentExecutor::ExecInternal() {
+  RuntimeProfile::Counter* plan_exec_timer =
+      ADD_TIMER(timings_profile_, "ExecTreeExecTime");
+  bool exec_tree_complete = false;
+  do {
+    Status status;
     row_batch_->Reset();
-    RETURN_IF_ERROR(plan_->GetNext(runtime_state_.get(), row_batch_.get(), &done_));
-    if (VLOG_ROW_IS_ON) row_batch_->VLogRows("PlanFragmentExecutor::OpenInternal()");
+    {
+      SCOPED_TIMER(plan_exec_timer);
+      status = exec_tree_->GetNext(
+          runtime_state_.get(), row_batch_.get(), &exec_tree_complete);
+    }
+    if (VLOG_ROW_IS_ON) row_batch_->VLogRows("PlanFragmentExecutor::ExecInternal()");
     COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());
+    RETURN_IF_ERROR(status);
     RETURN_IF_ERROR(sink_->Send(runtime_state(), row_batch_.get()));
-  }
+  } while (!exec_tree_complete);
 
   // Flush the sink *before* stopping the report thread. Flush may need to add some
   // important information to the last report that gets sent. (e.g. table sinks record the
@@ -376,13 +417,20 @@ void PlanFragmentExecutor::SendReport(bool done) {
     status = status_;
   }
 
+  // If status is not OK, we need to make sure that only one sender sends a 'done'
+  // response.
+  // TODO: Clean all this up - move 'done' reporting to Close()?
+  if (!done && !status.ok()) {
+    done = completed_report_sent_.CompareAndSwap(0, 1);
+  }
+
   // Update the counter for the peak per host mem usage.
   per_host_mem_usage_->Set(runtime_state()->query_mem_tracker()->peak_consumption());
 
   // This will send a report even if we are cancelled.  If the query completed correctly
   // but fragments still need to be cancelled (e.g. limit reached), the coordinator will
   // be waiting for a final report and profile.
-  report_status_cb_(status, profile(), done || !status.ok());
+  report_status_cb_(status, profile(), done);
 }
 
 void PlanFragmentExecutor::StopReportThread() {
@@ -395,36 +443,6 @@ void PlanFragmentExecutor::StopReportThread() {
   report_thread_->Join();
 }
 
-Status PlanFragmentExecutor::GetNext(RowBatch** batch) {
-  SCOPED_TIMER(profile()->total_time_counter());
-  VLOG_FILE << "GetNext(): instance_id=" << runtime_state_->fragment_instance_id();
-
-  Status status = Status::OK();
-  row_batch_->Reset();
-  // Loop until we've got a non-empty batch, hit an error or exhausted the input.
-  while (!done_) {
-    status = plan_->GetNext(runtime_state_.get(), row_batch_.get(), &done_);
-    if (VLOG_ROW_IS_ON) row_batch_->VLogRows("PlanFragmentExecutor::GetNext()");
-    if (!status.ok()) break;
-    if (row_batch_->num_rows() > 0) break;
-    row_batch_->Reset();
-  }
-  UpdateStatus(status);
-  COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());
-
-  if (done_) {
-    VLOG_QUERY << "Finished executing fragment query_id=" << PrintId(query_id_)
-        << " instance_id=" << PrintId(runtime_state_->fragment_instance_id());
-    FragmentComplete();
-    // Once all rows are returned, signal that we're done with an empty batch.
-    *batch = row_batch_->num_rows() == 0 ? NULL : row_batch_.get();
-    return status;
-  }
-
-  *batch = row_batch_.get();
-  return status;
-}
-
 void PlanFragmentExecutor::FragmentComplete() {
   // Check the atomic flag. If it is set, then a fragment complete report has already
   // been sent.
@@ -463,7 +481,7 @@ void PlanFragmentExecutor::UpdateStatus(const Status& status) {
 }
 
 void PlanFragmentExecutor::Cancel() {
-  VLOG_QUERY << "Cancelling plan fragment...";
+  VLOG_QUERY << "Cancelling fragment instance...";
   lock_guard<mutex> l(prepare_lock_);
   is_cancelled_ = true;
   if (!is_prepared_) {
@@ -476,18 +494,10 @@ void PlanFragmentExecutor::Cancel() {
   runtime_state_->stream_mgr()->Cancel(runtime_state_->fragment_instance_id());
 }
 
-const RowDescriptor& PlanFragmentExecutor::row_desc() {
-  return plan_->row_desc();
-}
-
 RuntimeProfile* PlanFragmentExecutor::profile() {
   return runtime_state_->runtime_profile();
 }
 
-bool PlanFragmentExecutor::ReachedLimit() {
-  return plan_->ReachedLimit();
-}
-
 void PlanFragmentExecutor::ReleaseThreadToken() {
   if (has_thread_token_) {
     has_thread_token_ = false;
@@ -500,19 +510,23 @@ void PlanFragmentExecutor::ReleaseThreadToken() {
 
 void PlanFragmentExecutor::Close() {
   if (closed_) return;
+  if (!is_prepared_) return;
+  if (sink_.get() != nullptr) sink_->Close(runtime_state());
+
   row_batch_.reset();
   if (sink_mem_tracker_ != NULL) {
     sink_mem_tracker_->UnregisterFromParent();
     sink_mem_tracker_.reset();
   }
-  // Prepare may not have been called, which sets runtime_state_
-  if (runtime_state_.get() != NULL) {
-    if (plan_ != NULL) plan_->Close(runtime_state_.get());
-    runtime_state_->UnregisterReaderContexts();
-    exec_env_->thread_mgr()->UnregisterPool(runtime_state_->resource_pool());
-    runtime_state_->desc_tbl().ClosePartitionExprs(runtime_state_.get());
-    runtime_state_->filter_bank()->Close();
-  }
+
+  // Prepare should always have been called, and so runtime_state_ should be set
+  DCHECK(prepared_promise_.IsSet());
+  if (exec_tree_ != NULL) exec_tree_->Close(runtime_state_.get());
+  runtime_state_->UnregisterReaderContexts();
+  exec_env_->thread_mgr()->UnregisterPool(runtime_state_->resource_pool());
+  runtime_state_->desc_tbl().ClosePartitionExprs(runtime_state_.get());
+  runtime_state_->filter_bank()->Close();
+
   if (mem_usage_sampled_counter_ != NULL) {
     PeriodicCounterUpdater::StopTimeSeriesCounter(mem_usage_sampled_counter_);
     mem_usage_sampled_counter_ = NULL;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/runtime/plan-fragment-executor.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/plan-fragment-executor.h b/be/src/runtime/plan-fragment-executor.h
index f4355ea..82d3001 100644
--- a/be/src/runtime/plan-fragment-executor.h
+++ b/be/src/runtime/plan-fragment-executor.h
@@ -23,9 +23,10 @@
 #include <boost/scoped_ptr.hpp>
 #include <boost/function.hpp>
 
-#include "common/status.h"
 #include "common/object-pool.h"
+#include "common/status.h"
 #include "runtime/runtime-state.h"
+#include "util/promise.h"
 #include "util/runtime-profile-counters.h"
 #include "util/thread.h"
 
@@ -33,6 +34,7 @@ namespace impala {
 
 class HdfsFsCache;
 class ExecNode;
+class PlanRootSink;
 class RowDescriptor;
 class RowBatch;
 class DataSink;
@@ -45,21 +47,28 @@ class TPlanFragment;
 class TPlanExecParams;
 
 /// PlanFragmentExecutor handles all aspects of the execution of a single plan fragment,
-/// including setup and tear-down, both in the success and error case.
-/// Tear-down frees all memory allocated for this plan fragment and closes all data
-/// streams; it happens automatically in the d'tor.
-//
-/// The executor makes an aggregated profile for the entire fragment available,
-/// which includes profile information for the plan itself as well as the output
-/// sink, if any.
+/// including setup and tear-down, both in the success and error case. Tear-down, which
+/// happens in Close(), frees all memory allocated for this plan fragment and closes all
+/// data streams.
+///
+/// The lifecycle of a PlanFragmentExecutor is as follows:
+///     if (Prepare().ok()) {
+///       Open()
+///       Exec()
+///     }
+///     Close()
+///
+/// The executor makes an aggregated profile for the entire fragment available, which
+/// includes profile information for the plan itself as well as the output sink.
+///
 /// The ReportStatusCallback passed into the c'tor is invoked periodically to report the
 /// execution status. The frequency of those reports is controlled by the flag
 /// status_report_interval; setting that flag to 0 disables periodic reporting altogether
-/// Regardless of the value of that flag, if a report callback is specified, it is
-/// invoked at least once at the end of execution with an overall status and profile
-/// (and 'done' indicator). The only exception is when execution is cancelled, in which
-/// case the callback is *not* invoked (the coordinator already knows that execution
-/// stopped, because it initiated the cancellation).
+/// Regardless of the value of that flag, if a report callback is specified, it is invoked
+/// at least once at the end of execution with an overall status and profile (and 'done'
+/// indicator). The only exception is when execution is cancelled, in which case the
+/// callback is *not* invoked (the coordinator already knows that execution stopped,
+/// because it initiated the cancellation).
 //
 /// Aside from Cancel(), which may be called asynchronously, this class is not
 /// thread-safe.
@@ -76,49 +85,37 @@ class PlanFragmentExecutor {
       ReportStatusCallback;
 
   /// report_status_cb, if !empty(), is used to report the accumulated profile
-  /// information periodically during execution (Open() or GetNext()).
+  /// information periodically during execution.
   PlanFragmentExecutor(ExecEnv* exec_env, const ReportStatusCallback& report_status_cb);
 
-  /// Closes the underlying plan fragment and frees up all resources allocated
-  /// in Open()/GetNext().
-  /// It is an error to delete a PlanFragmentExecutor with a report callback
-  /// before Open()/GetNext() (depending on whether the fragment has a sink)
-  /// indicated that execution is finished.
+  /// It is an error to delete a PlanFragmentExecutor with a report callback before Exec()
+  /// indicated that execution is finished, or to delete one that has not been Close()'d
+  /// if Prepare() has been called.
   ~PlanFragmentExecutor();
 
   /// Prepare for execution. Call this prior to Open().
   ///
-  /// runtime_state() and row_desc() will not be valid until Prepare() is
-  /// called. runtime_state() will always be valid after Prepare() returns, unless the
-  /// query was cancelled before Prepare() was called.  If request.query_options.mem_limit
-  /// > 0, it is used as an approximate limit on the number of bytes this query can
-  /// consume at runtime.  The query will be aborted (MEM_LIMIT_EXCEEDED) if it goes over
-  /// that limit.
+  /// runtime_state() will not be valid until Prepare() is called. runtime_state() will
+  /// always be valid after Prepare() returns, unless the query was cancelled before
+  /// Prepare() was called.  If request.query_options.mem_limit > 0, it is used as an
+  /// approximate limit on the number of bytes this query can consume at runtime.  The
+  /// query will be aborted (MEM_LIMIT_EXCEEDED) if it goes over that limit.
   ///
   /// If Cancel() is called before Prepare(), Prepare() is a no-op and returns
   /// Status::CANCELLED;
   Status Prepare(const TExecPlanFragmentParams& request);
 
-  /// Start execution. Call this prior to GetNext().
-  /// If this fragment has a sink, Open() will send all rows produced
-  /// by the fragment to that sink. Therefore, Open() may block until
-  /// all rows are produced (and a subsequent call to GetNext() will not return
-  /// any rows).
-  /// This also starts the status-reporting thread, if the interval flag
-  /// is > 0 and a callback was specified in the c'tor.
-  /// If this fragment has a sink, report_status_cb will have been called for the final
-  /// time when Open() returns, and the status-reporting thread will have been stopped.
+  /// Opens the fragment plan and sink. Starts the profile reporting thread, if required.
   Status Open();
 
-  /// Return results through 'batch'. Sets '*batch' to NULL if no more results.
-  /// '*batch' is owned by PlanFragmentExecutor and must not be deleted.
-  /// When *batch == NULL, GetNext() should not be called anymore. Also, report_status_cb
-  /// will have been called for the final time and the status-reporting thread
-  /// will have been stopped.
-  Status GetNext(RowBatch** batch);
+  /// Executes the fragment by repeatedly driving the sink with batches produced by the
+  /// exec node tree. report_status_cb will have been called for the final time when
+  /// Exec() returns, and the status-reporting thread will have been stopped.
+  Status Exec();
 
-  /// Closes the underlying plan fragment and frees up all resources allocated
-  /// in Open()/GetNext().
+  /// Closes the underlying plan fragment and frees up all resources allocated in
+  /// Prepare() and Open(). Must be called if Prepare() has been called - no matter
+  /// whether or not Prepare() succeeded.
   void Close();
 
   /// Initiate cancellation. If called concurrently with Prepare(), will wait for
@@ -131,25 +128,30 @@ class PlanFragmentExecutor {
   /// It is legal to call Cancel() if Prepare() returned an error.
   void Cancel();
 
-  /// Returns true if this query has a limit and it has been reached.
-  bool ReachedLimit();
-
-  /// Releases the thread token for this fragment executor.
-  void ReleaseThreadToken();
-
   /// call these only after Prepare()
   RuntimeState* runtime_state() { return runtime_state_.get(); }
-  const RowDescriptor& row_desc();
 
   /// Profile information for plan and output sink.
   RuntimeProfile* profile();
 
+  /// Blocks until Prepare() is completed.
+  Status WaitForPrepare() { return prepared_promise_.Get(); }
+
+  /// Blocks until exec tree and sink are both opened. It is an error to call this before
+  /// Prepare() has completed. If Prepare() returned an error, WaitForOpen() will
+  /// return that error without blocking.
+  Status WaitForOpen();
+
+  /// Returns fragment instance's sink if this is the root fragment instance. Valid after
+  /// Prepare() returns; if Prepare() fails may be nullptr.
+  PlanRootSink* root_sink() { return root_sink_; }
+
   /// Name of the counter that is tracking per query, per host peak mem usage.
   static const std::string PER_HOST_PEAK_MEM_COUNTER;
 
  private:
   ExecEnv* exec_env_;  // not owned
-  ExecNode* plan_;  // lives in runtime_state_->obj_pool()
+  ExecNode* exec_tree_; // lives in runtime_state_->obj_pool()
   TUniqueId query_id_;
 
   /// profile reporting-related
@@ -166,9 +168,6 @@ class PlanFragmentExecutor {
   boost::condition_variable report_thread_started_cv_;
   bool report_thread_active_;  // true if we started the thread
 
-  /// true if plan_->GetNext() indicated that it's done
-  bool done_;
-
   /// true if Close() has been called
   bool closed_;
 
@@ -190,14 +189,20 @@ class PlanFragmentExecutor {
   /// (e.g. mem_trackers_) from 'runtime_state_' to 'sink_' need to be severed prior to
   /// the dtor of 'runtime_state_'.
   boost::scoped_ptr<RuntimeState> runtime_state_;
-  /// Output sink for rows sent to this fragment. May not be set, in which case rows are
-  /// returned via GetNext's row batch
-  /// Created in Prepare (if required), owned by this object.
+
+  /// Profile for timings for each stage of the plan fragment instance's lifecycle.
+  RuntimeProfile* timings_profile_;
+
+  /// Output sink for rows sent to this fragment. Created in Prepare(), owned by this
+  /// object.
   boost::scoped_ptr<DataSink> sink_;
   boost::scoped_ptr<MemTracker> sink_mem_tracker_;
 
+  /// Set if this fragment instance is the root of the entire plan, so that a consumer can
+  /// pull results by calling root_sink_->GetNext(). Same object as sink_.
+  PlanRootSink* root_sink_ = nullptr;
+
   boost::scoped_ptr<RowBatch> row_batch_;
-  boost::scoped_ptr<TRowBatch> thrift_batch_;
 
   /// Protects is_prepared_ and is_cancelled_, and is also used to coordinate between
   /// Prepare() and Cancel() to ensure mutual exclusion.
@@ -207,6 +212,12 @@ class PlanFragmentExecutor {
   /// error. If Cancel() was called before Prepare(), is_prepared_ will not be set.
   bool is_prepared_;
 
+  /// Set when Prepare() returns.
+  Promise<Status> prepared_promise_;
+
+  /// Set when OpenInternal() returns.
+  Promise<Status> opened_promise_;
+
   /// True if and only if Cancel() has been called.
   bool is_cancelled_;
 
@@ -267,21 +278,25 @@ class PlanFragmentExecutor {
   void FragmentComplete();
 
   /// Optimizes the code-generated functions in runtime_state_->llvm_codegen().
-  /// Must be called between plan_->Prepare() and plan_->Open().
-  /// This is somewhat time consuming so we don't want it to do it in
-  /// PlanFragmentExecutor()::Prepare() to allow starting plan fragments more
-  /// quickly and in parallel (in a deep plan tree, the fragments are started
-  /// in level order).
+  /// Must be called after exec_tree_->Prepare() and before exec_tree_->Open().
   void OptimizeLlvmModule();
 
   /// Executes Open() logic and returns resulting status. Does not set status_.
-  /// If this plan fragment has no sink, OpenInternal() does nothing.
-  /// If this plan fragment has a sink and OpenInternal() returns without an
-  /// error condition, all rows will have been sent to the sink, the sink will
-  /// have been closed, a final report will have been sent and the report thread will
-  /// have been stopped. sink_ will be set to NULL after successful execution.
   Status OpenInternal();
 
+  /// Pulls row batches from fragment instance and pushes them to sink_ in a loop. Returns
+  /// OK if the input was exhausted and sent to the sink successfully, an error otherwise.
+  /// If ExecInternal() returns without an error condition, all rows will have been sent
+  /// to the sink, the sink will have been closed, a final report will have been sent and
+  /// the report thread will have been stopped.
+  Status ExecInternal();
+
+  /// Performs all the logic of Prepare() and returns resulting status.
+  Status PrepareInternal(const TExecPlanFragmentParams& request);
+
+  /// Releases the thread token for this fragment executor.
+  void ReleaseThreadToken();
+
   /// Stops report thread, if one is running. Blocks until report thread terminates.
   /// Idempotent.
   void StopReportThread();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/scheduling/query-schedule.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/query-schedule.cc b/be/src/scheduling/query-schedule.cc
index 5ad84df..1eb36e3 100644
--- a/be/src/scheduling/query-schedule.cc
+++ b/be/src/scheduling/query-schedule.cc
@@ -198,36 +198,28 @@ const TPlanFragment& FInstanceExecParams::fragment() const {
 }
 
 int QuerySchedule::GetNumFragmentInstances() const {
-  if (mt_fragment_exec_params_.empty()) return num_fragment_instances_;
   int result = 0;
-  for (const MtFragmentExecParams& fragment_exec_params: mt_fragment_exec_params_) {
-    result += fragment_exec_params.instance_exec_params.size();
+  if (mt_fragment_exec_params_.empty()) {
+    DCHECK(!fragment_exec_params_.empty());
+    for (const FragmentExecParams& fragment_exec_params : fragment_exec_params_) {
+      result += fragment_exec_params.hosts.size();
+    }
+  } else {
+    for (const MtFragmentExecParams& fragment_exec_params : mt_fragment_exec_params_) {
+      result += fragment_exec_params.instance_exec_params.size();
+    }
   }
   return result;
 }
 
-int QuerySchedule::GetNumRemoteFInstances() const {
-  bool has_coordinator_fragment = GetCoordFragment() != nullptr;
-  int result = GetNumFragmentInstances();
-  bool is_mt_execution = request_.query_ctx.request.query_options.mt_dop > 0;
-  if (is_mt_execution && has_coordinator_fragment) --result;
-  return result;
-}
-
-int QuerySchedule::GetTotalFInstances() const {
-  int result = GetNumRemoteFInstances();
-  return GetCoordFragment() != nullptr ? result + 1 : result;
-}
-
 const TPlanFragment* QuerySchedule::GetCoordFragment() const {
+  // Only have coordinator fragment for statements that return rows.
+  if (request_.stmt_type != TStmtType::QUERY) return nullptr;
   bool is_mt_exec = request_.query_ctx.request.query_options.mt_dop > 0;
   const TPlanFragment* fragment = is_mt_exec
       ? &request_.mt_plan_exec_info[0].fragments[0] : &request_.fragments[0];
-  if (fragment->partition.type == TPartitionType::UNPARTITIONED) {
+
     return fragment;
-  } else {
-    return nullptr;
-  }
 }
 
 void QuerySchedule::GetTPlanFragments(vector<const TPlanFragment*>* fragments) const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/scheduling/query-schedule.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/query-schedule.h b/be/src/scheduling/query-schedule.h
index 39ce268..77c9cd6 100644
--- a/be/src/scheduling/query-schedule.h
+++ b/be/src/scheduling/query-schedule.h
@@ -140,34 +140,9 @@ class QuerySchedule {
   /// Helper methods used by scheduler to populate this QuerySchedule.
   void IncNumScanRanges(int64_t delta) { num_scan_ranges_ += delta; }
 
-  /// The following 4 functions need to be replaced once we stop special-casing
-  /// the coordinator instance in the coordinator.
-  /// The replacement is a single function int GetNumFInstances() (which includes
-  /// the coordinator instance).
-
-  /// TODO-MT: remove; this is actually only the number of remote instances
-  /// (from the coordinator's perspective)
-  void set_num_fragment_instances(int64_t num_fragment_instances) {
-    num_fragment_instances_ = num_fragment_instances;
-  }
-
-  /// Returns the number of fragment instances registered with this schedule.
-  /// MT: total number of fragment instances
-  /// ST: value set with set_num_fragment_instances(); excludes coord instance
-  /// (in effect the number of remote instances)
-  /// TODO-MT: get rid of special-casing of coordinator instance and always return the
-  /// total
+  /// Returns the total number of fragment instances.
   int GetNumFragmentInstances() const;
 
-  /// Returns the total number of fragment instances, incl. coordinator fragment.
-  /// TODO-MT: remove
-  int GetTotalFInstances() const;
-
-  /// Returns the number of remote fragment instances (excludes coordinator).
-  /// Works for both MT and ST.
-  /// TODO-MT: remove
-  int GetNumRemoteFInstances() const;
-
   /// Return the coordinator fragment, or nullptr if there isn't one.
   const TPlanFragment* GetCoordFragment() const;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/scheduling/simple-scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler.cc b/be/src/scheduling/simple-scheduler.cc
index 5b6303e..9b52d5a 100644
--- a/be/src/scheduling/simple-scheduler.cc
+++ b/be/src/scheduling/simple-scheduler.cc
@@ -663,11 +663,6 @@ void SimpleScheduler::ComputeFragmentExecParams(const TQueryExecRequest& exec_re
           CreateInstanceId(schedule->query_id(), num_fragment_instances));
     }
   }
-  if (exec_request.fragments[0].partition.type == TPartitionType::UNPARTITIONED) {
-    // the root fragment is executed directly by the coordinator
-    --num_fragment_instances;
-  }
-  schedule->set_num_fragment_instances(num_fragment_instances);
 
   // compute destinations and # senders per exchange node
   // (the root fragment doesn't have a destination)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/fragment-exec-state.cc
----------------------------------------------------------------------
diff --git a/be/src/service/fragment-exec-state.cc b/be/src/service/fragment-exec-state.cc
index 76e11d1..cc56c19 100644
--- a/be/src/service/fragment-exec-state.cc
+++ b/be/src/service/fragment-exec-state.cc
@@ -54,8 +54,10 @@ Status FragmentMgr::FragmentExecState::Prepare() {
 }
 
 void FragmentMgr::FragmentExecState::Exec() {
-  // Open() does the full execution, because all plan fragments have sinks
-  executor_.Open();
+  if (Prepare().ok()) {
+    executor_.Open();
+    executor_.Exec();
+  }
   executor_.Close();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/fragment-exec-state.h
----------------------------------------------------------------------
diff --git a/be/src/service/fragment-exec-state.h b/be/src/service/fragment-exec-state.h
index 6cff7ce..c795cd8 100644
--- a/be/src/service/fragment-exec-state.h
+++ b/be/src/service/fragment-exec-state.h
@@ -47,9 +47,6 @@ class FragmentMgr::FragmentExecState {
   /// the fragment and returns OK.
   Status Cancel();
 
-  /// Call Prepare() and create and initialize data sink.
-  Status Prepare();
-
   /// Main loop of plan fragment execution. Blocks until execution finishes.
   void Exec();
 
@@ -67,6 +64,8 @@ class FragmentMgr::FragmentExecState {
   /// Publishes filter with ID 'filter_id' to this fragment's filter bank.
   void PublishFilter(int32_t filter_id, const TBloomFilter& thrift_bloom_filter);
 
+  PlanFragmentExecutor* executor() { return &executor_; }
+
  private:
   TQueryCtx query_ctx_;
   TPlanFragmentInstanceCtx fragment_instance_ctx_;
@@ -98,6 +97,9 @@ class FragmentMgr::FragmentExecState {
   /// the reporting RPC. `profile` may be NULL if a runtime profile has not been created
   /// for this fragment (e.g. when the fragment has failed during preparation).
   void ReportStatusCb(const Status& status, RuntimeProfile* profile, bool done);
+
+  /// Call Prepare() and create and initialize data sink.
+  Status Prepare();
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/fragment-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/service/fragment-mgr.cc b/be/src/service/fragment-mgr.cc
index 64e9a78..8e8fc05 100644
--- a/be/src/service/fragment-mgr.cc
+++ b/be/src/service/fragment-mgr.cc
@@ -54,9 +54,6 @@ Status FragmentMgr::ExecPlanFragment(const TExecPlanFragmentParams& exec_params)
     return process_mem_tracker->MemLimitExceeded(NULL, msg, 0);
   }
 
-  // Remote fragments must always have a sink. Remove when IMPALA-2905 is resolved.
-  DCHECK(exec_params.fragment_ctx.fragment.__isset.output_sink);
-
   shared_ptr<FragmentExecState> exec_state(
       new FragmentExecState(exec_params, ExecEnv::GetInstance()));
 
@@ -64,6 +61,8 @@ Status FragmentMgr::ExecPlanFragment(const TExecPlanFragmentParams& exec_params)
   // only happen after this RPC returns) can always find this fragment.
   {
     lock_guard<SpinLock> l(fragment_exec_state_map_lock_);
+    DCHECK(fragment_exec_state_map_.find(exec_state->fragment_instance_id())
+        == fragment_exec_state_map_.end());
     fragment_exec_state_map_.insert(
         make_pair(exec_state->fragment_instance_id(), exec_state));
   }
@@ -84,8 +83,7 @@ Status FragmentMgr::ExecPlanFragment(const TExecPlanFragmentParams& exec_params)
 void FragmentMgr::FragmentThread(TUniqueId fragment_instance_id) {
   shared_ptr<FragmentExecState> exec_state = GetFragmentExecState(fragment_instance_id);
   if (exec_state.get() == NULL) return;
-  Status status = exec_state->Prepare();
-  if (status.ok()) exec_state->Exec();
+  exec_state->Exec();
 
   // We're done with this plan fragment
   {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index 3daa36b..ee7f958 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -17,55 +17,19 @@
 
 #include "service/impala-server.h"
 
-#include <algorithm>
 #include <boost/algorithm/string/join.hpp>
-#include <boost/date_time/posix_time/posix_time_types.hpp>
-#include <boost/unordered_set.hpp>
-#include <jni.h>
-#include <thrift/protocol/TDebugProtocol.h>
-#include <gtest/gtest.h>
-#include <boost/bind.hpp>
-#include <boost/algorithm/string.hpp>
-#include <gperftools/heap-profiler.h>
-#include <gperftools/malloc_extension.h>
-
-#include "codegen/llvm-codegen.h"
+
 #include "common/logging.h"
-#include "common/version.h"
-#include "exec/exec-node.h"
-#include "exec/hdfs-table-sink.h"
-#include "exec/scan-node.h"
-#include "exprs/expr.h"
-#include "runtime/data-stream-mgr.h"
-#include "runtime/client-cache.h"
-#include "runtime/descriptors.h"
-#include "runtime/data-stream-sender.h"
-#include "runtime/row-batch.h"
-#include "runtime/plan-fragment-executor.h"
-#include "runtime/hdfs-fs-cache.h"
+#include "gen-cpp/Frontend_types.h"
+#include "rpc/thrift-util.h"
 #include "runtime/exec-env.h"
-#include "runtime/mem-tracker.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/timestamp-value.h"
-#include "scheduling/simple-scheduler.h"
 #include "service/query-exec-state.h"
 #include "service/query-options.h"
-#include "util/container-util.h"
-#include "util/debug-util.h"
+#include "service/query-result-set.h"
 #include "util/impalad-metrics.h"
-#include "util/string-parser.h"
-#include "rpc/thrift-util.h"
-#include "rpc/thrift-server.h"
-#include "util/jni-util.h"
 #include "util/webserver.h"
-#include "gen-cpp/Types_types.h"
-#include "gen-cpp/ImpalaService.h"
-#include "gen-cpp/DataSinks_types.h"
-#include "gen-cpp/Types_types.h"
-#include "gen-cpp/ImpalaService.h"
-#include "gen-cpp/ImpalaService_types.h"
-#include "gen-cpp/ImpalaInternalService.h"
-#include "gen-cpp/Frontend_types.h"
 
 #include "common/names.h"
 
@@ -83,11 +47,17 @@ using namespace beeswax;
     }                                                           \
   } while (false)
 
+namespace {
+
+/// Ascii output precision for double/float
+constexpr int ASCII_PRECISION = 16;
+}
+
 namespace impala {
 
 // Ascii result set for Beeswax.
 // Beeswax returns rows in ascii, using "\t" as column delimiter.
-class ImpalaServer::AsciiQueryResultSet : public ImpalaServer::QueryResultSet {
+class AsciiQueryResultSet : public QueryResultSet {
  public:
   // Rows are added into rowset.
   AsciiQueryResultSet(const TResultSetMetadata& metadata, vector<string>* rowset)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/impala-hs2-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index ee79b4b..de0e2f3 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -36,13 +36,14 @@
 #include "exprs/expr.h"
 #include "rpc/thrift-util.h"
 #include "runtime/raw-value.h"
+#include "service/hs2-util.h"
 #include "service/query-exec-state.h"
 #include "service/query-options.h"
+#include "service/query-result-set.h"
 #include "util/debug-util.h"
-#include "util/runtime-profile-counters.h"
 #include "util/impalad-metrics.h"
+#include "util/runtime-profile-counters.h"
 #include "util/string-parser.h"
-#include "service/hs2-util.h"
 
 #include "common/names.h"
 
@@ -129,7 +130,7 @@ static TOperationState::type QueryStateToTOperationState(
 
 // Result set container for Hive protocol versions >= V6, where results are returned in
 // column-orientation.
-class ImpalaServer::HS2ColumnarResultSet : public ImpalaServer::QueryResultSet {
+class HS2ColumnarResultSet : public QueryResultSet {
  public:
   HS2ColumnarResultSet(const TResultSetMetadata& metadata, TRowSet* rowset = NULL)
       : metadata_(metadata), result_set_(rowset), num_rows_(0) {
@@ -317,7 +318,7 @@ class ImpalaServer::HS2ColumnarResultSet : public ImpalaServer::QueryResultSet {
 };
 
 // TRow result set for HiveServer2
-class ImpalaServer::HS2RowOrientedResultSet : public ImpalaServer::QueryResultSet {
+class HS2RowOrientedResultSet : public QueryResultSet {
  public:
   // Rows are added into rowset.
   HS2RowOrientedResultSet(const TResultSetMetadata& metadata, TRowSet* rowset = NULL)
@@ -393,16 +394,6 @@ class ImpalaServer::HS2RowOrientedResultSet : public ImpalaServer::QueryResultSe
   scoped_ptr<TRowSet> owned_result_set_;
 };
 
-ImpalaServer::QueryResultSet* ImpalaServer::CreateHS2ResultSet(
-    TProtocolVersion::type version, const TResultSetMetadata& metadata,
-    TRowSet* rowset) {
-  if (version < TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6) {
-    return new HS2RowOrientedResultSet(metadata, rowset);
-  } else {
-    return new HS2ColumnarResultSet(metadata, rowset);
-  }
-}
-
 void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
     TMetadataOpRequest* request, TOperationHandle* handle, thrift::TStatus* status) {
   TUniqueId session_id;
@@ -482,6 +473,18 @@ void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
   status->__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS);
 }
 
+namespace {
+
+QueryResultSet* CreateHS2ResultSet(
+    TProtocolVersion::type version, const TResultSetMetadata& metadata, TRowSet* rowset) {
+  if (version < TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6) {
+    return new HS2RowOrientedResultSet(metadata, rowset);
+  } else {
+    return new HS2ColumnarResultSet(metadata, rowset);
+  }
+}
+}
+
 Status ImpalaServer::FetchInternal(const TUniqueId& query_id, int32_t fetch_size,
     bool fetch_first, TFetchResultsResp* fetch_results) {
   shared_ptr<QueryExecState> exec_state = GetQueryExecState(query_id, false);
@@ -759,8 +762,9 @@ void ImpalaServer::ExecuteStatement(TExecuteStatementResp& return_val,
 
   // Optionally enable result caching on the QueryExecState.
   if (cache_num_rows > 0) {
-    status = exec_state->SetResultCache(CreateHS2ResultSet(session->hs2_version,
-            *exec_state->result_metadata()), cache_num_rows);
+    status = exec_state->SetResultCache(
+        CreateHS2ResultSet(session->hs2_version, *exec_state->result_metadata(), nullptr),
+        cache_num_rows);
     if (!status.ok()) {
       UnregisterQuery(exec_state->query_id(), false, &status);
       HS2_RETURN_ERROR(return_val, status.GetDetail(), SQLSTATE_GENERAL_ERROR);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/impala-internal-service.h
----------------------------------------------------------------------
diff --git a/be/src/service/impala-internal-service.h b/be/src/service/impala-internal-service.h
index a238f65..af54c35 100644
--- a/be/src/service/impala-internal-service.h
+++ b/be/src/service/impala-internal-service.h
@@ -18,8 +18,6 @@
 #ifndef IMPALA_SERVICE_IMPALA_INTERNAL_SERVICE_H
 #define IMPALA_SERVICE_IMPALA_INTERNAL_SERVICE_H
 
-#include <boost/shared_ptr.hpp>
-
 #include "gen-cpp/ImpalaInternalService.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
 #include "service/impala-server.h"
@@ -32,9 +30,12 @@ namespace impala {
 /// ImpalaInternalService service.
 class ImpalaInternalService : public ImpalaInternalServiceIf {
  public:
-  ImpalaInternalService(const boost::shared_ptr<ImpalaServer>& impala_server,
-      const boost::shared_ptr<FragmentMgr>& fragment_mgr)
-      : impala_server_(impala_server), fragment_mgr_(fragment_mgr) { }
+  ImpalaInternalService() {
+    impala_server_ = ExecEnv::GetInstance()->impala_server();
+    DCHECK(impala_server_ != nullptr);
+    fragment_mgr_ = ExecEnv::GetInstance()->fragment_mgr();
+    DCHECK(fragment_mgr_ != nullptr);
+  }
 
   virtual void ExecPlanFragment(TExecPlanFragmentResult& return_val,
       const TExecPlanFragmentParams& params) {
@@ -74,10 +75,10 @@ class ImpalaInternalService : public ImpalaInternalServiceIf {
 
  private:
   /// Manages fragment reporting and data transmission
-  boost::shared_ptr<ImpalaServer> impala_server_;
+  ImpalaServer* impala_server_;
 
   /// Manages fragment execution
-  boost::shared_ptr<FragmentMgr> fragment_mgr_;
+  FragmentMgr* fragment_mgr_;
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 7f9d862..bf83eec 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -206,7 +206,6 @@ const string HS2_SERVER_NAME = "hiveserver2-frontend";
 const char* ImpalaServer::SQLSTATE_SYNTAX_ERROR_OR_ACCESS_VIOLATION = "42000";
 const char* ImpalaServer::SQLSTATE_GENERAL_ERROR = "HY000";
 const char* ImpalaServer::SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED = "HYC00";
-const int ImpalaServer::ASCII_PRECISION = 16; // print 16 digits for double/float
 
 const int MAX_NM_MISSED_HEARTBEATS = 5;
 
@@ -1866,9 +1865,7 @@ Status CreateImpalaServer(ExecEnv* exec_env, int beeswax_port, int hs2_port, int
   }
 
   if (be_port != 0 && be_server != NULL) {
-    boost::shared_ptr<FragmentMgr> fragment_mgr(new FragmentMgr());
-    boost::shared_ptr<ImpalaInternalService> thrift_if(
-        new ImpalaInternalService(handler, fragment_mgr));
+    boost::shared_ptr<ImpalaInternalService> thrift_if(new ImpalaInternalService());
     boost::shared_ptr<TProcessor> be_processor(
         new ImpalaInternalServiceProcessor(thrift_if));
     boost::shared_ptr<TProcessorEventHandler> event_handler(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/impala-server.h
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index 2104c5e..53f3384 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -249,45 +249,6 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf,
 
   boost::scoped_ptr<ImpalaHttpHandler> http_handler_;
 
-  /// Query result set stores converted rows returned by QueryExecState.fetchRows(). It
-  /// provides an interface to convert Impala rows to external API rows.
-  /// It is an abstract class. Subclass must implement AddOneRow().
-  class QueryResultSet {
-   public:
-    QueryResultSet() {}
-    virtual ~QueryResultSet() {}
-
-    /// Add the row (list of expr value) from a select query to this result set. When a row
-    /// comes from a select query, the row is in the form of expr values (void*). 'scales'
-    /// contains the values' scales (# of digits after decimal), with -1 indicating no
-    /// scale specified.
-    virtual Status AddOneRow(
-        const std::vector<void*>& row, const std::vector<int>& scales) = 0;
-
-    /// Add the TResultRow to this result set. When a row comes from a DDL/metadata
-    /// operation, the row in the form of TResultRow.
-    virtual Status AddOneRow(const TResultRow& row) = 0;
-
-    /// Copies rows in the range [start_idx, start_idx + num_rows) from the other result
-    /// set into this result set. Returns the number of rows added to this result set.
-    /// Returns 0 if the given range is out of bounds of the other result set.
-    virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) = 0;
-
-    /// Returns the approximate size of this result set in bytes.
-    int64_t ByteSize() { return ByteSize(0, size()); }
-
-    /// Returns the approximate size of the given range of rows in bytes.
-    virtual int64_t ByteSize(int start_idx, int num_rows) = 0;
-
-    /// Returns the size of this result set in number of rows.
-    virtual size_t size() = 0;
-  };
-
-  /// Result set implementations for Beeswax and HS2
-  class AsciiQueryResultSet;
-  class HS2RowOrientedResultSet;
-  class HS2ColumnarResultSet;
-
   struct SessionState;
 
   /// Execution state of a query.
@@ -299,14 +260,6 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf,
   static const char* SQLSTATE_GENERAL_ERROR;
   static const char* SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED;
 
-  /// Ascii output precision for double/float
-  static const int ASCII_PRECISION;
-
-  QueryResultSet* CreateHS2ResultSet(
-      apache::hive::service::cli::thrift::TProtocolVersion::type version,
-      const TResultSetMetadata& metadata,
-      apache::hive::service::cli::thrift::TRowSet* rowset = NULL);
-
   /// Return exec state for given query_id, or NULL if not found.
   /// If 'lock' is true, the returned exec state's lock() will be acquired before
   /// the query_exec_state_map_lock_ is released.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/query-exec-state.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc
index d55ac54..1532ecf 100644
--- a/be/src/service/query-exec-state.cc
+++ b/be/src/service/query-exec-state.cc
@@ -19,14 +19,15 @@
 #include <limits>
 #include <gutil/strings/substitute.h>
 
-#include "exprs/expr.h"
 #include "exprs/expr-context.h"
+#include "exprs/expr.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/row-batch.h"
 #include "runtime/runtime-state.h"
-#include "service/impala-server.h"
 #include "service/frontend.h"
+#include "service/impala-server.h"
 #include "service/query-options.h"
+#include "service/query-result-set.h"
 #include "util/debug-util.h"
 #include "util/impalad-metrics.h"
 #include "util/runtime-profile-counters.h"
@@ -191,6 +192,7 @@ Status ImpalaServer::QueryExecState::Exec(TExecRequest* exec_request) {
             exec_request_.set_query_option_request.value,
             &session_->default_query_options,
             &session_->set_query_options_mask));
+        SetResultSet({}, {});
       } else {
         // "SET" returns a table of all query options.
         map<string, string> config;
@@ -421,17 +423,10 @@ Status ImpalaServer::QueryExecState::ExecQueryOrDmlRequest(
     summary_profile_.AddInfoString(TABLES_WITH_CORRUPT_STATS_KEY, ss.str());
   }
 
-  // If desc_tbl is not set, query has SELECT with no FROM. In that
-  // case, the query can only have a single fragment, and that fragment needs to be
-  // executed by the coordinator. This check confirms that.
-  // If desc_tbl is set, the query may or may not have a coordinator fragment.
   bool is_mt_exec = query_exec_request.query_ctx.request.query_options.mt_dop > 0;
   const TPlanFragment& fragment = is_mt_exec
       ? query_exec_request.mt_plan_exec_info[0].fragments[0]
       : query_exec_request.fragments[0];
-  bool has_coordinator_fragment =
-      fragment.partition.type == TPartitionType::UNPARTITIONED;
-  DCHECK(has_coordinator_fragment || query_exec_request.__isset.desc_tbl);
 
   {
     lock_guard<mutex> l(lock_);
@@ -449,7 +444,7 @@ Status ImpalaServer::QueryExecState::ExecQueryOrDmlRequest(
   }
 
   coord_.reset(new Coordinator(*schedule_, exec_env_, query_events_));
-  status = coord_->Exec(&output_expr_ctxs_);
+  status = coord_->Exec();
   {
     lock_guard<mutex> l(lock_);
     RETURN_IF_ERROR(UpdateQueryStatus(status));
@@ -538,12 +533,11 @@ void ImpalaServer::QueryExecState::Done() {
   query_events_->MarkEvent("Unregister query");
 
   if (coord_.get() != NULL) {
-    Expr::Close(output_expr_ctxs_, coord_->runtime_state());
     // Release any reserved resources.
     Status status = exec_env_->scheduler()->Release(schedule_.get());
     if (!status.ok()) {
       LOG(WARNING) << "Failed to release resources of query " << schedule_->query_id()
-            << " because of error: " << status.GetDetail();
+                   << " because of error: " << status.GetDetail();
     }
     coord_->TearDown();
   }
@@ -626,7 +620,6 @@ Status ImpalaServer::QueryExecState::WaitInternal() {
 
   if (coord_.get() != NULL) {
     RETURN_IF_ERROR(coord_->Wait());
-    RETURN_IF_ERROR(Expr::Open(output_expr_ctxs_, coord_->runtime_state()));
     RETURN_IF_ERROR(UpdateCatalog());
   }
 
@@ -719,6 +712,10 @@ Status ImpalaServer::QueryExecState::FetchRowsInternal(const int32_t max_rows,
     return Status::OK();
   }
 
+  if (coord_.get() == nullptr) {
+    return Status("Client tried to fetch rows on a query that produces no results.");
+  }
+
   int32_t num_rows_fetched_from_cache = 0;
   if (result_cache_max_size_ > 0 && result_cache_ != NULL) {
     // Satisfy the fetch from the result cache if possible.
@@ -729,27 +726,7 @@ Status ImpalaServer::QueryExecState::FetchRowsInternal(const int32_t max_rows,
     if (num_rows_fetched_from_cache >= max_rows) return Status::OK();
   }
 
-  // List of expr values to hold evaluated rows from the query
-  vector<void*> result_row;
-  result_row.resize(output_expr_ctxs_.size());
-
-  // List of scales for floating point values in result_row
-  vector<int> scales;
-  scales.resize(result_row.size());
-
-  if (coord_ == NULL) {
-    // Query with LIMIT 0.
-    query_state_ = QueryState::FINISHED;
-    eos_ = true;
-    return Status::OK();
-  }
-
   query_state_ = QueryState::FINISHED;  // results will be ready after this call
-  // Fetch the next batch if we've returned the current batch entirely
-  if (current_batch_ == NULL || current_batch_row_ >= current_batch_->num_rows()) {
-    RETURN_IF_ERROR(FetchNextBatch());
-  }
-  if (current_batch_ == NULL) return Status::OK();
 
   // Maximum number of rows to be fetched from the coord.
   int32_t max_coord_rows = max_rows;
@@ -759,22 +736,26 @@ Status ImpalaServer::QueryExecState::FetchRowsInternal(const int32_t max_rows,
   }
   {
     SCOPED_TIMER(row_materialization_timer_);
-    // Convert the available rows, limited by max_coord_rows
-    int available = current_batch_->num_rows() - current_batch_row_;
-    int fetched_count = available;
-    // max_coord_rows <= 0 means no limit
-    if (max_coord_rows > 0 && max_coord_rows < available) fetched_count = max_coord_rows;
-    for (int i = 0; i < fetched_count; ++i) {
-      TupleRow* row = current_batch_->GetRow(current_batch_row_);
-      RETURN_IF_ERROR(GetRowValue(row, &result_row, &scales));
-      RETURN_IF_ERROR(fetched_rows->AddOneRow(result_row, scales));
-      ++num_rows_fetched_;
-      ++current_batch_row_;
+    size_t before = fetched_rows->size();
+    // Temporarily release lock so calls to Cancel() are not blocked. fetch_rows_lock_
+    // (already held) ensures that we do not call coord_->GetNext() multiple times
+    // concurrently.
+    // TODO: Simplify this.
+    lock_.unlock();
+    Status status = coord_->GetNext(fetched_rows, max_coord_rows, &eos_);
+    lock_.lock();
+    int num_fetched = fetched_rows->size() - before;
+    DCHECK(max_coord_rows <= 0 || num_fetched <= max_coord_rows) << Substitute(
+        "Fetched more rows ($0) than asked for ($1)", num_fetched, max_coord_rows);
+    num_rows_fetched_ += num_fetched;
+
+    RETURN_IF_ERROR(status);
+    // Check if query status has changed during GetNext() call
+    if (!query_status_.ok()) {
+      eos_ = true;
+      return query_status_;
     }
   }
-  ExprContext::FreeLocalAllocations(output_expr_ctxs_);
-  // Check if there was an error evaluating a row value.
-  RETURN_IF_ERROR(coord_->runtime_state()->CheckQueryState());
 
   // Update the result cache if necessary.
   if (result_cache_max_size_ > 0 && result_cache_.get() != NULL) {
@@ -833,16 +814,6 @@ Status ImpalaServer::QueryExecState::FetchRowsInternal(const int32_t max_rows,
   return Status::OK();
 }
 
-Status ImpalaServer::QueryExecState::GetRowValue(TupleRow* row, vector<void*>* result,
-                                                 vector<int>* scales) {
-  DCHECK(result->size() >= output_expr_ctxs_.size());
-  for (int i = 0; i < output_expr_ctxs_.size(); ++i) {
-    (*result)[i] = output_expr_ctxs_[i]->GetValue(row);
-    (*scales)[i] = output_expr_ctxs_[i]->root()->output_scale();
-  }
-  return Status::OK();
-}
-
 Status ImpalaServer::QueryExecState::Cancel(bool check_inflight, const Status* cause) {
   Coordinator* coord;
   {
@@ -931,28 +902,6 @@ Status ImpalaServer::QueryExecState::UpdateCatalog() {
   return Status::OK();
 }
 
-Status ImpalaServer::QueryExecState::FetchNextBatch() {
-  DCHECK(!eos_);
-  DCHECK(coord_.get() != NULL);
-
-  // Temporarily release lock so calls to Cancel() are not blocked.  fetch_rows_lock_
-  // ensures that we do not call coord_->GetNext() multiple times concurrently.
-  lock_.unlock();
-  Status status = coord_->GetNext(&current_batch_, coord_->runtime_state());
-  lock_.lock();
-  if (!status.ok()) return status;
-
-  // Check if query status has changed during GetNext() call
-  if (!query_status_.ok()) {
-    current_batch_ = NULL;
-    return query_status_;
-  }
-
-  current_batch_row_ = 0;
-  eos_ = current_batch_ == NULL;
-  return Status::OK();
-}
-
 void ImpalaServer::QueryExecState::SetResultSet(const vector<string>& results) {
   request_result_set_.reset(new vector<TResultRow>);
   request_result_set_->resize(results.size());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/query-exec-state.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-exec-state.h b/be/src/service/query-exec-state.h
index 0a763ff..54ee929 100644
--- a/be/src/service/query-exec-state.h
+++ b/be/src/service/query-exec-state.h
@@ -248,7 +248,7 @@ class ImpalaServer::QueryExecState {
   /// Resource assignment determined by scheduler. Owned by obj_pool_.
   boost::scoped_ptr<QuerySchedule> schedule_;
 
-  /// not set for ddl queries, or queries with "limit 0"
+  /// Not set for ddl queries.
   boost::scoped_ptr<Coordinator> coord_;
 
   /// Runs statements that query or modify the catalog via the CatalogService.
@@ -293,7 +293,7 @@ class ImpalaServer::QueryExecState {
   MonotonicStopWatch client_wait_sw_;
 
   RuntimeProfile::EventSequence* query_events_;
-  std::vector<ExprContext*> output_expr_ctxs_;
+
   bool is_cancelled_; // if true, Cancel() was called.
   bool eos_;  // if true, there are no more rows to return
   // We enforce the invariant that query_status_ is not OK iff query_state_
@@ -356,13 +356,6 @@ class ImpalaServer::QueryExecState {
   /// Caller needs to hold fetch_rows_lock_ and lock_.
   Status FetchRowsInternal(const int32_t max_rows, QueryResultSet* fetched_rows);
 
-  /// Fetch the next row batch and store the results in current_batch_. Only called for
-  /// non-DDL / DML queries. current_batch_ is set to NULL if execution is complete or the
-  /// query was cancelled.
-  /// Caller needs to hold fetch_rows_lock_ and lock_. Blocks, during which time lock_ is
-  /// released.
-  Status FetchNextBatch();
-
   /// Evaluates 'output_expr_ctxs_' against 'row' and output the evaluated row in
   /// 'result'. The values' scales (# of digits after decimal) are stored in 'scales'.
   /// result and scales must have been resized to the number of columns before call.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/service/query-result-set.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-result-set.h b/be/src/service/query-result-set.h
new file mode 100644
index 0000000..b444ca3
--- /dev/null
+++ b/be/src/service/query-result-set.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_SERVICE_QUERY_RESULT_SET_H
+#define IMPALA_SERVICE_QUERY_RESULT_SET_H
+
+#include "common/status.h"
+#include "gen-cpp/Data_types.h"
+
+#include <vector>
+
+namespace impala {
+
+/// Stores client-ready query result rows returned by
+/// QueryExecState::FetchRows(). Subclasses implement AddRows() / AddOneRow() to
+/// specialise how Impala's row batches are converted to client-API result
+/// representations.
+class QueryResultSet {
+ public:
+  QueryResultSet() {}
+  virtual ~QueryResultSet() {}
+
+  /// Add a single row to this result set. The row is a vector of pointers to values,
+  /// whose memory belongs to the caller. 'scales' contains the scales for decimal values
+  /// (# of digits after decimal), with -1 indicating no scale specified or the
+  /// corresponding value is not a decimal.
+  virtual Status AddOneRow(
+      const std::vector<void*>& row, const std::vector<int>& scales) = 0;
+
+  /// Add the TResultRow to this result set. When a row comes from a DDL/metadata
+  /// operation, the row in the form of TResultRow.
+  virtual Status AddOneRow(const TResultRow& row) = 0;
+
+  /// Copies rows in the range [start_idx, start_idx + num_rows) from the other result
+  /// set into this result set. Returns the number of rows added to this result set.
+  /// Returns 0 if the given range is out of bounds of the other result set.
+  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) = 0;
+
+  /// Returns the approximate size of this result set in bytes.
+  int64_t ByteSize() { return ByteSize(0, size()); }
+
+  /// Returns the approximate size of the given range of rows in bytes.
+  virtual int64_t ByteSize(int start_idx, int num_rows) = 0;
+
+  /// Returns the size of this result set in number of rows.
+  virtual size_t size() = 0;
+};
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/be/src/testutil/in-process-servers.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/in-process-servers.cc b/be/src/testutil/in-process-servers.cc
index cf0e28e..b28f7fc 100644
--- a/be/src/testutil/in-process-servers.cc
+++ b/be/src/testutil/in-process-servers.cc
@@ -34,6 +34,7 @@
 
 DECLARE_string(ssl_server_certificate);
 DECLARE_string(ssl_private_key);
+DECLARE_int32(be_port);
 
 using namespace apache::thrift;
 using namespace impala;
@@ -43,6 +44,9 @@ InProcessImpalaServer* InProcessImpalaServer::StartWithEphemeralPorts(
   for (int tries = 0; tries < 10; ++tries) {
     int backend_port = FindUnusedEphemeralPort();
     if (backend_port == -1) continue;
+    // This flag is read directly in several places to find the address of the local
+    // backend interface.
+    FLAGS_be_port = backend_port;
 
     int subscriber_port = FindUnusedEphemeralPort();
     if (subscriber_port == -1) continue;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/common/thrift/DataSinks.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/DataSinks.thrift b/common/thrift/DataSinks.thrift
index 12a75b9..83c63b7 100644
--- a/common/thrift/DataSinks.thrift
+++ b/common/thrift/DataSinks.thrift
@@ -26,7 +26,8 @@ include "Partitions.thrift"
 enum TDataSinkType {
   DATA_STREAM_SINK,
   TABLE_SINK,
-  JOIN_BUILD_SINK
+  JOIN_BUILD_SINK,
+  PLAN_ROOT_SINK
 }
 
 enum TSinkAction {
@@ -87,10 +88,10 @@ struct TJoinBuildSink {
 
 // Union type of all table sinks.
 struct TTableSink {
-  1: required Types.TTableId  target_table_id
+  1: required Types.TTableId target_table_id
   2: required TTableSinkType type
   3: required TSinkAction action
-  4: optional THdfsTableSink  hdfs_table_sink
+  4: optional THdfsTableSink hdfs_table_sink
   5: optional TKuduTableSink kudu_table_sink
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
index b02bc73..392b961 100644
--- a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
@@ -25,6 +25,8 @@ import java.util.Set;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.TreeNode;
+import org.apache.impala.planner.DataSink;
+import org.apache.impala.planner.PlanRootSink;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
 import com.google.common.collect.Lists;
@@ -409,6 +411,10 @@ public abstract class QueryStmt extends StatementBase {
     resultExprs_ = Expr.substituteList(resultExprs_, smap, analyzer, true);
   }
 
+  public DataSink createDataSink() {
+    return new PlanRootSink();
+  }
+
   public ArrayList<OrderByElement> cloneOrderByElements() {
     if (orderByElements_ == null) return null;
     ArrayList<OrderByElement> result =

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
new file mode 100644
index 0000000..a199f54
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/planner/PlanRootSink.java
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.planner;
+
+import org.apache.impala.thrift.TDataSink;
+import org.apache.impala.thrift.TDataSinkType;
+import org.apache.impala.thrift.TExplainLevel;
+
+/**
+ * Sink for the root of a query plan that produces result rows. Allows coordination
+ * between the sender which produces those rows, and the consumer which sends them to the
+ * client, despite both executing concurrently.
+ */
+public class PlanRootSink extends DataSink {
+
+  public String getExplainString(String prefix, String detailPrefix,
+      TExplainLevel explainLevel) {
+    return String.format("%sPLAN-ROOT SINK\n", prefix);
+  }
+
+  protected TDataSink toThrift() {
+    return new TDataSink(TDataSinkType.PLAN_ROOT_SINK);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/fe/src/main/java/org/apache/impala/planner/Planner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java
index 405eebe..ed4c677 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -149,6 +149,8 @@ public class Planner {
       } else if (ctx_.isDelete()) {
         // Set up delete sink for root fragment
         rootFragment.setSink(ctx_.getAnalysisResult().getDeleteStmt().createDataSink());
+      } else if (ctx_.isQuery()) {
+        rootFragment.setSink(ctx_.getAnalysisResult().getQueryStmt().createDataSink());
       }
       QueryStmt queryStmt = ctx_.getQueryStmt();
       queryStmt.substituteResultExprs(rootNodeSmap, ctx_.getRootAnalyzer());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
index 29cca13..3275a7a 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
@@ -90,6 +90,7 @@ public class PlannerContext {
   public boolean isInsertOrCtas() {
     return analysisResult_.isInsertStmt() || analysisResult_.isCreateTableAsSelectStmt();
   }
+  public boolean isQuery() { return analysisResult_.isQueryStmt(); }
 
   public boolean hasSubplan() { return !subplans_.isEmpty(); }
   public SubplanNode getSubplan() { return subplans_.getFirst(); }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
index 47bfb23..d7838f9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
@@ -3,12 +3,16 @@ select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tin
 avg(tinyint_col)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*), count:merge(tinyint_col), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
 |
@@ -26,6 +30,8 @@ avg(tinyint_col)
 from functional.alltypesagg
 group by 2, 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  group by: bigint_col, tinyint_col
@@ -33,6 +39,8 @@ group by 2, 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -54,6 +62,8 @@ from functional.testtbl
 having count(id) > 0
 order by avg(zip) limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=10]
 |  order by: avg(zip) ASC
 |
@@ -64,6 +74,8 @@ order by avg(zip) limit 10
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:TOP-N [LIMIT=10]
 |  order by: avg(zip) ASC
 |
@@ -85,6 +97,8 @@ from functional.alltypesagg
 group by int_col + int_col, int_col * int_col, int_col + int_col
 having (int_col * int_col) < 0 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: int_col + int_col, int_col * int_col
 |  having: int_col * int_col < 0
@@ -93,6 +107,8 @@ having (int_col * int_col) < 0 limit 10
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -116,6 +132,8 @@ functional.alltypes t1 inner join functional.alltypestiny t2
 group by t1.tinyint_col, t2.smallint_col
 having count(t2.int_col) = count(t1.bigint_col)
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*), count(t2.int_col), count(t1.bigint_col)
 |  group by: t1.tinyint_col, t2.smallint_col
@@ -141,6 +159,8 @@ select 1 from
    group by int_col) t
 where t.x > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: avg(bigint_col)
 |  group by: int_col
@@ -157,6 +177,8 @@ select count(*) from
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  limit: 10
@@ -169,6 +191,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  limit: 10
@@ -194,6 +218,8 @@ select count(*) from
 group by t.bigint_col
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: bigint_col
@@ -207,6 +233,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -237,6 +265,8 @@ from
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  limit: 10
@@ -252,6 +282,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  limit: 10
@@ -286,6 +318,8 @@ from
 group by t.bigint_col
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  group by: t.bigint_col
@@ -302,6 +336,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -334,6 +370,8 @@ from
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  limit: 10
@@ -350,6 +388,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  limit: 10
@@ -386,6 +426,8 @@ from
 group by t.bigint_col
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
@@ -403,6 +445,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -438,6 +482,8 @@ from
 group by t.bigint_col
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
@@ -458,6 +504,8 @@ limit 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -495,6 +543,8 @@ limit 10
 # test that aggregations are not placed below an unpartitioned exchange with a limit
 select count(*) from (select * from functional.alltypes limit 10) t
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -502,6 +552,8 @@ select count(*) from (select * from functional.alltypes limit 10) t
    partitions=24/24 files=24 size=478.45KB
    limit: 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -518,6 +570,8 @@ select count(*) from
    union all
    (select * from functional.alltypessmall) limit 10) t
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -530,6 +584,8 @@ select count(*) from
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -555,6 +611,8 @@ select * from (
   limit 2) v
 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(cnt)
 |  limit: 1
@@ -580,6 +638,8 @@ limit 1
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> t1.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(cnt)
 |  limit: 1
@@ -629,6 +689,8 @@ select * from
    group by 1, 2, 3, 4) v
 where v.a = v.b and v.b = v.c and v.c = v.d and v.a = v.c and v.a = v.d
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: tinyint_col, smallint_col, int_col + int_col, coalesce(bigint_col, year)
 |  having: int_col + int_col = coalesce(bigint_col, year), smallint_col = int_col + int_col
@@ -643,6 +705,8 @@ select cnt from
    from functional.alltypestiny
    group by bool_col, x) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: bool_col, CAST(NULL AS INT)
@@ -656,6 +720,8 @@ select cnt from
    from functional.alltypestiny
    group by bool_col, x) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  group by: bool_col, NULL
@@ -669,6 +735,8 @@ select cnt from
 # test simple group_concat with distinct
 select group_concat(distinct string_col) from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |
@@ -678,6 +746,8 @@ select group_concat(distinct string_col) from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col)
 |
@@ -702,6 +772,8 @@ select day, group_concat(distinct string_col)
 from (select * from functional.alltypesagg where id % 100 = day order by id limit 99999) a
 group by day
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |  group by: day
@@ -716,6 +788,8 @@ group by day
    partitions=11/11 files=11 size=814.73KB
    predicates: id % 100 = day
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |  group by: day
@@ -739,6 +813,8 @@ select count(distinct cast(timestamp_col as string)),
 group_concat(distinct cast(timestamp_col as string))
 from functional.alltypesagg group by year
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(CAST(timestamp_col AS STRING)), group_concat(CAST(timestamp_col AS STRING))
 |  group by: year
@@ -749,6 +825,8 @@ from functional.alltypesagg group by year
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -769,6 +847,8 @@ from functional.alltypesagg group by year
 # test group_concat distinct with other non-distinct aggregate functions
  select group_concat(distinct string_col), count(*) from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col), count:merge(*)
 |
@@ -779,6 +859,8 @@ from functional.alltypesagg group by year
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col), count:merge(*)
 |
@@ -804,6 +886,8 @@ from functional.alltypesagg group by year
 select group_concat(distinct string_col, '-'), sum(int_col), count(distinct string_col)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col, '-'), count(string_col), sum:merge(int_col)
 |
@@ -814,6 +898,8 @@ from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col, '-'), count:merge(string_col), sum:merge(int_col)
 |
@@ -841,6 +927,8 @@ select month, year, count(*), count(distinct date_string_col),
 group_concat(distinct date_string_col, '-') from functional.alltypesagg
 group by month, year
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(date_string_col), group_concat(date_string_col, '-'), count:merge(*)
 |  group by: month, year
@@ -852,6 +940,8 @@ group by month, year
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -875,6 +965,8 @@ group by month, year
 select group_concat(distinct string_col), group_concat(distinct string_col, '-'),
 group_concat(distinct string_col, '---')  from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col), group_concat(string_col, '-'), group_concat(string_col, '---')
 |
@@ -884,6 +976,8 @@ group_concat(distinct string_col, '---')  from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col), group_concat:merge(string_col, '-'), group_concat:merge(string_col, '---')
 |
@@ -906,6 +1000,8 @@ group_concat(distinct string_col, '---')  from functional.alltypesagg
 # IMPALA-852: Aggregation only in the HAVING clause.
 select 1 from functional.alltypestiny having count(*) > 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  having: count(*) > 0
@@ -923,6 +1019,8 @@ group by 1
 having count(*) < 150000
 limit 1000000
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |  limit: 1000000
 |
@@ -957,6 +1055,8 @@ select col from (
 where col > 50
 limit 50
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(c_custkey)
 |  having: count(c_custkey) > 50
@@ -992,6 +1092,8 @@ select straight_join c_custkey, count(distinct c_custkey)
 from tpch_parquet.orders inner join [shuffle] tpch_parquet.customer on c_custkey = o_custkey
 group by 1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE [FINALIZE]
@@ -1029,6 +1131,8 @@ group by 1, 2
 having count(*) > 10
 limit 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |

[30/32] incubator-impala git commit: IMPALA-4253: impala-server.backends.client-cache.total-clients shows negative value

Posted by ta...@apache.org.

IMPALA-4253: impala-server.backends.client-cache.total-clients shows negative value

Fixed double decrement in case a cached connection is broken
and cannot be re-created.

Change-Id: Ic9e28055cb232cdb543c4c9f05a558ab0f73f777
Reviewed-on: http://gerrit.cloudera.org:8080/4668
Reviewed-by: Juan Yu <jy...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/080a6784
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/080a6784
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/080a6784

Branch: refs/heads/hadoop-next
Commit: 080a67848b3f96eb49cdaab14f32eba8091a4320
Parents: 5a91964
Author: Juan Yu <jy...@cloudera.com>
Authored: Fri Oct 7 14:57:24 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 09:29:31 2016 +0000

----------------------------------------------------------------------
 be/src/runtime/client-cache.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/080a6784/be/src/runtime/client-cache.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/client-cache.cc b/be/src/runtime/client-cache.cc
index 155474c..578ef29 100644
--- a/be/src/runtime/client-cache.cc
+++ b/be/src/runtime/client-cache.cc
@@ -86,12 +86,16 @@ Status ClientCacheHelper::ReopenClient(ClientFactory factory_method,
   // clean up internal buffers it reopens. To work around this issue, create a new client
   // instead.
   ClientKey old_client_key = *client_key;
-  if (metrics_enabled_) total_clients_metric_->Increment(-1);
   Status status = CreateClient(client_impl->address(), factory_method, client_key);
   // Only erase the existing client from the map if creation of the new one succeeded.
   // This helps to ensure the proper accounting of metrics in the presence of
   // re-connection failures (the original client should be released as usual).
   if (status.ok()) {
+    // CreateClient() will increment total_clients_metric_ if succeed.
+    if (metrics_enabled_) {
+      total_clients_metric_->Increment(-1);
+      DCHECK_GE(total_clients_metric_->value(), 0);
+    }
     lock_guard<mutex> lock(client_map_lock_);
     client_map_.erase(client);
   } else {

[18/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 0ef95b0..8c25730 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -5,6 +5,8 @@ max(tinyint_col) over(partition by int_col)
 from functional.alltypes
 group by int_col, tinyint_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 03:ANALYTIC
@@ -32,6 +34,8 @@ max(int_col) over(partition by int_col, bool_col),
 max(int_col) over(partition by int_col, tinyint_col)
 from functional.alltypes
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 04:ANALYTIC
@@ -60,6 +64,8 @@ max(int_col) over(partition by int_col),
 min(int_col) over(order by int_col)
 from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:ANALYTIC
 |  functions: min(int_col)
 |  order by: int_col ASC
@@ -78,6 +84,8 @@ from functional.alltypes
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:ANALYTIC
 |  functions: min(int_col)
 |  order by: int_col ASC
@@ -115,6 +123,8 @@ max(int_col) over(partition by int_col order by bigint_col),
 max(int_col) over(partition by int_col order by bigint_col desc)
 from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:ANALYTIC
 |  functions: max(int_col)
 |  partition by: bool_col
@@ -155,6 +165,8 @@ from functional.alltypes
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 08:ANALYTIC
@@ -226,6 +238,8 @@ min(int_col) over(partition by int_col, smallint_col order by bigint_col),
 max(int_col) over(partition by int_col, smallint_col order by int_col)
 from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:ANALYTIC
 |  functions: min(int_col), max(int_col)
 |  order by: bigint_col ASC
@@ -276,6 +290,8 @@ from functional.alltypes
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:ANALYTIC
 |  functions: min(int_col), max(int_col)
 |  order by: bigint_col ASC
@@ -336,12 +352,16 @@ from functional.alltypes
 # basic analytic with default window and no partition/ordering
 select count(*) over() from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:ANALYTIC
 |  functions: count(*)
 |
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:ANALYTIC
 |  functions: count(*)
 |
@@ -354,6 +374,8 @@ select count(*) over() from functional.alltypesagg
 select tinyint_col, sum(bigint_col) over(partition by tinyint_col) sum_of_bigints
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: sum(bigint_col)
 |  partition by: tinyint_col
@@ -364,6 +386,8 @@ from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 02:ANALYTIC
@@ -381,6 +405,8 @@ from functional.alltypesagg
 # basic analytic with default window and ordering
 select int_col, rank() over(order by int_col) from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: rank()
 |  order by: int_col ASC
@@ -392,6 +418,8 @@ select int_col, rank() over(order by int_col) from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: rank()
 |  order by: int_col ASC
@@ -413,6 +441,8 @@ select bigint_col, count(double_col)
 from functional.alltypesagg
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: count(double_col)
 |  partition by: tinyint_col + 1, double_col / 2
@@ -426,6 +456,8 @@ limit 10
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
@@ -463,6 +495,8 @@ avg(double_col)
 from functional.alltypes
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:ANALYTIC
 |  functions: avg(double_col)
 |  order by: int_col DESC
@@ -490,6 +524,8 @@ limit 10
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:ANALYTIC
 |  functions: avg(double_col)
 |  order by: int_col DESC
@@ -528,6 +564,8 @@ select a.tinyint_col, a.int_col, count(a.double_col)
 from functional.alltypes a inner join functional.alltypessmall b on a.id = b.id
 order by a.tinyint_col, a.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SORT
 |  order by: tinyint_col ASC, int_col ASC
 |
@@ -551,6 +589,8 @@ order by a.tinyint_col, a.int_col
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC, int_col ASC
 |
@@ -596,6 +636,8 @@ from functional.alltypes
 group by 1
 order by 1, 2, 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:SORT
 |  order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC
 |
@@ -624,6 +666,8 @@ order by 1, 2, 3
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC
 |
@@ -700,6 +744,8 @@ min(int_col)
   rows between unbounded preceding and 2 following)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:ANALYTIC
 |  functions: count(double_col), last_value(double_col)
 |  partition by: tinyint_col, double_col
@@ -754,6 +800,8 @@ from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:EXCHANGE [UNPARTITIONED]
 |
 10:ANALYTIC
@@ -829,6 +877,8 @@ sum(smallint_col)
   rows between 2 preceding and 2 following)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:ANALYTIC
 |  functions: sum(tinyint_col)
 |  order by: int_col DESC
@@ -862,6 +912,8 @@ from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:ANALYTIC
 |  functions: sum(tinyint_col)
 |  order by: int_col DESC
@@ -912,6 +964,8 @@ select double_col, a, b, a + b, double_col + a from
      (select * from functional.alltypes) v1) v2
 order by 2, 3, 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: a ASC, b ASC, a + b ASC
 |
@@ -928,6 +982,8 @@ order by 2, 3, 4
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: a ASC, b ASC, a + b ASC
 |
@@ -959,6 +1015,8 @@ with v2 as
 select double_col, a, b, a + b, double_col + a from v2
 order by 2, 3, 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: a ASC, b ASC, a + b ASC
 |
@@ -975,6 +1033,8 @@ order by 2, 3, 4
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: a ASC, b ASC, a + b ASC
 |
@@ -1007,6 +1067,8 @@ select b from
    from functional.alltypes) v
 where e < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SELECT
 |  predicates: count(1) < 10
 |
@@ -1029,6 +1091,8 @@ where e < 10
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 05:SELECT
@@ -1068,6 +1132,8 @@ union all
  from functional.alltypestiny)
 order by 1 desc nulls first
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: min(id) OVER(...) DESC NULLS FIRST
 |
@@ -1110,6 +1176,8 @@ order by 1 desc nulls first
 09:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: min(id) OVER(...) DESC NULLS FIRST
 |
@@ -1172,6 +1240,8 @@ where int_col in
   (select min(bigint_col) over(partition by bool_col)
    from functional.alltypestiny t2 where t2.id < 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: int_col = min(bigint_col)
 |  runtime filters: RF000 <- min(bigint_col)
@@ -1191,6 +1261,8 @@ where int_col in
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
@@ -1247,6 +1319,8 @@ where
   v.a != v.e and
   v.b != v.c
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SELECT
 |  predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
 |
@@ -1279,6 +1353,8 @@ where
    partitions=24/24 files=24 size=478.45KB
    predicates: int_col <= 10, int_col >= 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:SELECT
 |  predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
 |
@@ -1330,6 +1406,8 @@ inner join functional.alltypes t2
 on (t1.id = t2.id and t1.a = t2.int_col)
 where t2.id < 10 and t2.int_col < 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: id = t2.id, sum(int_col) = t2.int_col
 |
@@ -1362,6 +1440,8 @@ left outer join functional.alltypes t2
 on (t1.id = t2.id and t1.a = t2.int_col)
 where t2.id < 10 and t2.int_col < 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: id = t2.id, sum(int_col) = t2.int_col
 |  other predicates: t2.id < 10, t2.int_col < 20
@@ -1387,6 +1467,8 @@ select
 row_number() over(partition by tinyint_col order by id)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: row_number()
 |  partition by: tinyint_col
@@ -1410,6 +1492,8 @@ lead(int_col, 8, 20) over(partition by int_col order by id),
 lag(int_col, 8, 20) over(partition by int_col order by id)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:ANALYTIC
 |  functions: lag(int_col, 8, 20)
 |  partition by: int_col
@@ -1478,6 +1562,8 @@ count(bigint_col) over(partition by tinyint_col order by id
 #                       range between unbounded preceding and 10 following)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:ANALYTIC
 |  functions: count(bigint_col)
 |  partition by: tinyint_col
@@ -1544,6 +1630,8 @@ first_value(tinyint_col ignore nulls) over (order by id
                                             rows between 1 following and 2 following)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:ANALYTIC
 |  functions: last_value_ignore_nulls(tinyint_col)
 |  order by: id DESC
@@ -1607,6 +1695,8 @@ last_value(bigint_col) over(partition by tinyint_col order by id
                             range between unbounded preceding and unbounded following)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:ANALYTIC
 |  functions: last_value(bigint_col)
 |  partition by: tinyint_col
@@ -1654,6 +1744,8 @@ select DENSE_RANK() OVER (ORDER BY t1.day ASC)
 FROM functional.alltypesagg t1
 WHERE EXISTS (SELECT t1.year AS int_col_1 FROM functional.alltypesagg t1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:ANALYTIC
 |  functions: dense_rank()
 |  order by: day ASC
@@ -1682,6 +1774,8 @@ FROM (
   FROM functional.alltypes t1 ) t1
 WHERE id IS NULL and tinyint_col != 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -1705,6 +1799,8 @@ WHERE id IS NULL and tinyint_col != 5
    partitions=4/4 files=4 size=460B
    predicates: t1.id IS NULL, t1.tinyint_col != 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -1747,6 +1843,8 @@ select * from
    from functional.alltypes) v
 where year = 2009 and id = 1 and int_col < 10 and s = 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SELECT
 |  predicates: id = 1, int_col < 10, sum(int_col) = 4
 |
@@ -1775,6 +1873,8 @@ select * from
    from functional.alltypes) v
 where year = 2009 and tinyint_col + 1 = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:SELECT
 |  predicates: tinyint_col + 1 = 1
 |
@@ -1826,6 +1926,8 @@ select * from
    from functional.alltypes) v
 where year = 2009 and tinyint_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SELECT
 |  predicates: tinyint_col = 1, year = 2009
 |
@@ -1866,6 +1968,8 @@ select * from
    from functional.alltypestiny where id = tinyint_col) v
 where month = int_col and int_col = 1 and tinyint_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SELECT
 |  predicates: month = int_col
 |
@@ -1894,6 +1998,8 @@ select * from
    on t1.id = t2.id) v
 where v.x + v.y < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SELECT
 |  predicates: tinyint_col + int_col < 10
 |
@@ -1928,6 +2034,8 @@ select * from
    from functional.alltypestiny) v
 where x = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:SELECT
 |  predicates: int_col + 1 = 1
 |
@@ -1964,6 +2072,8 @@ left outer join
    from functional.alltypestiny) t2
 on (t1.id = t2.a + 100)
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:ANALYTIC
 |  functions: avg(if(TupleIsNull(), NULL, coalesce(id + bigint_col, 40)))
 |  order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC
@@ -2023,6 +2133,8 @@ from
   on (t1.id = t2.a + 100)) t3
 group by d
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: avg(sum(t1.id)), sum(avg(g)), count(id)
 |  group by: if(TupleIsNull(), NULL, coalesce(int_col, 20))
@@ -2090,6 +2202,8 @@ full outer join
    on (c.id = v3.id)) v4
 on (v2.id = v4.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 14:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: id = id
 |
@@ -2152,6 +2266,8 @@ left outer join
    on (t1.id = t2.id)) b
 on (a.id = b.id and b.int_col < 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = t1.id
 |
@@ -2198,6 +2314,8 @@ first_value(int_col) over (order by bigint_col
                            rows between unbounded preceding and current row)
 from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:ANALYTIC
 |  functions: last_value(int_col), first_value(int_col)
 |  order by: bigint_col ASC

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/complex-types-file-formats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/complex-types-file-formats.test b/testdata/workloads/functional-planner/queries/PlannerTest/complex-types-file-formats.test
index f0431a2..9c68c65 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/complex-types-file-formats.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/complex-types-file-formats.test
@@ -1,6 +1,8 @@
 # Scanning an unpartitioned Parquet table with complex types plans ok.
 select s.f1 from functional_parquet.complextypes_fileformat t, t.a
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -47,6 +49,8 @@ Complex types are supported for these file formats: PARQUET.
 # only scalar type columns are allowed.
 select id from functional_rc_snap.complextypes_fileformat
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional_rc_snap.complextypes_fileformat]
    partitions=1/1 files=1 size=56B
 ====
@@ -54,6 +58,8 @@ select id from functional_rc_snap.complextypes_fileformat
 # queries should work.
 select count(*) from functional_rc_snap.complextypes_fileformat
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -70,6 +76,8 @@ Complex types are supported for these file formats: PARQUET.
 # are allowed.
 select id from functional_seq_snap.complextypes_fileformat
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional_seq_snap.complextypes_fileformat]
    partitions=1/1 files=1 size=87B
 ====
@@ -85,6 +93,8 @@ not implemented: Scan of partition
 # columns are selected.
 select id from functional_hbase.allcomplextypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.allcomplextypes]
 ====
 # Scanning an HBase table with complex-types columns fails if a complex-typed
@@ -115,6 +125,8 @@ Complex types are supported for these file formats: PARQUET.
 # Scanning a Parquet partition of a multi-format table with complex types plans ok.
 select s.f1 from functional.complextypes_multifileformat t, t.a where p = 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -143,6 +155,8 @@ Complex types are supported for these file formats: PARQUET.
 # queries should work.
 select count(*) from functional.complextypes_multifileformat where p = 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test b/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
index 29e1864..9f97d62 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
@@ -4,6 +4,8 @@ from functional.alltypes a
 where a.int_col = a.tinyint_col and
       a.bool_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.bool_col, a.int_col = a.tinyint_col
@@ -14,6 +16,8 @@ from functional.alltypes a
 where a.string_col LIKE '%a%' and
       a.int_col = a.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = a.tinyint_col, a.string_col LIKE '%a%'
@@ -24,6 +28,8 @@ from functional.alltypes a
 where (a.int_col = a.tinyint_col or a.int_col = a.smallint_col) and
       a.int_col = a.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = a.bigint_col, (a.int_col = a.tinyint_col OR a.int_col = a.smallint_col)
@@ -33,6 +39,8 @@ select *
 from functional.alltypes a
 where a.int_col + 5 = a.bigint_col - 10 and a.int_col = a.tinyint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = a.tinyint_col, a.int_col + 5 = a.bigint_col - 10
@@ -43,6 +51,8 @@ from functional.alltypes a
 where a.int_col = a.tinyint_col and
       (case a.int_col when 0 then true when 1 then true when 2 then true else false end)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = a.tinyint_col, (CASE a.int_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END), (CASE a.tinyint_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END)
@@ -53,6 +63,8 @@ select *
 from functional.alltypes a
 where a.date_string_col LIKE 'a%a' and a.date_string_col LIKE '%a%'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.date_string_col LIKE '%a%', a.date_string_col LIKE 'a%a'
@@ -62,6 +74,8 @@ select *
 from functional.alltypes a
 where a.int_col IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and a.int_col = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = 1, a.int_col IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
@@ -71,6 +85,8 @@ select *
 from functional.alltypes a
 where a.timestamp_col > '2000-01-01' and a.int_col = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col = 0, a.timestamp_col > '2000-01-01'
@@ -82,6 +98,8 @@ where a.string_col = "looooooooooooooooong string" and
       a.string_col = "medium string" and
       a.string_col = "a"
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.string_col = 'a', a.string_col = 'medium string', a.string_col = 'looooooooooooooooong string'
@@ -92,6 +110,8 @@ from functional.alltypes a
 where a.timestamp_col - interval 1 day > '2000-01-01' and
       a.timestamp_col < '2020-01-01'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.timestamp_col < '2020-01-01', a.timestamp_col - INTERVAL 1 day > '2000-01-01'
@@ -101,6 +121,8 @@ select *
 from functional.alltypes a
 where ceil(a.double_col) > 0 and a.double_col > 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.double_col > 0, ceil(a.double_col) > 0
@@ -110,6 +132,8 @@ select *
 from functional.alltypes a
 where cast(a.int_col as double) > 0 and a.int_col > 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col > 0, CAST(a.int_col AS DOUBLE) > 0
@@ -119,6 +143,8 @@ select *
 from functional.alltypes a
 where a.string_col = "string" and a.int_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.int_col IS NULL, a.string_col = 'string'
@@ -131,6 +157,8 @@ where a.string_col LIKE '%a%' and
       (a.int_col = a.tinyint_col or a.int_col = a.smallint_col) and
       a.int_col = a.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.bool_col, a.int_col = a.bigint_col, (a.int_col = a.tinyint_col OR a.int_col = a.smallint_col), a.string_col LIKE '%a%'
@@ -141,6 +169,8 @@ select *
 from functional.alltypes a
 where a.int_col = 0 and a.id = 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: a.id = 0, a.int_col = 0

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
index 76b0fec..3c03cd5 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
@@ -1,8 +1,12 @@
 select 1 + 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
index 5662f5d..fd7dc1e 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
@@ -5,6 +5,8 @@ where tinyint_col < 256 and
       float_col != 0 and
       cast(int_col as bigint) < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: tinyint_col < 256
 predicates: float_col != 0, CAST(int_col AS BIGINT) < 10
@@ -22,6 +24,8 @@ where 10 > int_col and
       not true = bool_col and
       not 5.0 = double_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: 10 > int_col, string_col != 'Foo'
 predicates: 5 > double_col, NOT 5.0 = double_col, NOT TRUE = bool_col, string_col != 'Bar'
@@ -34,6 +38,8 @@ where int_col < 10 and
       string_col in ("Foo", "Bar") and
       bool_col != false
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: int_col < 10, bool_col != FALSE
 predicates: double_col > 5, string_col IN ('Foo', 'Bar')
@@ -50,6 +56,8 @@ and a.int_col = b.id and a.bigint_col = b.id
 # redundant predicates to test minimal spanning tree of equivalent slots
 where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |
@@ -69,6 +77,8 @@ and smallint_col IS DISTINCT FROM 3
 and int_col is not distinct from 4
 and bigint_col is not distinct from 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: id IS NOT DISTINCT FROM 1, tinyint_col IS DISTINCT FROM 2, int_col IS NOT DISTINCT FROM 4
 predicates: bigint_col IS NOT DISTINCT FROM 5, bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
index f91dae5..07726c9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
@@ -2,6 +2,8 @@ select tinyint_col, count(*)
 from functional.alltypesagg
 group by 1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -20,6 +22,8 @@ group by 1
 select count(distinct id)
 from functional.alltypesagg
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(id)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
index fca99ed..b895ad0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
@@ -1,12 +1,16 @@
 # Distinct estimate
 select distinctpc(l_orderkey) from tpch.lineitem
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: distinctpc(l_orderkey)
 |
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: distinctpc:merge(l_orderkey)
 |
@@ -21,6 +25,8 @@ select distinctpc(l_orderkey) from tpch.lineitem
 # Distinct estimate with distinct
 select count(distinct l_orderkey), distinctpc(l_orderkey) from tpch.lineitem
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(l_orderkey), distinctpc:merge(l_orderkey)
 |
@@ -31,6 +37,8 @@ select count(distinct l_orderkey), distinctpc(l_orderkey) from tpch.lineitem
 00:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(l_orderkey), distinctpc:merge(l_orderkey)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
index e5f3bce..b5361a6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
@@ -2,12 +2,16 @@
 select distinct *
 from functional.testtbl
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
 |
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -25,12 +29,16 @@ from functional.testtbl
 select distinct id, zip
 from functional.testtbl
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: id, zip
 |
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -48,6 +56,8 @@ from functional.testtbl
 select count(distinct id, zip)
 from functional.testtbl
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(if(id IS NULL, NULL, zip))
 |
@@ -57,6 +67,8 @@ from functional.testtbl
 00:SCAN HDFS [functional.testtbl]
    partitions=1/1 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(if(id IS NULL, NULL, zip))
 |
@@ -81,6 +93,8 @@ select tinyint_col, count(distinct int_col, bigint_col)
 from functional.alltypesagg
 group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(if(int_col IS NULL, NULL, bigint_col))
 |  group by: tinyint_col
@@ -91,6 +105,8 @@ group by 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -113,6 +129,8 @@ select tinyint_col, count(distinct int_col), sum(distinct int_col)
 from functional.alltypesagg
 group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), sum(int_col)
 |  group by: tinyint_col
@@ -123,6 +141,8 @@ group by 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -144,6 +164,8 @@ group by 1
 select sum(distinct int_col)
 from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: sum(int_col)
 |
@@ -153,6 +175,8 @@ from functional.alltypesagg
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum:merge(int_col)
 |
@@ -178,6 +202,8 @@ select tinyint_col, count(distinct int_col),
 min(distinct smallint_col), max(distinct string_col)
 from functional.alltypesagg group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), min:merge(smallint_col), max:merge(string_col)
 |  group by: tinyint_col
@@ -189,6 +215,8 @@ from functional.alltypesagg group by 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -213,6 +241,8 @@ select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
 sum(int_col), min(smallint_col), max(bigint_col)
 from functional.alltypesagg group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col)
 |  group by: tinyint_col
@@ -224,6 +254,8 @@ from functional.alltypesagg group by 1
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -250,6 +282,8 @@ select t1.c, t2.c from
 (select count(distinct int_col) as c from functional.alltypestiny) t1 inner join
 (select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: count(int_col) = count(bigint_col)
 |
@@ -271,6 +305,8 @@ select t1.c, t2.c from
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: count(int_col) = count(bigint_col)
 |
@@ -323,6 +359,8 @@ select t1.c, t2.c from
 select count(distinct tinyint_col) from functional.alltypes
 having count(bigint_col) > 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col), count:merge(bigint_col)
 |  having: zeroifnull(count(bigint_col)) > 0
@@ -334,6 +372,8 @@ having count(bigint_col) > 0
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col), count:merge(bigint_col)
 |  having: zeroifnull(count(bigint_col)) > 0
@@ -362,6 +402,8 @@ select 1 from
   (select count(distinct 1) x from functional.alltypes) t
 where t.x is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(1)
 |  having: count(1) IS NOT NULL
@@ -372,6 +414,8 @@ where t.x is not null
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(1)
 |  having: count(1) IS NOT NULL
@@ -398,6 +442,8 @@ select 1 from
   (select count(distinct 1) x, count(1) y from functional.alltypes) t
 where t.x + t.y > 10 and t.x > 0 and t.y > 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(1), count:merge(1)
 |  having: count(1) > 0, zeroifnull(count(1)) > 1, count(1) + zeroifnull(count(1)) > 10
@@ -409,6 +455,8 @@ where t.x + t.y > 10 and t.x > 0 and t.y > 1
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(1), count:merge(1)
 |  having: count(1) > 0, zeroifnull(count(1)) > 1, count(1) + zeroifnull(count(1)) > 10
@@ -434,6 +482,8 @@ where t.x + t.y > 10 and t.x > 0 and t.y > 1
 # IMPALA-2266: Test non-grouping distinct aggregation inside an inline view.
 select * from (select count(distinct int_col) cd from functional.alltypes) v
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |
@@ -456,6 +506,8 @@ select * from (select count(distinct int_col) cd from functional.alltypes) v
 # IMPALA-2266: Test grouping distinct aggregation inside an inline view.
 select * from (select count(distinct int_col) cd from functional.alltypes group by bool_col) v
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
@@ -476,6 +528,8 @@ select * from (select count(distinct int_col) cd from functional.alltypes group
 # IMPALA-4042: count(distinct NULL) fails on a view
 select count(distinct null) from functional.alltypes_view
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(NULL)
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
index 27413c0..7933b3a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
@@ -5,11 +5,15 @@ left outer join functional.alltypes t2
 on t1.id = t2.id
 where false
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # HBase scan turns into empty-set node due to a constant conjunct.
 select * from functional_hbase.alltypessmall where false
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Data source scan turns into empty-set node due to a constant conjunct.
@@ -19,6 +23,8 @@ inner join functional.alltypestiny b
 on a.id = b.id
 where length("a") > 7
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Constant conjunct in ON clause turns query block into an empty-set node.
@@ -27,6 +33,8 @@ from functional.alltypestiny t1
 inner join functional.alltypes t2
 on (t1.id = t2.id and (false or false))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Constant conjunct in WHERE clause turns query block into an aggregation
@@ -35,6 +43,8 @@ select count(int_col), avg(double_col), count(*)
 from functional.alltypes
 where null
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(int_col), avg(double_col), count(*)
 |
@@ -47,6 +57,8 @@ from functional.alltypestiny t1
 inner join functional.alltypes t2
 on (t1.id = t2.id and (false or false))
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -61,6 +73,8 @@ on t1.id = t2.id
 group by t1.int_col
 having ifnull(null, false)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Constant conjunct causes empty-set inline view.
@@ -74,6 +88,8 @@ inner join
    where 1 + 3 > 10) e
 on e.id = f.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: f.id = t1.id
 |  runtime filters: RF000 <- t1.id
@@ -91,6 +107,8 @@ select * from functional.alltypes where "abc" = "cde"
 union all
 select * from functional.alltypestiny
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypestiny]
@@ -110,6 +128,8 @@ full outer join
    where null) t2
 on a.id = t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = id
 |
@@ -125,6 +145,8 @@ from functional.alltypessmall a
 left outer join functional.alltypestiny b
 on (a.id = b.id and 1 + 1 > 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: a.id = b.id
 |  other join predicates: 1 + 1 > 10
@@ -142,6 +164,8 @@ from functional.alltypessmall a
 right outer join functional.alltypestiny b
 on (a.id = b.id and !true)
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [RIGHT OUTER JOIN]
 |  hash predicates: a.id = b.id
 |  other join predicates: NOT TRUE
@@ -161,6 +185,8 @@ from functional.alltypessmall a
 full outer join functional.alltypestiny b
 on (a.id = b.id and null = "abc")
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = b.id
 |  other join predicates: NULL = 'abc'
@@ -178,6 +204,8 @@ left outer join functional.alltypes t2
 on t1.id = t2.id
 limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Limit 0 turns query block into an empty-set node.
@@ -185,6 +213,8 @@ select count(int_col), avg(double_col), count(*)
 from functional.alltypes
 limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Limit 0 causes empty-set inline view.
@@ -198,6 +228,8 @@ inner join
    limit 0) e
 on e.id = f.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: f.id = t1.id
 |  runtime filters: RF000 <- t1.id
@@ -215,6 +247,8 @@ select * from functional.alltypes limit 0
 union all
 select * from functional.alltypestiny
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypestiny]
@@ -231,6 +265,8 @@ union all
 (select * from functional.alltypestiny)
 limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # Inline view with a constant select stmt that is guaranteed to be empty.
@@ -241,6 +277,8 @@ w1 where w1.c1 is null
 union all
 select int_col from functional.alltypesagg
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--03:SCAN HDFS [functional.alltypesagg]
@@ -254,6 +292,8 @@ select int_col from functional.alltypesagg
 # IMPALA-1234: Analytic with constant empty result set failed precondition check in FE
 select MIN(int_col) OVER () FROM functional.alltypes limit 0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # IMPALA-1860: INSERT/CTAS should evaluate and apply constant predicates.
@@ -291,6 +331,8 @@ from
   (select id, int_col, bigint_col from functional.alltypestiny) T
 where false
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(id), count(int_col)
 |
@@ -304,6 +346,8 @@ from
   (select id, int_col from functional.alltypestiny) T2 on (T1.id = T2.id)
 where T1.bigint_col < 10 and 1 > 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(id + int_col)
 |
@@ -317,6 +361,8 @@ from
   functional.alltypessmall T2 on T1.id = T2.id
 where T2.bigint_col < 10 and false
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:AGGREGATE [FINALIZE]
 |  output: count(T1.int_col)
 |
@@ -338,6 +384,8 @@ union all
 select coalesce(10.4, int_col)
 from functional.alltypes where false
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:UNION
 |
 01:AGGREGATE [FINALIZE]
@@ -359,6 +407,8 @@ select 1
 union all select bigint_col
 from functional.alltypestiny
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:UNION
 |  constant-operands=1
 |
@@ -375,6 +425,8 @@ from functional.alltypestiny
 select * from (select 10 as i, 2 as j, '2013' as s) as t
 where t.i < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====
 # IMPALA-2216: Make sure the final output exprs are substituted, even
@@ -406,6 +458,8 @@ left outer join
       where null) nv) v4
 where c_custkey < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--16:NESTED LOOP JOIN [LEFT OUTER JOIN]
@@ -457,6 +511,8 @@ from tpch_nested_parquet.customer c,
  ) v1
 where c_custkey = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--07:NESTED LOOP JOIN [CROSS JOIN]
@@ -479,5 +535,7 @@ where c_custkey = 1
 # IMPALA-2215: Having clause without aggregation.
 select 1 from (select 1) v having 1 > 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:EMPTYSET
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index 57b2cce..91aff74 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -1,6 +1,8 @@
 # full scan of string typed row-key
 select * from functional_hbase.stringids
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
 ====
 # predicate on row key doesn't get transformed into scan parameter, because
@@ -8,6 +10,8 @@ select * from functional_hbase.stringids
 select * from functional_hbase.alltypessmall
 where id < 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: id < 5
 ---- SCANRANGELOCATIONS
@@ -16,6 +20,8 @@ NODE 0:
   HBASE KEYRANGE port=16202 3:7
   HBASE KEYRANGE port=16203 7:<unbounded>
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -26,6 +32,8 @@ select * from functional_hbase.stringids
 where id = '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 5
    stop key: 5\0
@@ -34,6 +42,8 @@ and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 5:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -45,6 +55,8 @@ select * from functional_hbase.stringids
 where id > '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 5\0
    predicates: tinyint_col = 5
@@ -53,6 +65,8 @@ NODE 0:
   HBASE KEYRANGE port=16202 5\0:7
   HBASE KEYRANGE port=16203 7:<unbounded>
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -63,6 +77,8 @@ select * from functional_hbase.stringids
 where id >= '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 5
    predicates: tinyint_col = 5
@@ -71,6 +87,8 @@ NODE 0:
   HBASE KEYRANGE port=16202 5:7
   HBASE KEYRANGE port=16203 7:<unbounded>
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -81,6 +99,8 @@ select * from functional_hbase.stringids
 where id < '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    stop key: 5
    predicates: tinyint_col = 5
@@ -89,6 +109,8 @@ NODE 0:
   HBASE KEYRANGE port=16201 <unbounded>:3
   HBASE KEYRANGE port=16202 3:5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -99,6 +121,8 @@ select * from functional_hbase.stringids
 where id <= '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -107,6 +131,8 @@ select * from functional_hbase.stringids
 where id > '4' and id < '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4\0
    stop key: 5
@@ -115,6 +141,8 @@ and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 4\0:5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -126,6 +154,8 @@ select * from functional_hbase.stringids
 where id >= '4' and id < '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4
    stop key: 5
@@ -134,6 +164,8 @@ and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 4:5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -145,6 +177,8 @@ select * from functional_hbase.stringids
 where id > '4' and id <= '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4\0
    stop key: 5\0
@@ -153,6 +187,8 @@ and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 4\0:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -164,6 +200,8 @@ select * from functional_hbase.stringids
 where id >= '4' and id <= '5'
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4
    stop key: 5\0
@@ -172,6 +210,8 @@ and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 4:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -185,6 +225,8 @@ from functional_hbase.alltypessmall
 where id < 5
 group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: int_col
@@ -192,6 +234,8 @@ group by 1
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: id < 5
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -210,10 +254,14 @@ group by 1
 # predicates on string columns against a constant string are converted to HBase filters
 select * from functional_hbase.alltypessmall where string_col = '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col EQUAL '4'
    predicates: string_col = '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -223,10 +271,14 @@ select * from functional_hbase.alltypessmall where string_col = '4'
 # test all comparison ops
 select * from functional_hbase.alltypessmall where string_col != '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col NOT_EQUAL '4'
    predicates: string_col != '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -235,10 +287,14 @@ select * from functional_hbase.alltypessmall where string_col != '4'
 ====
 select * from functional_hbase.alltypessmall where string_col < '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col LESS '4'
    predicates: string_col < '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -247,10 +303,14 @@ select * from functional_hbase.alltypessmall where string_col < '4'
 ====
 select * from functional_hbase.alltypessmall where string_col > '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col GREATER '4'
    predicates: string_col > '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -259,10 +319,14 @@ select * from functional_hbase.alltypessmall where string_col > '4'
 ====
 select * from functional_hbase.alltypessmall where string_col <= '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col LESS_OR_EQUAL '4'
    predicates: string_col <= '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -271,10 +335,14 @@ select * from functional_hbase.alltypessmall where string_col <= '4'
 ====
 select * from functional_hbase.alltypessmall where string_col >= '4'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col GREATER_OR_EQUAL '4'
    predicates: string_col >= '4'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -285,6 +353,8 @@ select * from functional_hbase.alltypessmall where string_col >= '4'
 select * from functional_hbase.alltypessmall
 where string_col >= '4' and string_col != '2' and date_string_col = '04/03/09'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters:
   d:string_col NOT_EQUAL '2'
@@ -292,6 +362,8 @@ where string_col >= '4' and string_col != '2' and date_string_col = '04/03/09'
   d:date_string_col EQUAL '04/03/09'
    predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
@@ -304,6 +376,8 @@ where string_col >= '4' and string_col != '2' and date_string_col = '04/03/09'
 # mix of predicates and functional_hbase. filters
 select * from functional_hbase.alltypessmall where string_col = '4' and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters: d:string_col EQUAL '4'
    predicates: tinyint_col = 5, string_col = '4'
@@ -312,6 +386,8 @@ select * from functional_hbase.alltypessmall where string_col = '4' and tinyint_
 select * from functional_hbase.stringids
 where string_col = '4' and tinyint_col = 5 and id >= '4' and id <= '5'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4
    stop key: 5\0
@@ -321,6 +397,8 @@ where string_col = '4' and tinyint_col = 5 and id >= '4' and id <= '5'
 NODE 0:
   HBASE KEYRANGE port=16202 4:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -332,12 +410,16 @@ NODE 0:
 # predicates involving casts (ie, non-string comparisons) cannot be turned into filters
 select * from functional_hbase.alltypessmall where cast(string_col as int) >= 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: CAST(string_col AS INT) >= 4
 ====
 # non-const comparisons cannot be turned into filters
 select * from functional_hbase.alltypessmall where string_col >= date_string_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypessmall]
    predicates: string_col >= date_string_col
 ====
@@ -346,6 +428,8 @@ select * from functional_hbase.stringids
 where id = concat('', '5')
 and tinyint_col = 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 5
    stop key: 5\0
@@ -356,6 +440,8 @@ select * from functional_hbase.stringids
 where string_col = '4' and tinyint_col = 5
   and id >= concat('', '4') and id <= concat('5', '')
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    start key: 4
    stop key: 5\0
@@ -365,6 +451,8 @@ where string_col = '4' and tinyint_col = 5
 NODE 0:
   HBASE KEYRANGE port=16202 4:5\0
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
@@ -376,8 +464,12 @@ NODE 0:
 # IMP-1188 - row key predicate is null.
 select * from functional_hbase.stringids where id = null
 ---- PLAN
+PLAN-ROOT SINK
+|
 empty scan node
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 empty scan node
@@ -385,8 +477,12 @@ empty scan node
 # IMP-1188 - row key lower bound is bigger than upper bound.
 select * from functional_hbase.stringids where id > 'b' and id < 'a'
 ---- PLAN
+PLAN-ROOT SINK
+|
 empty scan node
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 empty scan node
@@ -396,6 +492,8 @@ empty scan node
 select * from functional_hbase.stringids
 where cast(id as int) < 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.stringids]
    predicates: CAST(id AS INT) < 5
 ====
@@ -407,6 +505,8 @@ where cast(id as int) < 5
 select * from functional_hbase.alltypesagg
 where bigint_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col IS NULL
 ====
@@ -415,6 +515,8 @@ where bigint_col is null
 select bigint_col, day from functional_hbase.alltypesagg
 where bigint_col is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col IS NULL
 ====
@@ -422,6 +524,8 @@ where bigint_col is null
 select * from functional_hbase.alltypesagg
 where bigint_col is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col IS NOT NULL
 ====
@@ -429,6 +533,8 @@ where bigint_col is not null
 select * from functional_hbase.alltypesagg
 where bigint_col is null and day = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col IS NULL, day = 1
 ====
@@ -436,6 +542,8 @@ where bigint_col is null and day = 1
 select * from functional_hbase.alltypesagg
 where bigint_col is not null and bool_col = true
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col IS NOT NULL, bool_col = TRUE
 ---- SCANRANGELOCATIONS
@@ -444,6 +552,8 @@ NODE 0:
   HBASE KEYRANGE port=16202 3:7
   HBASE KEYRANGE port=16203 7:<unbounded>
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypesagg]
@@ -453,12 +563,16 @@ NODE 0:
 select count(*) from functional_hbase.alltypesagg
 where bigint_col = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
 00:SCAN HBASE [functional_hbase.alltypesagg]
    predicates: bigint_col = 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -474,6 +588,8 @@ where bigint_col = 10
 select count(*) from functional_hbase.alltypesagg
 where bigint_col = 10 and day = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -492,6 +608,8 @@ where
   a.int_col = b.int_col and
   c.int_col = b.int_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: b.int_col = a.int_col
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
index 785d111..9d43a25 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
@@ -2,9 +2,13 @@
 select * FROM functional.alltypes
 where cast(year as string) = to_date( from_unixtime(unix_timestamp()) )
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -16,6 +20,8 @@ from functional.testtbl
 where name like 'm%'
 group by 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: zip
@@ -24,6 +30,8 @@ group by 1
    partitions=1/1 files=0 size=0B
    predicates: name LIKE 'm%'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
@@ -43,6 +51,8 @@ group by 1
 # all partitions are selected
 select * from functional.alltypes
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- SCANRANGELOCATIONS
@@ -72,6 +82,8 @@ NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=8/100801.txt 0:20853
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=9/100901.txt 0:20179
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -80,6 +92,8 @@ NODE 0:
 # predicate on first partition key
 select id, month from functional.alltypes where year = 2009
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ---- SCANRANGELOCATIONS
@@ -97,6 +111,8 @@ NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=8/090801.txt 0:20853
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=9/090901.txt 0:20179
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -105,141 +121,191 @@ NODE 0:
 # same predicate, phrased differently
 select * from functional.alltypes where year = 2009.0
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ====
 select * from functional.alltypes where 2009 = year
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ====
 select * from functional.alltypes where 2009 <=> year
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ====
 # compound predicate on the second partition key
 select * from functional.alltypes where !(month > 2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=76.83KB
 ====
 # nested compound predicates on the second partition key
 select * from functional.alltypes where !(!(month=1))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=40.32KB
 ====
 select * from functional.alltypes where !(!(month<=>1))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=40.32KB
 ====
 # predicates on both partition keys one of which is a compound predicate with NOT
 select * from functional.alltypes where year=2009 and !(month < 6)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=7/24 files=7 size=140.58KB
 ====
 # compound predicates on both partition keys
 select * from functional.alltypes where !(year < 2009) and !(month < 6)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=14/24 files=14 size=281.15KB
 ====
 # compound predicate on a conjunct
 select * from functional.alltypes where !(year = 2009 and month > 6)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=18/24 files=18 size=357.58KB
 ====
 select * from functional.alltypes where !(year <=> 2009 and month > 6)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=18/24 files=18 size=357.58KB
 ====
 select * from functional.alltypes where !(year <=> 2009) or !(month > 6)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=18/24 files=18 size=357.58KB
 ====
 # compound predicate on a disjunct
 select * from functional.alltypes where !(month = 6 or month = 8)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=20/24 files=20 size=398.31KB
 ====
 select * from functional.alltypes where !(month <=> 6 or month <=> 8)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=20/24 files=20 size=398.31KB
 ====
 # not predicate with is null
 select * from functional.alltypes where not (year = 2009 or month is null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=239.77KB
 ====
 # not predicate with "<=> null" as a synonym of "is null"
 select * from functional.alltypes where not (year = 2009 or month <=> null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=239.77KB
 ====
 # nested not predicates with is null
 select * from functional.alltypes where not (not (month is null))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
 # nested not predicates with "<=> null" as a synonym of "is null"
 select * from functional.alltypes where not (not (month <=> null))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
 # nested not predicates with disjunct
 select * from functional.alltypes where not (not (month is null or year = 2009))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ====
 # nested not predicates with disjunct and "<=> null" as a synonym of "is null"
 select * from functional.alltypes where not (not (month <=> null or year = 2009))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=238.68KB
 ====
 # predicate on second partition key
 select * from functional.alltypes where month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=40.32KB
 ====
 # predicate on both partition keys
 select * from functional.alltypes where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=1/24 files=1 size=19.95KB
 ====
 # single-sided range on 2nd key
 select * from functional.alltypes where year=2009 and month > 6
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=6/24 files=6 size=120.87KB
 ====
 select * from functional.alltypes where year=2009 and month < 6
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=5/24 files=5 size=98.11KB
 ====
 select * from functional.alltypes where year=2009 and month in (1, 3, 5, 7)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=80.74KB
 ====
 select * from functional.alltypes where year<=>2009 and month in (1, 3, 5, 7)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=80.74KB
 ====
@@ -247,6 +313,8 @@ select * from functional.alltypes where year<=>2009 and month in (1, 3, 5, 7)
 select * from functional.alltypes
 where year=2009 and month in (1, 3, 5, 7) and month is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=80.74KB
 ====
@@ -254,29 +322,39 @@ where year=2009 and month in (1, 3, 5, 7) and month is not null
 select * from functional.alltypes
 where year=2009 and month in (1, 3, 5, 7) and month is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
 select * from functional.alltypes where year=2009 and (month in (1, 3, 5) or month = 7)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=80.74KB
 ====
 # single-sided ranges on both keys
 select * from functional.alltypes where year<=2009 and month < 6
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=5/24 files=5 size=98.11KB
 ====
 # range on 2nd key
 select * from functional.alltypes where month < 9 and month > 6
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=4/24 files=4 size=81.46KB
 ====
 # multiple predicates on first key; 2nd one applied as predicate
 select * from functional.alltypes where year < 2010 and year < 2009 and month > 6
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
@@ -284,6 +362,8 @@ select * from functional.alltypes where year < 2010 and year < 2009 and month >
 select * from functional.alltypes
 where year < 2010 and (month > 6 or month = 1 or month in (3, 4))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=9/24 files=9 size=180.49KB
 ====
@@ -291,18 +371,24 @@ where year < 2010 and (month > 6 or month = 1 or month in (3, 4))
 select * from functional.alltypes
 where year < 2010 and (month > 6 or month <=> 1 or month in (3, 4))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=9/24 files=9 size=180.49KB
 ====
 # between predicate on second key
 select * from functional.alltypes where year = 2009 and month between 6 and 8
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=3/24 files=3 size=60.43KB
 ====
 # between predicate on second key
 select * from functional.alltypes where year <=> 2009 and month between 6 and 8
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=3/24 files=3 size=60.43KB
 ====
@@ -310,6 +396,8 @@ select * from functional.alltypes where year <=> 2009 and month between 6 and 8
 select * from functional.alltypes
 where year between 2009 and 2009 and month between 6 and 8
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=3/24 files=3 size=60.43KB
 ====
@@ -317,6 +405,8 @@ where year between 2009 and 2009 and month between 6 and 8
 select * from functional.alltypes
 where year = 2009 and (month between 6 and 7 or month between 7 and 8)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=3/24 files=3 size=60.43KB
 ---- SCANRANGELOCATIONS
@@ -329,6 +419,8 @@ NODE 0:
 select * from functional.alltypes
 where year = 2009 and (month between 5+1 and 8-1 or month between 9-2 and 1+7)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=3/24 files=3 size=60.43KB
 ---- SCANRANGELOCATIONS
@@ -340,11 +432,15 @@ NODE 0:
 # slot binding still determined
 select * from functional.alltypes where year - 1 = 2009
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=239.77KB
 ====
 select * from functional.alltypes where year - 1 <=> 2009
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=12/24 files=12 size=239.77KB
 ====
@@ -352,82 +448,110 @@ select * from functional.alltypes where year - 1 <=> 2009
 # IS NULL predicate on a partition key with nulls
 select * from functional.alltypesagg where day is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # <=> null predicate on a partition key with nulls
 select * from functional.alltypesagg where day <=> null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # IS NOT NULL predicate on a partition key with nulls
 select * from functional.alltypesagg where day is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=10/11 files=10 size=743.67KB
 ====
 # IS DISTINCT FROM NULL predicate on a partition key with nulls
 select * from functional.alltypesagg where day is distinct from null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=10/11 files=10 size=743.67KB
 ====
 select * from functional.alltypesagg where day = day
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=10/11 files=10 size=743.67KB
 ====
 select * from functional.alltypesagg where day <=> day
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ====
 # partition key predicates which are in conjunctive normal form (case 1)
 select * from functional.alltypesagg where day is null and day = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=0/11 files=0 size=0B
 ====
 # partition key predicates which are in conjunctive normal form (case 1)
 select * from functional.alltypesagg where day <=> null and day = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=0/11 files=0 size=0B
 ====
 # partition key predicates which are in conjunctive normal form (case 2)
 select * from functional.alltypesagg where day is null and month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates which are in conjunctive normal form (case 2)
 select * from functional.alltypesagg where day <=> null and month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates which are in conjunctive normal form (case 3)
 select * from functional.alltypesagg where month = 1 and (day is null or day = 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
 # partition key predicates which are in conjunctive normal form (case 3)
 select * from functional.alltypesagg where month = 1 and (day <=> null or day = 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
 # partition key predicates which are in conjunctive normal form (case 4)
 select * from functional.alltypesagg where month = 1 and (day is null or year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ====
 # partition key predicates which are in conjunctive normal form (case 4)
 select * from functional.alltypesagg where month = 1 and (day <=> null or year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ====
@@ -435,6 +559,8 @@ select * from functional.alltypesagg where month = 1 and (day <=> null or year =
 select * from functional.alltypesagg
 where (year = 2010 or month = 1) and (day is not null or day = 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=10/11 files=10 size=743.67KB
 ====
@@ -442,42 +568,56 @@ where (year = 2010 or month = 1) and (day is not null or day = 10)
 select * from functional.alltypesagg
 where (year = 2010 or month = 1) and (day is distinct from null or day = 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=10/11 files=10 size=743.67KB
 ====
 # partition key predicates which are in disjunctive normal form (case 1)
 select * from functional.alltypesagg where day is null or month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ====
 # partition key predicates which are in disjunctive normal form (case 1)
 select * from functional.alltypesagg where day <=> null or month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=11/11 files=11 size=814.73KB
 ====
 # partition key predicates which are in disjunctive normal form (case 2)
 select * from functional.alltypesagg where day is null or day = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
 # partition key predicates which are in disjunctive normal form (case 2)
 select * from functional.alltypesagg where day <=> null or day = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
 # partition key predicates which are in disjunctive normal form (case 3)
 select * from functional.alltypesagg where day = 10 or (day is null and year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
 # partition key predicates which are in disjunctive normal form (case 3)
 select * from functional.alltypesagg where day = 10 or (day <=> null and year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=145.53KB
 ====
@@ -485,6 +625,8 @@ select * from functional.alltypesagg where day = 10 or (day <=> null and year =
 select * from functional.alltypesagg
 where (month = 1 and day = 1) or (day is null and year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=144.45KB
 ====
@@ -492,54 +634,72 @@ where (month = 1 and day = 1) or (day is null and year = 2010)
 select * from functional.alltypesagg
 where (month = 1 and day = 1) or (day <=> null and year = 2010)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=2/11 files=2 size=144.45KB
 ====
 # partition key predicates with negation (case 1)
 select * from functional.alltypesagg where not (day is not null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 1)
 select * from functional.alltypesagg where not (day is distinct from null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 2)
 select * from functional.alltypesagg where not (not (day is null))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 2)
 select * from functional.alltypesagg where not (not (day <=> null))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 3)
 select * from functional.alltypesagg where not (day is not null and month = 1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 3)
 select * from functional.alltypesagg where not (day is distinct from null and month = 1)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
 # partition key predicates with negation (case 3)
 select * from functional.alltypesagg where not (day is not null or day < 9)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=0/11 files=0 size=0B
 ====
 # partition key predicates with negation (case 3)
 select * from functional.alltypesagg where not (day is distinct from null or day < 9)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=0/11 files=0 size=0B
 ====
@@ -547,6 +707,8 @@ select * from functional.alltypesagg where not (day is distinct from null or day
 select * from functional.alltypesagg
 where not (day is not null and (not (day < 9 and month = 1)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=9/11 files=9 size=665.77KB
 ====
@@ -554,6 +716,8 @@ where not (day is not null and (not (day < 9 and month = 1)))
 select * from functional.alltypesagg
 where not (day is distinct from null and (not (day < 9 and month = 1)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=9/11 files=9 size=665.77KB
 ====
@@ -561,6 +725,8 @@ where not (day is distinct from null and (not (day < 9 and month = 1)))
 select * from functional.alltypesagg
 where not (day is not null or (day = 1 and (not (month = 1 or year = 2010))))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
@@ -568,6 +734,8 @@ where not (day is not null or (day = 1 and (not (month = 1 or year = 2010))))
 select * from functional.alltypesagg
 where not (day is distinct from null or (day = 1 and (not (month = 1 or year = 2010))))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
@@ -575,6 +743,8 @@ where not (day is distinct from null or (day = 1 and (not (month = 1 or year = 2
 select * from functional.alltypesagg
 where year + 1 = 2011 and month + 1 <= 3 and day is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
@@ -582,6 +752,8 @@ where year + 1 = 2011 and month + 1 <= 3 and day is null
 select * from functional.alltypesagg
 where year + 1 = 2011 and month + 1 <= 3 and day <=> null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=1/11 files=1 size=71.05KB
 ====
@@ -591,6 +763,8 @@ where day = 5 or (day >= 1 and day <= 2) or (day > 6 and day < 8)
 or day is null or day in (4) or not(day is not null)
 or not (day not in (10)) or not (day != 8)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=8/11 files=8 size=591.30KB
 ---- SCANRANGELOCATIONS
@@ -610,6 +784,8 @@ where day = 5 or (day >= 1 and day <= 2) or (day > 6 and day < 8)
 or day <=> null or day in (4) or not(day is distinct from null)
 or not (day not in (10)) or not (day != 8)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg]
    partitions=8/11 files=8 size=591.30KB
 ---- SCANRANGELOCATIONS
@@ -626,47 +802,65 @@ NODE 0:
 # Predicates on a partition key with no values (see IMPALA-4128).
 select * from functional.emptytable where f2 = 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 != 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 > 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 in (10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 not in (10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 is null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 select * from functional.emptytable where f2 is not null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.emptytable]
    partitions=0/0 files=0 size=0B
 ====
 # multi-file non-partitioned table
 select * from functional.alltypesaggmultifilesNoPart
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesaggmultifilesnopart]
    partitions=1/1 files=4 size=805.23KB
 ---- SCANRANGELOCATIONS
@@ -679,6 +873,8 @@ NODE 0:
 # multi-file partitioned table
 select * from functional.alltypesaggmultifiles where day <= 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesaggmultifiles]
    partitions=2/11 files=8 size=145.97KB
 ====
@@ -688,39 +884,53 @@ select * from functional.alltypesaggmultifiles where day <= 2
 # Test single binary predicate on a partition column
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=1/1234 files=1 size=2B
 ====
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=1/1234 files=1 size=2B
 ====
 # Test disjunctive predicate on a partition column
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1 or j = 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=2/1234 files=2 size=4B
 ====
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1 or j <=> 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=2/1234 files=2 size=4B
 ====
 # Test conjunctive predicate on a partition column
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1 and j = 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=0/1234 files=0 size=0B
 ====
 select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1 and j <=> 2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1]
    partitions=0/1234 files=0 size=0B
 ====
 # Partition pruning when a binary predicate contains a NullLiteral (IMPALA-1535)
 select * from functional.alltypestiny t1 where t1.year != null or t1.year = null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny t1]
    partitions=0/4 files=0 size=0B
 ====
@@ -728,6 +938,8 @@ select * from functional.alltypestiny t1 where t1.year != null or t1.year = null
 select * from functional.alltypestiny t1
 where t1.year IS DISTINCT FROM null or t1.year = null
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny t1]
    partitions=4/4 files=4 size=460B
 ====
@@ -735,6 +947,8 @@ where t1.year IS DISTINCT FROM null or t1.year = null
 # expression
 select * from functional.alltypesagg t1 where t1.year + null != t1.day
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=0/11 files=0 size=0B
 ====
@@ -742,6 +956,8 @@ select * from functional.alltypesagg t1 where t1.year + null != t1.day
 # expression and IS DISTINCT FROM
 select * from functional.alltypesagg t1 where t1.year + null IS DISTINCT FROM t1.day
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=10/11 files=10 size=743.67KB
 ====
@@ -749,6 +965,8 @@ select * from functional.alltypesagg t1 where t1.year + null IS DISTINCT FROM t1
 # (a single partition is scanned)
 select * from functional.alltypesagg t1 where day in (10, null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=1/11 files=1 size=74.48KB
 ====
@@ -756,6 +974,8 @@ select * from functional.alltypesagg t1 where day in (10, null)
 # (all partitions are pruned)
 select * from functional.alltypesagg t1 where day not in (10, null)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=0/11 files=0 size=0B
 ====
@@ -763,6 +983,8 @@ select * from functional.alltypesagg t1 where day not in (10, null)
 select * from functional.alltypesagg t1
 where t1.day = instr("this is a test", "this") or t1.year = year(now()) + 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=1/11 files=1 size=73.39KB
 ====
@@ -771,6 +993,8 @@ where t1.day = instr("this is a test", "this") or t1.year = year(now()) + 100
 select * from functional.alltypesagg t1
 where t1.day in (1, cast(2.0 as INT), year(now()) + 100)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=2/11 files=2 size=147.87KB
 ====
@@ -779,6 +1003,8 @@ where t1.day in (1, cast(2.0 as INT), year(now()) + 100)
 select * from functional.alltypesagg t1
 where -t1.day in(-1 - 1) or cast(t1.day as string) like '%1%'
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=3/11 files=3 size=222.34KB
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test
index 8fd3ce2..229c1e6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/implicit-joins.test
@@ -5,6 +5,8 @@ from (select * from functional.alltypestiny) t1
   join (select * from functional.alltypestiny) t2 on (t1.id = t2.id)
   join functional.alltypestiny t3 on (coalesce(t1.id, t3.id) = t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: (coalesce(functional.alltypestiny.id, t3.id) = t3.id)
 |
@@ -30,6 +32,8 @@ from
 where
   v.x < v.y
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:NESTED LOOP JOIN [INNER JOIN]
 |  predicates: t1.id < t2.id
 |
@@ -47,6 +51,8 @@ from
 where
   t1.id = t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--02:SCAN HDFS [functional.alltypes t3]
@@ -71,6 +77,8 @@ from
 where
   t1.id = t3.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--01:SCAN HDFS [functional.alltypes t2]
@@ -98,6 +106,8 @@ from
 where
   t2.id = t3.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.id = t3.id
 |  runtime filters: RF000 <- t3.id
@@ -121,6 +131,8 @@ from
   functional.alltypes t3 on t3.id = t2.id,
   functional.alltypes t4
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--03:SCAN HDFS [functional.alltypes t4]
@@ -147,6 +159,8 @@ select a.* from
   functional.alltypessmall b full outer join
   functional.alltypes c on a.id = c.id and a.id < b.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: a.id = c.id
 |  other join predicates: a.id < b.id
@@ -169,6 +183,8 @@ functional.alltypestiny c,
 functional.alltypes d
 where a.id = d.id and b.id = c.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = a.id
 |  runtime filters: RF000 <- a.id
@@ -201,6 +217,8 @@ from
     on t1.id < t2.id,
   functional.alltypes t3
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -232,6 +250,8 @@ from
 where
   t1.id = t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -267,6 +287,8 @@ from
 where
   t1.id = t2.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -298,6 +320,8 @@ from
   functional.alltypes t0 left semi join
   functional.alltypes t1 on ( t0.id < t1.id )
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -320,6 +344,8 @@ where
   b.id in (select avg(id) from functional.alltypes group by month) and
   a.id < b.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -348,6 +374,8 @@ from functional.alltypes t1,
 functional.alltypes t2 join functional.alltypes t3 on (t1.id = t2.id),
 functional.alltypes t4
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--03:SCAN HDFS [functional.alltypes t4]

[12/32] incubator-impala git commit: IMPALA-3348: Avoid per-slot check vector size in KuduScanner

Posted by ta...@apache.org.

IMPALA-3348: Avoid per-slot check vector size in KuduScanner

Fixes a small perf issue by avoiding extra calls to check
a vector size on every slot.

Testing: Ran EE tests.

Change-Id: Ie76d33c3d00e3be6d238226d28c4100bb65aac58
Reviewed-on: http://gerrit.cloudera.org:8080/4688
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ca3fd401
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ca3fd401
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ca3fd401

Branch: refs/heads/hadoop-next
Commit: ca3fd401be827680444590fbdf7c1b723434f2c5
Parents: 7fad3e5
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Sun Jun 12 12:22:23 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sat Oct 15 02:49:58 2016 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-scanner.cc | 6 ++++--
 be/src/exec/kudu-scanner.h  | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ca3fd401/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index 11bef14..4fcb40a 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -59,7 +59,8 @@ KuduScanner::KuduScanner(KuduScanNode* scan_node, RuntimeState* state)
     state_(state),
     cur_kudu_batch_num_read_(0),
     last_alive_time_micros_(0),
-    tuple_num_null_bytes_(scan_node_->tuple_desc()->num_null_bytes()) {
+    tuple_num_null_bytes_(scan_node_->tuple_desc()->num_null_bytes()),
+    num_string_slots_(0) {
 }
 
 Status KuduScanner::Open() {
@@ -68,6 +69,7 @@ Status KuduScanner::Open() {
   for (int i = 0; i < scan_node_->tuple_desc_->slots().size(); ++i) {
     if (scan_node_->tuple_desc_->slots()[i]->type().IsStringType()) {
       string_slots_.push_back(scan_node_->tuple_desc_->slots()[i]);
+      ++num_string_slots_;
     }
   }
   return scan_node_->GetConjunctCtxs(&conjunct_ctxs_);
@@ -236,7 +238,7 @@ bool KuduScanner::IsSlotNull(Tuple* tuple, const SlotDescriptor& slot) {
 }
 
 Status KuduScanner::RelocateValuesFromKudu(Tuple* tuple, MemPool* mem_pool) {
-  for (int i = 0; i < string_slots_.size(); ++i) {
+  for (int i = 0; i < num_string_slots_; ++i) {
     const SlotDescriptor* slot = string_slots_[i];
     // NULL handling was done in KuduRowToImpalaTuple.
     if (IsSlotNull(tuple, *slot)) continue;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ca3fd401/be/src/exec/kudu-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.h b/be/src/exec/kudu-scanner.h
index b31bb73..d868b05 100644
--- a/be/src/exec/kudu-scanner.h
+++ b/be/src/exec/kudu-scanner.h
@@ -126,6 +126,10 @@ class KuduScanner {
 
   /// List of string slots that need relocation for their auxiliary memory.
   std::vector<SlotDescriptor*> string_slots_;
+
+  /// Number of string slots that need relocation (i.e. size of string_slots_), stored
+  /// separately to avoid calling vector::size() in the hot path (IMPALA-3348).
+  int num_string_slots_;
 };
 
 } /// namespace impala

[10/32] incubator-impala git commit: IMPALA-4102: Remote Kudu reads should be reported

Posted by ta...@apache.org.

IMPALA-4102: Remote Kudu reads should be reported

Adds a profile counter for the number of kudu scan tokens
(ranges) that are "expected" to be remote.

Testing: Manual; Have been running with this on the Kudu
cluster. Cannot easily simulate this in the minicluster
because the scheduler considers multiple impalads on the
same host to be local for the purposes of determining
locality. See BackendConfig::LookUpBackendIp().

Change-Id: I74fd5773c4ae10267de80b6572d93197a4131696
Reviewed-on: http://gerrit.cloudera.org:8080/4687
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/a1c9cb36
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/a1c9cb36
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/a1c9cb36

Branch: refs/heads/hadoop-next
Commit: a1c9cb364655d0a65c4b00cb37757d55fc3131a1
Parents: 1a5c43e
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Thu Sep 8 17:30:14 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sat Oct 15 00:43:56 2016 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-scan-node.cc | 5 +++++
 be/src/exec/kudu-scan-node.h  | 2 ++
 2 files changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a1c9cb36/be/src/exec/kudu-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scan-node.cc b/be/src/exec/kudu-scan-node.cc
index faa093e..6e97378 100644
--- a/be/src/exec/kudu-scan-node.cc
+++ b/be/src/exec/kudu-scan-node.cc
@@ -62,6 +62,7 @@ namespace impala {
 
 const string KuduScanNode::KUDU_READ_TIMER = "TotalKuduReadTime";
 const string KuduScanNode::KUDU_ROUND_TRIPS = "TotalKuduScanRoundTrips";
+const string KuduScanNode::KUDU_REMOTE_TOKENS = "KuduRemoteScanTokens";
 
 KuduScanNode::KuduScanNode(ObjectPool* pool, const TPlanNode& tnode,
     const DescriptorTbl& descs)
@@ -97,6 +98,7 @@ Status KuduScanNode::Prepare(RuntimeState* state) {
   kudu_read_timer_ = ADD_CHILD_TIMER(runtime_profile(), KUDU_READ_TIMER,
       SCANNER_THREAD_TOTAL_WALLCLOCK_TIME);
   kudu_round_trips_ = ADD_COUNTER(runtime_profile(), KUDU_ROUND_TRIPS, TUnit::UNIT);
+  kudu_remote_tokens_ = ADD_COUNTER(runtime_profile(), KUDU_REMOTE_TOKENS, TUnit::UNIT);
 
   DCHECK(state->desc_tbl().GetTupleDescriptor(tuple_id_) != NULL);
 
@@ -104,9 +106,12 @@ Status KuduScanNode::Prepare(RuntimeState* state) {
 
   // Initialize the list of scan tokens to process from the TScanRangeParams.
   DCHECK(scan_range_params_ != NULL);
+  int num_remote_tokens = 0;
   for (const TScanRangeParams& params: *scan_range_params_) {
+    if (params.__isset.is_remote && params.is_remote) ++num_remote_tokens;
     scan_tokens_.push_back(params.scan_range.kudu_scan_token);
   }
+  COUNTER_SET(kudu_remote_tokens_, num_remote_tokens);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a1c9cb36/be/src/exec/kudu-scan-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scan-node.h b/be/src/exec/kudu-scan-node.h
index b1848d7..d4f742a 100644
--- a/be/src/exec/kudu-scan-node.h
+++ b/be/src/exec/kudu-scan-node.h
@@ -101,8 +101,10 @@ class KuduScanNode : public ScanNode {
 
   RuntimeProfile::Counter* kudu_read_timer_;
   RuntimeProfile::Counter* kudu_round_trips_;
+  RuntimeProfile::Counter* kudu_remote_tokens_;
   static const std::string KUDU_READ_TIMER;
   static const std::string KUDU_ROUND_TRIPS;
+  static const std::string KUDU_REMOTE_TOKENS;
 
   /// The id of the callback added to the thread resource manager when a thread
   /// is available. Used to remove the callback before this scan node is destroyed.

[14/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/with-clause.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/with-clause.test b/testdata/workloads/functional-planner/queries/PlannerTest/with-clause.test
index 639f73c..e0d2d8c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/with-clause.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/with-clause.test
@@ -1,9 +1,13 @@
 # Basic test with a single with-clause view.
 with t as (select int_col x, bigint_col y from functional.alltypes) select x, y from t
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -13,9 +17,13 @@ with t as (select int_col x, bigint_col y from functional.alltypes) select x, y
 with t as (select int_col x, bigint_col y from functional.alltypes_view)
 select x, y from t
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -26,9 +34,13 @@ with t1 as (select int_col x, bigint_col y from functional.alltypes),
 t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30))
 select x, y from t2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ====
@@ -37,6 +49,8 @@ with t1 as (select int_col x, bigint_col y from functional.alltypes),
 t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30))
 select * from t1 union all select * from t2 union all select * from t3
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--03:UNION
@@ -48,6 +62,8 @@ select * from t1 union all select * from t2 union all select * from t3
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -71,6 +87,8 @@ t2 as (select int_col x, bigint_col y from functional.alltypestiny),
 t3 as (select int_col x, bigint_col y from functional.alltypessmall)
 select * from t1, t2, t3 where t1.x = t2.x and t2.x = t3.x
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -90,6 +108,8 @@ select * from t1, t2, t3 where t1.x = t2.x and t2.x = t3.x
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.int_col, RF001 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -134,6 +154,8 @@ where c1 > 0
 order by c3, c1 desc
 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:TOP-N [LIMIT=3]
 |  order by: c3 ASC, c1 DESC
 |
@@ -155,6 +177,8 @@ limit 3
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:TOP-N [LIMIT=3]
 |  order by: c3 ASC, c1 DESC
 |
@@ -191,6 +215,8 @@ limit 3
 with t as (select int_col x, bigint_col y from functional.alltypestiny)
 select * from t t1 inner join t t2 on (t1.x = t2.x) inner join t t3 on (t2.x = t3.x)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -210,6 +236,8 @@ select * from t t1 inner join t t2 on (t1.x = t2.x) inner join t t3 on (t2.x = t
    partitions=4/4 files=4 size=460B
    runtime filters: RF000 -> functional.alltypestiny.int_col, RF001 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -242,6 +270,8 @@ select * from t t1 inner join t t2 on (t1.x = t2.x) inner join t t3 on (t2.x = t
 with t as (select int_col x, bigint_col y from functional.alltypestiny)
 select * from t t1 inner join t t2 using(x) inner join t t3 using(x)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -261,6 +291,8 @@ select * from t t1 inner join t t2 using(x) inner join t t3 using(x)
    partitions=4/4 files=4 size=460B
    runtime filters: RF000 -> functional.alltypestiny.int_col, RF001 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -293,6 +325,8 @@ select * from t t1 inner join t t2 using(x) inner join t t3 using(x)
 with t as (select int_col x, bigint_col y from functional.alltypestiny)
 select * from t t1 left outer join t t2 using(x) full outer join t t3 using(x)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: int_col = int_col
 |
@@ -308,6 +342,8 @@ select * from t t1 left outer join t t2 using(x) full outer join t t3 using(x)
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -338,6 +374,8 @@ select * from t t1 left outer join t t2 using(x) full outer join t t3 using(x)
 with t as (select int_col x, bigint_col y from functional.alltypestiny)
 select * from t t1 inner join [broadcast] t t2 using(x) inner join [shuffle] t t3 using(x)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = int_col
 |  runtime filters: RF000 <- int_col
@@ -357,6 +395,8 @@ select * from t t1 inner join [broadcast] t t2 using(x) inner join [shuffle] t t
    partitions=4/4 files=4 size=460B
    runtime filters: RF000 -> functional.alltypestiny.int_col, RF001 -> int_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -389,6 +429,8 @@ with t1 as (values('a', 'b'))
 (with t2 as (values('c', 'd')) select * from t2) union all
 (with t3 as (values('e', 'f')) select * from t3) order by 1 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=1]
 |  order by: 'c' ASC
 |
@@ -400,6 +442,8 @@ with t1 as (values('a', 'b'))
 01:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=1]
 |  order by: 'c' ASC
 |
@@ -416,6 +460,8 @@ with t1 as (values('a', 'b'))
 (with t2 as (values('c', 'd')) select * from t2) union all
 (with t3 as (values('e', 'f')) select * from t3) order by 1 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=1]
 |  order by: 'c' ASC
 |
@@ -427,6 +473,8 @@ with t1 as (values('a', 'b'))
 01:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:TOP-N [LIMIT=1]
 |  order by: 'c' ASC
 |
@@ -493,6 +541,8 @@ with t1 as (
   from functional.alltypestiny)
 select * from t1 where bigint_col = bigint_col2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
    predicates: bigint_col = bigint_col
@@ -503,6 +553,8 @@ select pos from functional.allcomplextypes t inner join
    select pos from w) v
 on v.pos = t.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [INNER JOIN]
@@ -520,6 +572,8 @@ select pos from functional.allcomplextypes t inner join
   (with w1 as (with w2 as (select pos from t.int_array_col) select * from w2)
    select pos from w1) v
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -536,6 +590,8 @@ with w1 as (select pos from functional.allcomplextypes t,
   (with w2 as (select pos from t.int_array_col) select * from w2) v)
 select * from w1
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [CROSS JOIN]
@@ -553,6 +609,8 @@ select pos from functional.allcomplextypes t inner join
    w2 as (select key, value from t.map_map_col.value)
    select a1.*, m2.* from w1 a1, w1 a2, w2 m1, w2 m2) v on v.value = t.id
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:SUBPLAN
 |
 |--10:NESTED LOOP JOIN [INNER JOIN]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/tests/custom_cluster/test_client_ssl.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py
index f3ac523..4bc7bea 100644
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -96,6 +96,7 @@ class TestClientSsl(CustomClusterTestSuite):
   @CustomClusterTestSuite.with_args(impalad_args=SSL_WILDCARD_ARGS,
                                     statestored_args=SSL_WILDCARD_ARGS,
                                     catalogd_args=SSL_WILDCARD_ARGS)
+  @pytest.mark.xfail(run=True, reason="IMPALA-4295 on Centos6")
   def test_wildcard_ssl(self, vector):
     """ Test for IMPALA-3159: Test with a certificate which has a wildcard for the
     CommonName.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/tests/failure/test_failpoints.py
----------------------------------------------------------------------
diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py
index 1468f79..512d6f7 100644
--- a/tests/failure/test_failpoints.py
+++ b/tests/failure/test_failpoints.py
@@ -122,7 +122,7 @@ class TestFailpoints(ImpalaTestSuite):
 
     for node_id in node_ids:
       debug_action = '%d:%s:%s' % (node_id, location, FAILPOINT_ACTION_MAP[action])
-      LOG.info('Current dubug action: SET DEBUG_ACTION=%s' % debug_action)
+      LOG.info('Current debug action: SET DEBUG_ACTION=%s' % debug_action)
       vector.get_value('exec_option')['debug_action'] = debug_action
 
       if action == 'CANCEL':

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/tests/hs2/test_hs2.py
----------------------------------------------------------------------
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 20bc9c7..6912b69 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -291,12 +291,16 @@ class TestHS2(HS2TestSuite):
     execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
     TestHS2.check_response(execute_statement_resp)
 
-    # Fetch results to make sure errors are generated
-    fetch_results_req = TCLIService.TFetchResultsReq()
-    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
-    fetch_results_req.maxRows = 100
-    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
-    TestHS2.check_response(fetch_results_resp)
+    # Fetch results to make sure errors are generated. Errors are only guaranteed to be
+    # seen by the coordinator after FetchResults() returns eos.
+    has_more_results = True
+    while has_more_results:
+      fetch_results_req = TCLIService.TFetchResultsReq()
+      fetch_results_req.operationHandle = execute_statement_resp.operationHandle
+      fetch_results_req.maxRows = 100
+      fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
+      TestHS2.check_response(fetch_results_resp)
+      has_more_results = fetch_results_resp.hasMoreRows
 
     get_log_req = TCLIService.TGetLogReq()
     get_log_req.operationHandle = execute_statement_resp.operationHandle

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/tests/hs2/test_json_endpoints.py
----------------------------------------------------------------------
diff --git a/tests/hs2/test_json_endpoints.py b/tests/hs2/test_json_endpoints.py
index 3053547..a5e73da 100644
--- a/tests/hs2/test_json_endpoints.py
+++ b/tests/hs2/test_json_endpoints.py
@@ -68,6 +68,10 @@ class TestJsonEndpoints(HS2TestSuite):
     fetch_results_req.maxRows = 100
     fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
     TestJsonEndpoints.check_response(fetch_results_resp)
+    # Fetch one more time to ensure that query is at EOS (first fetch might return 0-size
+    # row batch)
+    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
+    TestJsonEndpoints.check_response(fetch_results_resp)
     queries_json = self._get_json_queries(http_addr)
     assert len(queries_json["in_flight_queries"]) == 1
     assert queries_json["num_in_flight_queries"] == 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/tests/shell/util.py
----------------------------------------------------------------------
diff --git a/tests/shell/util.py b/tests/shell/util.py
index cb0b3ea..4507706 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -91,9 +91,10 @@ def run_impala_shell_cmd(shell_args, expect_success=True, stdin_input=None):
   """
   result = run_impala_shell_cmd_no_expect(shell_args, stdin_input)
   if expect_success:
-    assert result.rc == 0, "Cmd %s was expected to succeed: %s" % (cmd, result.stderr)
+    assert result.rc == 0, "Cmd %s was expected to succeed: %s" % (shell_args,
+                                                                   result.stderr)
   else:
-    assert result.rc != 0, "Cmd %s was expected to fail" % cmd
+    assert result.rc != 0, "Cmd %s was expected to fail" % shell_args
   return result
 
 def run_impala_shell_cmd_no_expect(shell_args, stdin_input=None):

[29/32] incubator-impala git commit: IMPALA-4310: Make push_to_asf.py respect --apache_remote

Posted by ta...@apache.org.

IMPALA-4310: Make push_to_asf.py respect --apache_remote

Change-Id: I03e15753e685b1b8cf953e8009fb473c9c12aa93
Reviewed-on: http://gerrit.cloudera.org:8080/4747
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5a919648
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5a919648
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5a919648

Branch: refs/heads/hadoop-next
Commit: 5a91964893a7ed2cadf8287471e25f8833ee45b1
Parents: 0686cc4
Author: Henry Robinson <he...@cloudera.com>
Authored: Mon Oct 17 23:06:16 2016 -0700
Committer: Henry Robinson <he...@cloudera.com>
Committed: Tue Oct 18 06:34:22 2016 +0000

----------------------------------------------------------------------
 bin/push_to_asf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5a919648/bin/push_to_asf.py
----------------------------------------------------------------------
diff --git a/bin/push_to_asf.py b/bin/push_to_asf.py
index 5865fb8..a438fa4 100755
--- a/bin/push_to_asf.py
+++ b/bin/push_to_asf.py
@@ -248,7 +248,7 @@ def do_update(branch, gerrit_sha, apache_sha):
     return
 
   # Everything has been confirmed. Do the actual push
-  cmd = ['git', 'push', 'apache']
+  cmd = ['git', 'push', OPTIONS.apache_remote]
   if OPTIONS.dry_run:
     cmd.append('--dry-run')
   cmd.append('%s:refs/heads/%s' % (push_sha, branch))

[05/32] incubator-impala git commit: IMPALA-3943: Address post-merge comments.

Posted by ta...@apache.org.

IMPALA-3943: Address post-merge comments.

Adds code comments and issues a warning for Parquet files
with num_rows=0 but at least one non-empty row group.

Change-Id: I72ccf00191afddb8583ac961f1eaf11e5eb28791
Reviewed-on: http://gerrit.cloudera.org:8080/4696
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2a04b0e2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2a04b0e2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2a04b0e2

Branch: refs/heads/hadoop-next
Commit: 2a04b0e21a763e3d1f4269e7a8c3027ff0cab934
Parents: 47b8aa3
Author: Alex Behm <al...@cloudera.com>
Authored: Wed Oct 12 13:31:54 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 05:41:22 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-parquet-scanner.cc             | 24 ++++++++++++++++++--
 common/thrift/generate_error_codes.py           |  3 +++
 .../queries/QueryTest/parquet-zero-rows.test    |  4 ++++
 3 files changed, 29 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2a04b0e2/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index 7782e8a..86dbbf8 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -410,7 +410,10 @@ Status HdfsParquetScanner::NextRowGroup() {
     ++row_group_idx_;
     if (row_group_idx_ >= file_metadata_.row_groups.size()) break;
     const parquet::RowGroup& row_group = file_metadata_.row_groups[row_group_idx_];
-    if (row_group.num_rows == 0 || file_metadata_.num_rows == 0) continue;
+    // Also check 'file_metadata_.num_rows' to make sure 'select count(*)' and 'select *'
+    // behave consistently for corrupt files that have 'file_metadata_.num_rows == 0'
+    // but some data in row groups.
+    if (row_group.num_rows == 0|| file_metadata_.num_rows == 0) continue;
 
     const DiskIoMgr::ScanRange* split_range = static_cast<ScanRangeMetadata*>(
         metadata_range_->meta_data())->original_split;
@@ -897,7 +900,24 @@ Status HdfsParquetScanner::ProcessFooter() {
   RETURN_IF_ERROR(ParquetMetadataUtils::ValidateFileVersion(file_metadata_, filename()));
 
   // IMPALA-3943: Do not throw an error for empty files for backwards compatibility.
-  if (file_metadata_.num_rows == 0) return Status::OK();
+  if (file_metadata_.num_rows == 0) {
+    // Warn if the num_rows is inconsistent with the row group metadata.
+    if (!file_metadata_.row_groups.empty()) {
+      bool has_non_empty_row_group = false;
+      for (const parquet::RowGroup& row_group : file_metadata_.row_groups) {
+        if (row_group.num_rows > 0) {
+          has_non_empty_row_group = true;
+          break;
+        }
+      }
+      // Warn if there is at least one non-empty row group.
+      if (has_non_empty_row_group) {
+        ErrorMsg msg(TErrorCode::PARQUET_ZERO_ROWS_IN_NON_EMPTY_FILE, filename());
+        state_->LogError(msg);
+      }
+    }
+    return Status::OK();
+  }
 
   // Parse out the created by application version string
   if (file_metadata_.__isset.created_by) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2a04b0e2/common/thrift/generate_error_codes.py
----------------------------------------------------------------------
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index 216d1c1..131947a 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -286,6 +286,9 @@ error_codes = (
    "while spilling data to disk."),
 
   ("BUFFER_ALLOCATION_FAILED", 92, "Unexpected error allocating $0 byte buffer."),
+
+  ("PARQUET_ZERO_ROWS_IN_NON_EMPTY_FILE", 93, "File '$0' is corrupt: metadata indicates "
+   "a zero row count but there is at least one non-empty row group."),
 )
 
 import sys

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2a04b0e2/testdata/workloads/functional-query/queries/QueryTest/parquet-zero-rows.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-zero-rows.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-zero-rows.test
index e7de245..0dbf3e7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-zero-rows.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-zero-rows.test
@@ -17,6 +17,8 @@ select * from zero_rows_one_row_group
 ---- TYPES
 int
 ---- RESULTS
+---- ERRORS
+File '__HDFS_FILENAME__' is corrupt: metadata indicates a zero row count but there is at least one non-empty row group.
 ====
 ---- QUERY
 select count(*) from zero_rows_one_row_group
@@ -24,4 +26,6 @@ select count(*) from zero_rows_one_row_group
 bigint
 ---- RESULTS
 0
+---- ERRORS
+File '__HDFS_FILENAME__' is corrupt: metadata indicates a zero row count but there is at least one non-empty row group.
 ====

[15/32] incubator-impala git commit: IMPALA-2905: Handle coordinator fragment lifecycle like all others

Posted by ta...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
index 4de9722..8144524 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test
@@ -28,6 +28,8 @@ order by
   brand_id
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=100]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |
@@ -56,6 +58,8 @@ limit 100
    predicates: dt.d_moy = 12, (dt.d_date_sk BETWEEN 2451149 AND 2451179 OR dt.d_date_sk BETWEEN 2451514 AND 2451544 OR dt.d_date_sk BETWEEN 2451880 AND 2451910 OR dt.d_date_sk BETWEEN 2452245 AND 2452275 OR dt.d_date_sk BETWEEN 2452610 AND 2452640)
    runtime filters: RF000 -> dt.d_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |  limit: 100
@@ -98,6 +102,8 @@ limit 100
    predicates: dt.d_moy = 12, (dt.d_date_sk BETWEEN 2451149 AND 2451179 OR dt.d_date_sk BETWEEN 2451514 AND 2451544 OR dt.d_date_sk BETWEEN 2451880 AND 2451910 OR dt.d_date_sk BETWEEN 2452245 AND 2452275 OR dt.d_date_sk BETWEEN 2452610 AND 2452640)
    runtime filters: RF000 -> dt.d_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |  limit: 100
@@ -178,6 +184,8 @@ order by
   i_item_id
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=100]
 |  order by: i_item_id ASC
 |
@@ -220,6 +228,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_item_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_cdemo_sk, RF003 -> ss_promo_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_item_id ASC
 |  limit: 100
@@ -282,6 +292,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_item_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_cdemo_sk, RF003 -> ss_promo_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_item_id ASC
 |  limit: 100
@@ -430,6 +442,8 @@ order by
   s_store_name
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:TOP-N [LIMIT=100]
 |  order by: s_store_name ASC
 |
@@ -479,6 +493,8 @@ limit 100
    partitions=7/120 files=7 size=1.02MB
    runtime filters: RF001 -> store_sales.ss_store_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC
 |  limit: 100
@@ -552,6 +568,8 @@ limit 100
    partitions=7/120 files=7 size=1.02MB
    runtime filters: RF001 -> store_sales.ss_store_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC
 |  limit: 100
@@ -679,6 +697,8 @@ order by
   i_manufact
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=100]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
 |
@@ -730,6 +750,8 @@ limit 100
    partitions=1/1 files=1 size=5.25MB
    runtime filters: RF004 -> ca_address_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
 |  limit: 100
@@ -801,6 +823,8 @@ limit 100
    partitions=1/1 files=1 size=5.25MB
    runtime filters: RF004 -> ca_address_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
 |  limit: 100
@@ -924,6 +948,8 @@ order by
   s_state
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=100]
 |  order by: i_item_id ASC, s_state ASC
 |
@@ -966,6 +992,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_item_sk, RF001 -> ss_store_sk, RF002 -> ss_sold_date_sk, RF003 -> ss_cdemo_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_item_id ASC, s_state ASC
 |  limit: 100
@@ -1028,6 +1056,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_item_sk, RF001 -> ss_store_sk, RF002 -> ss_sold_date_sk, RF003 -> ss_cdemo_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_item_id ASC, s_state ASC
 |  limit: 100
@@ -1151,6 +1181,8 @@ order by
   c_preferred_cust_flag desc
 limit 100000
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=100000]
 |  order by: c_last_name ASC, c_first_name ASC, c_salutation ASC, c_preferred_cust_flag DESC
 |
@@ -1195,6 +1227,8 @@ limit 100000
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, c_salutation ASC, c_preferred_cust_flag DESC
 |  limit: 100000
@@ -1257,6 +1291,8 @@ limit 100000
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, c_salutation ASC, c_preferred_cust_flag DESC
 |  limit: 100000
@@ -1362,6 +1398,8 @@ order by
   item.i_category
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=100]
 |  order by: sum(ss_ext_sales_price) DESC, dt.d_year ASC, item.i_category_id ASC, item.i_category ASC
 |
@@ -1389,6 +1427,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> store_sales.ss_sold_date_sk, RF001 -> store_sales.ss_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, dt.d_year ASC, item.i_category_id ASC, item.i_category ASC
 |  limit: 100
@@ -1430,6 +1470,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> store_sales.ss_sold_date_sk, RF001 -> store_sales.ss_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, dt.d_year ASC, item.i_category_id ASC, item.i_category ASC
 |  limit: 100
@@ -1514,6 +1556,8 @@ order by
   sat_sales
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=100]
 |  order by: s_store_name ASC, s_store_id ASC, sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) ASC
 |
@@ -1541,6 +1585,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC, s_store_id ASC, sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) ASC
 |  limit: 100
@@ -1582,6 +1628,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC, s_store_id ASC, sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) ASC, sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) ASC
 |  limit: 100
@@ -1710,6 +1758,8 @@ order by
   ss_ticket_number
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 14:TOP-N [LIMIT=100]
 |  order by: c_last_name ASC, c_first_name ASC, ca_city ASC, bought_city ASC, ss_ticket_number ASC
 |
@@ -1770,6 +1820,8 @@ limit 100
    partitions=1/1 files=1 size=5.25MB
    runtime filters: RF000 -> current_addr.ca_address_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, ca_city ASC, bought_city ASC, ss_ticket_number ASC
 |  limit: 100
@@ -1852,6 +1904,8 @@ limit 100
    partitions=1/1 files=1 size=5.25MB
    runtime filters: RF000 -> current_addr.ca_address_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, ca_city ASC, bought_city ASC, ss_ticket_number ASC
 |  limit: 100
@@ -1984,6 +2038,8 @@ order by
   brand_id
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=100]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |
@@ -2011,6 +2067,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> store_sales.ss_sold_date_sk, RF001 -> store_sales.ss_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |  limit: 100
@@ -2052,6 +2110,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> store_sales.ss_sold_date_sk, RF001 -> store_sales.ss_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: dt.d_year ASC, sum(ss_ext_sales_price) DESC, item.i_brand_id ASC
 |  limit: 100
@@ -2133,6 +2193,8 @@ order by
   i_manufact_id
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:TOP-N [LIMIT=100]
 |  order by: sum_sales ASC, i_manufact_id ASC
 |
@@ -2168,6 +2230,8 @@ limit 100
    predicates: d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 14:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales ASC, i_manufact_id ASC
 |  limit: 100
@@ -2219,6 +2283,8 @@ limit 100
    predicates: d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 14:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales ASC, i_manufact_id ASC
 |  limit: 100
@@ -2305,6 +2371,8 @@ order by
   i_brand_id
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=100]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand_id ASC
 |
@@ -2332,6 +2400,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_sold_date_sk, RF001 -> ss_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand_id ASC
 |  limit: 100
@@ -2373,6 +2443,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_sold_date_sk, RF001 -> ss_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(ss_ext_sales_price) DESC, i_brand_id ASC
 |  limit: 100
@@ -2505,6 +2577,8 @@ order by
   d_week_seq1
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 17:TOP-N [LIMIT=100]
 |  order by: s_store_name1 ASC, s_store_id1 ASC, d_week_seq1 ASC
 |
@@ -2576,6 +2650,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF002 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 32:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name1 ASC, s_store_id1 ASC, d_week_seq1 ASC
 |  limit: 100
@@ -2683,6 +2759,8 @@ limit 100
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF002 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 32:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name1 ASC, s_store_id1 ASC, d_week_seq1 ASC
 |  limit: 100
@@ -2855,6 +2933,8 @@ order by
   sum_sales
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:TOP-N [LIMIT=100]
 |  order by: i_manager_id ASC, avg_monthly_sales ASC, sum_sales ASC
 |
@@ -2900,6 +2980,8 @@ limit 100
    predicates: tpcds.date_dim.d_date_sk <= 2452275, tpcds.date_dim.d_date_sk >= 2451911, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_manager_id ASC, avg_monthly_sales ASC, sum_sales ASC
 |  limit: 100
@@ -2961,6 +3043,8 @@ limit 100
    predicates: tpcds.date_dim.d_date_sk <= 2452275, tpcds.date_dim.d_date_sk >= 2451911, d_month_seq IN (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
    runtime filters: RF001 -> d_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_manager_id ASC, avg_monthly_sales ASC, sum_sales ASC
 |  limit: 100
@@ -3090,6 +3174,8 @@ order by
   i_item_desc
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 14:TOP-N [LIMIT=100]
 |  order by: s_store_name ASC, i_item_desc ASC
 |
@@ -3150,6 +3236,8 @@ limit 100
    partitions=1/1 files=1 size=4.82MB
    runtime filters: RF000 -> i_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 26:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC, i_item_desc ASC
 |  limit: 100
@@ -3242,6 +3330,8 @@ limit 100
    partitions=1/1 files=1 size=4.82MB
    runtime filters: RF000 -> i_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 26:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_store_name ASC, i_item_desc ASC
 |  limit: 100
@@ -3404,6 +3494,8 @@ order by
   ss_ticket_number
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 14:TOP-N [LIMIT=100]
 |  order by: c_last_name ASC, ss_ticket_number ASC
 |
@@ -3462,6 +3554,8 @@ limit 100
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> customer.c_current_addr_sk, RF001 -> c_customer_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, ss_ticket_number ASC
 |  limit: 100
@@ -3542,6 +3636,8 @@ limit 100
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> customer.c_current_addr_sk, RF001 -> c_customer_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, ss_ticket_number ASC
 |  limit: 100
@@ -3685,6 +3781,8 @@ order by
   cnt desc
 limit 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=1000]
 |  order by: cnt DESC
 |
@@ -3728,6 +3826,8 @@ limit 1000
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: cnt DESC
 |  limit: 1000
@@ -3791,6 +3891,8 @@ limit 1000
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: cnt DESC
 |  limit: 1000
@@ -3914,6 +4016,8 @@ order by
   profit
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=100]
 |  order by: c_last_name ASC, c_first_name ASC, substr(s_city, 1, 30) ASC, profit ASC
 |
@@ -3957,6 +4061,8 @@ limit 100
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, substr(s_city, 1, 30) ASC, profit ASC
 |  limit: 100
@@ -4020,6 +4126,8 @@ limit 100
    partitions=1/1 files=1 size=12.60MB
    runtime filters: RF000 -> c_customer_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: c_last_name ASC, c_first_name ASC, substr(s_city, 1, 30) ASC, profit ASC
 |  limit: 100
@@ -4129,6 +4237,8 @@ where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales
 order by sum_sales - avg_monthly_sales, s_store_name
 limit 100) tmp2
 ---- PLAN
+PLAN-ROOT SINK
+|
 11:TOP-N [LIMIT=100]
 |  order by: sum_sales - avg_monthly_sales ASC, s_store_name ASC
 |
@@ -4173,6 +4283,8 @@ limit 100) tmp2
    partitions=24/120 files=24 size=4.16MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales - avg_monthly_sales ASC, s_store_name ASC
 |  limit: 100
@@ -4233,6 +4345,8 @@ limit 100) tmp2
    partitions=24/120 files=24 size=4.16MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales - avg_monthly_sales ASC, s_store_name ASC
 |  limit: 100
@@ -4321,6 +4435,8 @@ WHERE
   AND hd.hd_dep_count = 5
   AND s.s_store_name = 'ese'
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -4352,6 +4468,8 @@ WHERE
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss.ss_store_sk, RF001 -> ss.ss_hdemo_sk, RF002 -> ss.ss_sold_time_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -4394,6 +4512,8 @@ WHERE
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss.ss_store_sk, RF001 -> ss.ss_hdemo_sk, RF002 -> ss.ss_sold_time_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 12:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -4480,6 +4600,8 @@ order by
   revenueratio
 limit 1000
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:TOP-N [LIMIT=1000]
 |  order by: i_category ASC, i_class ASC, i_item_id ASC, i_item_desc ASC, sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) ASC
 |
@@ -4515,6 +4637,8 @@ limit 1000
    predicates: tpcds.date_dim.d_date_sk <= 2451941, tpcds.date_dim.d_date_sk >= 2451911, d_date <= '2001-01-31', d_date >= '2001-01-01'
    runtime filters: RF000 -> d_date_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_category ASC, i_class ASC, i_item_id ASC, i_item_desc ASC, sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) ASC
 |  limit: 1000
@@ -4564,6 +4688,8 @@ limit 1000
    predicates: tpcds.date_dim.d_date_sk <= 2451941, tpcds.date_dim.d_date_sk >= 2451911, d_date <= '2001-01-31', d_date >= '2001-01-01'
    runtime filters: RF000 -> d_date_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 13:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: i_category ASC, i_class ASC, i_item_id ASC, i_item_desc ASC, sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) ASC
 |  limit: 1000
@@ -4648,6 +4774,8 @@ select * from (
  having count(*) >= 10
  order by cnt limit 100) as t
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:TOP-N [LIMIT=100]
 |  order by: count(*) ASC
 |
@@ -4715,6 +4843,8 @@ select * from (
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF002 -> s.ss_item_sk, RF003 -> s.ss_sold_date_sk, RF005 -> s.ss_customer_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 31:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) ASC
 |  limit: 100
@@ -4820,6 +4950,8 @@ select * from (
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF002 -> s.ss_item_sk, RF003 -> s.ss_sold_date_sk, RF005 -> s.ss_customer_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 31:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) ASC
 |  limit: 100
@@ -5000,6 +5132,8 @@ with v1 as (
  limit 100
  ) as v3
 ---- PLAN
+PLAN-ROOT SINK
+|
 35:TOP-N [LIMIT=100]
 |  order by: sum_sales - avg_monthly_sales ASC, d_year ASC
 |
@@ -5136,6 +5270,8 @@ with v1 as (
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_item_sk
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 53:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales - avg_monthly_sales ASC, d_year ASC
 |  limit: 100
@@ -5316,6 +5452,8 @@ with v1 as (
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_item_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 53:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum_sales - avg_monthly_sales ASC, d_year ASC
 |  limit: 100
@@ -5582,6 +5720,8 @@ from
    and   d_moy  = 11) all_sales
 order by promotions, total
 ---- PLAN
+PLAN-ROOT SINK
+|
 27:SORT
 |  order by: promotions ASC, total ASC
 |
@@ -5688,6 +5828,8 @@ order by promotions, total
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF005 -> ss_item_sk, RF004 -> ss_promo_sk, RF003 -> ss_sold_date_sk, RF001 -> ss_customer_sk, RF002 -> ss_store_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 27:SORT
 |  order by: promotions ASC, total ASC
 |
@@ -5972,6 +6114,8 @@ from
           (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
      and store.s_store_name = 'ese') s8
 ---- PLAN
+PLAN-ROOT SINK
+|
 70:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--63:AGGREGATE [FINALIZE]
@@ -6220,6 +6364,8 @@ from
    partitions=120/120 files=120 size=21.31MB
    runtime filters: RF002 -> ss_sold_time_sk, RF001 -> ss_hdemo_sk
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 70:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
 |  join table id: 00
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index fc6f4e2..d883b74 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -22,6 +22,8 @@ order by
   l_returnflag,
   l_linestatus
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -33,6 +35,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    predicates: l_shipdate <= '1998-09-02'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -53,6 +57,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    predicates: l_shipdate <= '1998-09-02'
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -120,6 +126,8 @@ order by
   p_partkey
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 18:TOP-N [LIMIT=100]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
@@ -195,6 +203,8 @@ limit 100
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 30:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |  limit: 100
@@ -298,6 +308,8 @@ limit 100
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF004 -> ps_suppkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 30:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |  limit: 100
@@ -459,6 +471,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=10]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |
@@ -488,6 +502,8 @@ limit 10
    predicates: l_shipdate > '1995-03-15'
    runtime filters: RF001 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |  limit: 10
@@ -531,6 +547,8 @@ limit 10
    predicates: l_shipdate > '1995-03-15'
    runtime filters: RF001 -> l_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |  limit: 10
@@ -606,6 +624,8 @@ group by
 order by
   o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: o_orderpriority ASC
 |
@@ -626,6 +646,8 @@ order by
    predicates: l_commitdate < l_receiptdate
    runtime filters: RF000 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |
@@ -659,6 +681,8 @@ order by
    predicates: l_commitdate < l_receiptdate
    runtime filters: RF000 -> l_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |
@@ -723,6 +747,8 @@ group by
 order by
   revenue desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -775,6 +801,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -846,6 +874,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -949,6 +979,8 @@ where
   and l_discount between 0.05 and 0.07
   and l_quantity < 24
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * l_discount)
 |
@@ -956,6 +988,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
 |
@@ -968,6 +1002,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
 |
@@ -1021,6 +1057,8 @@ order by
   cust_nation,
   l_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -1072,6 +1110,8 @@ order by
    predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> l_suppkey, RF004 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -1144,6 +1184,8 @@ order by
    predicates: l_shipdate <= '1996-12-31', l_shipdate >= '1995-01-01'
    runtime filters: RF003 -> l_suppkey, RF004 -> l_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -1276,6 +1318,8 @@ group by
 order by
   o_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: o_year ASC
 |
@@ -1343,6 +1387,8 @@ order by
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF002 -> c_nationkey, RF003 -> c_custkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 28:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_year ASC
 |
@@ -1437,6 +1483,8 @@ order by
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF002 -> c_nationkey, RF003 -> c_custkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 28:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_year ASC
 |
@@ -1593,6 +1641,8 @@ order by
   nation,
   o_year desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: nation ASC, o_year DESC
 |
@@ -1642,6 +1692,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF001 -> l_partkey, RF002 -> l_suppkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: nation ASC, o_year DESC
 |
@@ -1712,6 +1764,8 @@ order by
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF001 -> l_partkey, RF002 -> l_suppkey, RF003 -> l_suppkey, RF004 -> l_orderkey, RF005 -> l_partkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: nation ASC, o_year DESC
 |
@@ -1838,6 +1892,8 @@ order by
   revenue desc
 limit 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:TOP-N [LIMIT=20]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -1873,6 +1929,8 @@ limit 20
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF000 -> c_nationkey, RF001 -> c_custkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 14:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |  limit: 20
@@ -1924,6 +1982,8 @@ limit 20
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF000 -> c_nationkey, RF001 -> c_custkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 14:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |  limit: 20
@@ -2023,6 +2083,8 @@ where
 order by
   value desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:SORT
 |  order by: value DESC
 |
@@ -2076,6 +2138,8 @@ order by
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF001 -> ps_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: value DESC
 |
@@ -2153,6 +2217,8 @@ order by
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF001 -> ps_suppkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: value DESC
 |
@@ -2281,6 +2347,8 @@ group by
 order by
   l_shipmode
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: l_shipmode ASC
 |
@@ -2300,6 +2368,8 @@ order by
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_shipmode ASC
 |
@@ -2332,6 +2402,8 @@ order by
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_shipmode ASC
 |
@@ -2391,6 +2463,8 @@ order by
   custdist desc,
   c_count desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SORT
 |  order by: count(*) DESC, c_count DESC
 |
@@ -2414,6 +2488,8 @@ order by
    predicates: NOT o_comment LIKE '%special%requests%'
    runtime filters: RF000 -> o_custkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, c_count DESC
 |
@@ -2450,6 +2526,8 @@ order by
    predicates: NOT o_comment LIKE '%special%requests%'
    runtime filters: RF000 -> o_custkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, c_count DESC
 |
@@ -2506,6 +2584,8 @@ where
   and l_shipdate >= '1995-09-01'
   and l_shipdate < '1995-10-01'
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
 |
@@ -2521,6 +2601,8 @@ where
    predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -2545,6 +2627,8 @@ where
    predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 07:AGGREGATE [FINALIZE]
 |  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -2606,6 +2690,8 @@ where
 order by
   s_suppkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:SORT
 |  order by: s_suppkey ASC
 |
@@ -2639,6 +2725,8 @@ order by
    partitions=1/1 files=1 size=1.33MB
    runtime filters: RF000 -> s_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_suppkey ASC
 |
@@ -2696,6 +2784,8 @@ order by
    partitions=1/1 files=1 size=1.33MB
    runtime filters: RF000 -> s_suppkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_suppkey ASC
 |
@@ -2794,6 +2884,8 @@ order by
   p_type,
   p_size
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -2823,6 +2915,8 @@ order by
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF000 -> ps_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -2864,6 +2958,8 @@ order by
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF000 -> ps_partkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 12:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -2933,6 +3029,8 @@ where
       l_partkey = p_partkey
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice)
 |
@@ -2961,6 +3059,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> tpch.lineitem.l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice)
 |
@@ -3004,6 +3104,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> tpch.lineitem.l_partkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 12:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice)
 |
@@ -3092,6 +3194,8 @@ order by
   o_orderdate
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:TOP-N [LIMIT=100]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |
@@ -3130,6 +3234,8 @@ limit 100
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> tpch.lineitem.l_orderkey, RF002 -> l_orderkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |  limit: 100
@@ -3190,6 +3296,8 @@ limit 100
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> tpch.lineitem.l_orderkey, RF002 -> l_orderkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |  limit: 100
@@ -3300,6 +3408,8 @@ where
     )
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * (1 - l_discount))
 |
@@ -3315,6 +3425,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -3337,6 +3449,8 @@ where
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> l_partkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -3403,6 +3517,8 @@ where
 order by
   s_name
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SORT
 |  order by: s_name ASC
 |
@@ -3448,6 +3564,8 @@ order by
    predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
    runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
 |
@@ -3512,6 +3630,8 @@ order by
    predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
    runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF001 -> tpch.lineitem.l_partkey, RF002 -> tpch.lineitem.l_suppkey
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 18:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
 |
@@ -3635,6 +3755,8 @@ order by
   s_name
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=100]
 |  order by: count(*) DESC, s_name ASC
 |
@@ -3688,6 +3810,8 @@ limit 100
    partitions=1/1 files=1 size=718.94MB
    predicates: l3.l_receiptdate > l3.l_commitdate
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, s_name ASC
 |  limit: 100
@@ -3763,6 +3887,8 @@ limit 100
    partitions=1/1 files=1 size=718.94MB
    predicates: l3.l_receiptdate > l3.l_commitdate
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, s_name ASC
 |  limit: 100
@@ -3895,6 +4021,8 @@ group by
 order by
   cntrycode
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: cntrycode ASC
 |
@@ -3922,6 +4050,8 @@ order by
 03:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 15:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: cntrycode ASC
 |
@@ -3969,6 +4099,8 @@ order by
 03:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
 ---- PARALLELPLANS
+PLAN-ROOT SINK
+|
 15:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: cntrycode ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index 6c70c35..0b7f35d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -21,6 +21,8 @@ order by
   l_returnflag,
   l_linestatus
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -77,6 +79,8 @@ order by
   p_partkey
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 18:TOP-N [LIMIT=100]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
@@ -155,6 +159,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=10]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 2) DESC, o_orderdate ASC
 |
@@ -200,6 +206,8 @@ group by
 order by
   o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: o_orderpriority ASC
 |
@@ -242,6 +250,8 @@ group by
 order by
   revenue desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 2) DESC
 |
@@ -289,6 +299,8 @@ where
   and l_discount between 0.05 and 0.07
   and l_quantity < 24
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * l_discount)
 |
@@ -335,6 +347,8 @@ order by
   cust_nation,
   l_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -410,6 +424,8 @@ group by
 order by
   o_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: o_year ASC
 |
@@ -490,6 +506,8 @@ order by
   nation,
   o_year desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: nation ASC, o_year DESC
 |
@@ -560,6 +578,8 @@ order by
   revenue desc
 limit 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:TOP-N [LIMIT=20]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 1) DESC
 |
@@ -621,6 +641,8 @@ where
 order by
   value desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:SORT
 |  order by: value DESC
 |
@@ -690,6 +712,8 @@ group by
 order by
   l_shipmode
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: l_shipmode ASC
 |
@@ -728,6 +752,8 @@ order by
   custdist desc,
   c_count desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SORT
 |  order by: count(*) DESC, c_count DESC
 |
@@ -762,6 +788,8 @@ where
   and l_shipdate >= '1995-09-01'
   and l_shipdate < '1995-10-01'
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
 |
@@ -805,6 +833,8 @@ where
 order by
   s_suppkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:SORT
 |  order by: s_suppkey ASC
 |
@@ -865,6 +895,8 @@ order by
   p_type,
   p_size
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -908,6 +940,8 @@ where
       l_partkey = p_partkey
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice)
 |
@@ -965,6 +999,8 @@ order by
   o_orderdate
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:TOP-N [LIMIT=100]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |
@@ -1031,6 +1067,8 @@ where
     )
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * (1 - l_discount))
 |
@@ -1080,6 +1118,8 @@ where
 order by
   s_name
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SORT
 |  order by: s_name ASC
 |
@@ -1155,6 +1195,8 @@ order by
   s_name
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=100]
 |  order by: count(*) DESC, s_name ASC
 |
@@ -1231,6 +1273,8 @@ group by
 order by
   cntrycode
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: cntrycode ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index caa3420..3ecf764 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -22,6 +22,8 @@ order by
   l_returnflag,
   l_linestatus
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -33,6 +35,8 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates: l_shipdate <= '1998-09-02'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 05:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -96,6 +100,8 @@ order by
   p_partkey
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 26:TOP-N [LIMIT=100]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
@@ -173,6 +179,8 @@ limit 100
    predicates: !empty(s.s_partsupps)
    runtime filters: RF002 -> s_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 33:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |  limit: 100
@@ -292,6 +300,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=10]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |
@@ -321,6 +331,8 @@ limit 10
    predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
    predicates on l: l_shipdate > '1995-03-15'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |  limit: 10
@@ -384,6 +396,8 @@ group by
 order by
   o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SORT
 |  order by: o_orderpriority ASC
 |
@@ -413,6 +427,8 @@ order by
    predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
    predicates on o_lineitems: l_commitdate < l_receiptdate
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_orderpriority ASC
 |
@@ -475,6 +491,8 @@ group by
 order by
   revenue desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 18:SORT
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -528,6 +546,8 @@ order by
    predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
    runtime filters: RF000 -> c.c_nationkey, RF001 -> c_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 23:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -606,6 +626,8 @@ where
   and l_discount between 0.05 and 0.07
   and l_quantity < 24
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * l_discount)
 |
@@ -613,6 +635,8 @@ where
    partitions=1/1 files=4 size=292.35MB
    predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * l_discount)
 |
@@ -664,6 +688,8 @@ order by
   cust_nation,
   l_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -716,6 +742,8 @@ order by
    predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
    runtime filters: RF000 -> c_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 22:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -820,6 +848,8 @@ group by
 order by
   o_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 22:SORT
 |  order by: o_year ASC
 |
@@ -886,6 +916,8 @@ order by
    predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
    runtime filters: RF001 -> c_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 29:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_year ASC
 |
@@ -1001,6 +1033,8 @@ order by
   nation,
   o_year desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: nation ASC, o_year DESC
 |
@@ -1050,6 +1084,8 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates: !empty(o.o_lineitems)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 22:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: nation ASC, o_year DESC
 |
@@ -1148,6 +1184,8 @@ order by
   revenue desc
 limit 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=20]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -1185,6 +1223,8 @@ limit 20
    predicates on l: l_returnflag = 'R'
    runtime filters: RF000 -> c_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 16:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |  limit: 20
@@ -1268,6 +1308,8 @@ where
 order by
   value desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 17:SORT
 |  order by: value DESC
 |
@@ -1323,6 +1365,8 @@ order by
    predicates: !empty(s.s_partsupps)
    runtime filters: RF000 -> s_nationkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 25:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: value DESC
 |
@@ -1428,6 +1472,8 @@ group by
 order by
   l_shipmode
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:SORT
 |  order by: l_shipmode ASC
 |
@@ -1448,6 +1494,8 @@ order by
    predicates: !empty(o.o_lineitems)
    predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: l_shipmode ASC
 |
@@ -1499,6 +1547,8 @@ order by
   custdist desc,
   c_count desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: count(*) DESC, c_count DESC
 |
@@ -1522,6 +1572,8 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates on c_orders: (NOT o_comment LIKE '%special%requests%')
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, c_count DESC
 |
@@ -1576,6 +1628,8 @@ where
   and l_shipdate >= '1995-09-01'
   and l_shipdate < '1995-10-01'
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
 |
@@ -1591,6 +1645,8 @@ where
    predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -1646,6 +1702,8 @@ where
 order by
   s_suppkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:SORT
 |  order by: s_suppkey ASC
 |
@@ -1679,6 +1737,8 @@ order by
    predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
    runtime filters: RF000 -> l.l_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_suppkey ASC
 |
@@ -1763,6 +1823,8 @@ order by
   p_type,
   p_size
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:SORT
 |  order by: count(s_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -1792,6 +1854,8 @@ order by
    partitions=1/1 files=1 size=111.08MB
    predicates: NOT s_comment LIKE '%Customer%Complaints%', !empty(s.s_partsupps)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 13:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(s_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -1851,6 +1915,8 @@ where
       l_partkey = p_partkey
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice)
 |
@@ -1879,6 +1945,8 @@ where
    partitions=1/1 files=4 size=577.87MB
    runtime filters: RF000 -> l.l_partkey, RF001 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice)
 |
@@ -1942,6 +2010,8 @@ order by
   o_orderdate
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:TOP-N [LIMIT=100]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |
@@ -1969,6 +2039,8 @@ limit 100
    partitions=1/1 files=4 size=577.87MB
    predicates: !empty(c.c_orders)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |  limit: 100
@@ -2038,6 +2110,8 @@ where
     )
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(l_extendedprice * (1 - l_discount))
 |
@@ -2053,6 +2127,8 @@ where
    partitions=1/1 files=4 size=577.87MB
    runtime filters: RF000 -> l_partkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum:merge(l_extendedprice * (1 - l_discount))
 |
@@ -2110,6 +2186,8 @@ where
 order by
   s_name
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:SORT
 |  order by: s_name ASC
 |
@@ -2158,6 +2236,8 @@ order by
    predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
    runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 21:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: s_name ASC
 |
@@ -2267,6 +2347,8 @@ order by
   s_name
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 20:TOP-N [LIMIT=100]
 |  order by: count(*) DESC, s_name ASC
 |
@@ -2326,6 +2408,8 @@ limit 100
    predicates on l1: l1.l_receiptdate > l1.l_commitdate
    predicates on l3: l3.l_receiptdate > l3.l_commitdate
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 25:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: count(*) DESC, s_name ASC
 |  limit: 100
@@ -2434,6 +2518,8 @@ group by
 order by
   cntrycode
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:SORT
 |  order by: cntrycode ASC
 |
@@ -2463,6 +2549,8 @@ order by
    partitions=1/1 files=4 size=577.87MB
    predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 15:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: cntrycode ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index 96409e2..c4b23f5 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -22,6 +22,8 @@ order by
   l_returnflag,
   l_linestatus
 ---- PLAN
+PLAN-ROOT SINK
+|
 02:SORT
 |  order by: l_returnflag ASC, l_linestatus ASC
 |
@@ -80,6 +82,8 @@ order by
   p_partkey
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 18:TOP-N [LIMIT=100]
 |  order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
 |
@@ -181,6 +185,8 @@ order by
   o_orderdate
 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=10]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
 |
@@ -234,6 +240,8 @@ group by
 order by
   o_orderpriority
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: o_orderpriority ASC
 |
@@ -281,6 +289,8 @@ group by
 order by
   revenue desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -345,6 +355,8 @@ where
   and l_discount between 0.05 and 0.07
   and l_quantity < 24
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  output: sum(tpch.lineitem.l_extendedprice * tpch.lineitem.l_discount)
 |
@@ -393,6 +405,8 @@ order by
   cust_nation,
   l_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: supp_nation ASC, cust_nation ASC, l_year ASC
 |
@@ -484,6 +498,8 @@ group by
 order by
   o_year
 ---- PLAN
+PLAN-ROOT SINK
+|
 16:SORT
 |  order by: o_year ASC
 |
@@ -585,6 +601,8 @@ order by
   nation,
   o_year desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:SORT
 |  order by: nation ASC, o_year DESC
 |
@@ -670,6 +688,8 @@ order by
   revenue desc
 limit 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:TOP-N [LIMIT=20]
 |  order by: sum(l_extendedprice * (1 - l_discount)) DESC
 |
@@ -741,6 +761,8 @@ where
 order by
   value desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 13:SORT
 |  order by: value DESC
 |
@@ -825,6 +847,8 @@ group by
 order by
   l_shipmode
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:SORT
 |  order by: l_shipmode ASC
 |
@@ -867,6 +891,8 @@ order by
   custdist desc,
   c_count desc
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:SORT
 |  order by: count(*) DESC, c_count DESC
 |
@@ -906,6 +932,8 @@ where
   and l_shipdate >= '1995-09-01'
   and l_shipdate < '1995-10-01'
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(CASE WHEN tpch.part.p_type LIKE 'PROMO%' THEN tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount) ELSE 0.0 END), sum(tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount))
 |
@@ -954,6 +982,8 @@ where
 order by
   s_suppkey
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:SORT
 |  order by: s_suppkey ASC
 |
@@ -1020,6 +1050,8 @@ order by
   p_type,
   p_size
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
 |
@@ -1069,6 +1101,8 @@ where
       l_partkey = p_partkey
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  output: sum(tpch.lineitem.l_extendedprice)
 |
@@ -1134,6 +1168,8 @@ order by
   o_orderdate
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:TOP-N [LIMIT=100]
 |  order by: o_totalprice DESC, o_orderdate ASC
 |
@@ -1210,6 +1246,8 @@ where
     )
   )
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  output: sum(tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount))
 |
@@ -1265,6 +1303,8 @@ where
 order by
   s_name
 ---- PLAN
+PLAN-ROOT SINK
+|
 10:SORT
 |  order by: s_name ASC
 |
@@ -1353,6 +1393,8 @@ order by
   s_name
 limit 100
 ---- PLAN
+PLAN-ROOT SINK
+|
 12:TOP-N [LIMIT=100]
 |  order by: count(*) DESC, s_name ASC
 |
@@ -1443,6 +1485,8 @@ group by
 order by
   cntrycode
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:SORT
 |  order by: cntrycode ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/union.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/union.test b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
index 84053a9..4baf6fe 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/union.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
@@ -8,6 +8,8 @@ select * from
   on (a.month = b.month)
 where b.month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: month = b.month
 |  runtime filters: RF000 <- b.month
@@ -25,6 +27,8 @@ where b.month = 1
    partitions=2/24 files=2 size=40.32KB
    runtime filters: RF000 -> functional.alltypes.month
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
@@ -53,6 +57,8 @@ select * from functional.alltypestiny where year=2009 and month=1
 union all
 select * from functional.alltypestiny where year=2009 and month=2
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--03:SCAN HDFS [functional.alltypestiny]
@@ -71,6 +77,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -91,6 +99,8 @@ select * from functional.alltypestiny where year=2009 and month=1 order by int_c
 union all
 select * from functional.alltypestiny where year=2009 and month=2 limit 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--04:SCAN HDFS [functional.alltypestiny]
@@ -114,6 +124,8 @@ NODE 2:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--07:EXCHANGE [UNPARTITIONED]
@@ -147,6 +159,8 @@ select * from functional.alltypestiny where year=2009 and month=1
 union distinct
 select * from functional.alltypestiny where year=2009 and month=2
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -168,6 +182,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -198,6 +214,8 @@ select * from functional.alltypestiny where year=2009 and month=1
 union all
 select 1,false,1,1,1,10,1.1,10.1,'01/01/09','1',cast('2009-01-01 00:01:00' as timestamp),2009,1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=2
 |
@@ -212,6 +230,8 @@ NODE 1:
 NODE 2:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -232,6 +252,8 @@ select * from functional.alltypestiny where year=2009 and month=1
 union distinct
 select 1,false,1,1,1,10,1.1,10.1,'01/01/09','1',cast('2009-01-01 00:01:00' as timestamp),2009,1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -249,6 +271,8 @@ NODE 1:
 NODE 2:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 05:AGGREGATE [FINALIZE]
@@ -279,6 +303,8 @@ union distinct
 (select * from functional.alltypestiny where year=2009 and month=2)
 order by 3 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:TOP-N [LIMIT=3]
 |  order by: tinyint_col ASC
 |
@@ -308,6 +334,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC
 |  limit: 3
@@ -347,6 +375,8 @@ union all
 (select * from functional.alltypestiny where year=2009 and month=2)
 order by 3,4 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:TOP-N [LIMIT=3]
 |  order by: tinyint_col ASC, smallint_col ASC
 |
@@ -378,6 +408,8 @@ NODE 5:
 NODE 6:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC, smallint_col ASC
 |  limit: 3
@@ -419,6 +451,8 @@ union all
 (select * from functional.alltypestiny where year=2009 and month=2)
 order by 3,4 limit 4
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:TOP-N [LIMIT=4]
 |  order by: tinyint_col ASC, smallint_col ASC
 |
@@ -450,6 +484,8 @@ NODE 3:
 NODE 6:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC, smallint_col ASC
 |  limit: 4
@@ -488,6 +524,8 @@ NODE 6:
 union all
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--03:SCAN HDFS [functional.alltypestiny]
@@ -506,6 +544,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -526,6 +566,8 @@ union all
    union all
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--03:SCAN HDFS [functional.alltypestiny]
@@ -544,6 +586,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 04:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -564,6 +608,8 @@ NODE 3:
 union distinct
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -585,6 +631,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -613,6 +661,8 @@ union distinct
    union distinct
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -634,6 +684,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -663,6 +715,8 @@ NODE 3:
 union all
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--05:SCAN HDFS [functional.alltypestiny]
@@ -686,6 +740,8 @@ NODE 3:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -717,6 +773,8 @@ union all
    union distinct
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--05:AGGREGATE [FINALIZE]
@@ -740,6 +798,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -773,6 +833,8 @@ NODE 4:
 union all
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--06:SCAN HDFS [functional.alltypestiny]
@@ -801,6 +863,8 @@ NODE 4:
 NODE 6:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -837,6 +901,8 @@ union all
    union all
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--06:AGGREGATE [FINALIZE]
@@ -865,6 +931,8 @@ NODE 4:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -902,6 +970,8 @@ union all
    (select * from functional.alltypestiny where year=2009 and month=2)
    limit 10)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--06:UNION
@@ -933,6 +1003,8 @@ NODE 4:
 NODE 7:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -976,6 +1048,8 @@ NODE 7:
 union all
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--05:SCAN HDFS [functional.alltypestiny]
@@ -999,6 +1073,8 @@ NODE 3:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -1032,6 +1108,8 @@ union all
    (select * from functional.alltypestiny where year=2009 and month=2)
    order by 3 limit 3)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--05:TOP-N [LIMIT=3]
@@ -1055,6 +1133,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -1087,6 +1167,8 @@ NODE 4:
 union distinct
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1108,6 +1190,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -1137,6 +1221,8 @@ union distinct
    union all
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1158,6 +1244,8 @@ NODE 2:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
@@ -1189,6 +1277,8 @@ NODE 3:
 union distinct
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1215,6 +1305,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
@@ -1249,6 +1341,8 @@ union distinct
    union all
    select * from functional.alltypestiny where year=2009 and month=2)
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1275,6 +1369,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
@@ -1308,6 +1404,8 @@ NODE 4:
 union distinct
 select * from functional.alltypestiny where year=2009 and month=1
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1334,6 +1432,8 @@ NODE 3:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 10:AGGREGATE [FINALIZE]
@@ -1375,6 +1475,8 @@ union distinct
    (select * from functional.alltypestiny where year=2009 and month=2)
    order by 3 limit 3)
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1401,6 +1503,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 10:AGGREGATE [FINALIZE]
@@ -1444,6 +1548,8 @@ union all
          union all
          select * from functional.alltypestiny where year=2009 and month=3)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--05:SCAN HDFS [functional.alltypestiny]
@@ -1472,6 +1578,8 @@ NODE 4:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=3/090301.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -1502,6 +1610,8 @@ union distinct
          union distinct
          select * from functional.alltypestiny where year=2009 and month=3)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 06:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1533,6 +1643,8 @@ NODE 4:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=3/090301.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 08:AGGREGATE [FINALIZE]
@@ -1572,6 +1684,8 @@ union all
          (select * from functional.alltypestiny where year=2009 and month=3)
          order by 3 limit 3)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--10:AGGREGATE [FINALIZE]
@@ -1613,6 +1727,8 @@ NODE 6:
 NODE 7:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=3/090301.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 17:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -1673,6 +1789,8 @@ union distinct
          (select * from functional.alltypestiny where year=2009 and month=3)
          order by 3 limit 3)))
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |
@@ -1712,6 +1830,8 @@ NODE 5:
 NODE 6:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=3/090301.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 16:EXCHANGE [UNPARTITIONED]
 |
 15:AGGREGATE [FINALIZE]
@@ -1779,6 +1899,8 @@ union all
    (select * from functional.alltypestiny where year=2009 and month=5)
    order by 3 limit 3)
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:UNION
 |
 |--08:AGGREGATE [FINALIZE]
@@ -1841,6 +1963,8 @@ NODE 13:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=4/090401.txt 0:115
 NODE 14:
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 22:EXCHANGE [UNPARTITIONED]
 |
 09:UNION
@@ -1913,6 +2037,8 @@ union all
 (select * from functional.alltypestiny where year=2009 and month=2)
 order by 3 limit 5
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:TOP-N [LIMIT=5]
 |  order by: tinyint_col ASC
 |
@@ -1936,6 +2062,8 @@ NODE 3:
 NODE 4:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 06:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC
 |  limit: 5
@@ -1965,6 +2093,8 @@ union distinct
 (select * from functional.alltypestiny where year=2009 and month=2)
 order by 3 limit 3
 ---- PLAN
+PLAN-ROOT SINK
+|
 07:TOP-N [LIMIT=3]
 |  order by: tinyint_col ASC
 |
@@ -1994,6 +2124,8 @@ NODE 3:
 NODE 5:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=2/090201.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 12:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: tinyint_col ASC
 |  limit: 3
@@ -2039,6 +2171,8 @@ select x.* from
    from functional.alltypestiny where year=2009 and month=1 group by 1, 2) x
 where x.int_col < 5 and x.bool_col = false
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--04:AGGREGATE [FINALIZE]
@@ -2062,6 +2196,8 @@ NODE 1:
 NODE 3:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypestiny/year=2009/month=1/090101.txt 0:115
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 09:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -2101,9 +2237,13 @@ select 2, 'b', NULL, 20.f
 union all
 select 3, 'c', NULL, 30.f
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ====
@@ -2114,12 +2254,16 @@ select 2, 'b', NULL, 20.0f
 union distinct
 select 3, 'c', NULL, 30.0f
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: 1, 'a', null, f
 |
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: 1, 'a', null, f
 |
@@ -2135,6 +2279,8 @@ select 3, 'c', NULL, 30.f
 union all
 select int_col, string_col, bool_col, float_col from functional.alltypestiny
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:UNION
 |
 |--02:AGGREGATE [FINALIZE]
@@ -2149,6 +2295,8 @@ select int_col, string_col, bool_col, float_col from functional.alltypestiny
 04:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:UNION
@@ -2183,6 +2331,8 @@ select int_col from functional.alltypestiny where year=2009 and month=1
 union all
 select 503
 ---- PLAN
+PLAN-ROOT SINK
+|
 05:UNION
 |  constant-operands=1
 |
@@ -2201,6 +2351,8 @@ select 503
 01:SCAN HDFS [functional.alltypestiny]
    partitions=1/4 files=1 size=115B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 10:EXCHANGE [UNPARTITIONED]
 |
 05:UNION
@@ -2239,9 +2391,13 @@ values(2, 'b', NULL, 20.f)
 union all
 values(3, 'c', NULL, 30.f)
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ====
@@ -2252,12 +2408,16 @@ values(2, 'b', NULL, 20.f)
 union distinct
 values(3, 'c', NULL, 30.f)
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: 1, 'a', null, f
 |
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:AGGREGATE [FINALIZE]
 |  group by: 1, 'a', null, f
 |
@@ -2273,6 +2433,8 @@ values(3, 'c', NULL, 30.f)
 union all
 select int_col, string_col, bool_col, float_col from functional.alltypessmall
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:UNION
 |
 |--02:AGGREGATE [FINALIZE]
@@ -2287,6 +2449,8 @@ select int_col, string_col, bool_col, float_col from functional.alltypessmall
 04:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 07:EXCHANGE [UNPARTITIONED]
 |
 03:UNION
@@ -2329,6 +2493,8 @@ select count(*) from (
       cast('2009-01-01 00:02:00.10' as timestamp),2009,1
 ) x
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:AGGREGATE [FINALIZE]
 |  output: count(*)
 |
@@ -2344,6 +2510,8 @@ select count(*) from (
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |
@@ -2377,6 +2545,8 @@ select x.int_col, x.bool_col, x.month from
    select * from functional.alltypestiny where year=2009) x
 where x.int_col < 5 and x.bool_col = false and x.month = 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypestiny]
@@ -2387,6 +2557,8 @@ where x.int_col < 5 and x.bool_col = false and x.month = 1
    partitions=1/4 files=1 size=115B
    predicates: functional.alltypestiny.int_col < 5, functional.alltypestiny.bool_col = FALSE
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -2409,6 +2581,8 @@ select 1 from
    select 1, tinyint_col, 3, bigint_col from functional.alltypessmall) t
 where a + b < 100 and c > 20 and d > 50
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypes]
@@ -2428,8 +2602,12 @@ select * from
    select 10, 20) t
 where a > b
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 ====
 # Test union merging only partitioned inputs.
@@ -2445,6 +2623,8 @@ select a.id, a.bigint_col
 union all
 select 1000, 2000
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=1
 |
@@ -2469,6 +2649,8 @@ select 1000, 2000
 01:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 11:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -2512,6 +2694,8 @@ select id, bigint_col from functional.alltypessmall order by id limit 10
 union all
 select 1000, 2000
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=1
 |
@@ -2527,6 +2711,8 @@ select 1000, 2000
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=1
 |
@@ -2568,6 +2754,8 @@ select a.id, a.bigint_col
 union all
 select 1000, 2000
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  constant-operands=1
 |
@@ -2604,6 +2792,8 @@ select 1000, 2000
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 20:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -2670,6 +2860,8 @@ select 1000, 2000
 # have explain_level=1
 select * from tpch.lineitem UNION ALL (select * from tpch.lineitem) LIMIT 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |  limit: 1
 |
@@ -2681,6 +2873,8 @@ select * from tpch.lineitem UNION ALL (select * from tpch.lineitem) LIMIT 1
 ====
 select l_orderkey from tpch.lineitem UNION DISTINCT (select l_orderkey from tpch.lineitem) LIMIT 1
 ---- PLAN
+PLAN-ROOT SINK
+|
 03:AGGREGATE [FINALIZE]
 |  group by: l_orderkey
 |  limit: 1
@@ -2705,6 +2899,8 @@ select * from
    (select * from functional.alltypestiny order by id limit 20 offset 10)) v
 where v.id < 10 and v.int_col > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--08:SELECT
@@ -2744,6 +2940,8 @@ select * from
    (select * from functional.alltypestiny order by id limit 20 offset 10)) v
 where v.id < 10 and v.int_col > 20
 ---- PLAN
+PLAN-ROOT SINK
+|
 09:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/values.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/values.test b/testdata/workloads/functional-planner/queries/PlannerTest/values.test
index 2eec954..a27c793 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/values.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/values.test
@@ -1,19 +1,27 @@
 values(1+1, 2, 5.0, 'a')
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=1
 ====
 values(1+1, 2, 5.0, 'a') order by 1 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: 1 + 1 ASC
 |
 00:UNION
    constant-operands=1
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: 1 + 1 ASC
 |
@@ -22,20 +30,28 @@ values(1+1, 2, 5.0, 'a') order by 1 limit 10
 ====
 values((1+1, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c'))
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 00:UNION
    constant-operands=3
 ====
 values((1+1, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c')) order by 1 limit 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: 1 + 1 ASC
 |
 00:UNION
    constant-operands=3
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:TOP-N [LIMIT=10]
 |  order by: 1 + 1 ASC
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f61397f/testdata/workloads/functional-planner/queries/PlannerTest/views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/views.test b/testdata/workloads/functional-planner/queries/PlannerTest/views.test
index 50bee61..a6322e6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/views.test
@@ -1,9 +1,13 @@
 # Basic test with a view.
 select int_col, string_col from functional.alltypes_view
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -12,6 +16,8 @@ select int_col, string_col from functional.alltypes_view
 # Basic test with a complex view.
 select * from functional.complex_view
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:TOP-N [LIMIT=100]
 |  order by: b.string_col ASC
 |
@@ -32,6 +38,8 @@ select * from functional.complex_view
    predicates: a.bigint_col < 50
    runtime filters: RF000 -> a.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: b.string_col ASC
 |  limit: 100
@@ -67,9 +75,13 @@ select * from functional.complex_view
 # Basic test with a view on a view
 select int_col, string_col from functional.view_view
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -79,6 +91,8 @@ select int_col, string_col from functional.view_view
 select * from functional.alltypes_view union all
 select * from functional.alltypes_view where id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:UNION
 |
 |--02:SCAN HDFS [functional.alltypes]
@@ -88,6 +102,8 @@ select * from functional.alltypes_view where id < 10
 01:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 03:EXCHANGE [UNPARTITIONED]
 |
 00:UNION
@@ -103,10 +119,14 @@ select * from functional.alltypes_view where id < 10
 select t.id from (select id from functional.alltypes_view) t
 where t.id < 10
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.id < 10
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -117,6 +137,8 @@ where t.id < 10
 select * from functional.alltypes_view t1, functional.alltypes_view_sub t2,
 functional.complex_view t3 where t1.id = t2.x and t2.x = t3.abc
 ---- PLAN
+PLAN-ROOT SINK
+|
 08:HASH JOIN [INNER JOIN]
 |  hash predicates: int_col = count(a.bigint_col)
 |  runtime filters: RF000 <- count(a.bigint_col)
@@ -155,6 +177,8 @@ functional.complex_view t3 where t1.id = t2.x and t2.x = t3.abc
    predicates: functional.alltypes.id > 1
    runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 16:EXCHANGE [UNPARTITIONED]
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
@@ -219,6 +243,8 @@ select * from functional.alltypes_view t1
 inner join functional.alltypes_view t2 on (t1.id = t2.id)
 inner join functional.alltypes_view t3 on (t2.id = t3.id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = functional.alltypes.id
 |  runtime filters: RF000 <- functional.alltypes.id
@@ -238,6 +264,8 @@ inner join functional.alltypes_view t3 on (t2.id = t3.id)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -271,6 +299,8 @@ select * from functional.alltypes_view t1
 inner join functional.alltypes_view t2 using(id)
 inner join functional.alltypes_view t3 using(id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = functional.alltypes.id
 |  runtime filters: RF000 <- functional.alltypes.id
@@ -290,6 +320,8 @@ inner join functional.alltypes_view t3 using(id)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -323,6 +355,8 @@ select * from functional.alltypes_view t1
 left outer join functional.alltypes_view t2 using(id)
 full outer join functional.alltypes_view t3 using(id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: functional.alltypes.id = functional.alltypes.id
 |
@@ -338,6 +372,8 @@ full outer join functional.alltypes_view t3 using(id)
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
@@ -369,6 +405,8 @@ select * from functional.alltypes_view t1
 inner join [broadcast] functional.alltypes_view t2 using(id)
 inner join [shuffle] functional.alltypes_view t3 using(id)
 ---- PLAN
+PLAN-ROOT SINK
+|
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: functional.alltypes.id = functional.alltypes.id
 |  runtime filters: RF000 <- functional.alltypes.id
@@ -388,6 +426,8 @@ inner join [shuffle] functional.alltypes_view t3 using(id)
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.id
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -419,10 +459,14 @@ inner join [shuffle] functional.alltypes_view t3 using(id)
 # enabling proper partition pruning for this particular view.
 select * from functional.alltypes_parens
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes]
    partitions=1/24 files=1 size=19.95KB
    predicates: (int_col < 100 OR bool_col = FALSE)
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes]
@@ -433,10 +477,14 @@ select * from functional.alltypes_parens
 # possible (see IMPALA-923)
 select bool_col FROM ( SELECT bool_col FROM functional.alltypes t ) t WHERE t.bool_col
 ---- PLAN
+PLAN-ROOT SINK
+|
 00:SCAN HDFS [functional.alltypes t]
    partitions=24/24 files=24 size=478.45KB
    predicates: bool_col
 ---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HDFS [functional.alltypes t]

[11/32] incubator-impala git commit: IMPALA-3002/IMPALA-1473: Cardinality observability cleanup

Posted by ta...@apache.org.

IMPALA-3002/IMPALA-1473: Cardinality observability cleanup

IMPALA-3002:
The shell prints an incorrect value for '#Rows' in the exec
summary for broadcast nodes due to incorrect logic around
whether to use max or agg stats. This patch makes the behavior
consistent with the way the be treats exec summaries in
summary-util.cc. This incorrect logic was also duplicated in
the impala_beeswax test framework.

IMPALA-1473:
When there is a merging exchange with a limit, we may copy rows
into the output batch beyond the limit. In this case, we currently
update the output batch's size to reflect the limit, but we also
need to update ExecNode::num_rows_returned_ or the exec summary
may show that the exchange node returned more rows than it really
did.

Additionally, PlanFragmentExecutor::GetNext does not update
rows_produced_counter_ in some cases, leading the runtime profile
to display an incorrect value for 'RowsProduced'.

Change-Id: I386719370386c9cff09b8b35d15dc712dc6480aa
Reviewed-on: http://gerrit.cloudera.org:8080/4679
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7fad3e5d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7fad3e5d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7fad3e5d

Branch: refs/heads/hadoop-next
Commit: 7fad3e5dc38c1097db6be24da0cda6941f554150
Parents: a1c9cb3
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Mon Oct 10 10:32:55 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sat Oct 15 01:25:51 2016 +0000

----------------------------------------------------------------------
 be/src/exec/exchange-node.cc             |  1 +
 be/src/runtime/plan-fragment-executor.cc |  2 +-
 shell/impala_client.py                   |  5 ++-
 tests/beeswax/impala_beeswax.py          | 10 +++---
 tests/query_test/test_observability.py   | 52 +++++++++++++++++++++++++++
 5 files changed, 63 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7fad3e5d/be/src/exec/exchange-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/exchange-node.cc b/be/src/exec/exchange-node.cc
index 22dfe40..833949b 100644
--- a/be/src/exec/exchange-node.cc
+++ b/be/src/exec/exchange-node.cc
@@ -207,6 +207,7 @@ Status ExchangeNode::GetNextMerging(RuntimeState* state, RowBatch* output_batch,
   num_rows_returned_ += output_batch->num_rows();
   if (ReachedLimit()) {
     output_batch->set_num_rows(output_batch->num_rows() - (num_rows_returned_ - limit_));
+    num_rows_returned_ = limit_;
     *eos = true;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7fad3e5d/be/src/runtime/plan-fragment-executor.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/plan-fragment-executor.cc b/be/src/runtime/plan-fragment-executor.cc
index e0d314b..aba4a26 100644
--- a/be/src/runtime/plan-fragment-executor.cc
+++ b/be/src/runtime/plan-fragment-executor.cc
@@ -410,6 +410,7 @@ Status PlanFragmentExecutor::GetNext(RowBatch** batch) {
     row_batch_->Reset();
   }
   UpdateStatus(status);
+  COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());
 
   if (done_) {
     VLOG_QUERY << "Finished executing fragment query_id=" << PrintId(query_id_)
@@ -421,7 +422,6 @@ Status PlanFragmentExecutor::GetNext(RowBatch** batch) {
   }
 
   *batch = row_batch_.get();
-  COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());
   return status;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7fad3e5d/shell/impala_client.py
----------------------------------------------------------------------
diff --git a/shell/impala_client.py b/shell/impala_client.py
index bc20b09..0d1c835 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -115,6 +115,9 @@ class ImpalaClient(object):
 
     Returns the index of the next exec node in summary.exec_nodes that should be
     processed, used internally to this method only.
+
+    NOTE: This is duplicated in impala_beeswax.py, and changes made here should also be
+    made there.
     """
     attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"]
 
@@ -142,7 +145,7 @@ class ImpalaClient(object):
     # is the max over all instances (which should all have received the same number of
     # rows). Otherwise, the cardinality is the sum over all instances which process
     # disjoint partitions.
-    if node.is_broadcast and is_fragment_root:
+    if node.is_broadcast:
       cardinality = max_stats.cardinality
     else:
       cardinality = agg_stats.cardinality

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7fad3e5d/tests/beeswax/impala_beeswax.py
----------------------------------------------------------------------
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index 79a106f..e0f5d55 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -30,15 +30,15 @@ import shlex
 import getpass
 import re
 
-from impala._thrift_gen.beeswax import BeeswaxService
-from impala._thrift_gen.beeswax.BeeswaxService import QueryState
+from beeswaxd import BeeswaxService
+from beeswaxd.BeeswaxService import QueryState
 from datetime import datetime
 try:
   # If Exec Summary is not implemented in Impala, this cannot be imported
-  from impala._thrift_gen.ExecStats.ttypes import TExecStats
+  from ExecStats.ttypes import TExecStats
 except ImportError:
   pass
-from impala._thrift_gen.ImpalaService import ImpalaService
+from ImpalaService import ImpalaService
 from tests.util.thrift_util import create_transport
 from thrift.transport.TTransport import TTransportException
 from thrift.protocol import TBinaryProtocol
@@ -265,7 +265,7 @@ class ImpalaBeeswaxClient(object):
     # is the max over all instances (which should all have received the same number of
     # rows). Otherwise, the cardinality is the sum over all instances which process
     # disjoint partitions.
-    if node.is_broadcast and is_fragment_root:
+    if node.is_broadcast:
       cardinality = max_stats.cardinality
     else:
       cardinality = agg_stats.cardinality

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7fad3e5d/tests/query_test/test_observability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
new file mode 100644
index 0000000..59e6a73
--- /dev/null
+++ b/tests/query_test/test_observability.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tests.common.impala_test_suite import ImpalaTestSuite
+
+class TestObservability(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  def test_merge_exchange_num_rows(self):
+    """Regression test for IMPALA-1473 - checks that the exec summary for a merging
+    exchange with a limit reports the number of rows returned as equal to the limit,
+    and that the coordinator fragment portion of the runtime profile reports the number
+    of rows returned correctly."""
+    query = """select tinyint_col, count(*) from functional.alltypes
+        group by tinyint_col order by tinyint_col limit 5"""
+    result = self.execute_query(query)
+    assert result.exec_summary[0]['operator'] == '05:MERGING-EXCHANGE'
+    assert result.exec_summary[0]['num_rows'] == 5
+    assert result.exec_summary[0]['est_num_rows'] == 5
+
+    for line in result.runtime_profile.split('\n'):
+      # The first 'RowsProduced' we find is for the coordinator fragment.
+      if 'RowsProduced' in line:
+        assert '(5)' in line
+        break
+
+  def test_broadcast_num_rows(self):
+    """Regression test for IMPALA-3002 - checks that the num_rows for a broadcast node
+    in the exec summaty is correctly set as the max over all instances, not the sum."""
+    query = """select distinct a.int_col, a.string_col from functional.alltypes a
+        inner join functional.alltypessmall b on (a.id = b.id)
+        where a.year = 2009 and b.month = 2"""
+    result = self.execute_query(query)
+    assert result.exec_summary[5]['operator'] == '04:EXCHANGE'
+    assert result.exec_summary[5]['num_rows'] == 25
+    assert result.exec_summary[5]['est_num_rows'] == 25

[24/32] incubator-impala git commit: IMPALA-4123: Fast bit unpacking

Posted by ta...@apache.org.

IMPALA-4123: Fast bit unpacking

Adds utility functions for fast unpacking of batches of bit-packed
values. These support reading batches of any number of values provided
that the start of the batch is aligned to a byte boundary. Callers that
want to read smaller batches that don't align to byte boundaries will
need to implement their own buffering.

The unpacking code uses only portable C++ and no SIMD intrinsics, but is
fairly efficient because unpacking a full batch of 32 values compiles
down to 32-bit loads, shifts by constants, masks by constants, bitwise
ors when a value straddles 32-bit words and stores. Further speedups
should be possible using SIMD intrinsics.

Testing:
Added unit tests for unpacking, exhaustively covering different
bitwidths with additional test dimensions (memory alignment, various
input sizes, etc).

Tested under ASAN to ensure the bit unpacking doesn't read past the end
of buffers.

Perf:
Added microbenchmark that shows on average an 8-9x speedup over the
existing BitReader code.

Change-Id: I12db69409483d208cd4c0f41c27a78aeb6cd3622
Reviewed-on: http://gerrit.cloudera.org:8080/4494
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/07da7679
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/07da7679
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/07da7679

Branch: refs/heads/hadoop-next
Commit: 07da7679d1755ada836706f752d8078260a76244
Parents: ef762b7
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Sep 14 10:44:08 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 02:53:16 2016 +0000

----------------------------------------------------------------------
 be/src/benchmarks/CMakeLists.txt           |  31 ++-
 be/src/benchmarks/bit-packing-benchmark.cc | 347 ++++++++++++++++++++++++
 be/src/benchmarks/bswap-benchmark.cc       |  23 +-
 be/src/exprs/expr-test.cc                  |   5 +-
 be/src/testutil/mem-util.h                 |  57 ++++
 be/src/util/CMakeLists.txt                 |   1 +
 be/src/util/bit-packing-test.cc            | 159 +++++++++++
 be/src/util/bit-packing.h                  |  92 +++++++
 be/src/util/bit-packing.inline.h           | 202 ++++++++++++++
 be/src/util/bit-stream-utils.h             |  16 +-
 be/src/util/bit-stream-utils.inline.h      |   2 +-
 be/src/util/bit-util.h                     |  68 +++--
 be/src/util/openssl-util-test.cc           |  10 +-
 13 files changed, 929 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index 0a28dce..ba8bcfc 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -27,28 +27,29 @@ FUNCTION(ADD_BE_BENCHMARK BENCHMARK_NAME)
   TARGET_LINK_LIBRARIES(${BENCHMARK_NAME} ${IMPALA_LINK_LIBS})
 ENDFUNCTION()
 
-ADD_BE_BENCHMARK(parse-timestamp-benchmark)
-ADD_BE_BENCHMARK(string-search-benchmark)
 ADD_BE_BENCHMARK(atod-benchmark)
 ADD_BE_BENCHMARK(atof-benchmark)
 ADD_BE_BENCHMARK(atoi-benchmark)
-ADD_BE_BENCHMARK(lock-benchmark)
-ADD_BE_BENCHMARK(thread-create-benchmark)
-ADD_BE_BENCHMARK(tuple-layout-benchmark)
-ADD_BE_BENCHMARK(string-benchmark)
-ADD_BE_BENCHMARK(rle-benchmark)
-ADD_BE_BENCHMARK(string-compare-benchmark)
-ADD_BE_BENCHMARK(multiint-benchmark)
-ADD_BE_BENCHMARK(status-benchmark)
-ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
-ADD_BE_BENCHMARK(overflow-benchmark)
-ADD_BE_BENCHMARK(bloom-filter-benchmark)
-ADD_BE_BENCHMARK(int-hash-benchmark)
 ADD_BE_BENCHMARK(bitmap-benchmark)
+ADD_BE_BENCHMARK(bit-packing-benchmark)
+ADD_BE_BENCHMARK(bloom-filter-benchmark)
+ADD_BE_BENCHMARK(bswap-benchmark)
 ADD_BE_BENCHMARK(expr-benchmark)
 ADD_BE_BENCHMARK(hash-benchmark)
 ADD_BE_BENCHMARK(in-predicate-benchmark)
+ADD_BE_BENCHMARK(int-hash-benchmark)
+ADD_BE_BENCHMARK(lock-benchmark)
+ADD_BE_BENCHMARK(multiint-benchmark)
 ADD_BE_BENCHMARK(network-perf-benchmark)
-ADD_BE_BENCHMARK(bswap-benchmark)
+ADD_BE_BENCHMARK(overflow-benchmark)
+ADD_BE_BENCHMARK(parse-timestamp-benchmark)
+ADD_BE_BENCHMARK(rle-benchmark)
+ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
+ADD_BE_BENCHMARK(status-benchmark)
+ADD_BE_BENCHMARK(string-benchmark)
+ADD_BE_BENCHMARK(string-compare-benchmark)
+ADD_BE_BENCHMARK(string-search-benchmark)
+ADD_BE_BENCHMARK(thread-create-benchmark)
+ADD_BE_BENCHMARK(tuple-layout-benchmark)
 
 target_link_libraries(hash-benchmark Experiments)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/bit-packing-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bit-packing-benchmark.cc b/be/src/benchmarks/bit-packing-benchmark.cc
new file mode 100644
index 0000000..6e80d83
--- /dev/null
+++ b/be/src/benchmarks/bit-packing-benchmark.cc
@@ -0,0 +1,347 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test bit packing performance when unpacking data for all supported bit-widths.
+// This compares:
+// * BitReader - the original bit reader that unpacks a value at a time.
+// * Unpack32Scalar - a batched implementation using scalar operations to unpack batches
+//    of 32 values.
+// * UnpackScalar - an implementation that can unpack a variable number of values, using
+//   Unpack32Scalar internally.
+//
+//
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
+// Unpack32Values bit_width 0:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.57e+04 1.59e+04  1.6e+04         1X         1X         1X
+//                      Unpack32Scalar           1.34e+05 1.35e+05 1.36e+05      8.51X      8.49X      8.51X
+//                        UnpackScalar           2.08e+05  2.1e+05 2.12e+05      13.3X      13.2X      13.2X
+//
+// Unpack32Values bit_width 1:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.19e+04  1.2e+04  1.2e+04         1X         1X         1X
+//                      Unpack32Scalar           8.89e+04 8.94e+04 9.04e+04      7.48X      7.46X      7.51X
+//                        UnpackScalar           9.72e+04  9.8e+04 9.86e+04      8.18X      8.18X      8.19X
+//
+// Unpack32Values bit_width 2:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.18e+04 1.19e+04  1.2e+04         1X         1X         1X
+//                      Unpack32Scalar           8.84e+04 8.91e+04 8.99e+04      7.49X      7.48X       7.5X
+//                        UnpackScalar           9.68e+04 9.76e+04 9.84e+04       8.2X      8.19X      8.21X
+//
+// Unpack32Values bit_width 3:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.16e+04 1.17e+04 1.18e+04         1X         1X         1X
+//                      Unpack32Scalar           8.67e+04 8.72e+04 8.79e+04      7.45X      7.42X      7.43X
+//                        UnpackScalar            9.6e+04 9.66e+04 9.74e+04      8.25X      8.22X      8.24X
+//
+// Unpack32Values bit_width 4:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.08e+04 1.09e+04  1.1e+04         1X         1X         1X
+//                      Unpack32Scalar           9.13e+04 9.19e+04 9.25e+04      8.44X      8.43X      8.42X
+//                        UnpackScalar           9.65e+04 9.69e+04 9.78e+04      8.91X      8.89X       8.9X
+//
+// Unpack32Values bit_width 5:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           8.35e+04 8.42e+04 8.49e+04       7.3X      7.31X      7.31X
+//                        UnpackScalar           9.41e+04 9.48e+04 9.56e+04      8.22X      8.22X      8.24X
+//
+// Unpack32Values bit_width 6:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           8.46e+04 8.53e+04  8.6e+04       7.4X      7.41X      7.41X
+//                        UnpackScalar           9.35e+04 9.41e+04 9.51e+04      8.18X      8.16X       8.2X
+//
+// Unpack32Values bit_width 7:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04 1.11e+04         1X         1X         1X
+//                      Unpack32Scalar           8.11e+04 8.16e+04 8.25e+04      7.44X      7.44X      7.45X
+//                        UnpackScalar           9.16e+04 9.21e+04  9.3e+04       8.4X       8.4X      8.39X
+//
+// Unpack32Values bit_width 8:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           9.02e+04 9.07e+04 9.14e+04       7.9X       7.9X      7.91X
+//                        UnpackScalar           9.48e+04 9.55e+04 9.63e+04      8.31X      8.33X      8.33X
+//
+// Unpack32Values bit_width 9:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.11e+04 1.12e+04 1.13e+04         1X         1X         1X
+//                      Unpack32Scalar           7.94e+04 7.97e+04 8.06e+04      7.14X      7.12X      7.14X
+//                        UnpackScalar           8.78e+04 8.83e+04  8.9e+04      7.89X      7.88X      7.89X
+//
+// Unpack32Values bit_width 10:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader            1.1e+04 1.11e+04 1.12e+04         1X         1X         1X
+//                      Unpack32Scalar           8.07e+04 8.14e+04 8.21e+04      7.31X      7.32X      7.34X
+//                        UnpackScalar           8.95e+04 9.02e+04 9.09e+04      8.11X      8.12X      8.12X
+//
+// Unpack32Values bit_width 11:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04 1.11e+04         1X         1X         1X
+//                      Unpack32Scalar           7.63e+04 7.69e+04 7.75e+04      6.99X      6.99X      6.99X
+//                        UnpackScalar           8.55e+04 8.61e+04 8.69e+04      7.83X      7.83X      7.84X
+//
+// Unpack32Values bit_width 12:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04  1.1e+04         1X         1X         1X
+//                      Unpack32Scalar           8.23e+04 8.29e+04 8.35e+04      7.55X      7.56X      7.57X
+//                        UnpackScalar           9.06e+04 9.12e+04 9.19e+04      8.31X      8.31X      8.33X
+//
+// Unpack32Values bit_width 13:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.07e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           7.42e+04 7.47e+04 7.55e+04      6.92X       6.9X      6.92X
+//                        UnpackScalar           8.16e+04 8.23e+04 8.29e+04       7.6X       7.6X      7.61X
+//
+// Unpack32Values bit_width 14:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.07e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           7.58e+04 7.62e+04 7.68e+04      7.08X      7.08X      7.08X
+//                        UnpackScalar           8.33e+04 8.38e+04 8.46e+04      7.78X      7.78X      7.79X
+//
+// Unpack32Values bit_width 15:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.06e+04 1.06e+04 1.07e+04         1X         1X         1X
+//                      Unpack32Scalar           7.16e+04 7.22e+04 7.29e+04      6.78X      6.79X      6.79X
+//                        UnpackScalar           7.96e+04 8.05e+04 8.09e+04      7.54X      7.57X      7.54X
+//
+// Unpack32Values bit_width 16:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.08e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           8.71e+04 8.76e+04 8.83e+04      8.09X      8.09X      8.08X
+//                        UnpackScalar           9.22e+04  9.3e+04 9.37e+04      8.56X      8.58X      8.57X
+//
+// Unpack32Values bit_width 17:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.04e+04 1.04e+04 1.05e+04         1X         1X         1X
+//                      Unpack32Scalar           6.98e+04 7.04e+04 7.09e+04      6.73X      6.74X      6.74X
+//                        UnpackScalar           7.73e+04 7.78e+04 7.85e+04      7.45X      7.45X      7.47X
+//
+// Unpack32Values bit_width 18:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.03e+04 1.04e+04 1.05e+04         1X         1X         1X
+//                      Unpack32Scalar            7.1e+04 7.17e+04 7.22e+04      6.86X      6.88X      6.87X
+//                        UnpackScalar           7.77e+04 7.82e+04 7.89e+04      7.51X       7.5X      7.51X
+//
+// Unpack32Values bit_width 19:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.02e+04 1.03e+04 1.04e+04         1X         1X         1X
+//                      Unpack32Scalar           6.74e+04  6.8e+04 6.85e+04      6.59X       6.6X      6.61X
+//                        UnpackScalar           7.43e+04 7.49e+04 7.54e+04      7.26X      7.27X      7.28X
+//
+// Unpack32Values bit_width 20:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.02e+04 1.03e+04 1.03e+04         1X         1X         1X
+//                      Unpack32Scalar           7.28e+04 7.34e+04  7.4e+04      7.15X      7.15X      7.15X
+//                        UnpackScalar           7.94e+04 8.02e+04 8.07e+04       7.8X      7.81X       7.8X
+//
+// Unpack32Values bit_width 21:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.01e+04 1.01e+04 1.02e+04         1X         1X         1X
+//                      Unpack32Scalar           6.56e+04 6.62e+04 6.67e+04      6.53X      6.54X      6.54X
+//                        UnpackScalar            7.1e+04 7.15e+04 7.19e+04      7.06X      7.06X      7.06X
+//
+// Unpack32Values bit_width 22:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader              1e+04 1.01e+04 1.02e+04         1X         1X         1X
+//                      Unpack32Scalar           6.68e+04 6.73e+04 6.79e+04      6.68X      6.68X      6.68X
+//                        UnpackScalar           7.35e+04 7.41e+04 7.46e+04      7.34X      7.35X      7.35X
+//
+// Unpack32Values bit_width 23:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.87e+03 9.95e+03    1e+04         1X         1X         1X
+//                      Unpack32Scalar           6.44e+04 6.48e+04 6.53e+04      6.52X      6.52X      6.51X
+//                        UnpackScalar           6.93e+04 6.97e+04 7.04e+04      7.03X      7.01X      7.02X
+//
+// Unpack32Values bit_width 24:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.93e+03    1e+04 1.01e+04         1X         1X         1X
+//                      Unpack32Scalar           7.44e+04 7.49e+04 7.55e+04      7.49X      7.49X      7.49X
+//                        UnpackScalar           8.12e+04 8.17e+04 8.27e+04      8.18X      8.17X       8.2X
+//
+// Unpack32Values bit_width 25:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.71e+03 9.79e+03 9.86e+03         1X         1X         1X
+//                      Unpack32Scalar           6.12e+04 6.16e+04 6.22e+04      6.31X      6.29X      6.31X
+//                        UnpackScalar           6.44e+04 6.48e+04 6.53e+04      6.64X      6.62X      6.62X
+//
+// Unpack32Values bit_width 26:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.67e+03 9.74e+03 9.81e+03         1X         1X         1X
+//                      Unpack32Scalar           6.21e+04 6.26e+04 6.31e+04      6.42X      6.42X      6.43X
+//                        UnpackScalar           6.53e+04 6.59e+04 6.64e+04      6.75X      6.77X      6.76X
+//
+// Unpack32Values bit_width 27:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.56e+03 9.62e+03  9.7e+03         1X         1X         1X
+//                      Unpack32Scalar           5.99e+04 6.03e+04 6.09e+04      6.27X      6.27X      6.28X
+//                        UnpackScalar           6.32e+04 6.35e+04 6.42e+04      6.61X       6.6X      6.62X
+//
+// Unpack32Values bit_width 28:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.53e+03 9.61e+03 9.66e+03         1X         1X         1X
+//                      Unpack32Scalar           6.37e+04 6.42e+04 6.47e+04      6.69X      6.68X       6.7X
+//                        UnpackScalar           6.68e+04 6.73e+04 6.77e+04      7.01X         7X      7.01X
+//
+// Unpack32Values bit_width 29:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.41e+03 9.46e+03 9.55e+03         1X         1X         1X
+//                      Unpack32Scalar           5.79e+04 5.82e+04 5.87e+04      6.15X      6.15X      6.14X
+//                        UnpackScalar           6.08e+04 6.11e+04 6.16e+04      6.46X      6.46X      6.46X
+//
+// Unpack32Values bit_width 30:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.37e+03 9.45e+03 9.52e+03         1X         1X         1X
+//                      Unpack32Scalar           5.87e+04 5.92e+04 5.96e+04      6.26X      6.27X      6.26X
+//                        UnpackScalar           6.16e+04  6.2e+04 6.26e+04      6.58X      6.56X      6.57X
+//
+// Unpack32Values bit_width 31:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.26e+03 9.33e+03 9.41e+03         1X         1X         1X
+//                      Unpack32Scalar           5.59e+04 5.63e+04 5.67e+04      6.03X      6.03X      6.03X
+//                        UnpackScalar           5.85e+04 5.89e+04 5.94e+04      6.31X      6.31X      6.31X
+//
+// Unpack32Values bit_width 32:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.89e+03 9.96e+03    1e+04         1X         1X         1X
+//                      Unpack32Scalar           9.83e+04 9.96e+04 1.01e+05      9.95X        10X        10X
+//                        UnpackScalar           8.24e+04 8.36e+04 8.44e+04      8.34X       8.4X      8.41X
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "gutil/strings/substitute.h"
+#include "util/benchmark.h"
+#include "util/bit-packing.inline.h"
+#include "util/bit-stream-utils.inline.h"
+#include "util/cpu-info.h"
+
+#include "common/names.h"
+
+using namespace impala;
+
+constexpr int NUM_OUT_VALUES = 1024 * 1024;
+static_assert(NUM_OUT_VALUES % 32 == 0, "NUM_OUT_VALUES must be divisible by 32");
+
+uint32_t out_buffer[NUM_OUT_VALUES];
+
+struct BenchmarkParams {
+  int bit_width;
+  const uint8_t* data;
+  int64_t data_len;
+};
+
+/// Benchmark calling BitReader::GetValue() in a loop to unpack 32 * 'batch_size' values.
+void BitReaderBenchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  BitReader reader(p->data, p->data_len);
+  for (int i = 0; i < batch_size; ++i) {
+    for (int j = 0; j < 32; ++j) {
+      const int64_t offset = (i * 32 + j) % NUM_OUT_VALUES;
+      if (UNLIKELY(!reader.GetValue<uint32_t>(p->bit_width, &out_buffer[offset]))) {
+        reader.Reset(p->data, p->data_len);
+        const bool success = reader.GetValue<uint32_t>(p->bit_width, &out_buffer[offset]);
+        DCHECK(success);
+      }
+    }
+  }
+}
+
+/// Benchmark calling Unpack32Values() in a loop to unpack 32 * 'batch_size' values.
+void Unpack32Benchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  const uint8_t* pos = reinterpret_cast<const uint8_t*>(p->data);
+  const uint8_t* const data_end = pos + p->data_len;
+  for (int i = 0; i < batch_size; ++i) {
+    if (UNLIKELY(pos >= data_end)) pos = reinterpret_cast<const uint8_t*>(p->data);
+    const int64_t offset = (i * 32) % NUM_OUT_VALUES;
+    pos = BitPacking::Unpack32Values(
+        p->bit_width, pos, data_end - pos, out_buffer + offset);
+  }
+}
+
+/// Benchmark calling UnpackValues() to unpack 32 * 'batch_size' values.
+void UnpackBenchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  const int64_t total_values_to_unpack = 32L * batch_size;
+  for (int64_t unpacked = 0; unpacked < total_values_to_unpack;
+       unpacked += NUM_OUT_VALUES) {
+    const int64_t unpack_batch =
+        min<int64_t>(NUM_OUT_VALUES, total_values_to_unpack - unpacked);
+    BitPacking::UnpackValues(
+        p->bit_width, p->data, p->data_len, unpack_batch, out_buffer);
+  }
+}
+
+int main(int argc, char **argv) {
+  CpuInfo::Init();
+  cout << endl << Benchmark::GetMachineInfo() << endl;
+
+  for (int bit_width = 0; bit_width <= 32; ++bit_width) {
+    Benchmark suite(Substitute("Unpack32Values bit_width $0", bit_width));
+    const int64_t data_len = NUM_OUT_VALUES * bit_width / 8;
+    vector<uint8_t> data(data_len);
+    std::iota(data.begin(), data.end(), 0);
+    BenchmarkParams params{bit_width, data.data(), data_len};
+    suite.AddBenchmark(Substitute("BitReader", bit_width), BitReaderBenchmark, &params);
+    suite.AddBenchmark(
+        Substitute("Unpack32Scalar", bit_width), Unpack32Benchmark, &params);
+    suite.AddBenchmark(Substitute("UnpackScalar", bit_width), UnpackBenchmark, &params);
+    cout << suite.Measure() << endl;
+  }
+  return 0;
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/bswap-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bswap-benchmark.cc b/be/src/benchmarks/bswap-benchmark.cc
index 6add717..f62d4fc 100644
--- a/be/src/benchmarks/bswap-benchmark.cc
+++ b/be/src/benchmarks/bswap-benchmark.cc
@@ -25,6 +25,7 @@
 #include "gutil/strings/substitute.h"
 #include "exec/parquet-common.h"
 #include "runtime/decimal-value.h"
+#include "testutil/mem-util.h"
 #include "util/benchmark.h"
 #include "util/cpu-info.h"
 #include "util/bit-util.h"
@@ -116,18 +117,6 @@ void TestSIMDSwap(int batch_size, void* d) {
   BitUtil::ByteSwap(data->outbuffer, data->inbuffer, data->num_values);
 }
 
-// Allocate 64-byte (an x86-64 cache line) aligned memory so it does not straddle cache
-// line boundaries. This is sufficient to meet alignment requirements for all SIMD
-// instructions, at least up to AVX-512.
-// Exit process if allocation fails.
-void* AllocateAligned(size_t size) {
-  void* ptr;
-  if (posix_memalign(&ptr, 64, size) != 0) {
-    LOG(FATAL) << "Failed to allocate " << size;
-  }
-  return ptr;
-}
-
 // Benchmark routine for FastScalar/"Pure" SSSE3/"Pure" AVX2/SIMD approaches
 void PerfBenchmark() {
   // Measure perf both when memory is perfectly aligned for SIMD and also misaligned.
@@ -135,18 +124,16 @@ void PerfBenchmark() {
   const vector<int> misalignments({0, 1, 4, max_misalignment});
   const int data_len = 1 << 20;
 
-  const unique_ptr<uint8_t, decltype(free)*> inbuffer(
-      reinterpret_cast<uint8_t*>(AllocateAligned(data_len + max_misalignment)), free);
-  const unique_ptr<uint8_t, decltype(free)*> outbuffer(
-      reinterpret_cast<uint8_t*>(AllocateAligned(data_len + max_misalignment)), free);
+  AlignedAllocation inbuffer(data_len + max_misalignment);
+  AlignedAllocation outbuffer(data_len + max_misalignment);
 
   for (const int misalign : misalignments) {
     Benchmark suite(Substitute("ByteSwap benchmark misalignment=$0", misalign));
     TestData data;
 
     data.num_values = data_len;
-    data.inbuffer = inbuffer.get() + misalign;
-    data.outbuffer = outbuffer.get() + misalign;
+    data.inbuffer = inbuffer.data() + misalign;
+    data.outbuffer = outbuffer.data() + misalign;
     InitData(data.inbuffer, data_len);
 
     const int baseline = suite.AddBenchmark("FastScalar", TestFastScalarSwap, &data, -1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 88565e1..0a6b720 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -15,16 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <math.h>
+#include <time.h>
 #include <limits>
 #include <map>
-#include <math.h>
 #include <string>
-#include <time.h>
 
 #include <boost/date_time/c_local_time_adjustor.hpp>
 #include <boost/date_time/posix_time/posix_time.hpp>
 #include <boost/lexical_cast.hpp>
-#include <boost/random/mersenne_twister.hpp>
 #include <boost/regex.hpp>
 #include <boost/unordered_map.hpp>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/testutil/mem-util.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/mem-util.h b/be/src/testutil/mem-util.h
new file mode 100644
index 0000000..78b7b48
--- /dev/null
+++ b/be/src/testutil/mem-util.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_TESTUTIL_MEM_UTIL_H_
+#define IMPALA_TESTUTIL_MEM_UTIL_H_
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gutil/macros.h"
+
+namespace impala {
+
+/// Allocate 64-byte (an x86-64 cache line) aligned memory so it does not straddle cache
+/// line boundaries. This is sufficient to meet alignment requirements for all SIMD
+/// instructions, at least up to AVX-512.
+/// Exits process if allocation fails so should be used for tests and benchmarks only.
+inline uint8_t* AllocateAligned(size_t size) {
+  void* ptr;
+  if (posix_memalign(&ptr, 64, size) != 0) {
+    LOG(FATAL) << "Failed to allocate " << size;
+  }
+  return reinterpret_cast<uint8_t*>(ptr);
+}
+
+/// Scoped allocation with 64-bit alignment.
+/// Exits process if allocation fails so should be used for tests and benchmarks only.
+class AlignedAllocation {
+ public:
+  AlignedAllocation(size_t bytes) : data_(AllocateAligned(bytes)) {}
+  ~AlignedAllocation() { free(data_); }
+
+  uint8_t* data() { return data_; }
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AlignedAllocation);
+
+  uint8_t* data_;
+};
+
+}
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 0dfd12e..ecc222a 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -105,6 +105,7 @@ target_link_libraries(loggingsupport ${IMPALA_LINK_LIBS_DYNAMIC_TARGETS})
 
 ADD_BE_TEST(benchmark-test)
 ADD_BE_TEST(bitmap-test)
+ADD_BE_TEST(bit-packing-test)
 ADD_BE_TEST(bit-util-test)
 ADD_BE_TEST(blocking-queue-test)
 ADD_BE_TEST(bloom-filter-test)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing-test.cc b/be/src/util/bit-packing-test.cc
new file mode 100644
index 0000000..bedf178
--- /dev/null
+++ b/be/src/util/bit-packing-test.cc
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include "testutil/gtest-util.h"
+#include "testutil/mem-util.h"
+#include "util/bit-packing.inline.h"
+#include "util/bit-stream-utils.inline.h"
+
+#include "common/names.h"
+
+using std::uniform_int_distribution;
+using std::mt19937;
+
+namespace impala {
+
+namespace {
+uint32_t ComputeMask(int bit_width) {
+  return (1U << bit_width) - 1;
+}
+}
+
+/// Test unpacking a subarray of values to/from smaller buffers that are sized to exactly
+/// fit the the input and output. 'in' is the original unpacked input, 'packed' is the
+/// bit-packed data. The test copies 'num_in_values' packed values to a smaller temporary
+/// buffer, then unpacks them to another temporary buffer. Both buffers are sized to the
+/// minimum number of bytes required to fit the packed/unpacked data.
+///
+/// This is to test that we do not overrun either the input or output buffer for smaller
+/// batch sizes.
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
+    int bit_width, bool aligned);
+
+/// Test a packing/unpacking round-trip of the 'num_in_values' values in 'in',
+/// packed with 'bit_width'. If 'aligned' is true, buffers for packed and unpacked data
+/// are allocated at a 64-byte aligned address. Otherwise the buffers are misaligned
+/// by 1 byte from a 64-byte aligned address.
+void PackUnpack(const uint32_t* in, int num_in_values, int bit_width, bool aligned) {
+  LOG(INFO) << "num_in_values = " << num_in_values << " bit_width = " << bit_width
+            << " aligned = " << aligned;
+
+  // Mask out higher bits so that the values to pack are in range.
+  const uint32_t mask = ComputeMask(bit_width);
+  const int misalignment = aligned ? 0 : 1;
+
+  const int bytes_required = BitUtil::RoundUpNumBytes(bit_width * num_in_values);
+  AlignedAllocation storage(bytes_required + misalignment);
+  uint8_t* packed = storage.data() + misalignment;
+
+  BitWriter writer(packed, bytes_required);
+  if (bit_width > 0) {
+    for (int i = 0; i < num_in_values; ++i) {
+      ASSERT_TRUE(writer.PutValue(in[i] & mask, bit_width));
+    }
+  }
+  writer.Flush();
+  LOG(INFO) << "Wrote " << writer.bytes_written() << " bytes.";
+
+  // Test unpacking all the values. Trying to unpack extra values should have the same
+  // result because the input buffer size 'num_in_values' limits the number of values to
+  // return.
+  for (const int num_to_unpack : {num_in_values, num_in_values + 1, num_in_values + 77}) {
+    LOG(INFO) << "Unpacking " << num_to_unpack;
+    // Size buffer exactly so that ASAN can detect reads/writes that overrun the buffer.
+    AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+    uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
+    const auto result = BitPacking::UnpackValues(
+        bit_width, packed, writer.bytes_written(), num_to_unpack, out);
+    ASSERT_EQ(packed + writer.bytes_written(), result.first)
+        << "Unpacked different # of bytes from the # written";
+    if (bit_width == 0) {
+      // If no bits, we can get back as many as we ask for.
+      ASSERT_EQ(num_to_unpack, result.second) << "Unpacked wrong # of values";
+    } else if (bit_width < CHAR_BIT) {
+      // We may get back some garbage values that we didn't actually pack if we
+      // didn't use all of the trailing byte.
+      const int max_packed_values = writer.bytes_written() * CHAR_BIT / bit_width;
+      ASSERT_EQ(min(num_to_unpack, max_packed_values), result.second)
+          << "Unpacked wrong # of values";
+    } else {
+      ASSERT_EQ(num_in_values, result.second) << "Unpacked wrong # of values";
+    }
+
+    for (int i = 0; i < num_in_values; ++i) {
+      EXPECT_EQ(in[i] & mask, out[i]) << "Didn't get back input value " << i;
+    }
+  }
+  UnpackSubset(in, packed, num_in_values, bit_width, aligned);
+}
+
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
+    int bit_width, bool aligned) {
+  const int misalignment = aligned ? 0 : 1;
+  for (int num_to_unpack : {1, 10, 77, num_in_values - 7}) {
+    if (num_to_unpack < 0 || num_to_unpack > num_in_values) continue;
+
+    // Size buffers exactly so that ASAN can detect buffer overruns.
+    const int64_t bytes_to_read = BitUtil::RoundUpNumBytes(num_to_unpack * bit_width);
+    AlignedAllocation packed_copy_storage(bytes_to_read + misalignment);
+    uint8_t* packed_copy = packed_copy_storage.data() + misalignment;
+    memcpy(packed_copy, packed, bytes_to_read);
+    AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+    uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
+    const auto result = BitPacking::UnpackValues(
+        bit_width, packed_copy, bytes_to_read, num_to_unpack, out);
+    ASSERT_EQ(packed_copy + bytes_to_read, result.first) << "Read wrong # of bytes";
+    ASSERT_EQ(num_to_unpack, result.second) << "Unpacked wrong # of values";
+
+    for (int i = 0; i < num_to_unpack; ++i) {
+      ASSERT_EQ(in[i] & ComputeMask(bit_width), out[i]) << "Didn't get back input value "
+                                                         << i;
+    }
+  }
+}
+
+TEST(BitPackingTest, RandomUnpack) {
+  constexpr int NUM_IN_VALUES = 64 * 1024;
+  uint32_t in[NUM_IN_VALUES];
+  mt19937 rng;
+  uniform_int_distribution<uint32_t> dist;
+  std::generate(std::begin(in), std::end(in), [&rng, &dist] { return dist(rng); });
+
+  // Test various odd input lengths to exercise boundary cases for full and partial
+  // batches of 32.
+  vector<int> lengths{NUM_IN_VALUES, NUM_IN_VALUES - 1, NUM_IN_VALUES - 16,
+      NUM_IN_VALUES - 19, NUM_IN_VALUES - 31};
+  for (int i = 0; i < 32; ++i) {
+    lengths.push_back(i);
+  }
+
+  for (int bit_width = 0; bit_width <= 32; ++bit_width) {
+    for (const int length : lengths) {
+      // Test that unpacking to/from aligned and unaligned memory works.
+      for (const bool aligned : {true, false}) {
+        PackUnpack(in, length, bit_width, aligned);
+      }
+    }
+  }
+}
+}
+
+IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing.h b/be/src/util/bit-packing.h
new file mode 100644
index 0000000..62e5e88
--- /dev/null
+++ b/be/src/util/bit-packing.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_UTIL_BIT_PACKING_H
+#define IMPALA_UTIL_BIT_PACKING_H
+
+namespace impala {
+
+#include <cstdint>
+
+#include <utility>
+
+/// Utilities for manipulating bit-packed values. Bit-packing is a technique for
+/// compressing integer values that do not use the full range of the integer type.
+/// E.g. an array of uint32_t values with range [0, 31] only uses the lower 5 bits
+/// of every uint32_t value, or an array of 0/1 booleans only uses the lowest bit
+/// of each integer.
+///
+/// Bit-packing always has a "bit width" parameter that determines the range of
+/// representable unsigned values: [0, 2^bit_width - 1]. The packed representation
+/// is logically the concatenatation of the lower bits of the input values (in
+/// little-endian order). E.g. the values 1, 2, 3, 4 packed with bit width 4 results
+/// in the two output bytes: [ 0 0 1 0 | 0 0 0 1 ] [ 0 1 0 0 | 0 0 1 1 ]
+///                               2         1           4         3
+///
+/// Packed values can be split across words, e.g. packing 1, 17 with bit_width 5 results
+/// in the two output bytes: [ 0 0 1 | 0 0 0 0 1 ] [ x x x x x x | 1 0 ]
+///            lower bits of 17--^         1         next value     ^--upper bits of 17
+///
+/// Bit widths from 0 to 32 are supported (0 bit width means that every value is 0).
+/// The batched unpacking functions operate on batches of 32 values. This batch size
+/// is convenient because for every supported bit width, the end of a 32 value batch
+/// falls on a byte boundary. It is also large enough to amortise loop overheads.
+class BitPacking {
+ public:
+  /// Unpack bit-packed values with 'bit_width' from 'in' to 'out'. Keeps unpacking until
+  /// either all 'in_bytes' are read or 'num_values' values are unpacked. 'out' must have
+  /// enough space for 'num_values'. 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits
+  /// in OutType. 'in' must point to 'in_bytes' of addressable memory.
+  ///
+  /// Returns a pointer to the byte after the last byte of 'in' that was read and also the
+  /// number of values that were read. If the caller wants to continue reading packed
+  /// values after the last one returned, it must ensure that the next value to unpack
+  /// starts at a byte boundary. This is true if 'num_values' is a multiple of 32, or
+  /// more generally if (bit_width * num_values) % 8 == 0.
+  template <typename OutType>
+  static std::pair<const uint8_t*, int64_t> UnpackValues(int bit_width,
+      const uint8_t* __restrict__ in, int64_t in_bytes, int64_t num_values,
+      OutType* __restrict__ out);
+
+  /// Unpack exactly 32 values of 'bit_width' from 'in' to 'out'. 'in' must point to
+  /// 'in_bytes' of addressable memory, and 'in_bytes' must be at least
+  /// (32 * bit_width / 8). 'out' must have space for 32 OutType values.
+  /// 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits in OutType.
+  template <typename OutType>
+  static const uint8_t* Unpack32Values(int bit_width, const uint8_t* __restrict__ in,
+      int64_t in_bytes, OutType* __restrict__ out);
+
+ private:
+  /// Implementation of Unpack32Values() that uses 32-bit integer loads to
+  /// unpack values with the given BIT_WIDTH from 'in' to 'out'.
+  template <typename OutType, int BIT_WIDTH>
+  static const uint8_t* Unpack32Values(
+      const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ out);
+
+  /// Function that unpacks 'num_values' values with the given BIT_WIDTH from 'in' to
+  /// 'out'. 'num_values' can be at most 32. The version with 'bit_width' as an argument
+  /// dispatches based on 'bit_width' to the appropriate templated implementation.
+  template <typename OutType, int BIT_WIDTH>
+  static const uint8_t* UnpackUpTo32Values(const uint8_t* __restrict__ in,
+      int64_t in_bytes, int num_values, OutType* __restrict__ out);
+  template <typename OutType>
+  static const uint8_t* UnpackUpTo32Values(int bit_width, const uint8_t* __restrict__ in,
+      int64_t in_bytes, int num_values, OutType* __restrict__ out);
+};
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing.inline.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
new file mode 100644
index 0000000..37d51ab
--- /dev/null
+++ b/be/src/util/bit-packing.inline.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_UTIL_BIT_PACKING_INLINE_H
+#define IMPALA_UTIL_BIT_PACKING_INLINE_H
+
+#include "util/bit-packing.h"
+
+#include <algorithm>
+#include <type_traits>
+
+#include <boost/preprocessor/repetition/repeat_from_to.hpp>
+
+#include "common/compiler-util.h"
+#include "common/logging.h"
+#include "util/bit-util.h"
+
+namespace impala {
+
+template <typename OutType>
+std::pair<const uint8_t*, int64_t> BitPacking::UnpackValues(int bit_width,
+    const uint8_t* __restrict__ in, int64_t in_bytes, int64_t num_values,
+    OutType* __restrict__ out) {
+  constexpr int BATCH_SIZE = 32;
+  const int64_t max_input_values =
+      bit_width ? (in_bytes * CHAR_BIT) / bit_width : num_values;
+  const int64_t values_to_read = std::min(num_values, max_input_values);
+  const int64_t batches_to_read = values_to_read / BATCH_SIZE;
+  const int64_t remainder_values = values_to_read % BATCH_SIZE;
+  const uint8_t* in_pos = in;
+  OutType* out_pos = out;
+  // First unpack as many full batches as possible.
+  for (int64_t i = 0; i < batches_to_read; ++i) {
+    in_pos = Unpack32Values<OutType>(bit_width, in_pos, in_bytes, out_pos);
+    out_pos += BATCH_SIZE;
+    in_bytes -= (BATCH_SIZE * bit_width) / CHAR_BIT;
+  }
+  // Then unpack the final partial batch.
+  if (remainder_values > 0) {
+    in_pos = UnpackUpTo32Values<OutType>(bit_width,
+        in_pos, in_bytes, remainder_values, out_pos);
+  }
+  return std::make_pair(in_pos, values_to_read);
+}
+
+// Loop body of unrolled loop that unpacks the value. BIT_WIDTH is the bit width of
+// the packed values. 'in_buf' is the start of the input buffer and 'out_vals' is the
+// start of the output values array. This function unpacks the VALUE_IDX'th packed value
+// from 'in_buf'.
+//
+// This implements essentially the same algorithm as the (Apache-licensed) code in
+// bpacking.c at https://github.com/lemire/FrameOfReference/, but is much more compact
+// because it uses templates rather than source-level unrolling of all combinations.
+//
+// After the template parameters is expanded and constants are propagated, all branches
+// and offset/shift calculations should be optimized out, leaving only shifts by constants
+// and bitmasks by constants. Calls to this must be stamped out manually or with
+// BOOST_PP_REPEAT_FROM_TO: experimentation revealed that the GCC 4.9.2 optimiser was
+// not able to fully propagate constants and remove branches when this was called from
+// inside a for loop with constant bounds with VALUE_IDX changed to a function argument.
+template <int BIT_WIDTH, int VALUE_IDX>
+inline uint32_t ALWAYS_INLINE UnpackValue(const uint8_t* __restrict__ in_buf) {
+  constexpr uint32_t LOAD_BIT_WIDTH = sizeof(uint32_t) * CHAR_BIT;
+  static_assert(BIT_WIDTH <= LOAD_BIT_WIDTH, "BIT_WIDTH > LOAD_BIT_WIDTH");
+  static_assert(VALUE_IDX >= 0 && VALUE_IDX < 32, "0 <= VALUE_IDX < 32");
+  // The index of the first bit of the value, relative to the start of 'in_buf'.
+  constexpr uint32_t FIRST_BIT = VALUE_IDX * BIT_WIDTH;
+  constexpr uint32_t IN_WORD_IDX = FIRST_BIT / LOAD_BIT_WIDTH;
+  constexpr uint32_t FIRST_BIT_OFFSET = FIRST_BIT % LOAD_BIT_WIDTH;
+  // Index of bit after last bit of this value, relative to start of IN_WORD_IDX.
+  constexpr uint32_t END_BIT_OFFSET = FIRST_BIT_OFFSET + BIT_WIDTH;
+
+  const uint32_t* in_words = reinterpret_cast<const uint32_t*>(in_buf);
+  // The lower bits of the value come from the first word.
+  const uint32_t lower_bits =
+      BIT_WIDTH > 0 ? in_words[IN_WORD_IDX] >> FIRST_BIT_OFFSET : 0U;
+  if (END_BIT_OFFSET < LOAD_BIT_WIDTH) {
+    // All bits of the value are in the first word, but we need to mask out upper bits
+    // that belong to the next value.
+    return lower_bits % (1UL << BIT_WIDTH);
+  } if (END_BIT_OFFSET == LOAD_BIT_WIDTH) {
+    // This value was exactly the uppermost bits of the first word - no masking required.
+    return lower_bits;
+  } else {
+    DCHECK_GT(END_BIT_OFFSET, LOAD_BIT_WIDTH);
+    DCHECK_LT(VALUE_IDX, 31)
+        << "Should not go down this branch for last value with no trailing bits.";
+    // Value is split between words, so grab trailing bits from the next word.
+    // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+    constexpr uint32_t NUM_TRAILING_BITS =
+        END_BIT_OFFSET < LOAD_BIT_WIDTH ? 0 : END_BIT_OFFSET - LOAD_BIT_WIDTH;
+    const uint32_t trailing_bits = in_words[IN_WORD_IDX + 1] % (1UL << NUM_TRAILING_BITS);
+    // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+    constexpr uint32_t TRAILING_BITS_SHIFT =
+        BIT_WIDTH == 32 ? 0 : (BIT_WIDTH - NUM_TRAILING_BITS);
+    return lower_bits | (trailing_bits << TRAILING_BITS_SHIFT);
+  }
+}
+
+template <typename OutType, int BIT_WIDTH>
+const uint8_t* BitPacking::Unpack32Values(
+    const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ out) {
+  static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
+  static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
+  static_assert(
+      BIT_WIDTH <= sizeof(OutType) * CHAR_BIT, "BIT_WIDTH too high for output type");
+  constexpr int BYTES_TO_READ = BitUtil::RoundUpNumBytes(32 * BIT_WIDTH);
+  DCHECK_GE(in_bytes, BYTES_TO_READ);
+
+// Call UnpackValue for 0 <= i < 32.
+#pragma push_macro("UNPACK_VALUES_CALL")
+#define UNPACK_VALUE_CALL(ignore1, i, ignore2) \
+  out[i] = static_cast<OutType>(UnpackValue<BIT_WIDTH, i>(in));
+  BOOST_PP_REPEAT_FROM_TO(0, 32, UNPACK_VALUE_CALL, ignore);
+#pragma pop_macro("UNPACK_VALUES_CALL")
+  return in + BYTES_TO_READ;
+}
+
+template <typename OutType>
+const uint8_t* BitPacking::Unpack32Values(int bit_width, const uint8_t* __restrict__ in,
+    int64_t in_bytes, OutType* __restrict__ out) {
+  switch (bit_width) {
+    // Expand cases from 0 to 32.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+    case i: return Unpack32Values<OutType, i>(in, in_bytes, out);
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    default: DCHECK(false); return in;
+  }
+}
+
+template <typename OutType>
+const uint8_t* BitPacking::UnpackUpTo32Values(int bit_width, const uint8_t* __restrict__ in,
+    int64_t in_bytes, int num_values, OutType* __restrict__ out) {
+  switch (bit_width) {
+    // Expand cases from 0 to 32.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+    case i: return UnpackUpTo32Values<OutType, i>(in, in_bytes, num_values, out);
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    default: DCHECK(false); return in;
+  }
+}
+
+template <typename OutType, int BIT_WIDTH>
+const uint8_t* BitPacking::UnpackUpTo32Values(const uint8_t* __restrict__ in,
+    int64_t in_bytes, int num_values, OutType* __restrict__ out) {
+  static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
+  static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
+  static_assert(
+      BIT_WIDTH <= sizeof(OutType) * CHAR_BIT, "BIT_WIDTH too high for output type");
+  constexpr int MAX_BATCH_SIZE = 31;
+  const int BYTES_TO_READ = BitUtil::RoundUpNumBytes(num_values * BIT_WIDTH);
+  DCHECK_GE(in_bytes, BYTES_TO_READ);
+  DCHECK_LE(num_values, MAX_BATCH_SIZE);
+
+  // Make sure the buffer is at least 1 byte.
+  constexpr int TMP_BUFFER_SIZE = BIT_WIDTH ?
+    (BIT_WIDTH * (MAX_BATCH_SIZE + 1)) / CHAR_BIT : 1;
+  uint8_t tmp_buffer[TMP_BUFFER_SIZE];
+
+  const uint8_t* in_buffer = in;
+  // Copy into padded temporary buffer to avoid reading past the end of 'in' if the
+  // last 32-bit load would go past the end of the buffer.
+  if (BitUtil::RoundUp(BYTES_TO_READ, sizeof(uint32_t)) > in_bytes) {
+    memcpy(tmp_buffer, in, BYTES_TO_READ);
+    in_buffer = tmp_buffer;
+  }
+
+  // Use switch with fall-through cases to minimise branching.
+  switch (num_values) {
+// Expand cases from 31 down to 1.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+  case 31 - i: out[30 - i] = \
+      static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i>(in_buffer));
+    BOOST_PP_REPEAT_FROM_TO(0, 31, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    case 0: break;
+    default: DCHECK(false);
+  }
+  return in + BYTES_TO_READ;
+}
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-stream-utils.h b/be/src/util/bit-stream-utils.h
index ce159cb..5acdeee 100644
--- a/be/src/util/bit-stream-utils.h
+++ b/be/src/util/bit-stream-utils.h
@@ -98,13 +98,19 @@ class BitWriter {
 class BitReader {
  public:
   /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
-  BitReader(uint8_t* buffer, int buffer_len) {
-    Reset(buffer, buffer_len);
-  }
+  /// Does not take ownership of the buffer.
+  BitReader(const uint8_t* buffer, int buffer_len) { Reset(buffer, buffer_len); }
 
   BitReader() : buffer_(NULL), max_bytes_(0) {}
 
-  void Reset(uint8_t* buffer, int buffer_len) {
+  // The implicit copy constructor is left defined. If a BitReader is copied, the
+  // two copies do not share any state. Invoking functions on either copy continues
+  // reading from the current read position without modifying the state of the other
+  // copy.
+
+  /// Resets the read to start reading from the start of 'buffer'. The buffer's
+  /// length is 'buffer_len'. Does not take ownership of the buffer.
+  void Reset(const uint8_t* buffer, int buffer_len) {
     buffer_ = buffer;
     max_bytes_ = buffer_len;
     byte_offset_ = 0;
@@ -141,7 +147,7 @@ class BitReader {
   static const int MAX_BITWIDTH = 32;
 
  private:
-  uint8_t* buffer_;
+  const uint8_t* buffer_;
   int max_bytes_;
 
   /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-stream-utils.inline.h b/be/src/util/bit-stream-utils.inline.h
index fd77974..41648e3 100644
--- a/be/src/util/bit-stream-utils.inline.h
+++ b/be/src/util/bit-stream-utils.inline.h
@@ -86,7 +86,7 @@ inline bool BitWriter::PutVlqInt(int32_t v) {
 
 template<typename T>
 inline bool BitReader::GetValue(int num_bits, T* v) {
-  DCHECK(buffer_ != NULL);
+  DCHECK(num_bits == 0 || buffer_ != NULL);
   // TODO: revisit this limit if necessary
   DCHECK_LE(num_bits, MAX_BITWIDTH);
   DCHECK_LE(num_bits, sizeof(T) * 8);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index f947a17..33dd02b 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -41,17 +41,17 @@ using boost::make_unsigned;
 class BitUtil {
  public:
   /// Returns the ceil of value/divisor
-  static inline int64_t Ceil(int64_t value, int64_t divisor) {
+  constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) {
     return value / divisor + (value % divisor != 0);
   }
 
   /// Returns 'value' rounded up to the nearest multiple of 'factor'
-  static inline int64_t RoundUp(int64_t value, int64_t factor) {
+  constexpr static inline int64_t RoundUp(int64_t value, int64_t factor) {
     return (value + (factor - 1)) / factor * factor;
   }
 
   /// Returns 'value' rounded down to the nearest multiple of 'factor'
-  static inline int64_t RoundDown(int64_t value, int64_t factor) {
+  constexpr static inline int64_t RoundDown(int64_t value, int64_t factor) {
     return (value / factor) * factor;
   }
 
@@ -85,34 +85,28 @@ class BitUtil {
   /// Specialized round up and down functions for frequently used factors,
   /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64).
   /// Returns the rounded up number of bytes that fit the number of bits.
-  static inline uint32_t RoundUpNumBytes(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumBytes(uint32_t bits) {
     return (bits + 7) >> 3;
   }
 
   /// Returns the rounded down number of bytes that fit the number of bits.
-  static inline uint32_t RoundDownNumBytes(uint32_t bits) {
-    return bits >> 3;
-  }
+  constexpr static inline uint32_t RoundDownNumBytes(uint32_t bits) { return bits >> 3; }
 
   /// Returns the rounded up to 32 multiple. Used for conversions of bits to i32.
-  static inline uint32_t RoundUpNumi32(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumi32(uint32_t bits) {
     return (bits + 31) >> 5;
   }
 
   /// Returns the rounded up 32 multiple.
-  static inline uint32_t RoundDownNumi32(uint32_t bits) {
-    return bits >> 5;
-  }
+  constexpr static inline uint32_t RoundDownNumi32(uint32_t bits) { return bits >> 5; }
 
   /// Returns the rounded up to 64 multiple. Used for conversions of bits to i64.
-  static inline uint32_t RoundUpNumi64(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumi64(uint32_t bits) {
     return (bits + 63) >> 6;
   }
 
   /// Returns the rounded down to 64 multiple.
-  static inline uint32_t RoundDownNumi64(uint32_t bits) {
-    return bits >> 6;
-  }
+  constexpr static inline uint32_t RoundDownNumi64(uint32_t bits) { return bits >> 6; }
 
   /// Non hw accelerated pop count.
   /// TODO: we don't use this in any perf sensitive code paths currently.  There
@@ -172,51 +166,51 @@ class BitUtil {
   /// swap for len > 16.
   static void ByteSwap(void* dest, const void* source, int len);
 
-  /// Converts to big endian format (if not already in big endian) from the
-  /// machine's native endian format.
+/// Converts to big endian format (if not already in big endian) from the
+/// machine's native endian format.
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  ToBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); }
   static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  ToBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); }
   static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  ToBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); }
   static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
 #else
-  static inline int64_t  ToBigEndian(int64_t val)  { return val; }
+  static inline int64_t ToBigEndian(int64_t val) { return val; }
   static inline uint64_t ToBigEndian(uint64_t val) { return val; }
-  static inline int32_t  ToBigEndian(int32_t val)  { return val; }
+  static inline int32_t ToBigEndian(int32_t val) { return val; }
   static inline uint32_t ToBigEndian(uint32_t val) { return val; }
-  static inline int16_t  ToBigEndian(int16_t val)  { return val; }
+  static inline int16_t ToBigEndian(int16_t val) { return val; }
   static inline uint16_t ToBigEndian(uint16_t val) { return val; }
 #endif
 
-  /// Converts from big endian format to the machine's native endian format.
+/// Converts from big endian format to the machine's native endian format.
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  FromBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); }
   static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  FromBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); }
   static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  FromBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); }
   static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
 #else
-  static inline int64_t  FromBigEndian(int64_t val)  { return val; }
+  static inline int64_t FromBigEndian(int64_t val) { return val; }
   static inline uint64_t FromBigEndian(uint64_t val) { return val; }
-  static inline int32_t  FromBigEndian(int32_t val)  { return val; }
+  static inline int32_t FromBigEndian(int32_t val) { return val; }
   static inline uint32_t FromBigEndian(uint32_t val) { return val; }
-  static inline int16_t  FromBigEndian(int16_t val)  { return val; }
+  static inline int16_t FromBigEndian(int16_t val) { return val; }
   static inline uint16_t FromBigEndian(uint16_t val) { return val; }
 #endif
 
   /// Returns true if 'value' is a non-negative 32-bit integer.
-  static inline bool IsNonNegative32Bit(int64_t value) {
+  constexpr static inline bool IsNonNegative32Bit(int64_t value) {
     return static_cast<uint64_t>(value) <= std::numeric_limits<int32_t>::max();
   }
 
   /// Logical right shift for signed integer types
   /// This is needed because the C >> operator does arithmetic right shift
   /// Negative shift amounts lead to undefined behavior
-  template<typename T>
-  static T ShiftRightLogical(T v, int shift) {
+  template <typename T>
+  constexpr static T ShiftRightLogical(T v, int shift) {
     // Conversion to unsigned ensures most significant bits always filled with 0's
     return static_cast<typename make_unsigned<T>::type>(v) >> shift;
   }
@@ -230,15 +224,15 @@ class BitUtil {
 
   /// Set a specific bit to 1
   /// Behavior when bitpos is negative is undefined
-  template<typename T>
-  static T SetBit(T v, int bitpos) {
+  template <typename T>
+  constexpr static T SetBit(T v, int bitpos) {
     return v | (static_cast<T>(0x1) << bitpos);
   }
 
   /// Set a specific bit to 0
   /// Behavior when bitpos is negative is undefined
-  template<typename T>
-  static T UnsetBit(T v, int bitpos) {
+  template <typename T>
+  constexpr static T UnsetBit(T v, int bitpos) {
     return v & ~(static_cast<T>(0x1) << bitpos);
   }
 };

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/openssl-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc
index b0238bf..ef1b28e 100644
--- a/be/src/util/openssl-util-test.cc
+++ b/be/src/util/openssl-util-test.cc
@@ -15,17 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <random>
+
 #include <gtest/gtest.h>
 #include <openssl/rand.h>
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/uniform_int.hpp>
 
 #include "common/init.h"
 #include "testutil/gtest-util.h"
 #include "util/openssl-util.h"
 
-using boost::uniform_int;
-using boost::mt19937_64;
+using std::uniform_int_distribution;
+using std::mt19937_64;
 
 namespace impala {
 
@@ -40,7 +40,7 @@ class OpenSSLUtilTest : public ::testing::Test {
     DCHECK_EQ(len % 8, 0);
     for (int64_t i = 0; i < len; i += sizeof(uint64_t)) {
       *(reinterpret_cast<uint64_t*>(&data[i])) =
-          uniform_int<uint64_t>(0, numeric_limits<uint64_t>::max())(rng_);
+          uniform_int_distribution<uint64_t>(0, numeric_limits<uint64_t>::max())(rng_);
     }
   }

[28/32] incubator-impala git commit: IMPALA-2916: Add warning to query profile if debug build

Posted by ta...@apache.org.

IMPALA-2916: Add warning to query profile if debug build

Change-Id: I85ce4d4a5624382203e6b2c8f5b96d04c4482f37
Reviewed-on: http://gerrit.cloudera.org:8080/4588
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0686cc4e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0686cc4e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0686cc4e

Branch: refs/heads/hadoop-next
Commit: 0686cc4e1f68b9cdacd6edfb4acb090a7ea0f865
Parents: df680cf
Author: Lars Volker <lv...@cloudera.com>
Authored: Sun Oct 2 22:05:19 2016 +0200
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 06:12:51 2016 +0000

----------------------------------------------------------------------
 be/src/service/query-exec-state.cc | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0686cc4e/be/src/service/query-exec-state.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc
index 1532ecf..7ec8c27 100644
--- a/be/src/service/query-exec-state.cc
+++ b/be/src/service/query-exec-state.cc
@@ -85,6 +85,10 @@ ImpalaServer::QueryExecState::QueryExecState(
     frontend_(frontend),
     parent_server_(server),
     start_time_(TimestampValue::LocalTime()) {
+#ifndef NDEBUG
+  profile_.AddInfoString("DEBUG MODE WARNING", "Query profile created while running a "
+      "DEBUG build of Impala. Use RELEASE builds to measure query performance.");
+#endif
   row_materialization_timer_ = ADD_TIMER(&server_profile_, "RowMaterializationTimer");
   client_wait_timer_ = ADD_TIMER(&server_profile_, "ClientFetchWaitTimer");
   query_events_ = summary_profile_.AddEventSequence("Query Timeline");

[23/32] incubator-impala git commit: IMPALA-4299: add buildall.sh option to start test cluster

Posted by ta...@apache.org.

IMPALA-4299: add buildall.sh option to start test cluster

A previous commit "IMPALA-4259: build Impala without any test
cluster setup" altered some undocumented side-effects of
buildall.sh.

Previously the following commands reconfigured and restarted the test
cluster. It worked because buildall.sh unconditionally regenerated
the test cluster configs.

  ./buildall.sh -notests && ./testdata/bin/run-all.sh
  ./buildall.sh -noclean -notests && ./testdata/bin/run-all.sh

Instead of restoring the old behaviour and continuing to encourage
mixing use of low and high level scripts like testdata/bin/run-all.sh
as part of the "standard" workflow, this commit adds another
high-level option to buildall.sh, -start_minicluster, that
accomplishes the high-level task of restarting a minicluster with
fresh configs. The above commands can be replaced with:

  ./buildall.sh -notests -start_minicluster
  ./buildall.sh -notests -noclean -start_minicluster

Change-Id: I0ab3461f8ff3de49b3f28a0dc22fa0a6d5569da5
Reviewed-on: http://gerrit.cloudera.org:8080/4734
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ef762b73
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ef762b73
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ef762b73

Branch: refs/heads/hadoop-next
Commit: ef762b73a1ddb8842bb5bac5bfed733b9a71fc1b
Parents: 0480253
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Oct 17 10:04:39 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 17 22:19:06 2016 +0000

----------------------------------------------------------------------
 buildall.sh | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ef762b73/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index ebd3eb3..a7858a3 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -46,6 +46,7 @@ TESTS_ACTION=1
 FORMAT_CLUSTER=0
 FORMAT_METASTORE=0
 FORMAT_SENTRY_POLICY_DB=0
+NEED_MINICLUSTER=0
 START_IMPALA_CLUSTER=0
 IMPALA_KERBERIZE=0
 SNAPSHOT_FILE=
@@ -133,6 +134,9 @@ do
       METASTORE_SNAPSHOT_FILE="$(readlink -f "$METASTORE_SNAPSHOT_FILE")"
       shift;
       ;;
+    -start_minicluster)
+      NEED_MINICLUSTER=1
+      ;;
     -start_impala_cluster)
       START_IMPALA_CLUSTER=1
       ;;
@@ -169,6 +173,10 @@ do
       echo "[-asan] : Address sanitizer build [Default: False]"
       echo "[-skiptests] : Skips execution of all tests"
       echo "[-notests] : Skips building and execution of all tests"
+      echo "[-start_minicluster] : Start test cluster including Impala and all"\
+            " its dependencies. If already running, all services are restarted."\
+            " Regenerates test cluster config files. [Default: True if running "\
+            " tests or loading data, False otherwise]"
       echo "[-start_impala_cluster] : Start Impala minicluster after build"\
            " [Default: False]"
       echo "[-testpairwise] : Run tests in 'pairwise' mode (increases"\
@@ -191,8 +199,12 @@ Examples of common tasks:
   # Build and skip tests
   ./buildall.sh -skiptests
 
-  # Incrementally rebuild and skip tests. Keeps existing Hadoop services running.
-  ./buildall.sh -skiptests -noclean
+  # Build, then restart the minicluster and Impala with fresh configs.
+  ./buildall.sh -notests -start_minicluster -start_impala_cluster
+
+  # Incrementally rebuild and skip tests. Keeps existing minicluster services running
+  # and restart Impala.
+  ./buildall.sh -skiptests -noclean -start_impala_cluster
 
   # Build, load a snapshot file, run tests
   ./buildall.sh -snapshot_file <file>
@@ -256,7 +268,6 @@ if [[ -z "$METASTORE_SNAPSHOT_FILE" && "${TARGET_FILESYSTEM}" != "hdfs" &&
   exit 1
 fi
 
-NEED_MINICLUSTER=0
 if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
       $FORMAT_METASTORE -eq 1 || $FORMAT_SENTRY_POLICY_DB -eq 1 || -n "$SNAPSHOT_FILE" ||
       -n "$METASTORE_SNAPSHOT_FILE" ]]; then

[32/32] incubator-impala git commit: Remove Llama dependency

Posted by ta...@apache.org.

Remove Llama dependency

This change prevents us from depending on LLAMA to build.

Note that the LLAMA MiniKDC is left in - it is a test
utility that does not depend on LLAMA itself.
IMPALA-4292 tracks cleaning this up.

Testing:
Ran a private build to verify that all tests pass.

Change-Id: If2e5e21d8047097d56062ded11b0832a1d397fe0
Reviewed-on: http://gerrit.cloudera.org:8080/4739
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ee2a06d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ee2a06d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ee2a06d8

Branch: refs/heads/hadoop-next
Commit: ee2a06d827a94e659e7d92fe027eeaab6a9ac680
Parents: 3f5380d
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Oct 13 13:24:43 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Tue Oct 18 16:35:58 2016 +0000

----------------------------------------------------------------------
 bin/bootstrap_toolchain.py                   |  2 +-
 bin/generate_minidump_collection_testdata.py |  1 -
 bin/impala-config.sh                         |  2 --
 bin/start-impala-cluster.py                  |  7 ++-----
 common/thrift/ImpalaInternalService.thrift   |  6 ++++++
 common/thrift/ImpalaService.thrift           |  5 ++++-
 common/thrift/generate_metrics.py            |  4 ----
 common/thrift/metrics.json                   | 20 --------------------
 infra/deploy/deploy.py                       |  9 ---------
 9 files changed, 13 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 6524e82..3f161c8 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -351,5 +351,5 @@ if __name__ == "__main__":
 
   # Download the CDH components if necessary.
   if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
-    cdh_components = ["hadoop", "hbase", "hive", "llama", "llama-minikdc", "sentry"]
+    cdh_components = ["hadoop", "hbase", "hive", "llama-minikdc", "sentry"]
     download_cdh_components(toolchain_root, cdh_components)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/bin/generate_minidump_collection_testdata.py
----------------------------------------------------------------------
diff --git a/bin/generate_minidump_collection_testdata.py b/bin/generate_minidump_collection_testdata.py
index a408e05..350f0e3 100755
--- a/bin/generate_minidump_collection_testdata.py
+++ b/bin/generate_minidump_collection_testdata.py
@@ -50,7 +50,6 @@ options, args = parser.parse_args()
 CONFIG_FILE = '''-beeswax_port=21000
 -fe_port=21000
 -be_port=22000
--llama_callback_port=28000
 -hs2_port=21050
 -enable_webserver=true
 -mem_limit=108232130560

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2a25248..38bdc69 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -299,7 +299,6 @@ export IMPALA_HADOOP_VERSION=${IMPALA_HADOOP_VERSION:-2.6.0-cdh5.10.0-SNAPSHOT}
 export IMPALA_HBASE_VERSION=${IMPALA_HBASE_VERSION:-1.2.0-cdh5.10.0-SNAPSHOT}
 export IMPALA_HIVE_VERSION=${IMPALA_HIVE_VERSION:-1.1.0-cdh5.10.0-SNAPSHOT}
 export IMPALA_SENTRY_VERSION=${IMPALA_SENTRY_VERSION:-1.5.1-cdh5.10.0-SNAPSHOT}
-export IMPALA_LLAMA_VERSION=${IMPALA_LLAMA_VERSION:-1.0.0-cdh5.10.0-SNAPSHOT}
 export IMPALA_PARQUET_VERSION=${IMPALA_PARQUET_VERSION:-1.5.0-cdh5.10.0-SNAPSHOT}
 export IMPALA_LLAMA_MINIKDC_VERSION=${IMPALA_LLAMA_MINIKDC_VERSION:-1.0.0}
 
@@ -339,7 +338,6 @@ HADOOP_CLASSPATH+=":$LZO_JAR_PATH"
 export MINI_DFS_BASE_DATA_DIR="$IMPALA_HOME/cdh-${CDH_MAJOR_VERSION}-hdfs-data"
 export PATH="$HADOOP_HOME/bin:$PATH"
 
-export LLAMA_HOME="$CDH_COMPONENTS_HOME/llama-${IMPALA_LLAMA_VERSION}/"
 export MINIKDC_HOME="$CDH_COMPONENTS_HOME/llama-minikdc-${IMPALA_LLAMA_MINIKDC_VERSION}"
 export SENTRY_HOME="$CDH_COMPONENTS_HOME/sentry-${IMPALA_SENTRY_VERSION}"
 export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/bin/start-impala-cluster.py
----------------------------------------------------------------------
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index b92fcf1..3ea338a 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -85,8 +85,7 @@ MINI_IMPALA_CLUSTER_PATH = IMPALAD_PATH + " -in-process"
 
 IMPALA_SHELL = os.path.join(IMPALA_HOME, 'bin/impala-shell.sh')
 IMPALAD_PORTS = ("-beeswax_port=%d -hs2_port=%d  -be_port=%d "
-                 "-state_store_subscriber_port=%d -webserver_port=%d "
-                 "-llama_callback_port=%d")
+                 "-state_store_subscriber_port=%d -webserver_port=%d")
 JVM_ARGS = "-jvm_debug_port=%s -jvm_args=%s"
 BE_LOGGING_ARGS = "-log_filename=%s -log_dir=%s -v=%s -logbufsecs=5 -max_log_files=%s"
 CLUSTER_WAIT_TIMEOUT_IN_SECONDS = 240
@@ -188,12 +187,10 @@ def build_impalad_port_args(instance_num):
   BASE_BE_PORT = 22000
   BASE_STATE_STORE_SUBSCRIBER_PORT = 23000
   BASE_WEBSERVER_PORT = 25000
-  BASE_LLAMA_CALLBACK_PORT = 28000
   return IMPALAD_PORTS % (BASE_BEESWAX_PORT + instance_num, BASE_HS2_PORT + instance_num,
                           BASE_BE_PORT + instance_num,
                           BASE_STATE_STORE_SUBSCRIBER_PORT + instance_num,
-                          BASE_WEBSERVER_PORT + instance_num,
-                          BASE_LLAMA_CALLBACK_PORT + instance_num)
+                          BASE_WEBSERVER_PORT + instance_num)
 
 def build_impalad_logging_args(instance_num, service_name):
   log_file_path = os.path.join(options.log_dir, "%s.INFO" % service_name)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 3ee54ae..736de34 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -71,11 +71,14 @@ struct TQueryOptions {
   6: optional i64 max_scan_range_length = 0
   7: optional i32 num_scanner_threads = 0
 
+  // TODO: IMPALA-4306: retire at compatibility-breaking version
   8: optional i32 max_io_buffers = 0              // Deprecated in 1.1
   9: optional bool allow_unsupported_formats = 0
+  // TODO: IMPALA-4306: retire at compatibility-breaking version
   10: optional i64 default_order_by_limit = -1    // Deprecated in 1.4
   11: optional string debug_action = ""
   12: optional i64 mem_limit = 0
+  // TODO: IMPALA-4306: retire at compatibility-breaking version
   13: optional bool abort_on_default_limit_exceeded = 0 // Deprecated in 1.4
   14: optional CatalogObjects.THdfsCompression compression_codec
   15: optional i32 hbase_caching = 0
@@ -89,10 +92,12 @@ struct TQueryOptions {
   20: optional string request_pool
 
   // Per-host virtual CPU cores required for query (only relevant with RM).
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   21: optional i16 v_cpu_cores
 
   // Max time in milliseconds the resource broker should wait for
   // a resource request to be granted by Llama/Yarn (only relevant with RM).
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   22: optional i64 reservation_request_timeout
 
   // Disables taking advantage of HDFS caching. This has two parts:
@@ -104,6 +109,7 @@ struct TQueryOptions {
   24: optional bool disable_outermost_topn = 0
 
   // Override for initial memory reservation size if RM is enabled.
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   25: optional i64 rm_initial_mem = 0
 
   // Time, in s, before a query will be timed out if it is inactive. May not exceed

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 129be2d..794c140 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -134,21 +134,24 @@ enum TImpalaQueryOptions {
   REQUEST_POOL,
 
   // Per-host virtual CPU cores required for query (only relevant with RM).
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   V_CPU_CORES,
 
   // Max time in milliseconds the resource broker should wait for
   // a resource request to be granted by Llama/Yarn (only relevant with RM).
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   RESERVATION_REQUEST_TIMEOUT,
 
   // if true, disables cached reads. This option has no effect if REPLICA_PREFERENCE is
   // configured.
-  // TODO: Retire in C6
+  // TODO: IMPALA-4306: retire at compatibility-breaking version
   DISABLE_CACHED_READS,
 
   // Temporary testing flag
   DISABLE_OUTERMOST_TOPN,
 
   // Size of initial memory reservation when RM is enabled
+  // TODO: IMPALA-3271: retire at compatibility-breaking version
   RM_INITIAL_MEM,
 
   // Time, in s, before a query will be timed out if it is inactive. May not exceed

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/common/thrift/generate_metrics.py
----------------------------------------------------------------------
diff --git a/common/thrift/generate_metrics.py b/common/thrift/generate_metrics.py
index e878a29..f1f57cd 100755
--- a/common/thrift/generate_metrics.py
+++ b/common/thrift/generate_metrics.py
@@ -166,10 +166,6 @@ MDL_BASE = """
       "name" : "CATALOGSERVER",
       "nameForCrossEntityAggregateMetrics" : "catalogservers"
     },
-    {
-      "name" : "LLAMA",
-      "nameForCrossEntityAggregateMetrics" : "llamas"
-    }
   ],
   "metricEntityTypeDefinitions" : [
       {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/common/thrift/metrics.json
----------------------------------------------------------------------
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index c61c7fe..0829139 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -570,26 +570,6 @@
     "key": "impala-server.version"
   },
   {
-    "description": "The number of active connections to the Llama Callback Service.",
-    "contexts": [
-      "IMPALAD"
-    ],
-    "label": "Llama Callback Service Active Connections",
-    "units": "NONE",
-    "kind": "GAUGE",
-    "key": "impala.thrift-server.llama-callback.connections-in-use"
-  },
-  {
-    "description": "The total number of connections made to the Llama Callback Service over its lifetime.",
-    "contexts": [
-      "IMPALAD"
-    ],
-    "label": "Llama Callback Service Total Connections",
-    "units": "UNIT",
-    "kind": "COUNTER",
-    "key": "impala.thrift-server.llama-callback.total-connections"
-  },
-  {
     "description": "The number of active catalog service connections to this Catalog Server.",
     "contexts": [
       "CATALOGSERVER"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ee2a06d8/infra/deploy/deploy.py
----------------------------------------------------------------------
diff --git a/infra/deploy/deploy.py b/infra/deploy/deploy.py
index 56e3c46..0463111 100644
--- a/infra/deploy/deploy.py
+++ b/infra/deploy/deploy.py
@@ -314,8 +314,6 @@ def create_new_service(api, cluster, new_name, deps, scratch_dirs, master_host):
                 new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, md5.hexdigest())
                 new_service.create_role(new_role_name, rcg.roleType, h.hostId)
                 print "Created new role %s" % (new_role_name,)
-        elif rcg.roleType == "LLAMA":
-            continue
         else:
             md5 = hashlib.md5()
             md5.update(master_host.hostId)
@@ -367,19 +365,12 @@ def transform_rcg_config(rcg):
         new_config.update(transform_port(rcg.name, full, "beeswax_port"))
         new_config.update(transform_port(rcg.name, full, "hs2_port"))
         new_config.update(transform_port(rcg.name, full, "impalad_webserver_port"))
-        new_config.update(transform_port(rcg.name, full, "llama_callback_port"))
         new_config.update(transform_port(rcg.name, full, "state_store_subscriber_port"))
     elif rcg.roleType == "CATALOGSERVER":
         new_config.update(transform_path(rcg.name, full, "log_dir"))
 
         new_config.update(transform_port(rcg.name, full, "catalog_service_port"))
         new_config.update(transform_port(rcg.name, full, "catalogserver_webserver_port"))
-    elif rcg.roleType == "LLAMA":
-        new_config.update(transform_path(rcg.name, full, "llama_log_dir"))
-
-        new_config.update(transform_port(rcg.name, full, "llama_am_server_thrift_admin_address"))
-        new_config.update(transform_port(rcg.name, full, "llama_http_port"))
-        new_config.update(transform_port(rcg.name, full, "llama_port"))
     elif rcg.roleType == "STATESTORE":
         new_config.update(transform_path(rcg.name, full, "log_dir"))

[07/32] incubator-impala git commit: IMPALA-4188: Leopard: support external Docker volumes

Posted by ta...@apache.org.

IMPALA-4188: Leopard: support external Docker volumes

To be able to run the Random Query Generator with Impala and Kudu, we
need to mount an external Docker volume as a workaround to KUDU-1419.
This patch introduces a series of environment variables a user may tweak
in order to help with that purpose. The patch assumes a viable,
reasonable Docker container based on a standard Linux distribution like
Ubuntu 14.

To assist users, I've updated the Leopard README with instructions on
the environment variables' meanings.

The gist here is that the container is the source of truth, which means
to create an external volume, we need to copy the testdata off the
container onto the host running Docker Engine. To do that we suggest a
strategy using rsync via passwordless SSH key.

Testing:
I used a Cloudera Docker container that has Impala in /home/dev/Impala.
Before, Kudu would fail to start due to KUDU-1419. Now, we load testdata
into an external volume, build Impala, run the minicluster including
Kudu, and can access the tpch_kudu data.

I made flake8 fixes as well. flake8 on this file is now clean.

Change-Id: Ia7d9d9253fcd7e3905e389ddeb1438cee3e24480
Reviewed-on: http://gerrit.cloudera.org:8080/4678
Reviewed-by: Michael Brown <mi...@cloudera.com>
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/db5de41a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/db5de41a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/db5de41a

Branch: refs/heads/hadoop-next
Commit: db5de41a808d0e177ac0089ead2e420ab6043d1d
Parents: 784716f
Author: Michael Brown <mi...@cloudera.com>
Authored: Thu Sep 22 15:04:41 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 07:44:23 2016 +0000

----------------------------------------------------------------------
 tests/comparison/leopard/README               |  70 +++++++-
 tests/comparison/leopard/impala_docker_env.py | 199 +++++++++++++++------
 2 files changed, 209 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/db5de41a/tests/comparison/leopard/README
----------------------------------------------------------------------
diff --git a/tests/comparison/leopard/README b/tests/comparison/leopard/README
index 45f5ad8..a8144ac 100644
--- a/tests/comparison/leopard/README
+++ b/tests/comparison/leopard/README
@@ -1,13 +1,77 @@
+Summary
+-------
+
 This package runs the query generator continuously. Compares Impala and Postgres results
 for a randomly generated query and produces several reports per day. Reports are
 displayed on a web page which allows the user to conveniently examine the discovered
 issues. The user can also start a custom run against a private Impala branch using the
 web interface.
 
-Requirements:
+Requirements
+------------
 
-Docker -- A docker image with Impala and Postgres installed and functional database
-    loaded into Postgres.
+Docker -- A docker image with Impala and PostgresQL installed and at
+    least one reference database loaded into PostgresQL. data_generator.py is a useful
+    tool to migrate data from Impala into PostgresQL.
 
 To get started, run ./controller.py and ./front_end.py. You should be able to view the
 web page at http://localhost:5000. Results and logs are saved to /tmp/query_gen
+
+
+Basic Configuration
+-------------------
+
+The following are useful environment variables for running the
+controller and Docker images within it.
+
+DOCKER_USER - user *within* the Impala Docker container who owns the
+Impala source tree and test data.
+
+DOCKER_PASSWORD - password for the user *within* the Impala Docker
+container.
+
+TARGET_HOST - host system on which Docker Engine is running. This is the
+host that the controller will use to issue Docker commands like "docker
+run".
+
+TARGET_HOST_USERNAME - username for controller process to use to SSH
+into TARGET_HOST. Via Fabric, one can either type a password or use SSH
+keys.
+
+DOCKER_IMAGE_NAME - image to pull via "docker pull"
+
+
+External Volume Configuration
+-----------------------------
+
+To run Leopard against Impala with Kudu, we need to work around
+KUDU-1419. KUDU-1419 is likely to occur if your Docker Storage Engine is
+AUFS, or maybe others.  The easiest way to overcome this is to mount an
+external Docker volume that contain the necessary test data.  To try to
+handle this automatically, you can export any or all of the environment
+variables, depending on your host and container setups:
+
+DOCKER_IMPALA_USER_UID, DOCKER_IMPALA_USER_GID - numeric UID and GID for
+the owner of the Impala test data (testdata/cluster from an Impala
+source checkout) within your Docker container. Numeric IDs are needed,
+because there is no guarantee the symbolic owner and group on the
+container match the IDs on the target host.
+
+HOST_TESTDATA_EXTERNAL_VOLUME_PATH - path on TARGET_HOST where the
+external volume will reside. This is the destination for rsync to warm
+the volume and the left-hand side of "docker run -v".
+
+DOCKER_TESTDATA_VOLUME_PATH - path on your Docker container to the
+testdata/cluster Impala directory. This is source for rsync to warm the
+volume and the right-hand side of "docker run -v".
+
+HOST_TO_DOCKER_SSH_KEY - name of private key on TARGET_HOST for use with
+rsync so as to "warm" the external volume automatically.
+
+You are encouraged to configure your container in such a way that rsync
+with passwordless SSH is possible so as to create the external volume
+using the environment variables above.
+
+To do that, this is a handy guide on how to use rsync with SSH keys:
+
+https://www.guyrutenberg.com/2014/01/14/restricting-ssh-access-to-rsync/

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/db5de41a/tests/comparison/leopard/impala_docker_env.py
----------------------------------------------------------------------
diff --git a/tests/comparison/leopard/impala_docker_env.py b/tests/comparison/leopard/impala_docker_env.py
index 715ad65..4ce12d8 100755
--- a/tests/comparison/leopard/impala_docker_env.py
+++ b/tests/comparison/leopard/impala_docker_env.py
@@ -20,7 +20,9 @@
 from __future__ import division
 from fabric.api import sudo, run, settings
 from logging import getLogger
-from os.path import join as join_path
+from os.path import (
+    join as join_path,
+    normpath)
 from time import sleep
 from tests.comparison.leopard.controller import (
     SHOULD_BUILD_IMPALA,
@@ -31,13 +33,43 @@ import os
 
 IMPALA_HOME = '/home/dev/Impala'
 CORE_PATH = '/tmp/core_files'
-DEFAULT_BRANCH_NAME = 'origin/cdh5-trunk'
+DEFAULT_BRANCH_NAME = os.environ.get('DEFAULT_BRANCH_NAME', 'origin/master')
 DEFAULT_DOCKER_IMAGE_NAME = 'cloudera/impala-dev'
-DOCKER_USER_NAME = 'dev'
+DOCKER_USER_NAME = os.environ.get('DOCKER_USER_NAME', 'dev')
+
+# Needed for ensuring the testdata volume is properly owned. The UID/GID from the
+# container must be used, not symbolic name.
+DOCKER_IMPALA_USER_UID = int(os.environ.get(
+    'DOCKER_IMPALA_USER_UID', 1234))
+DOCKER_IMPALA_USER_GID = int(os.environ.get(
+    'DOCKER_IMPALA_USER_GID', 1000))
+
+HOST_TESTDATA_EXTERNAL_VOLUME_PATH = normpath(os.environ.get(
+    'HOST_TESTDATA_EXTERNAL_VOLUME_PATH',
+    os.path.sep + join_path('data', '1', 'dockervols', 'cluster')))
+
+DEFAULT_DOCKER_TESTDATA_VOLUME_PATH = os.path.sep + join_path(
+    'home', DOCKER_USER_NAME, 'Impala', 'testdata', 'cluster')
+
+# This needs to have a trailing os.path.sep for rsync so that the contents of the rsync
+# source will be put directly into this directory. man rsync to understand the
+# more idiosyncracies of trailling / (or not) in paths.
+DOCKER_TESTDATA_VOLUME_PATH = normpath(
+    os.environ.get(
+        'DOCKER_TESTDATA_VOLUME_PATH',
+        DEFAULT_DOCKER_TESTDATA_VOLUME_PATH)
+) + os.path.sep
+
+HOST_TO_DOCKER_SSH_KEY = os.environ.get(
+    'HOST_TO_DOCKER_SSH_KEY',
+    join_path(os.environ['HOME'], '.ssh', 'ro-rsync_rsa'))
+
 NUM_START_ATTEMPTS = 50
 NUM_FABRIC_ATTEMPTS = 50
+
 LOG = getLogger('ImpalaDockerEnv')
 
+
 def retry(func):
   '''Retry decorator.'''
 
@@ -57,6 +89,7 @@ def retry(func):
 
   return wrapper
 
+
 class ImpalaDockerEnv(object):
   '''Represents an Impala environemnt inside a Docker container. Used for starting
   Impala, getting stack traces after a crash and keeping track of the ports on which SSH,
@@ -75,14 +108,19 @@ class ImpalaDockerEnv(object):
         'DOCKER_IMAGE_NAME', DEFAULT_DOCKER_IMAGE_NAME)
 
   def stop_docker(self):
-    with settings(warn_only = True, host_string = self.host, user = self.host_username):
+    with settings(warn_only=True, host_string=self.host, user=self.host_username):
       retry(sudo)('docker stop {0}'.format(self.container_id), pty=True)
       retry(sudo)('docker rm {0}'.format(self.container_id), pty=True)
 
-  def start_new_container(self):
-    '''Starts a container with port forwarding for ssh, impala and postgres. '''
+  def start_new_container(self, volume_map=None):
+    """
+    Starts a container with port forwarding for ssh, impala and postgres.
+
+    The optional volume_map is a dictionary for making use of Docker external volumes.
+    The keys are paths on the host, and the values are paths on the container.
+    """
     for _ in range(NUM_START_ATTEMPTS):
-      with settings(warn_only = True, host_string = self.host, user = self.host_username):
+      with settings(warn_only=True, host_string=self.host, user=self.host_username):
         set_core_dump_location_command = \
             "echo '/tmp/core_files/core.%e.%p' | sudo tee /proc/sys/kernel/core_pattern"
         sudo(set_core_dump_location_command, pty=True)
@@ -94,46 +132,59 @@ class ImpalaDockerEnv(object):
         start_command = ''
         if SHOULD_PULL_DOCKER_IMAGE:
           start_command = 'docker pull {docker_image_name} && '.format(
-              docker_image_name = self.docker_image_name)
+              docker_image_name=self.docker_image_name)
+        volume_ops = ''
+        if volume_map is not None:
+          volume_ops = ' '.join(
+              ['-v {host_path}:{container_path}'.format(host_path=host_path,
+                                                        container_path=container_path)
+               for host_path, container_path in volume_map.iteritems()])
         start_command += (
-            'docker run -d -t -p {postgres_port}:5432 -p {ssh_port}:22 '
+            'docker run -d -t {volume_ops} -p {postgres_port}:5432 -p {ssh_port}:22 '
             '-p {impala_port}:21050 {docker_image_name} /bin/docker-boot-daemon').format(
-                ssh_port = self.ssh_port,
-                impala_port = self.impala_port,
-                postgres_port = self.postgres_port,
-                docker_image_name = self.docker_image_name)
+                volume_ops=volume_ops,
+                ssh_port=self.ssh_port,
+                impala_port=self.impala_port,
+                postgres_port=self.postgres_port,
+                docker_image_name=self.docker_image_name)
 
         try:
           self.container_id = sudo(start_command, pty=True)
-        except:
-          LOG.exception('start_new_container')
+        except Exception as e:
+          LOG.exception('start_new_container:' + str(e))
       if self.container_id is not None:
         break
     else:
       LOG.error('Container failed to start after {0} attempts'.format(NUM_START_ATTEMPTS))
+    # Wait for the SSH service to start inside the docker instance.  Usually takes 1
+    # second. This is simple and reliable. An alternative implementation is to poll with
+    # timeout if SSH was started.
+    sleep(10)
 
   def get_git_hash(self):
     '''Returns Git hash if the current commit. '''
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       git_hash = retry(run)('cd {IMPALA_HOME} && git rev-parse --short HEAD'.format(
-        IMPALA_HOME = IMPALA_HOME))
+          IMPALA_HOME=IMPALA_HOME))
       return git_hash
 
   def run_all(self):
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       run_all_command = (
           'mkdir -p {CORE_PATH} && chmod 777 {CORE_PATH} && cd {IMPALA_HOME} '
           '&& source {IMPALA_HOME}/bin/impala-config.sh '
           '&& {IMPALA_HOME}/bin/create-test-configuration.sh '
           '&& {IMPALA_HOME}/testdata/bin/run-all.sh').format(
-              IMPALA_HOME = IMPALA_HOME,
-              CORE_PATH=CORE_PATH)
+              CORE_PATH=CORE_PATH,
+              IMPALA_HOME=IMPALA_HOME)
       retry(run)(run_all_command, pty=False)
 
   def build_impala(self):
@@ -146,33 +197,33 @@ class ImpalaDockerEnv(object):
           'docker-boot && cd {IMPALA_HOME} && {git_command} '
           '&& source {IMPALA_HOME}/bin/impala-config.sh '
           '&& {IMPALA_HOME}/buildall.sh -notests').format(
-              git_command = self.git_command,
-              IMPALA_HOME = IMPALA_HOME,
-              CORE_PATH = CORE_PATH)
+              git_command=self.git_command,
+              IMPALA_HOME=IMPALA_HOME)
     elif SHOULD_BUILD_IMPALA:
       build_command = (
           'docker-boot && cd {IMPALA_HOME} '
-          '&& git fetch --all && git checkout DEFAULT_BRANCH_NAME '
+          '&& git fetch --all && git checkout {DEFAULT_BRANCH_NAME} '
           '&& source {IMPALA_HOME}/bin/impala-config.sh '
           '&& {IMPALA_HOME}/buildall.sh -notests').format(
-              IMPALA_HOME = IMPALA_HOME,
-              DEFAULT_BRANCH_NAME = DEFAULT_BRANCH_NAME,
-              CORE_PATH = CORE_PATH)
+              IMPALA_HOME=IMPALA_HOME,
+              DEFAULT_BRANCH_NAME=DEFAULT_BRANCH_NAME)
 
     if build_command:
       with settings(
-          warn_only = True,
-          host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-          password = os.environ['DOCKER_PASSWORD']):
+          warn_only=True,
+          host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+          password=os.environ['DOCKER_PASSWORD']
+      ):
         result = retry(run)(build_command, pty=False)
         LOG.info('Build Complete, Result: {0}'.format(result))
 
   def load_data(self):
     if SHOULD_LOAD_DATA:
       with settings(
-          warn_only = True,
-          host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-          password = os.environ['DOCKER_PASSWORD']):
+          warn_only=True,
+          host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+          password=os.environ['DOCKER_PASSWORD']
+      ):
         self.start_impala()
         load_command = '''cd {IMPALA_HOME} \
             && source bin/impala-config.sh \
@@ -180,14 +231,16 @@ class ImpalaDockerEnv(object):
                 --use-postgresql --db-name=functional \
                 --migrate-table-names=alltypes,alltypestiny,alltypesagg migrate \
             && ./tests/comparison/data_generator.py --use-postgresql'''.format(
-                IMPALA_HOME=IMPALA_HOME)
+            IMPALA_HOME=IMPALA_HOME)
         result = retry(run)(load_command, pty=False)
+        return result
 
   def start_impala(self):
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       impalad_args = [
           '-convert_legacy_hive_parquet_utc_timestamps=true',
       ]
@@ -202,46 +255,78 @@ class ImpalaDockerEnv(object):
   def is_impala_running(self):
     '''Check that exactly 3 impalads are running inside the docker instance.'''
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       return retry(run)('ps aux | grep impalad').count('/service/impalad') == 3
 
   def get_stack(self):
     '''Finds the newest core file and extracts the stack trace from it using gdb. '''
     IMPALAD_PATH = '{IMPALA_HOME}/be/build/debug/service/impalad'.format(
-        IMPALA_HOME = IMPALA_HOME)
+        IMPALA_HOME=IMPALA_HOME)
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       core_file_name = retry(run)('ls {0} -t1 | head -1'.format(CORE_PATH))
       LOG.info('Core File Name: {0}'.format(core_file_name))
       if 'core' not in core_file_name:
         return None
       core_full_path = join_path(CORE_PATH, core_file_name)
       stack_trace = retry(run)('gdb {0} {1} --batch --quiet --eval-command=bt'.format(
-        IMPALAD_PATH, core_full_path))
+          IMPALAD_PATH, core_full_path))
       self.delete_core_files()
       return stack_trace
 
   def delete_core_files(self):
     '''Delete all core files. This is usually done after the stack was extracted.'''
     with settings(
-        warn_only = True,
-        host_string = '{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
-        password = os.environ['DOCKER_PASSWORD']):
+        warn_only=True,
+        host_string='{0}@{1}:{2}'.format(DOCKER_USER_NAME, self.host, self.ssh_port),
+        password=os.environ['DOCKER_PASSWORD']
+    ):
       retry(run)('rm -f {0}/core.*'.format(CORE_PATH))
 
   def prepare(self):
     '''Create a new Impala Environment. Starts a docker container and builds Impala in it.
     '''
-    self.start_new_container()
+    # See KUDU-1419: If we expect to be running Kudu in the minicluster inside the
+    # Docker container, we have to protect against storage engines like AUFS and their
+    # incompatibility with Kudu. First we have to get test data off the container, store
+    # it somewhere, and then start another container using docker -v and mount the test
+    # data as a volume to bypass AUFS. See also the README for Leopard.
+    if os.environ.get('KUDU_IS_SUPPORTED') == 'true':
+      LOG.info('Warming testdata cluster external volume')
+      self.start_new_container()
+      with settings(
+          warn_only=True,
+          host_string=self.host,
+          user=self.host_username,
+      ):
+        sudo(
+            'mkdir -p {host_testdata_path} && '
+            'rsync -e "ssh -i {priv_key} -o StrictHostKeyChecking=no '
+            ''         '-o UserKnownHostsFile=/dev/null -p {ssh_port}" '
+            '--delete --archive --verbose --progress --chown={uid}:{gid} '
+            '{user}@127.0.0.1:{container_testdata_path} {host_testdata_path}'.format(
+                host_testdata_path=HOST_TESTDATA_EXTERNAL_VOLUME_PATH,
+                priv_key=HOST_TO_DOCKER_SSH_KEY,
+                ssh_port=self.ssh_port,
+                uid=DOCKER_IMPALA_USER_UID,
+                gid=DOCKER_IMPALA_USER_GID,
+                user=DOCKER_USER_NAME,
+                container_testdata_path=DOCKER_TESTDATA_VOLUME_PATH))
+      self.stop_docker()
+      volume_map = {
+          HOST_TESTDATA_EXTERNAL_VOLUME_PATH: DOCKER_TESTDATA_VOLUME_PATH,
+      }
+    else:
+      volume_map = None
+
+    self.start_new_container(volume_map=volume_map)
     LOG.info('Container Started')
-    # Wait for the SSH service to start inside the docker instance.  Usually takes 1
-    # second. This is simple and reliable. An alternative implementation is to poll with
-    # timeout if SSH was started.
-    sleep(10)
     self.build_impala()
     try:
       result = self.run_all()

[09/32] incubator-impala git commit: IMPALA-3644 Make predicate order deterministic

Posted by ta...@apache.org.

IMPALA-3644 Make predicate order deterministic

This adds a tie-break to make sure that we sort predicates in a
deterministic order on Java 7 and 8. This was suggested by Alex in
IMPALA-3644.

There are still three broken tests when run in Java 8, but it seems best
to address them in a subsequent change.

Change-Id: Id11010bfeaff368869e6d430eeb4773ddf41faff
Reviewed-on: http://gerrit.cloudera.org:8080/4671
Reviewed-by: Jim Apple <jb...@cloudera.com>
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1a5c43ef
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1a5c43ef
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1a5c43ef

Branch: refs/heads/hadoop-next
Commit: 1a5c43ef5e1f66a3b64fe2e2c382605337aaaab9
Parents: db5de41
Author: Lars Volker <lv...@cloudera.com>
Authored: Wed Oct 5 10:39:00 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 14 22:04:30 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/PlanNode.java     |   6 +
 .../queries/PlannerTest/analytic-fns.test       |   8 +-
 .../queries/PlannerTest/conjunct-ordering.test  |   2 +-
 .../queries/PlannerTest/data-source-tables.test |   6 +-
 .../queries/PlannerTest/hbase.test              |   8 +-
 .../queries/PlannerTest/inline-view-limit.test  |  10 +-
 .../queries/PlannerTest/inline-view.test        |   4 +-
 .../queries/PlannerTest/join-order.test         |  28 +--
 .../queries/PlannerTest/joins.test              |  60 +++----
 .../queries/PlannerTest/kudu-update.test        |   4 +-
 .../queries/PlannerTest/kudu.test               |  10 +-
 .../queries/PlannerTest/lineage.test            |   2 +-
 .../queries/PlannerTest/nested-collections.test |   4 +-
 .../queries/PlannerTest/outer-joins.test        |  10 +-
 .../PlannerTest/predicate-propagation.test      | 160 ++++++++---------
 .../PlannerTest/runtime-filter-propagation.test |  42 ++---
 .../queries/PlannerTest/subquery-rewrite.test   |  38 ++--
 .../queries/PlannerTest/tpcds-all.test          |  60 +++----
 .../queries/PlannerTest/tpch-all.test           | 172 +++++++++----------
 .../queries/PlannerTest/tpch-kudu.test          |  28 +--
 .../queries/PlannerTest/tpch-nested.test        |  64 +++----
 .../queries/PlannerTest/tpch-views.test         |  52 +++---
 .../queries/PlannerTest/union.test              |   4 +-
 23 files changed, 394 insertions(+), 388 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/fe/src/main/java/org/apache/impala/planner/PlanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index 3350364..4686e2c 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -702,6 +702,12 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
         if (cost < smallestCost) {
           smallestCost = cost;
           bestConjunct = e;
+        } else if (cost == smallestCost) {
+          // Break ties based on toSql() to get a consistent display in explain plans.
+          if (e.toSql().compareTo(bestConjunct.toSql()) < 0) {
+            smallestCost = cost;
+            bestConjunct = e;
+          }
         }
       }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index a62e21d..0ef95b0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -1248,7 +1248,7 @@ where
   v.b != v.c
 ---- PLAN
 07:SELECT
-|  predicates: bigint_col > 10, min(int_col) < 1, max(int_col) < 2, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
+|  predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
 |
 06:ANALYTIC
 |  functions: min(int_col)
@@ -1277,10 +1277,10 @@ where
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: int_col >= 5, int_col <= 10
+   predicates: int_col <= 10, int_col >= 5
 ---- DISTRIBUTEDPLAN
 07:SELECT
-|  predicates: bigint_col > 10, min(int_col) < 1, max(int_col) < 2, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
+|  predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
 |
 06:ANALYTIC
 |  functions: min(int_col)
@@ -1315,7 +1315,7 @@ where
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: int_col >= 5, int_col <= 10
+   predicates: int_col <= 10, int_col >= 5
 ====
 # test predicate propagation onto and through analytic nodes
 # TODO: allow AnalyticEvalNode to apply a < 20

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test b/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
index 1dc6a3e..29e1864 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/conjunct-ordering.test
@@ -45,7 +45,7 @@ where a.int_col = a.tinyint_col and
 ---- PLAN
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
-   predicates: a.int_col = a.tinyint_col, (CASE a.tinyint_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END), (CASE a.int_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END)
+   predicates: a.int_col = a.tinyint_col, (CASE a.int_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END), (CASE a.tinyint_col WHEN 0 THEN TRUE WHEN 1 THEN TRUE WHEN 2 THEN TRUE ELSE FALSE END)
 ====
 # Check that a LIKE with only leading/trailing wildcards costs less then LIKE with
 # non-leading/trailing wildcards.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
index a7bdf36..5662f5d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
@@ -24,7 +24,7 @@ where 10 > int_col and
 ---- PLAN
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: 10 > int_col, string_col != 'Foo'
-predicates: 5 > double_col, NOT TRUE = bool_col, NOT 5.0 = double_col, string_col != 'Bar'
+predicates: 5 > double_col, NOT 5.0 = double_col, NOT TRUE = bool_col, string_col != 'Bar'
 ====
 # The 3rd predicate is not in a form that can be offered to the data source so
 # the 4th will be offered and accepted instead.
@@ -57,7 +57,7 @@ where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 |--predicates: b.id = b.int_col, b.id = b.bigint_col
 |
 00:SCAN DATA SOURCE [functional.alltypes_datasource a]
-predicates: a.id = a.int_col, a.tinyint_col = a.smallint_col, a.int_col = a.bigint_col, a.id = a.tinyint_col
+predicates: a.id = a.int_col, a.id = a.tinyint_col, a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
 ====
 # Tests that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be offered to the
 # data source.
@@ -71,5 +71,5 @@ and bigint_col is not distinct from 5
 ---- PLAN
 00:SCAN DATA SOURCE [functional.alltypes_datasource]
 data source predicates: id IS NOT DISTINCT FROM 1, tinyint_col IS DISTINCT FROM 2, int_col IS NOT DISTINCT FROM 4
-predicates: bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3, bigint_col IS NOT DISTINCT FROM 5
+predicates: bigint_col IS NOT DISTINCT FROM 5, bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index a8841be..57b2cce 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -287,19 +287,19 @@ where string_col >= '4' and string_col != '2' and date_string_col = '04/03/09'
 ---- PLAN
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters:
-  d:string_col GREATER_OR_EQUAL '4'
   d:string_col NOT_EQUAL '2'
+  d:string_col GREATER_OR_EQUAL '4'
   d:date_string_col EQUAL '04/03/09'
-   predicates: string_col >= '4', string_col != '2', date_string_col = '04/03/09'
+   predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
 ---- DISTRIBUTEDPLAN
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters:
-  d:string_col GREATER_OR_EQUAL '4'
   d:string_col NOT_EQUAL '2'
+  d:string_col GREATER_OR_EQUAL '4'
   d:date_string_col EQUAL '04/03/09'
-   predicates: string_col >= '4', string_col != '2', date_string_col = '04/03/09'
+   predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
 ====
 # mix of predicates and functional_hbase. filters
 select * from functional_hbase.alltypessmall where string_col = '4' and tinyint_col = 5

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
index 8d3a816..79f75b6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
@@ -530,7 +530,7 @@ where a.id > 10 and b.id > 20
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: id != 1, functional.alltypes.id > 10
+   predicates: functional.alltypes.id > 10, id != 1
 ====
 # Test value transfers for outer-joined inline views with a limit.
 # Value transfer a.id->b.id is legal.
@@ -557,7 +557,7 @@ where a.id > 10 and b.id > 20
 |
 02:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
-   predicates: functional.alltypessmall.id != 1, id != 2, functional.alltypessmall.id > 10, functional.alltypessmall.id > 20
+   predicates: functional.alltypessmall.id != 1, functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, id != 2
    runtime filters: RF000 -> id
 ====
 # Test value transfers for outer-joined inline views with a limit.
@@ -577,7 +577,7 @@ where a.id > 10 and b.id > 20
 |
 |--01:SCAN HDFS [functional.alltypessmall]
 |     partitions=4/4 files=4 size=6.32KB
-|     predicates: id != 2, functional.alltypessmall.id > 20
+|     predicates: functional.alltypessmall.id > 20, id != 2
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
@@ -610,7 +610,7 @@ where a.id > 10 and b.id > 20
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: id != 1, functional.alltypes.id != 2, functional.alltypes.id > 10, functional.alltypes.id > 20
+   predicates: functional.alltypes.id != 2, functional.alltypes.id > 10, functional.alltypes.id > 20, id != 1
    runtime filters: RF000 -> id
 ====
 # IMPALA-3450: limits on select nodes are reflected in cardinality estimates. The test for
@@ -625,4 +625,4 @@ select * from (select * from functional.alltypes limit 100) v where id < 10 limi
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
    limit: 100
-====
\ No newline at end of file
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
index 9eb32d4..fe6ade8 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
@@ -195,7 +195,7 @@ and b.id + 15 = 27
 |
 |--01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.17KB
-|     predicates: functional.alltypessmall.string_col = '15', functional.alltypessmall.id + 15 = 27
+|     predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
 |
 00:SCAN HDFS [functional.alltypesagg]
    partitions=5/11 files=5 size=372.38KB
@@ -213,7 +213,7 @@ and b.id + 15 = 27
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.17KB
-|     predicates: functional.alltypessmall.string_col = '15', functional.alltypessmall.id + 15 = 27
+|     predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
 |
 03:EXCHANGE [HASH(id,int_col)]
 |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
index 44c2c24..e1951e0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
@@ -243,8 +243,8 @@ limit 100
 |     runtime filters: RF000 -> n_regionkey
 |
 08:HASH JOIN [INNER JOIN]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_suppkey, RF003 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF002 <- s_nationkey, RF003 <- s_suppkey
 |
 |--03:SCAN HDFS [tpch.supplier s]
 |     partitions=1/1 files=1 size=1.33MB
@@ -256,7 +256,7 @@ limit 100
 |
 |--00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> c_nationkey
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: l_orderkey = o_orderkey
@@ -264,12 +264,12 @@ limit 100
 |
 |--01:SCAN HDFS [tpch.orders o]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> o_custkey
 |
 02:SCAN HDFS [tpch.lineitem l]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> l_suppkey, RF005 -> l_orderkey
+   runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ---- DISTRIBUTEDPLAN
 20:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
@@ -309,8 +309,8 @@ limit 100
 |     runtime filters: RF000 -> n_regionkey
 |
 08:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: l_suppkey = s_suppkey, c_nationkey = s_nationkey
-|  runtime filters: RF002 <- s_suppkey, RF003 <- s_nationkey
+|  hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
+|  runtime filters: RF002 <- s_nationkey, RF003 <- s_suppkey
 |
 |--15:EXCHANGE [BROADCAST]
 |  |
@@ -326,7 +326,7 @@ limit 100
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
-|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF003 -> c_nationkey
+|     runtime filters: RF001 -> tpch.customer.c_nationkey, RF002 -> c_nationkey
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l_orderkey = o_orderkey
@@ -336,12 +336,12 @@ limit 100
 |  |
 |  01:SCAN HDFS [tpch.orders o]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
+|     predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
 |     runtime filters: RF004 -> o_custkey
 |
 02:SCAN HDFS [tpch.lineitem l]
    partitions=1/1 files=1 size=718.94MB
-   runtime filters: RF002 -> l_suppkey, RF005 -> l_orderkey
+   runtime filters: RF003 -> l_suppkey, RF005 -> l_orderkey
 ====
 # Q2 - Minimum Cost Supplier Query
 select
@@ -482,7 +482,7 @@ limit 10
 |
 |--00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
+|     predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
 |
 01:SCAN HDFS [tpch.lineitem]
    partitions=1/1 files=1 size=718.94MB
@@ -514,7 +514,7 @@ limit 10
 |  |
 |  00:SCAN HDFS [tpch.orders]
 |     partitions=1/1 files=1 size=162.56MB
-|     predicates: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
+|     predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
 |
 05:EXCHANGE [HASH(l_orderkey)]
 |
@@ -1335,7 +1335,7 @@ where a.int_col = b.int_col and b.bigint_col < a.tinyint_col
 ---- PLAN
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = b.id
-|  other predicates: a.int_col = b.int_col, b.tinyint_col = c.tinyint_col, b.bool_col != c.bool_col, b.bigint_col < a.tinyint_col
+|  other predicates: a.int_col = b.int_col, b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col, b.bigint_col < a.tinyint_col
 |  runtime filters: RF000 <- b.tinyint_col
 |
 |--03:HASH JOIN [RIGHT OUTER JOIN]
@@ -1365,7 +1365,7 @@ where b.tinyint_col = c.tinyint_col and b.bool_col != c.bool_col
 ---- PLAN
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = b.id
-|  other predicates: b.tinyint_col = c.tinyint_col, b.bool_col != c.bool_col
+|  other predicates: b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col
 |  runtime filters: RF000 <- b.tinyint_col
 |
 |--03:HASH JOIN [INNER JOIN]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index b5b36a0..260ba21 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -173,7 +173,7 @@ and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 ---- PLAN
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = a.id, c.string_col = b.string_col
-|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.tinyint_col + b.tinyint_col < 15, a.float_col - c.double_col < 0, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
+|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
 |
 |--03:HASH JOIN [FULL OUTER JOIN]
 |  |  hash predicates: a.id = b.id, a.int_col = b.int_col
@@ -193,7 +193,7 @@ and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 |
 04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
 |  hash predicates: c.id = a.id, c.string_col = b.string_col
-|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.tinyint_col + b.tinyint_col < 15, a.float_col - c.double_col < 0, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
+|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
 |
 |--08:EXCHANGE [HASH(a.id,b.string_col)]
 |  |
@@ -681,8 +681,8 @@ inner join [shuffle]
 on (a.int_col = b.int_col and b.bool_col = a.bool_col)
 ---- PLAN
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: a.int_col = int_col, a.bool_col = bool_col
-|  runtime filters: RF000 <- int_col, RF001 <- bool_col
+|  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  runtime filters: RF000 <- bool_col, RF001 <- int_col
 |
 |--02:AGGREGATE [FINALIZE]
 |  |  output: count(*)
@@ -693,13 +693,13 @@ on (a.int_col = b.int_col and b.bool_col = a.bool_col)
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> a.int_col, RF001 -> a.bool_col
+   runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col
 ---- DISTRIBUTEDPLAN
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: a.int_col = int_col, a.bool_col = bool_col
-|  runtime filters: RF000 <- int_col, RF001 <- bool_col
+|  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  runtime filters: RF000 <- bool_col, RF001 <- int_col
 |
 |--05:AGGREGATE [FINALIZE]
 |  |  output: count:merge(*)
@@ -718,7 +718,7 @@ on (a.int_col = b.int_col and b.bool_col = a.bool_col)
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> a.int_col, RF001 -> a.bool_col
+   runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col
 ====
 # Tests that the partitioned join between b and c exploits the existing
 # data partition of its lhs and rhs inputs.
@@ -731,12 +731,12 @@ inner join [shuffle]
 on (b.int_col = c.int_col and c.bool_col = b.bool_col)
 ---- PLAN
 05:HASH JOIN [INNER JOIN]
-|  hash predicates: b.int_col = a.int_col, b.bool_col = a.bool_col
-|  runtime filters: RF000 <- a.int_col, RF001 <- a.bool_col
+|  hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col
+|  runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col
 |
 |--04:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.int_col = int_col, a.bool_col = bool_col
-|  |  runtime filters: RF002 <- int_col, RF003 <- bool_col
+|  |  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  |  runtime filters: RF002 <- bool_col, RF003 <- int_col
 |  |
 |  |--03:AGGREGATE [FINALIZE]
 |  |  |  output: count(*)
@@ -747,21 +747,21 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col)
 |  |
 |  00:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF002 -> a.int_col, RF003 -> a.bool_col
+|     runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col
 |
 01:SCAN HDFS [functional.alltypes b]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col
+   runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col
 ---- DISTRIBUTEDPLAN
 10:EXCHANGE [UNPARTITIONED]
 |
 05:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: b.int_col = a.int_col, b.bool_col = a.bool_col
-|  runtime filters: RF000 <- a.int_col, RF001 <- a.bool_col
+|  hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col
+|  runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col
 |
 |--04:HASH JOIN [INNER JOIN, PARTITIONED]
-|  |  hash predicates: a.int_col = int_col, a.bool_col = bool_col
-|  |  runtime filters: RF002 <- int_col, RF003 <- bool_col
+|  |  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  |  runtime filters: RF002 <- bool_col, RF003 <- int_col
 |  |
 |  |--07:AGGREGATE [FINALIZE]
 |  |  |  output: count:merge(*)
@@ -780,13 +780,13 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col)
 |  |
 |  00:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF002 -> a.int_col, RF003 -> a.bool_col
+|     runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col
 |
 09:EXCHANGE [HASH(b.int_col,b.bool_col)]
 |
 01:SCAN HDFS [functional.alltypes b]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col
+   runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col
 ====
 # Tests that all predicates from the On-clause are applied (IMPALA-805)
 # and that slot equivalences are enforced at lowest possible plan node.
@@ -807,7 +807,7 @@ where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 |
 |--00:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     predicates: a.id = a.int_col, a.tinyint_col = a.smallint_col, a.int_col = a.bigint_col, a.id = a.tinyint_col
+|     predicates: a.id = a.int_col, a.id = a.tinyint_col, a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
 |
 01:SCAN HDFS [functional.alltypes b]
    partitions=24/24 files=24 size=478.45KB
@@ -827,16 +827,16 @@ and b.string_col = a.string_col and b.date_string_col = a.string_col
 where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
 ---- PLAN
 02:HASH JOIN [RIGHT OUTER JOIN]
-|  hash predicates: b.id = a.id, b.int_col = a.id, b.id = a.int_col, b.bigint_col = a.id, b.id = a.tinyint_col, b.id = a.smallint_col, b.id = a.bigint_col, b.string_col = a.string_col, b.date_string_col = a.string_col
-|  runtime filters: RF000 <- a.id, RF001 <- a.id, RF002 <- a.int_col, RF003 <- a.id, RF004 <- a.tinyint_col, RF005 <- a.smallint_col, RF006 <- a.bigint_col, RF007 <- a.string_col, RF008 <- a.string_col
+|  hash predicates: b.id = a.id, b.int_col = a.id, b.id = a.int_col, b.id = a.bigint_col, b.bigint_col = a.id, b.id = a.smallint_col, b.string_col = a.string_col, b.id = a.tinyint_col, b.date_string_col = a.string_col
+|  runtime filters: RF000 <- a.id, RF001 <- a.id, RF002 <- a.int_col, RF003 <- a.bigint_col, RF004 <- a.id, RF005 <- a.smallint_col, RF006 <- a.string_col, RF007 <- a.tinyint_col, RF008 <- a.string_col
 |
 |--00:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     predicates: a.tinyint_col = a.smallint_col, a.int_col = a.bigint_col
+|     predicates: a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
 |
 01:SCAN HDFS [functional.alltypes b]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> b.id, RF001 -> b.int_col, RF002 -> b.id, RF003 -> b.bigint_col, RF004 -> b.id, RF005 -> b.id, RF006 -> b.id, RF007 -> b.string_col, RF008 -> b.date_string_col
+   runtime filters: RF000 -> b.id, RF001 -> b.int_col, RF002 -> b.id, RF003 -> b.id, RF004 -> b.bigint_col, RF005 -> b.id, RF006 -> b.string_col, RF007 -> b.id, RF008 -> b.date_string_col
 ====
 # Tests elimination of redundant join predicates (IMPALA-912).
 select * from
@@ -1099,8 +1099,8 @@ on a.id = b.x and a.id = b.tinyint_col and
    a.int_col = b.y and a.int_col = b.bigint_col
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = x, a.id = tinyint_col, a.int_col = y, a.int_col = bigint_col
-|  runtime filters: RF000 <- x, RF001 <- tinyint_col, RF002 <- y, RF003 <- bigint_col
+|  hash predicates: a.id = tinyint_col, a.id = x, a.int_col = bigint_col, a.int_col = y
+|  runtime filters: RF000 <- tinyint_col, RF001 <- x, RF002 <- bigint_col, RF003 <- y
 |
 |--01:UNION
 |  |
@@ -1599,7 +1599,7 @@ on (v1.tinyint_col = v2.tinyint_col and
 |
 02:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
-   predicates: functional.alltypessmall.tinyint_col = functional.alltypessmall.int_col, functional.alltypessmall.tinyint_col = functional.alltypessmall.bigint_col
+   predicates: functional.alltypessmall.tinyint_col = functional.alltypessmall.bigint_col, functional.alltypessmall.tinyint_col = functional.alltypessmall.int_col
    runtime filters: RF000 -> functional.alltypessmall.tinyint_col
 ====
 # Same as above but with a full outer join.
@@ -1623,7 +1623,7 @@ on (v1.tinyint_col = v2.tinyint_col and
 10:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
-|  hash predicates: tinyint_col = tinyint_col, int_col = tinyint_col, bigint_col = tinyint_col
+|  hash predicates: tinyint_col = tinyint_col, bigint_col = tinyint_col, int_col = tinyint_col
 |
 |--09:EXCHANGE [HASH(tinyint_col,tinyint_col,tinyint_col)]
 |  |
@@ -2154,7 +2154,7 @@ and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
 ---- PLAN
 04:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: c.id = a.id, c.string_col IS NOT DISTINCT FROM b.string_col
-|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.tinyint_col + b.tinyint_col < 15, a.float_col - c.double_col < 0, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
+|  other predicates: a.tinyint_col = 15, b.string_col = '15', a.day >= 6, b.month > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
 |
 |--03:HASH JOIN [FULL OUTER JOIN]
 |  |  hash predicates: a.id IS NOT DISTINCT FROM b.id, a.int_col = b.int_col

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/kudu-update.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-update.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-update.test
index 80ba800..a26ec40 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-update.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-update.test
@@ -19,13 +19,13 @@ UPDATE KUDU [functional_kudu.testtbl]
 |  check keys exist: false
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: zip > 94549, id = 5
+   kudu predicates: id = 5, zip > 94549
 ---- DISTRIBUTEDPLAN
 UPDATE KUDU [functional_kudu.testtbl]
 |  check keys exist: false
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: zip > 94549, id = 5
+   kudu predicates: id = 5, zip > 94549
 ====
 # Mixing predicate and value assignment
 update functional_kudu.testtbl set zip = 94546 where zip > 94549

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 4df133a..565f3a3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -96,7 +96,7 @@ where id >= 10 and zip <= 5 and 20 >= id and 'foo' = name and zip >= 0 and 30 >=
 and zip > 1 and zip < 50
 ---- PLAN
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: id >= 10, zip <= 5, id <= 20, zip >= 0, zip <= 30, zip > 1, zip < 50, name = 'foo'
+   kudu predicates: id <= 20, zip <= 30, id >= 10, zip < 50, zip <= 5, zip > 1, zip >= 0, name = 'foo'
 ---- SCANRANGELOCATIONS
 NODE 0:
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
@@ -104,7 +104,7 @@ NODE 0:
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: id >= 10, zip <= 5, id <= 20, zip >= 0, zip <= 30, zip > 1, zip < 50, name = 'foo'
+   kudu predicates: id <= 20, zip <= 30, id >= 10, zip < 50, zip <= 5, zip > 1, zip >= 0, name = 'foo'
 ====
 # Test constant folding.
 select * from functional_kudu.testtbl
@@ -112,7 +112,7 @@ where id < 10 + 30  and cast(sin(id) as boolean) = true and 20 * 3 >= id and 10
 ---- PLAN
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: CAST(sin(id) AS BOOLEAN) = TRUE
-   kudu predicates: id < 40, id <= 60, id < 103
+   kudu predicates: id <= 60, id < 40, id < 103
 ---- SCANRANGELOCATIONS
 NODE 0:
   ScanToken{table=testtbl, range-partition: [<start>, (int64 id=1003))}
@@ -121,7 +121,7 @@ NODE 0:
 |
 00:SCAN KUDU [functional_kudu.testtbl]
    predicates: CAST(sin(id) AS BOOLEAN) = TRUE
-   kudu predicates: id < 40, id <= 60, id < 103
+   kudu predicates: id <= 60, id < 40, id < 103
 ====
 # Some predicates can be pushed down but others can't (predicate on an non-const value).
 select * from functional_kudu.testtbl
@@ -190,5 +190,5 @@ and 1475059765 + 100 < id
 |  output: count(*)
 |
 00:SCAN KUDU [functional_kudu.alltypes]
-   kudu predicates: id < 1475059775, id > 1475059865
+   kudu predicates: id > 1475059865, id < 1475059775
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test b/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test
index 20c117e..b18a044 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test
@@ -4452,4 +4452,4 @@ where not exists (select 1 from functional.alltypes a where v.id = a.id)
         }
     ]
 }
-====
\ No newline at end of file
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
index 0a8cd01..6270c11 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test
@@ -392,7 +392,7 @@ where b.item % 2 = 0
 01:SUBPLAN
 |
 |--04:NESTED LOOP JOIN [FULL OUTER JOIN]
-|  |  join predicates: a.id < b.item, a.id < 10
+|  |  join predicates: a.id < 10, a.id < b.item
 |  |  predicates: b.item % 2 = 0
 |  |
 |  |--02:SINGULAR ROW SRC
@@ -1357,7 +1357,7 @@ where a.item between 10 and 20
 |
 00:SCAN HDFS [functional.allcomplextypes t]
    partitions=0/0 files=0 size=0B
-   predicates: t.id < 200, !empty(t.int_array_col), !empty(t.complex_nested_struct_col.f2)
+   predicates: t.id < 200, !empty(t.complex_nested_struct_col.f2), !empty(t.int_array_col)
    predicates on a: a.item >= 10, a.item <= 20, a.item % 2 = 0
    predicates on m: m.key = 'test', m.value != 30
    predicates on c: c.f11 >= 10, c.f11 <= 20, c.f11 % 2 = 0

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
index 74002e0..c6ccc81 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
@@ -34,7 +34,7 @@ and t1.zip + t2.zip + t3.zip= 20
 |
 |--02:SCAN HDFS [functional.testtbl t3]
 |     partitions=1/1 files=0 size=0B
-|     predicates: t3.id IS NOT NULL, t3.zip = 94720, t3.id > 0
+|     predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
 |
 03:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: t1.id - 1 = t2.id + 1
@@ -60,7 +60,7 @@ and t1.zip + t2.zip + t3.zip= 20
 |  |
 |  02:SCAN HDFS [functional.testtbl t3]
 |     partitions=1/1 files=0 size=0B
-|     predicates: t3.id IS NOT NULL, t3.zip = 94720, t3.id > 0
+|     predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
 |
 03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
 |  hash predicates: t1.id - 1 = t2.id + 1
@@ -455,7 +455,7 @@ where a.smallint_col = 100 and a.float_col > b.float_col
 ---- PLAN
 04:HASH JOIN [FULL OUTER JOIN]
 |  hash predicates: c.int_col = a.int_col
-|  other join predicates: a.tinyint_col < b.tinyint_col, a.bigint_col < 10
+|  other join predicates: a.bigint_col < 10, a.tinyint_col < b.tinyint_col
 |  other predicates: a.smallint_col = 100, a.float_col > b.float_col
 |
 |--03:HASH JOIN [INNER JOIN]
@@ -775,7 +775,7 @@ inner join functional.alltypestiny c
 ---- PLAN
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id = c.id
-|  other predicates: b.int_col < 0, a.int_col > 10
+|  other predicates: a.int_col > 10, b.int_col < 0
 |  runtime filters: RF000 <- c.id
 |
 |--02:SCAN HDFS [functional.alltypestiny c]
@@ -809,7 +809,7 @@ full outer join functional.alltypestiny e
 |  hash predicates: e.id = d.id
 |
 |--08:NESTED LOOP JOIN [INNER JOIN]
-|  |  predicates: b.int_col < 0, a.int_col > 10
+|  |  predicates: a.int_col > 10, b.int_col < 0
 |  |
 |  |--07:HASH JOIN [RIGHT OUTER JOIN]
 |  |  |  hash predicates: c.id = b.id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
index 3ffdbd4..605f157 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
@@ -50,7 +50,7 @@ where month = id and id = int_col and tinyint_col = int_col and int_col < 2
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=40.32KB
-   predicates: month = id, id = int_col, functional.alltypes.id < 2, int_col < 2, functional.alltypes.tinyint_col < 2, tinyint_col = int_col
+   predicates: functional.alltypes.id < 2, functional.alltypes.tinyint_col < 2, id = int_col, int_col < 2, month = id, tinyint_col = int_col
 ====
 # all subquery results get materialized correctly;
 # a.string_col = 'a' needs to be evaluated by the join itself, not the scan
@@ -190,26 +190,26 @@ where a.year = 2009 and b.month + 2 <= 4 and b.id = 17
   and cast(sin(c.int_col) as boolean) = true
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: b.id = c.id, b.year = c.year, b.month = c.month, b.smallint_col = c.int_col
-|  runtime filters: RF000 <- c.id, RF001 <- c.year, RF002 <- c.month, RF003 <- c.int_col
+|  hash predicates: b.id = c.id, b.month = c.month, b.year = c.year, b.smallint_col = c.int_col
+|  runtime filters: RF000 <- c.id, RF001 <- c.month, RF002 <- c.year, RF003 <- c.int_col
 |
 |--02:SCAN HDFS [functional.alltypestiny c]
 |     partitions=2/4 files=2 size=230B
 |     predicates: c.id = 17, CAST(sin(c.int_col) AS BOOLEAN) = TRUE
 |
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = b.id, a.year = b.year, a.month = b.month, a.tinyint_col = b.smallint_col
-|  runtime filters: RF004 <- b.id, RF005 <- b.year, RF006 <- b.month, RF007 <- b.smallint_col
+|  hash predicates: a.id = b.id, a.month = b.month, a.year = b.year, a.tinyint_col = b.smallint_col
+|  runtime filters: RF004 <- b.id, RF005 <- b.month, RF006 <- b.year, RF007 <- b.smallint_col
 |
 |--01:SCAN HDFS [functional.alltypessmall b]
 |     partitions=2/4 files=2 size=3.16KB
 |     predicates: b.id = 17, CAST(sin(b.smallint_col) AS BOOLEAN) = TRUE
-|     runtime filters: RF000 -> b.id, RF001 -> b.year, RF002 -> b.month, RF003 -> b.smallint_col
+|     runtime filters: RF000 -> b.id, RF001 -> b.month, RF002 -> b.year, RF003 -> b.smallint_col
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id = 17, CAST(sin(a.tinyint_col) AS BOOLEAN) = TRUE
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col, RF004 -> a.id, RF005 -> a.year, RF006 -> a.month, RF007 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col, RF004 -> a.id, RF005 -> a.month, RF006 -> a.year, RF007 -> a.tinyint_col
 ---- SCANRANGELOCATIONS
 NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
@@ -224,32 +224,32 @@ NODE 2:
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: b.id = c.id, b.year = c.year, b.month = c.month, b.smallint_col = c.int_col
-|  runtime filters: RF000 <- c.id, RF001 <- c.year, RF002 <- c.month, RF003 <- c.int_col
+|  hash predicates: b.id = c.id, b.month = c.month, b.year = c.year, b.smallint_col = c.int_col
+|  runtime filters: RF000 <- c.id, RF001 <- c.month, RF002 <- c.year, RF003 <- c.int_col
 |
-|--07:EXCHANGE [HASH(c.id,c.year,c.month,c.int_col)]
+|--07:EXCHANGE [HASH(c.id,c.month,c.year,c.int_col)]
 |  |
 |  02:SCAN HDFS [functional.alltypestiny c]
 |     partitions=2/4 files=2 size=230B
 |     predicates: c.id = 17, CAST(sin(c.int_col) AS BOOLEAN) = TRUE
 |
 03:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: a.id = b.id, a.year = b.year, a.month = b.month, a.tinyint_col = b.smallint_col
-|  runtime filters: RF004 <- b.id, RF005 <- b.year, RF006 <- b.month, RF007 <- b.smallint_col
+|  hash predicates: a.id = b.id, a.month = b.month, a.year = b.year, a.tinyint_col = b.smallint_col
+|  runtime filters: RF004 <- b.id, RF005 <- b.month, RF006 <- b.year, RF007 <- b.smallint_col
 |
-|--06:EXCHANGE [HASH(b.id,b.year,b.month,b.smallint_col)]
+|--06:EXCHANGE [HASH(b.id,b.month,b.year,b.smallint_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypessmall b]
 |     partitions=2/4 files=2 size=3.16KB
 |     predicates: b.id = 17, CAST(sin(b.smallint_col) AS BOOLEAN) = TRUE
-|     runtime filters: RF000 -> b.id, RF001 -> b.year, RF002 -> b.month, RF003 -> b.smallint_col
+|     runtime filters: RF000 -> b.id, RF001 -> b.month, RF002 -> b.year, RF003 -> b.smallint_col
 |
-05:EXCHANGE [HASH(a.id,a.year,a.month,a.tinyint_col)]
+05:EXCHANGE [HASH(a.id,a.month,a.year,a.tinyint_col)]
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id = 17, CAST(sin(a.tinyint_col) AS BOOLEAN) = TRUE
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col, RF004 -> a.id, RF005 -> a.year, RF006 -> a.month, RF007 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col, RF004 -> a.id, RF005 -> a.month, RF006 -> a.year, RF007 -> a.tinyint_col
 ====
 # basic propagation between equivalence classes, with partition pruning;
 # variation with inline views
@@ -263,26 +263,26 @@ where a.year = 2009 and b.month + 2 <= 4 and b.id = 17
   and cast(sin(c.int_col) as boolean) = true
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: functional.alltypessmall.id = functional.alltypestiny.id, functional.alltypessmall.year = functional.alltypestiny.year, functional.alltypessmall.month = functional.alltypestiny.month, functional.alltypessmall.smallint_col = functional.alltypestiny.int_col
-|  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.year, RF002 <- functional.alltypestiny.month, RF003 <- functional.alltypestiny.int_col
+|  hash predicates: functional.alltypessmall.id = functional.alltypestiny.id, functional.alltypessmall.month = functional.alltypestiny.month, functional.alltypessmall.year = functional.alltypestiny.year, functional.alltypessmall.smallint_col = functional.alltypestiny.int_col
+|  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.month, RF002 <- functional.alltypestiny.year, RF003 <- functional.alltypestiny.int_col
 |
 |--02:SCAN HDFS [functional.alltypestiny]
 |     partitions=2/4 files=2 size=230B
 |     predicates: functional.alltypestiny.id = 17, CAST(sin(functional.alltypestiny.int_col) AS BOOLEAN) = TRUE
 |
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: functional.alltypes.id = functional.alltypessmall.id, functional.alltypes.year = functional.alltypessmall.year, functional.alltypes.month = functional.alltypessmall.month, functional.alltypes.tinyint_col = functional.alltypessmall.smallint_col
-|  runtime filters: RF004 <- functional.alltypessmall.id, RF005 <- functional.alltypessmall.year, RF006 <- functional.alltypessmall.month, RF007 <- functional.alltypessmall.smallint_col
+|  hash predicates: functional.alltypes.id = functional.alltypessmall.id, functional.alltypes.month = functional.alltypessmall.month, functional.alltypes.year = functional.alltypessmall.year, functional.alltypes.tinyint_col = functional.alltypessmall.smallint_col
+|  runtime filters: RF004 <- functional.alltypessmall.id, RF005 <- functional.alltypessmall.month, RF006 <- functional.alltypessmall.year, RF007 <- functional.alltypessmall.smallint_col
 |
 |--01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
 |     predicates: functional.alltypessmall.id = 17, CAST(sin(functional.alltypessmall.smallint_col) AS BOOLEAN) = TRUE
-|     runtime filters: RF000 -> functional.alltypessmall.id, RF001 -> functional.alltypessmall.year, RF002 -> functional.alltypessmall.month, RF003 -> functional.alltypessmall.smallint_col
+|     runtime filters: RF000 -> functional.alltypessmall.id, RF001 -> functional.alltypessmall.month, RF002 -> functional.alltypessmall.year, RF003 -> functional.alltypessmall.smallint_col
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=38.07KB
    predicates: functional.alltypes.id = 17, CAST(sin(functional.alltypes.tinyint_col) AS BOOLEAN) = TRUE
-   runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.year, RF002 -> functional.alltypes.month, RF003 -> functional.alltypes.tinyint_col, RF004 -> functional.alltypes.id, RF005 -> functional.alltypes.year, RF006 -> functional.alltypes.month, RF007 -> functional.alltypes.tinyint_col
+   runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.month, RF002 -> functional.alltypes.year, RF003 -> functional.alltypes.tinyint_col, RF004 -> functional.alltypes.id, RF005 -> functional.alltypes.month, RF006 -> functional.alltypes.year, RF007 -> functional.alltypes.tinyint_col
 ---- SCANRANGELOCATIONS
 NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
@@ -297,32 +297,32 @@ NODE 2:
 08:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: functional.alltypessmall.id = functional.alltypestiny.id, functional.alltypessmall.year = functional.alltypestiny.year, functional.alltypessmall.month = functional.alltypestiny.month, functional.alltypessmall.smallint_col = functional.alltypestiny.int_col
-|  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.year, RF002 <- functional.alltypestiny.month, RF003 <- functional.alltypestiny.int_col
+|  hash predicates: functional.alltypessmall.id = functional.alltypestiny.id, functional.alltypessmall.month = functional.alltypestiny.month, functional.alltypessmall.year = functional.alltypestiny.year, functional.alltypessmall.smallint_col = functional.alltypestiny.int_col
+|  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.month, RF002 <- functional.alltypestiny.year, RF003 <- functional.alltypestiny.int_col
 |
-|--07:EXCHANGE [HASH(functional.alltypestiny.id,functional.alltypestiny.year,functional.alltypestiny.month,functional.alltypestiny.int_col)]
+|--07:EXCHANGE [HASH(functional.alltypestiny.id,functional.alltypestiny.month,functional.alltypestiny.year,functional.alltypestiny.int_col)]
 |  |
 |  02:SCAN HDFS [functional.alltypestiny]
 |     partitions=2/4 files=2 size=230B
 |     predicates: functional.alltypestiny.id = 17, CAST(sin(functional.alltypestiny.int_col) AS BOOLEAN) = TRUE
 |
 03:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: functional.alltypes.id = functional.alltypessmall.id, functional.alltypes.year = functional.alltypessmall.year, functional.alltypes.month = functional.alltypessmall.month, functional.alltypes.tinyint_col = functional.alltypessmall.smallint_col
-|  runtime filters: RF004 <- functional.alltypessmall.id, RF005 <- functional.alltypessmall.year, RF006 <- functional.alltypessmall.month, RF007 <- functional.alltypessmall.smallint_col
+|  hash predicates: functional.alltypes.id = functional.alltypessmall.id, functional.alltypes.month = functional.alltypessmall.month, functional.alltypes.year = functional.alltypessmall.year, functional.alltypes.tinyint_col = functional.alltypessmall.smallint_col
+|  runtime filters: RF004 <- functional.alltypessmall.id, RF005 <- functional.alltypessmall.month, RF006 <- functional.alltypessmall.year, RF007 <- functional.alltypessmall.smallint_col
 |
-|--06:EXCHANGE [HASH(functional.alltypessmall.id,functional.alltypessmall.year,functional.alltypessmall.month,functional.alltypessmall.smallint_col)]
+|--06:EXCHANGE [HASH(functional.alltypessmall.id,functional.alltypessmall.month,functional.alltypessmall.year,functional.alltypessmall.smallint_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
 |     predicates: functional.alltypessmall.id = 17, CAST(sin(functional.alltypessmall.smallint_col) AS BOOLEAN) = TRUE
-|     runtime filters: RF000 -> functional.alltypessmall.id, RF001 -> functional.alltypessmall.year, RF002 -> functional.alltypessmall.month, RF003 -> functional.alltypessmall.smallint_col
+|     runtime filters: RF000 -> functional.alltypessmall.id, RF001 -> functional.alltypessmall.month, RF002 -> functional.alltypessmall.year, RF003 -> functional.alltypessmall.smallint_col
 |
-05:EXCHANGE [HASH(functional.alltypes.id,functional.alltypes.year,functional.alltypes.month,functional.alltypes.tinyint_col)]
+05:EXCHANGE [HASH(functional.alltypes.id,functional.alltypes.month,functional.alltypes.year,functional.alltypes.tinyint_col)]
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=2/24 files=2 size=38.07KB
    predicates: functional.alltypes.id = 17, CAST(sin(functional.alltypes.tinyint_col) AS BOOLEAN) = TRUE
-   runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.year, RF002 -> functional.alltypes.month, RF003 -> functional.alltypes.tinyint_col, RF004 -> functional.alltypes.id, RF005 -> functional.alltypes.year, RF006 -> functional.alltypes.month, RF007 -> functional.alltypes.tinyint_col
+   runtime filters: RF000 -> functional.alltypes.id, RF001 -> functional.alltypes.month, RF002 -> functional.alltypes.year, RF003 -> functional.alltypes.tinyint_col, RF004 -> functional.alltypes.id, RF005 -> functional.alltypes.month, RF006 -> functional.alltypes.year, RF007 -> functional.alltypes.tinyint_col
 ====
 # propagation between outer-joined tables only goes in one direction:
 # - predicates on a.year and a.tinyint_col are propagated to b
@@ -341,7 +341,7 @@ from functional.alltypes a
 where a.year = 2009 and a.tinyint_col = 7 and a.id is null and b.id = 17 and b.int_col is null
 ---- PLAN
 02:HASH JOIN [LEFT OUTER JOIN]
-|  hash predicates: a.tinyint_col = b.tinyint_col, a.id = b.id, a.year = b.year, a.month = b.month
+|  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: b.int_col IS NULL, b.id = 17
 |
 |--01:SCAN HDFS [functional.alltypessmall b]
@@ -371,7 +371,7 @@ NODE 1:
 04:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
-|  hash predicates: a.tinyint_col = b.tinyint_col, a.id = b.id, a.year = b.year, a.month = b.month
+|  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: b.int_col IS NULL, b.id = 17
 |
 |--03:EXCHANGE [BROADCAST]
@@ -401,9 +401,9 @@ from functional.alltypessmall a
 where b.year = 2009 and b.tinyint_col = 7 and b.id is null and a.id = 17 and a.int_col is null
 ---- PLAN
 02:HASH JOIN [RIGHT OUTER JOIN]
-|  hash predicates: a.tinyint_col = b.tinyint_col, a.id = b.id, a.year = b.year, a.month = b.month
+|  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: a.int_col IS NULL, a.id = 17
-|  runtime filters: RF000 <- b.tinyint_col, RF001 <- b.id, RF002 <- b.year, RF003 <- b.month
+|  runtime filters: RF000 <- b.id, RF001 <- b.month, RF002 <- b.tinyint_col, RF003 <- b.year
 |
 |--01:SCAN HDFS [functional.alltypes b]
 |     partitions=12/24 files=12 size=238.68KB
@@ -412,27 +412,27 @@ where b.year = 2009 and b.tinyint_col = 7 and b.id is null and a.id = 17 and a.i
 00:SCAN HDFS [functional.alltypessmall a]
    partitions=1/4 files=1 size=1.57KB
    predicates: a.id = 17, a.tinyint_col = 7
-   runtime filters: RF000 -> a.tinyint_col, RF001 -> a.id, RF002 -> a.year, RF003 -> a.month
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.tinyint_col, RF003 -> a.year
 ---- DISTRIBUTEDPLAN
 05:EXCHANGE [UNPARTITIONED]
 |
 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
-|  hash predicates: a.tinyint_col = b.tinyint_col, a.id = b.id, a.year = b.year, a.month = b.month
+|  hash predicates: a.id = b.id, a.month = b.month, a.tinyint_col = b.tinyint_col, a.year = b.year
 |  other predicates: a.int_col IS NULL, a.id = 17
-|  runtime filters: RF000 <- b.tinyint_col, RF001 <- b.id, RF002 <- b.year, RF003 <- b.month
+|  runtime filters: RF000 <- b.id, RF001 <- b.month, RF002 <- b.tinyint_col, RF003 <- b.year
 |
-|--04:EXCHANGE [HASH(b.tinyint_col,b.id,b.year,b.month)]
+|--04:EXCHANGE [HASH(b.id,b.month,b.tinyint_col,b.year)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     partitions=12/24 files=12 size=238.68KB
 |     predicates: b.id IS NULL, b.tinyint_col = 7
 |
-03:EXCHANGE [HASH(a.tinyint_col,a.id,a.year,a.month)]
+03:EXCHANGE [HASH(a.id,a.month,a.tinyint_col,a.year)]
 |
 00:SCAN HDFS [functional.alltypessmall a]
    partitions=1/4 files=1 size=1.57KB
    predicates: a.id = 17, a.tinyint_col = 7
-   runtime filters: RF000 -> a.tinyint_col, RF001 -> a.id, RF002 -> a.year, RF003 -> a.month
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.tinyint_col, RF003 -> a.year
 ====
 # propagation into inline view with aggregation:
 # - predicates from enclosing scope applied to grouping exprs; with partition pruning
@@ -449,8 +449,8 @@ from functional.alltypes a
 where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col != 5
 ---- PLAN
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--02:AGGREGATE [FINALIZE]
 |  |  output: count(*)
@@ -459,12 +459,12 @@ where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
-|     predicates: id > 11, functional.alltypessmall.int_col != 5
+|     predicates: functional.alltypessmall.int_col != 5, id > 11
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- SCANRANGELOCATIONS
 NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
@@ -476,8 +476,8 @@ NODE 1:
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--06:EXCHANGE [BROADCAST]
 |  |
@@ -494,12 +494,12 @@ NODE 1:
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
-|     predicates: id > 11, functional.alltypessmall.int_col != 5
+|     predicates: functional.alltypessmall.int_col != 5, id > 11
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ====
 # Same as above but with cross join
 select straight_join a.id, b.id
@@ -519,8 +519,8 @@ where a.id = b.id and
       a.tinyint_col != 5
 ---- PLAN
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--02:AGGREGATE [FINALIZE]
 |  |  output: count(*)
@@ -529,18 +529,18 @@ where a.id = b.id and
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
-|     predicates: id > 11, functional.alltypessmall.int_col != 5
+|     predicates: functional.alltypessmall.int_col != 5, id > 11
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- DISTRIBUTEDPLAN
 07:EXCHANGE [UNPARTITIONED]
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--06:EXCHANGE [BROADCAST]
 |  |
@@ -557,12 +557,12 @@ where a.id = b.id and
 |  |
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partitions=2/4 files=2 size=3.16KB
-|     predicates: id > 11, functional.alltypessmall.int_col != 5
+|     predicates: functional.alltypessmall.int_col != 5, id > 11
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ====
 # no propagation into select block with limit;
 # propagation out of that block is okay;
@@ -580,8 +580,8 @@ from functional.alltypes a
 where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col != 5
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--03:SELECT
 |  |  predicates: count(*) + 1 = 17
@@ -598,7 +598,7 @@ where a.year = 2009 and b.month <= 2 and b.count_col + 1 = 17 and a.tinyint_col
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- SCANRANGELOCATIONS
 NODE 0:
   HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
@@ -612,8 +612,8 @@ NODE 1:
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--08:EXCHANGE [BROADCAST]
 |  |
@@ -641,7 +641,7 @@ NODE 1:
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ====
 # Similar to the above, converts the cross join to a hash join
 select straight_join a.id, b.id
@@ -662,8 +662,8 @@ where a.year = 2009 and
       a.month = b.month
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--03:SELECT
 |  |  predicates: count(*) + 1 = 17
@@ -680,13 +680,13 @@ where a.year = 2009 and
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ---- DISTRIBUTEDPLAN
 09:EXCHANGE [UNPARTITIONED]
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
-|  hash predicates: a.id = id, a.year = year, a.month = month, a.tinyint_col = int_col
-|  runtime filters: RF000 <- id, RF001 <- year, RF002 <- month, RF003 <- int_col
+|  hash predicates: a.id = id, a.month = month, a.year = year, a.tinyint_col = int_col
+|  runtime filters: RF000 <- id, RF001 <- month, RF002 <- year, RF003 <- int_col
 |
 |--08:EXCHANGE [BROADCAST]
 |  |
@@ -714,7 +714,7 @@ where a.year = 2009 and
 00:SCAN HDFS [functional.alltypes a]
    partitions=2/24 files=2 size=38.07KB
    predicates: a.id > 11, a.tinyint_col != 5
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.month, RF003 -> a.tinyint_col
+   runtime filters: RF000 -> a.id, RF001 -> a.month, RF002 -> a.year, RF003 -> a.tinyint_col
 ====
 # propagation of z.month=1 to alltypesagg is prevented
 select straight_join x.int_col, z.int_col
@@ -768,16 +768,16 @@ and x.id + x.b_id = 17
 |     limit: 10
 |
 02:HASH JOIN [INNER JOIN]
-|  hash predicates: a.year = b.year, a.int_col = b.int_col
+|  hash predicates: a.int_col = b.int_col, a.year = b.year
 |  other predicates: a.id + b.id = 17
-|  runtime filters: RF001 <- b.year, RF002 <- b.int_col
+|  runtime filters: RF001 <- b.int_col, RF002 <- b.year
 |
 |--01:SCAN HDFS [functional.alltypessmall b]
 |     partitions=4/4 files=4 size=6.32KB
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=12/24 files=12 size=238.68KB
-   runtime filters: RF000 -> a.id, RF001 -> a.year, RF002 -> a.int_col
+   runtime filters: RF000 -> a.id, RF001 -> a.int_col, RF002 -> a.year
 ====
 # correct placement of predicates in the presence of aggregation in an inline view
 select straight_join a.id, b.id
@@ -969,13 +969,13 @@ and t2.id + t2.smallint_col + t2.bigint_col > 30
 and t2.id + t3.int_col > 40
 ---- PLAN
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: t2.id = functional.alltypestiny.id, t2.bigint_col = functional.alltypestiny.bigint_col, t2.smallint_col = functional.alltypestiny.int_col
+|  hash predicates: t2.bigint_col = functional.alltypestiny.bigint_col, t2.id = functional.alltypestiny.id, t2.smallint_col = functional.alltypestiny.int_col
 |  other predicates: t2.id + functional.alltypestiny.int_col > 40
-|  runtime filters: RF000 <- functional.alltypestiny.id, RF001 <- functional.alltypestiny.bigint_col, RF002 <- functional.alltypestiny.int_col
+|  runtime filters: RF000 <- functional.alltypestiny.bigint_col, RF001 <- functional.alltypestiny.id, RF002 <- functional.alltypestiny.int_col
 |
 |--02:SCAN HDFS [functional.alltypestiny]
 |     partitions=4/4 files=4 size=460B
-|     predicates: functional.alltypestiny.id + functional.alltypestiny.int_col > 10, functional.alltypestiny.id + functional.alltypestiny.bigint_col > 20, functional.alltypestiny.id + functional.alltypestiny.int_col + functional.alltypestiny.bigint_col > 30
+|     predicates: functional.alltypestiny.id + functional.alltypestiny.bigint_col > 20, functional.alltypestiny.id + functional.alltypestiny.int_col > 10, functional.alltypestiny.id + functional.alltypestiny.int_col + functional.alltypestiny.bigint_col > 30
 |
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id, t1.tinyint_col = t2.smallint_col
@@ -983,13 +983,13 @@ and t2.id + t3.int_col > 40
 |
 |--01:SCAN HDFS [functional.alltypessmall t2]
 |     partitions=4/4 files=4 size=6.32KB
-|     predicates: t2.id + t2.smallint_col > 10, t2.id + t2.bigint_col > 20, t2.id + t2.smallint_col + t2.bigint_col > 30
-|     runtime filters: RF000 -> t2.id, RF001 -> t2.bigint_col, RF002 -> t2.smallint_col
+|     predicates: t2.id + t2.bigint_col > 20, t2.id + t2.smallint_col > 10, t2.id + t2.smallint_col + t2.bigint_col > 30
+|     runtime filters: RF000 -> t2.bigint_col, RF001 -> t2.id, RF002 -> t2.smallint_col
 |
 00:SCAN HDFS [functional.alltypes t1]
    partitions=24/24 files=24 size=478.45KB
    predicates: t1.id + t1.tinyint_col > 10
-   runtime filters: RF000 -> t1.id, RF002 -> t1.tinyint_col, RF003 -> t1.id, RF004 -> t1.tinyint_col
+   runtime filters: RF001 -> t1.id, RF002 -> t1.tinyint_col, RF003 -> t1.id, RF004 -> t1.tinyint_col
 ====
 # basic propagation of multi-slot, single-tuple predicates with aggregates
 select straight_join 1 from
@@ -1162,7 +1162,7 @@ where a.id < 10
 |
 |--01:SCAN HDFS [functional.alltypestiny]
 |     partitions=4/4 files=4 size=460B
-|     predicates: id > 20, functional.alltypestiny.id < 10
+|     predicates: functional.alltypestiny.id < 10, id > 20
 |
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
@@ -1185,7 +1185,7 @@ where b.id < 10
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: id > 20, functional.alltypes.id < 10
+   predicates: functional.alltypes.id < 10, id > 20
 ====
 # Test proper predicate assignment with predicate propagation when the
 # generated predicate is bound by an outer joined tuple (IMPALA-2018)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
index 499910b..f1abd1d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test
@@ -143,7 +143,7 @@ and t1.int_col = 1 and 1 = t2.bigint_col
 ---- PLAN
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
-|  other predicates: t1.year = t1.month + t2.int_col, t1.year + t2.smallint_col = t2.tinyint_col, t1.year + t2.int_col = t1.month + t2.tinyint_col
+|  other predicates: t1.year + t2.smallint_col = t2.tinyint_col, t1.year = t1.month + t2.int_col, t1.year + t2.int_col = t1.month + t2.tinyint_col
 |  runtime filters: RF000 <- t2.id
 |
 |--01:SCAN HDFS [functional.alltypesnopart t2]
@@ -262,8 +262,8 @@ where t1.year = v.int_col and t1.year = v.id and t1.month = v.tinyint_col
 |  runtime filters: RF000 <- t2.int_col, RF001 <- t4.tinyint_col
 |
 |--05:HASH JOIN [INNER JOIN]
-|  |  hash predicates: t3.int_col = t4.int_col, t2.tinyint_col = t4.tinyint_col
-|  |  runtime filters: RF002 <- t4.int_col, RF003 <- t4.tinyint_col
+|  |  hash predicates: t2.tinyint_col = t4.tinyint_col, t3.int_col = t4.int_col
+|  |  runtime filters: RF002 <- t4.tinyint_col, RF003 <- t4.int_col
 |  |
 |  |--03:SCAN HDFS [functional.alltypesnopart t4]
 |  |     partitions=1/1 files=0 size=0B
@@ -274,12 +274,12 @@ where t1.year = v.int_col and t1.year = v.id and t1.month = v.tinyint_col
 |  |
 |  |--02:SCAN HDFS [functional.alltypesnopart t3]
 |  |     partitions=1/1 files=0 size=0B
-|  |     runtime filters: RF002 -> t3.int_col
+|  |     runtime filters: RF003 -> t3.int_col
 |  |
 |  01:SCAN HDFS [functional.alltypesnopart t2]
 |     partitions=1/1 files=0 size=0B
 |     predicates: t2.int_col = t2.id
-|     runtime filters: RF003 -> t2.tinyint_col, RF004 -> t2.id
+|     runtime filters: RF002 -> t2.tinyint_col, RF004 -> t2.id
 |
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=11/11 files=11 size=814.73KB
@@ -328,8 +328,8 @@ where t1.year = t2.id and t2.int_col = t3.tinyint_col and t3.month = t4.bigint_c
   and t4.smallint_col = t5.smallint_col and t5.id = t1.month
 ---- PLAN
 08:HASH JOIN [INNER JOIN]
-|  hash predicates: t4.smallint_col = t5.smallint_col, t1.month = t5.id
-|  runtime filters: RF000 <- t5.smallint_col, RF001 <- t5.id
+|  hash predicates: t1.month = t5.id, t4.smallint_col = t5.smallint_col
+|  runtime filters: RF000 <- t5.id, RF001 <- t5.smallint_col
 |
 |--04:SCAN HDFS [functional.alltypesnopart t5]
 |     partitions=1/1 files=0 size=0B
@@ -340,7 +340,7 @@ where t1.year = t2.id and t2.int_col = t3.tinyint_col and t3.month = t4.bigint_c
 |
 |--03:SCAN HDFS [functional.alltypesnopart t4]
 |     partitions=1/1 files=0 size=0B
-|     runtime filters: RF000 -> t4.smallint_col
+|     runtime filters: RF001 -> t4.smallint_col
 |
 06:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.int_col = t3.tinyint_col
@@ -360,7 +360,7 @@ where t1.year = t2.id and t2.int_col = t3.tinyint_col and t3.month = t4.bigint_c
 |
 00:SCAN HDFS [functional.alltypesagg t1]
    partitions=11/11 files=11 size=814.73KB
-   runtime filters: RF001 -> t1.month, RF004 -> t1.year
+   runtime filters: RF000 -> t1.month, RF004 -> t1.year
 ====
 # Two-way left outer join query; no runtime filters should be generated from the
 # ON-clause equi-join predicate
@@ -632,7 +632,7 @@ select straight_join * from
 where v1.year = t2.id and v1.int_col = t2.int_col and t2.smallint_col = 1
 ---- PLAN
 03:HASH JOIN [INNER JOIN]
-|  hash predicates: year = t2.id, int_col = t2.int_col
+|  hash predicates: int_col = t2.int_col, year = t2.id
 |
 |--02:SCAN HDFS [functional.alltypesnopart t2]
 |     partitions=1/1 files=0 size=0B
@@ -1213,7 +1213,7 @@ from big_six
 36:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--28:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |  hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
 |  |
 |  |--27:SCAN HDFS [functional.alltypestiny b]
 |  |     partitions=4/4 files=4 size=460B
@@ -1224,7 +1224,7 @@ from big_six
 35:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--25:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col, a.tinyint_col = b.tinyint_col
+|  |  hash predicates: a.bool_col = b.bool_col, a.id = b.id, a.tinyint_col = b.tinyint_col
 |  |
 |  |--24:SCAN HDFS [functional.alltypes b]
 |  |     partitions=24/24 files=24 size=478.45KB
@@ -1235,7 +1235,7 @@ from big_six
 34:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--22:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |  hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
 |  |
 |  |--21:SCAN HDFS [functional.alltypestiny b]
 |  |     partitions=4/4 files=4 size=460B
@@ -1273,20 +1273,20 @@ from big_six
 31:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--11:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
-|  |  runtime filters: RF017 <- b.bigint_col, RF016 <- b.id, RF019 <- b.double_col, RF018 <- b.bool_col, RF021 <- b.int_col, RF020 <- b.float_col, RF023 <- b.tinyint_col, RF022 <- b.smallint_col
+|  |  hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|  |  runtime filters: RF017 <- b.bool_col, RF016 <- b.bigint_col, RF019 <- b.float_col, RF018 <- b.double_col, RF021 <- b.int_col, RF020 <- b.id, RF023 <- b.tinyint_col, RF022 <- b.smallint_col
 |  |
 |  |--10:SCAN HDFS [functional.alltypestiny b]
 |  |     partitions=4/4 files=4 size=460B
 |  |
 |  09:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF017 -> a.bigint_col, RF016 -> a.id, RF019 -> a.double_col, RF018 -> a.bool_col, RF021 -> a.int_col, RF020 -> a.float_col, RF023 -> a.tinyint_col, RF022 -> a.smallint_col
+|     runtime filters: RF017 -> a.bool_col, RF016 -> a.bigint_col, RF019 -> a.float_col, RF018 -> a.double_col, RF021 -> a.int_col, RF020 -> a.id, RF023 -> a.tinyint_col, RF022 -> a.smallint_col
 |
 30:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--08:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col, a.double_col = b.double_col, a.smallint_col = b.smallint_col, a.timestamp_col = b.timestamp_col, a.tinyint_col = b.tinyint_col, a.string_col = b.string_col, a.date_string_col = b.date_string_col
+|  |  hash predicates: a.bool_col = b.bool_col, a.double_col = b.double_col, a.id = b.id, a.smallint_col = b.smallint_col, a.timestamp_col = b.timestamp_col, a.tinyint_col = b.tinyint_col, a.string_col = b.string_col, a.date_string_col = b.date_string_col
 |  |
 |  |--07:SCAN HDFS [functional.alltypes b]
 |  |     partitions=24/24 files=24 size=478.45KB
@@ -1297,18 +1297,18 @@ from big_six
 29:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--05:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.id = b.id, a.bool_col = b.bool_col
-|  |  runtime filters: RF006 <- b.id, RF007 <- b.bool_col
+|  |  hash predicates: a.bool_col = b.bool_col, a.id = b.id
+|  |  runtime filters: RF006 <- b.bool_col, RF007 <- b.id
 |  |
 |  |--04:SCAN HDFS [functional.alltypestiny b]
 |  |     partitions=4/4 files=4 size=460B
 |  |
 |  03:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF006 -> a.id, RF007 -> a.bool_col
+|     runtime filters: RF006 -> a.bool_col, RF007 -> a.id
 |
 02:HASH JOIN [INNER JOIN]
-|  hash predicates: a.id = b.id, a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
+|  hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
 |
 |--01:SCAN HDFS [functional.alltypes b]
 |     partitions=24/24 files=24 size=478.45KB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1a5c43ef/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index a04f723..21776ba 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -41,7 +41,7 @@ and a.int_col < 100
 ---- PLAN
 02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: a.int_col = int_col
-|  other join predicates: g.bigint_col < a.bigint_col, a.id = g.id
+|  other join predicates: a.id = g.id, g.bigint_col < a.bigint_col
 |
 |--01:SCAN HDFS [functional.alltypesagg g]
 |     partitions=11/11 files=11 size=814.73KB
@@ -77,8 +77,8 @@ and bool_col = false
 |  output: count(*)
 |
 02:HASH JOIN [LEFT SEMI JOIN]
-|  hash predicates: int_col = int_col, a.id = g.id
-|  runtime filters: RF000 <- int_col, RF001 <- g.id
+|  hash predicates: a.id = g.id, int_col = int_col
+|  runtime filters: RF000 <- g.id, RF001 <- int_col
 |
 |--01:SCAN HDFS [functional.alltypesagg g]
 |     partitions=11/11 files=11 size=814.73KB
@@ -87,7 +87,7 @@ and bool_col = false
 00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
    predicates: bool_col = FALSE
-   runtime filters: RF000 -> int_col, RF001 -> a.id
+   runtime filters: RF000 -> a.id, RF001 -> int_col
 ====
 # Complex expression in the IN predicate
 select *
@@ -174,8 +174,8 @@ and a.int_col < 10
 |  output: count(*)
 |
 04:HASH JOIN [LEFT SEMI JOIN]
-|  hash predicates: a.id = s.id, a.bool_col = s.bool_col
-|  runtime filters: RF000 <- s.id, RF001 <- s.bool_col
+|  hash predicates: a.bool_col = s.bool_col, a.id = s.id
+|  runtime filters: RF000 <- s.bool_col, RF001 <- s.id
 |
 |--03:HASH JOIN [INNER JOIN]
 |  |  hash predicates: s.int_col = t.int_col
@@ -191,7 +191,7 @@ and a.int_col < 10
 00:SCAN HDFS [functional.alltypesagg a]
    partitions=11/11 files=11 size=814.73KB
    predicates: a.int_col < 10
-   runtime filters: RF000 -> a.id, RF001 -> a.bool_col
+   runtime filters: RF000 -> a.bool_col, RF001 -> a.id
 ====
 # Outer join between the tables in the outer query block
 select count(*)
@@ -451,15 +451,15 @@ where id in
 |  runtime filters: RF000 <- id, RF001 <- a.int_col
 |
 |--03:HASH JOIN [LEFT SEMI JOIN]
-|  |  hash predicates: a.tinyint_col = tinyint_col, a.bigint_col = s.bigint_col
-|  |  runtime filters: RF002 <- tinyint_col, RF003 <- s.bigint_col
+|  |  hash predicates: a.bigint_col = s.bigint_col, a.tinyint_col = tinyint_col
+|  |  runtime filters: RF002 <- s.bigint_col, RF003 <- tinyint_col
 |  |
 |  |--02:SCAN HDFS [functional.alltypestiny s]
 |  |     partitions=4/4 files=4 size=460B
 |  |
 |  01:SCAN HDFS [functional.alltypesagg a]
 |     partitions=11/11 files=11 size=814.73KB
-|     runtime filters: RF002 -> a.tinyint_col, RF003 -> a.bigint_col
+|     runtime filters: RF002 -> a.bigint_col, RF003 -> a.tinyint_col
 |
 00:SCAN HDFS [functional.alltypes t]
    partitions=24/24 files=24 size=478.45KB
@@ -477,15 +477,15 @@ where id in
 |  runtime filters: RF000 <- id
 |
 |--03:HASH JOIN [LEFT SEMI JOIN]
-|  |  hash predicates: a.int_col = int_col, a.bigint_col = s.bigint_col
-|  |  runtime filters: RF001 <- int_col, RF002 <- s.bigint_col
+|  |  hash predicates: a.bigint_col = s.bigint_col, a.int_col = int_col
+|  |  runtime filters: RF001 <- s.bigint_col, RF002 <- int_col
 |  |
 |  |--02:SCAN HDFS [functional.alltypestiny s]
 |  |     partitions=4/4 files=4 size=460B
 |  |
 |  01:SCAN HDFS [functional.alltypesagg a]
 |     partitions=11/11 files=11 size=814.73KB
-|     runtime filters: RF001 -> a.int_col, RF002 -> a.bigint_col
+|     runtime filters: RF001 -> a.bigint_col, RF002 -> a.int_col
 |
 00:SCAN HDFS [functional.alltypes t]
    partitions=24/24 files=24 size=478.45KB
@@ -517,8 +517,8 @@ where t.int_col < 10 and t.int_col in
   (select int_col from functional.alltypessmall s where s.id = t.id)
 ---- PLAN
 04:HASH JOIN [LEFT SEMI JOIN]
-|  hash predicates: t.int_col = int_col, t.id = s.id
-|  runtime filters: RF000 <- int_col, RF001 <- s.id
+|  hash predicates: t.id = s.id, t.int_col = int_col
+|  runtime filters: RF000 <- s.id, RF001 <- int_col
 |
 |--02:SCAN HDFS [functional.alltypessmall s]
 |     partitions=4/4 files=4 size=6.32KB
@@ -531,11 +531,11 @@ where t.int_col < 10 and t.int_col in
 |--01:SCAN HDFS [functional.alltypes t]
 |     partitions=24/24 files=24 size=478.45KB
 |     predicates: t.int_col < 10
-|     runtime filters: RF000 -> t.int_col, RF001 -> t.id
+|     runtime filters: RF000 -> t.id, RF001 -> t.int_col
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partitions=11/11 files=11 size=814.73KB
-   runtime filters: RF001 -> a.id, RF002 -> a.id
+   runtime filters: RF000 -> a.id, RF002 -> a.id
 ====
 # Correlated EXISTS
 select count(*)
@@ -1852,12 +1852,12 @@ select 1 from functional.alltypes t where id in
 ---- PLAN
 02:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = id
-|  other join predicates: t.float_col >= a.float_col, a.tinyint_col >= t.tinyint_col, t.float_col <= a.double_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col, a.double_col <= CAST(t.string_col AS INT), t.string_col >= a.string_col, a.double_col >= round(acos(t.float_col), 2)
+|  other join predicates: a.tinyint_col >= t.tinyint_col, t.float_col >= a.float_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col, t.float_col <= a.double_col, a.double_col <= CAST(t.string_col AS INT), t.string_col >= a.string_col, a.double_col >= round(acos(t.float_col), 2)
 |  runtime filters: RF000 <- id
 |
 |--01:SCAN HDFS [functional.alltypesagg a]
 |     partitions=11/11 files=11 size=814.73KB
-|     predicates: a.smallint_col >= 10, 20 <= a.int_col
+|     predicates: 20 <= a.int_col, a.smallint_col >= 10
 |
 00:SCAN HDFS [functional.alltypes t]
    partitions=24/24 files=24 size=478.45KB

[02/32] incubator-impala git commit: Match .clang-format more closely to actual practice.

Posted by ta...@apache.org.

Match .clang-format more closely to actual practice.

In order to attempt to get code like

    double VeryLongFunctionNames(double x1, double x2, double x3,
        double x4) {
      return 1.0;
    }

rather than

    double VeryLongFunctionNames(
        double x1, double x2, double x3, double x4) {
      return 1.0;
    }

I wrote a small set of programs to infer which .clang-format params
fit the current Impala codebase most closely; this patch is the
result.

This patch is the best the inferencer found (while maintaining certain
enforced parameters, like 90-character lines). It is about 10% closer
to Impala's current code base than the .clang-format that is checked
in at the moment, as measured by number of lines in the diff.

Change-Id: Iccaec6c1673c3e08d2c39200b0c84437af629aed
Reviewed-on: http://gerrit.cloudera.org:8080/4590
Reviewed-by: Jim Apple <jb...@cloudera.com>
Tested-by: Jim Apple <jb...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/89b41c68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/89b41c68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/89b41c68

Branch: refs/heads/hadoop-next
Commit: 89b41c68c180a96c4e34abfbe12f686d2461a7a3
Parents: 67a0451
Author: Jim Apple <jb...@cloudera.com>
Authored: Sun Oct 2 06:06:03 2016 -0700
Committer: Jim Apple <jb...@cloudera.com>
Committed: Fri Oct 14 00:08:17 2016 +0000

----------------------------------------------------------------------
 .clang-format | 49 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/89b41c68/.clang-format
----------------------------------------------------------------------
diff --git a/.clang-format b/.clang-format
index c0b9030..7c25a43 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,15 +1,36 @@
+---
+Language: Cpp
 BasedOnStyle: Google
-AlignAfterOpenBracket: 'false'
-AlignOperands: 'false'
-AllowShortCaseLabelsOnASingleLine: 'true'
-AllowShortFunctionsOnASingleLine: 'Inline'
-AllowShortIfStatementsOnASingleLine: 'true'
-BreakBeforeBinaryOperators: 'NonAssignment'
-BreakBeforeTernaryOperators: 'false'
-ColumnLimit: '90'
-ConstructorInitializerIndentWidth: '2'
-ContinuationIndentWidth: '4'
-DerivePointerAlignment: 'false'
-PenaltyBreakBeforeFirstCallParameter: '99999999'
-SpacesBeforeTrailingComments: '1'
-Standard: 'Cpp11'
+AlignAfterOpenBracket: DontAlign
+AlignOperands: false
+AlignTrailingComments: false
+AllowShortFunctionsOnASingleLine: Inline
+AlwaysBreakBeforeMultilineStrings: false
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeTernaryOperators: false
+ColumnLimit: 90
+ConstructorInitializerIndentWidth: 2
+DerivePointerAlignment: false
+IncludeCategories:
+- Priority: 3
+  Regex: '^<.*/' # Like <boost/lexical_cast.hpp>
+- Priority: 1
+  Regex: '^<.*\.' # Like <fcntl.h>
+- Priority: 2
+  Regex: '^<' # Like <vector>
+- Priority: 4
+  Regex: '^"' # Like "util/auth-util.h"
+SpacesBeforeTrailingComments: 1
+Standard: Cpp11
+---
+Language: Java
+BasedOnStyle: Chromium
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AlwaysBreakBeforeMultilineStrings: true
+BreakBeforeTernaryOperators: false
+ColumnLimit: 90
+ContinuationIndentWidth: 4
+IndentWidth: 2
+IndentWrappedFunctionNames: true
+SortIncludes: true