You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/01/12 18:19:19 UTC
[20/26] impala git commit: IMPALA-8021: Add estimated cardinality to
EXPLAIN output
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
index e46bf94..2e85c6a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
@@ -9,7 +9,9 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
@@ -18,7 +20,9 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
====
# insert into a static partition
insert into table functional.alltypessmall
@@ -32,7 +36,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
@@ -41,7 +47,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
====
# overwrite a static partition
insert overwrite table functional.alltypessmall
@@ -55,7 +63,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
@@ -64,7 +74,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 5
partitions=1/24 files=1 size=20.36KB
+ row-size=81B cardinality=310
====
# insert into fully dynamic partitions
insert into table functional.alltypessmall
@@ -79,9 +91,12 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=610
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
@@ -92,11 +107,14 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=610
|
01:EXCHANGE [HASH(year,month)]
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
====
# IMPALA-5293: noclustered hint prevents adding sort node
insert into table functional.alltypessmall
@@ -110,7 +128,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
| partitions=24
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
@@ -122,7 +142,9 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
01:EXCHANGE [HASH(year,month)]
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
====
# insert into fully dynamic partitions. The source table has no stats and the insert
# statement has a partition clause, so hash partition before the sink.
@@ -138,20 +160,26 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_co
|
01:SORT
| order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
+| row-size=72B cardinality=unavailable
|
00:SCAN HDFS [functional_seq_snap.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=11.34KB
+ row-size=72B cardinality=unavailable
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_col,int_col)]
| partitions=unavailable
|
02:SORT
| order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
+| row-size=72B cardinality=unavailable
|
01:EXCHANGE [HASH(int_col,int_col)]
|
00:SCAN HDFS [functional_seq_snap.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=11.34KB
+ row-size=72B cardinality=unavailable
====
# insert into fully dynamic partitions;
# partitioned output doesn't require repartitioning
@@ -169,13 +197,17 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=80B cardinality=24
|
01:AGGREGATE [FINALIZE]
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
| group by: year, month
+| row-size=80B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
@@ -186,19 +218,24 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,m
|
04:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=80B cardinality=24
|
03:AGGREGATE [FINALIZE]
| output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
| group by: year, month
+| row-size=80B cardinality=24
|
02:EXCHANGE [HASH(year,month)]
|
01:AGGREGATE [STREAMING]
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
| group by: year, month
+| row-size=80B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=89B cardinality=610
====
# insert into a partially dynamic partition
insert into table functional.alltypessmall
@@ -213,9 +250,12 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,m
|
01:SORT
| order by: month ASC NULLS LAST
+| row-size=85B cardinality=610
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=85B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
@@ -226,11 +266,14 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,m
|
02:SORT
| order by: month ASC NULLS LAST
+| row-size=85B cardinality=610
|
01:EXCHANGE [HASH(month)]
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=85B cardinality=610
====
# insert into a partially dynamic partition
# partitioned output doesn't require repartitioning
@@ -248,13 +291,17 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,m
|
02:SORT
| order by: month ASC NULLS LAST
+| row-size=76B cardinality=12
|
01:AGGREGATE [FINALIZE]
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
| group by: month
+| row-size=76B cardinality=12
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=85B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
@@ -265,19 +312,24 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,m
|
04:SORT
| order by: month ASC NULLS LAST
+| row-size=76B cardinality=12
|
03:AGGREGATE [FINALIZE]
| output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
| group by: month
+| row-size=76B cardinality=12
|
02:EXCHANGE [HASH(month)]
|
01:AGGREGATE [STREAMING]
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
| group by: month
+| row-size=76B cardinality=12
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month > 10
partitions=2/24 files=2 size=40.07KB
+ row-size=85B cardinality=610
====
# insert into a partially dynamic partition
insert into table functional.alltypessmall
@@ -292,9 +344,12 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4
|
01:SORT
| order by: year ASC NULLS LAST
+| row-size=85B cardinality=300
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year > 2009, month = 4
partitions=1/24 files=1 size=19.71KB
+ row-size=85B cardinality=300
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=4/100401.txt 0:20179
@@ -304,9 +359,12 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4
|
01:SORT
| order by: year ASC NULLS LAST
+| row-size=85B cardinality=300
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year > 2009, month = 4
partitions=1/24 files=1 size=19.71KB
+ row-size=85B cardinality=300
====
# insert with limit from partitioned table.
insert into table functional.alltypesnopart
@@ -318,8 +376,10 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 1
partitions=1/24 files=1 size=19.95KB
limit: 10
+ row-size=81B cardinality=10
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
@@ -331,8 +391,10 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| limit: 10
|
00:SCAN HDFS [functional.alltypes]
+ partition predicates: year = 2009, month = 1
partitions=1/24 files=1 size=19.95KB
limit: 10
+ row-size=81B cardinality=10
====
# static partition insert from a constant select
insert into table functional.alltypessmall
@@ -345,12 +407,14 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4
|
00:UNION
constant-operands=1
+ row-size=54B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
| partitions=1
|
00:UNION
constant-operands=1
+ row-size=54B cardinality=1
====
# dynamic partition insert from a constant select
insert into table functional.alltypessmall
@@ -363,12 +427,14 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4
|
00:UNION
constant-operands=1
+ row-size=57B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
| partitions=1
|
00:UNION
constant-operands=1
+ row-size=57B cardinality=1
====
# static partition insert from values statement
insert into table functional.alltypessmall
@@ -382,12 +448,14 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4
|
00:UNION
constant-operands=3
+ row-size=55B cardinality=3
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
| partitions=1
|
00:UNION
constant-operands=3
+ row-size=55B cardinality=3
====
# dynamic partition insert from values statement
insert into table functional.alltypessmall
@@ -401,18 +469,22 @@ WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4
|
01:SORT
| order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
+| row-size=58B cardinality=3
|
00:UNION
constant-operands=3
+ row-size=58B cardinality=3
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
| partitions=9
|
01:SORT
| order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
+| row-size=58B cardinality=3
|
00:UNION
constant-operands=3
+ row-size=58B cardinality=3
====
# test static partition insert from a query with grouped aggregation
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
@@ -427,9 +499,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
01:AGGREGATE [FINALIZE]
| output: count(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
| partitions=1
@@ -437,15 +511,18 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
03:AGGREGATE [FINALIZE]
| output: count:merge(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
02:EXCHANGE [HASH(string_col)]
|
01:AGGREGATE [STREAMING]
| output: count(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=17B cardinality=7.30K
====
# test static partition insert from a query with distinct grouped aggregation
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
@@ -460,12 +537,15 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
02:AGGREGATE [FINALIZE]
| output: count(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
01:AGGREGATE
| group by: string_col, int_col
+| row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
| partitions=1
@@ -473,23 +553,28 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
05:EXCHANGE [HASH(string_col)]
|
02:AGGREGATE [STREAMING]
| output: count(int_col)
| group by: string_col
+| row-size=21B cardinality=10
|
04:AGGREGATE
| group by: string_col, int_col
+| row-size=17B cardinality=100
|
03:EXCHANGE [HASH(string_col,int_col)]
|
01:AGGREGATE [STREAMING]
| group by: string_col, int_col
+| row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=17B cardinality=7.30K
====
# test that the planner chooses to repartition before the table sink
# alltypes has column stats and based on the product of the NDVs of year and month
@@ -502,11 +587,13 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# test noshuffle hint to prevent repartitioning (same query as above with hint)
insert into table functional.alltypes partition(year, month) [noshuffle]
@@ -517,9 +604,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# same as above but with traditional commented hint at default hint location
insert into table functional.alltypes partition(year, month) /* +noshuffle */
@@ -530,9 +619,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# same as above but with traditional commented hint at Oracle hint location
insert /* +noshuffle */ into table functional.alltypes partition(year, month)
@@ -543,9 +634,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# same as above but with enf-of-line commented hint
insert into table functional.alltypes partition(year, month)
@@ -557,9 +650,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# test that the planner does not repartition before the table sink
# alltypes has column stats and since year only has 2 distinct values the planner
@@ -574,9 +669,11 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|
01:SORT
| order by: year ASC NULLS LAST
+| row-size=85B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=85B cardinality=7.30K
====
# test shuffle hint to force repartitioning (same query as above with hint)
insert into table functional.alltypes partition(year, month=1) [shuffle]
@@ -589,11 +686,13 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|
02:SORT
| order by: year ASC NULLS LAST
+| row-size=85B cardinality=7.30K
|
01:EXCHANGE [HASH(year)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=85B cardinality=7.30K
====
# test insert/select stmt that contains an analytic function (IMPALA-1400)
insert into table functional.alltypestiny partition(year=2009, month=1)
@@ -610,12 +709,15 @@ WRITE TO HDFS [functional.alltypestiny, OVERWRITE=false, PARTITION-KEYS=(2009,1)
| partition by: id
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
+| row-size=97B cardinality=8
|
01:SORT
| order by: id ASC NULLS FIRST
+| row-size=81B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=81B cardinality=8
====
# IMPALA-3930: Test insert with shuffle hint on constant partition exprs. The table sink
# is executed at the coordinator.
@@ -631,6 +733,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=81B cardinality=7.30K
====
# IMPALA-3930: Same as above but with a dynamic partition insert.
insert into table functional.alltypes partition(year, month) /* +shuffle */
@@ -645,6 +748,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=81B cardinality=7.30K
====
# IMPALA-3930: Same as above but with a mix of static/dynamic partition exprs, and
# with more complex constant exprs.
@@ -660,6 +764,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,5)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=81B cardinality=7.30K
====
# Test insert into an unpartitioned table with shuffle hint.
insert into table functional.alltypesnopart /* +shuffle */
@@ -674,6 +779,7 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=81B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
insert into table functional.alltypes partition(year, month)
@@ -684,20 +790,24 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
insert into table functional.alltypes partition(year, month) /*+ noshuffle */
@@ -708,18 +818,22 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
# Subquery in WHERE-clause exercises the reset() + analyze() path during rewrite.
@@ -732,49 +846,60 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
04:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=730
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = max(int_col)
| runtime filters: RF000 <- max(int_col)
+| row-size=89B cardinality=730
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
+| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> int_col
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
08:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=730
|
07:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: int_col = max(int_col)
| runtime filters: RF000 <- max(int_col)
+| row-size=89B cardinality=730
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: max:merge(int_col)
+| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: max(int_col)
+| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> int_col
+ row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
insert into table functional.alltypesnopart
@@ -785,12 +910,14 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
00:SCAN HDFS [functional.alltypesnopart]
partitions=1/1 files=0 size=0B
+ row-size=72B cardinality=0
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypesnopart]
partitions=1/1 files=0 size=0B
+ row-size=72B cardinality=0
====
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
insert into table functional.alltypesnopart /*+ shuffle */
@@ -801,6 +928,7 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
00:SCAN HDFS [functional.alltypesnopart]
partitions=1/1 files=0 size=0B
+ row-size=72B cardinality=0
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
| partitions=1
@@ -809,4 +937,5 @@ WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
00:SCAN HDFS [functional.alltypesnopart]
partitions=1/1 files=0 size=0B
+ row-size=72B cardinality=0
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
index fc6abeb..01ff807 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test
@@ -27,32 +27,39 @@ PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
+| row-size=50B cardinality=10
|
05:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=17.56K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: o.o_custkey = c.c_custkey
| runtime filters: RF000 <- c.c_custkey
+| row-size=117B cardinality=17.56K
|
|--00:SCAN HDFS [tpch.customer c]
| partitions=1/1 files=1 size=23.08MB
| predicates: c.c_mktsegment = 'BUILDING'
+| row-size=29B cardinality=30.00K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: l.l_orderkey = o.o_orderkey
| runtime filters: RF002 <- o.o_orderkey
+| row-size=88B cardinality=57.58K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_custkey
+| row-size=42B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
predicates: l_shipdate > '1995-03-15'
runtime filters: RF002 -> l.l_orderkey
+ row-size=46B cardinality=600.12K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -62,30 +69,36 @@ PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
+| row-size=50B cardinality=10
|
10:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=17.56K
|
09:EXCHANGE [HASH(l_orderkey,o_orderdate,o_shippriority)]
|
05:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=17.56K
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o.o_custkey = c.c_custkey
| runtime filters: RF000 <- c.c_custkey
+| row-size=117B cardinality=17.56K
|
|--08:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [tpch.customer c]
| partitions=1/1 files=1 size=23.08MB
| predicates: c.c_mktsegment = 'BUILDING'
+| row-size=29B cardinality=30.00K
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l.l_orderkey = o.o_orderkey
| runtime filters: RF002 <- o.o_orderkey
+| row-size=88B cardinality=57.58K
|
|--07:EXCHANGE [BROADCAST]
| |
@@ -93,11 +106,13 @@ PLAN-ROOT SINK
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_custkey
+| row-size=42B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
predicates: l_shipdate > '1995-03-15'
runtime filters: RF002 -> l.l_orderkey
+ row-size=46B cardinality=600.12K
====
# Q3 - Shipping Priority Query
# straight_join prevents join order optimization
@@ -128,32 +143,39 @@ PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
+| row-size=50B cardinality=10
|
05:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=575.77K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: o.o_orderkey = l.l_orderkey
| runtime filters: RF000 <- l.l_orderkey
+| row-size=117B cardinality=575.77K
|
|--02:SCAN HDFS [tpch.lineitem l]
| partitions=1/1 files=1 size=718.94MB
| predicates: l_shipdate > '1995-03-15'
+| row-size=46B cardinality=600.12K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: c.c_custkey = o.o_custkey
| runtime filters: RF002 <- o.o_custkey
+| row-size=71B cardinality=150.00K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_orderkey
+| row-size=42B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer c]
partitions=1/1 files=1 size=23.08MB
predicates: c.c_mktsegment = 'BUILDING'
runtime filters: RF002 -> c.c_custkey
+ row-size=29B cardinality=30.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -163,30 +185,36 @@ PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
+| row-size=50B cardinality=10
|
10:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=575.77K
|
09:EXCHANGE [HASH(l_orderkey,o_orderdate,o_shippriority)]
|
05:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
+| row-size=50B cardinality=575.77K
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o.o_orderkey = l.l_orderkey
| runtime filters: RF000 <- l.l_orderkey
+| row-size=117B cardinality=575.77K
|
|--08:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.lineitem l]
| partitions=1/1 files=1 size=718.94MB
| predicates: l_shipdate > '1995-03-15'
+| row-size=46B cardinality=600.12K
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c.c_custkey = o.o_custkey
| runtime filters: RF002 <- o.o_custkey
+| row-size=71B cardinality=150.00K
|
|--07:EXCHANGE [BROADCAST]
| |
@@ -194,11 +222,13 @@ PLAN-ROOT SINK
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_orderkey
+| row-size=42B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer c]
partitions=1/1 files=1 size=23.08MB
predicates: c.c_mktsegment = 'BUILDING'
runtime filters: RF002 -> c.c_custkey
+ row-size=29B cardinality=30.00K
====
# Q5 - Local Supplier Volume Query
# Modifications: Added round() call, converted selects from multiple tables
@@ -231,55 +261,68 @@ PLAN-ROOT SINK
|
12:TOP-N [LIMIT=100]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
+| row-size=35B cardinality=25
|
11:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
+| row-size=35B cardinality=25
|
10:HASH JOIN [INNER JOIN]
| hash predicates: n_regionkey = r_regionkey
| runtime filters: RF000 <- r_regionkey
+| row-size=134B cardinality=115.16K
|
|--05:SCAN HDFS [tpch.region]
| partitions=1/1 files=1 size=384B
| predicates: r_name = 'ASIA'
+| row-size=21B cardinality=1
|
09:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF002 <- n_nationkey
+| row-size=113B cardinality=575.77K
|
|--04:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n_regionkey
+| row-size=23B cardinality=25
|
08:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF004 <- s_nationkey, RF005 <- s_suppkey
+| row-size=90B cardinality=575.77K
|
|--03:SCAN HDFS [tpch.supplier s]
| partitions=1/1 files=1 size=1.33MB
| runtime filters: RF002 -> s_nationkey
+| row-size=10B cardinality=10.00K
|
07:HASH JOIN [INNER JOIN]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF008 <- c_custkey
+| row-size=80B cardinality=575.77K
|
|--00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey
+| row-size=10B cardinality=150.00K
|
06:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF010 <- o_orderkey
+| row-size=70B cardinality=575.77K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
| runtime filters: RF008 -> o_custkey
+| row-size=38B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey
+ row-size=32B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -289,60 +332,72 @@ PLAN-ROOT SINK
|
12:TOP-N [LIMIT=100]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
+| row-size=35B cardinality=25
|
19:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: n_name
+| row-size=35B cardinality=25
|
18:EXCHANGE [HASH(n_name)]
|
11:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
+| row-size=35B cardinality=25
|
10:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: n_regionkey = r_regionkey
| runtime filters: RF000 <- r_regionkey
+| row-size=134B cardinality=115.16K
|
|--17:EXCHANGE [BROADCAST]
| |
| 05:SCAN HDFS [tpch.region]
| partitions=1/1 files=1 size=384B
| predicates: r_name = 'ASIA'
+| row-size=21B cardinality=1
|
09:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF002 <- n_nationkey
+| row-size=113B cardinality=575.77K
|
|--16:EXCHANGE [BROADCAST]
| |
| 04:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n_regionkey
+| row-size=23B cardinality=25
|
08:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF004 <- s_nationkey, RF005 <- s_suppkey
+| row-size=90B cardinality=575.77K
|
|--15:EXCHANGE [BROADCAST]
| |
| 03:SCAN HDFS [tpch.supplier s]
| partitions=1/1 files=1 size=1.33MB
| runtime filters: RF002 -> s_nationkey
+| row-size=10B cardinality=10.00K
|
07:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF008 <- c_custkey
+| row-size=80B cardinality=575.77K
|
|--14:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey
+| row-size=10B cardinality=150.00K
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF010 <- o_orderkey
+| row-size=70B cardinality=575.77K
|
|--13:EXCHANGE [BROADCAST]
| |
@@ -350,10 +405,12 @@ PLAN-ROOT SINK
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
| runtime filters: RF008 -> o_custkey
+| row-size=38B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey
+ row-size=32B cardinality=6.00M
====
# Q2 - Minimum Cost Supplier Query
select
@@ -386,38 +443,47 @@ PLAN-ROOT SINK
08:HASH JOIN [INNER JOIN]
| hash predicates: n.n_regionkey = r.r_regionkey
| runtime filters: RF000 <- r.r_regionkey
+| row-size=325B cardinality=1.01K
|
|--04:SCAN HDFS [tpch.region r]
| partitions=1/1 files=1 size=384B
| predicates: r.r_name = 'EUROPE'
+| row-size=21B cardinality=1
|
07:HASH JOIN [INNER JOIN]
| hash predicates: s.s_nationkey = n.n_nationkey
| runtime filters: RF002 <- n.n_nationkey
+| row-size=304B cardinality=5.05K
|
|--03:SCAN HDFS [tpch.nation n]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n.n_regionkey
+| row-size=23B cardinality=25
|
06:HASH JOIN [INNER JOIN]
| hash predicates: s.s_suppkey = ps.ps_suppkey
| runtime filters: RF004 <- ps.ps_suppkey
+| row-size=281B cardinality=5.05K
|
|--05:HASH JOIN [INNER JOIN]
| | hash predicates: ps.ps_partkey = p.p_partkey
| | runtime filters: RF006 <- p.p_partkey
+| | row-size=95B cardinality=5.05K
| |
| |--00:SCAN HDFS [tpch.part p]
| | partitions=1/1 files=1 size=22.83MB
| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS'
+| | row-size=71B cardinality=1.26K
| |
| 02:SCAN HDFS [tpch.partsupp ps]
| partitions=1/1 files=1 size=112.71MB
| runtime filters: RF006 -> ps.ps_partkey
+| row-size=24B cardinality=800.00K
|
01:SCAN HDFS [tpch.supplier s]
partitions=1/1 files=1 size=1.33MB
runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey
+ row-size=187B cardinality=10.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -426,46 +492,55 @@ PLAN-ROOT SINK
08:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: n.n_regionkey = r.r_regionkey
| runtime filters: RF000 <- r.r_regionkey
+| row-size=325B cardinality=1.01K
|
|--12:EXCHANGE [BROADCAST]
| |
| 04:SCAN HDFS [tpch.region r]
| partitions=1/1 files=1 size=384B
| predicates: r.r_name = 'EUROPE'
+| row-size=21B cardinality=1
|
07:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s.s_nationkey = n.n_nationkey
| runtime filters: RF002 <- n.n_nationkey
+| row-size=304B cardinality=5.05K
|
|--11:EXCHANGE [BROADCAST]
| |
| 03:SCAN HDFS [tpch.nation n]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n.n_regionkey
+| row-size=23B cardinality=25
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s.s_suppkey = ps.ps_suppkey
| runtime filters: RF004 <- ps.ps_suppkey
+| row-size=281B cardinality=5.05K
|
|--10:EXCHANGE [BROADCAST]
| |
| 05:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: ps.ps_partkey = p.p_partkey
| | runtime filters: RF006 <- p.p_partkey
+| | row-size=95B cardinality=5.05K
| |
| |--09:EXCHANGE [BROADCAST]
| | |
| | 00:SCAN HDFS [tpch.part p]
| | partitions=1/1 files=1 size=22.83MB
| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS'
+| | row-size=71B cardinality=1.26K
| |
| 02:SCAN HDFS [tpch.partsupp ps]
| partitions=1/1 files=1 size=112.71MB
| runtime filters: RF006 -> ps.ps_partkey
+| row-size=24B cardinality=800.00K
|
01:SCAN HDFS [tpch.supplier s]
partitions=1/1 files=1 size=1.33MB
runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey
+ row-size=187B cardinality=10.00K
====
# Q4 - Order Priority Checking Query
# the largest input is prevented from becoming the leftmost input by the semi-join
@@ -489,23 +564,28 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF000 <- o_orderkey
+| row-size=50B cardinality=150.00K
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
+| row-size=50B cardinality=150.00K
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
predicates: l_commitdate < l_receiptdate
runtime filters: RF000 -> l_orderkey
+ row-size=52B cardinality=600.12K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -515,26 +595,31 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF000 <- o_orderkey
+| row-size=50B cardinality=150.00K
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
+| row-size=50B cardinality=150.00K
|
05:EXCHANGE [HASH(l_orderkey)]
|
@@ -542,6 +627,7 @@ PLAN-ROOT SINK
partitions=1/1 files=1 size=718.94MB
predicates: l_commitdate < l_receiptdate
runtime filters: RF000 -> l_orderkey
+ row-size=52B cardinality=600.12K
====
select o_orderpriority, count(*) as order_count
from tpch.orders
@@ -554,19 +640,24 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: l_orderkey = o_orderkey
+| row-size=36B cardinality=7.50M
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=28B cardinality=1.50M
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -576,29 +667,35 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
+| row-size=36B cardinality=7.50M
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=28B cardinality=1.50M
|
05:EXCHANGE [HASH(l_orderkey)]
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
====
select o_orderpriority, count(*) as order_count
from tpch.orders
@@ -611,19 +708,24 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: l_orderkey = o_orderkey
+| row-size=36B cardinality=6.00M
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=28B cardinality=1.50M
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -633,29 +735,35 @@ PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
+| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
+| row-size=28B cardinality=5
|
02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
+| row-size=36B cardinality=6.00M
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=28B cardinality=1.50M
|
05:EXCHANGE [HASH(l_orderkey)]
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
====
# order does not become the leftmost input because of the outer join;
# the join with nation is done first because it reduces the intermediate output
@@ -669,62 +777,75 @@ PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
+| row-size=39B cardinality=60.00K
|
|--02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
+| row-size=21B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF002 <- c_custkey
+| row-size=18B cardinality=1.50M
|
|--00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF000 -> c_nationkey
+| row-size=10B cardinality=150.00K
|
01:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF002 -> o_custkey
+ row-size=8B cardinality=1.50M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
09:EXCHANGE [UNPARTITIONED]
|
05:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
+| row-size=39B cardinality=60.00K
|
|--08:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
+| row-size=21B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF002 <- c_custkey
+| row-size=18B cardinality=1.50M
|
|--07:EXCHANGE [HASH(c_custkey)]
| |
| 00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF000 -> c_nationkey
+| row-size=10B cardinality=150.00K
|
06:EXCHANGE [HASH(o_custkey)]
|
01:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF002 -> o_custkey
+ row-size=8B cardinality=1.50M
====
# order does not become the leftmost input because of the cross join;
# the join with nation is done first because it reduces the intermediate output
@@ -738,54 +859,67 @@ PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=23B cardinality=9.00G
|
|--01:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=0B cardinality=1.50M
|
03:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
+| row-size=23B cardinality=6.00K
|
|--02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
+| row-size=21B cardinality=1
|
00:SCAN HDFS [tpch.customer]
partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> c_nationkey
+ row-size=2B cardinality=150.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
08:EXCHANGE [UNPARTITIONED]
|
05:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=23B cardinality=9.00G
|
|--07:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
+| row-size=0B cardinality=1.50M
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
+| row-size=23B cardinality=6.00K
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
+| row-size=21B cardinality=1
|
00:SCAN HDFS [tpch.customer]
partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> c_nationkey
+ row-size=2B cardinality=150.00K
====
# Do not consider 'c' a candidate for the leftmost table (IMPALA-1281),
# because doing so requires careful consideration of the joinOps of
@@ -798,17 +932,22 @@ cross join functional.alltypes c
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=4B cardinality=467.20K
|
|--03:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=0B cardinality=64
| |
| |--01:SCAN HDFS [functional.alltypestiny b]
| | partitions=4/4 files=4 size=460B
+| | row-size=0B cardinality=8
| |
| 00:SCAN HDFS [functional.alltypestiny a]
| partitions=4/4 files=4 size=460B
+| row-size=0B cardinality=8
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
+ row-size=4B cardinality=7.30K
====
# Test that tables are not re-ordered across outer/semi joins (IMPALA-860),
# but the tables to the left/right of outer/semi joins are still re-ordered.
@@ -824,48 +963,60 @@ PLAN-ROOT SINK
|
11:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
10:HASH JOIN [INNER JOIN]
| hash predicates: t5.id = t4.id
| runtime filters: RF000 <- t4.id
+| row-size=24B cardinality=7
|
|--09:HASH JOIN [INNER JOIN]
| | hash predicates: t4.id = t6.id
| | runtime filters: RF002 <- t6.id
+| | row-size=20B cardinality=9
| |
| |--05:SCAN HDFS [functional.alltypestiny t6]
| | partitions=4/4 files=4 size=460B
+| | row-size=4B cardinality=8
| |
| 08:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: t4.id = t3.id
+| | row-size=16B cardinality=11.00K
| |
| |--07:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF004 <- t2.id
+| | | row-size=12B cardinality=1
| | |
| | |--06:HASH JOIN [INNER JOIN]
| | | | hash predicates: t2.id = t1.id
| | | | runtime filters: RF006 <- t1.id
+| | | | row-size=8B cardinality=8
| | | |
| | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | partitions=4/4 files=4 size=460B
| | | | runtime filters: RF002 -> t1.id
+| | | | row-size=4B cardinality=8
| | | |
| | | 01:SCAN HDFS [functional.alltypes t2]
| | | partitions=24/24 files=24 size=478.45KB
| | | runtime filters: RF002 -> t2.id, RF006 -> t2.id
+| | | row-size=4B cardinality=7.30K
| | |
| | 02:SCAN HDFS [functional.alltypessmall t3]
| | partitions=4/4 files=4 size=6.32KB
| | runtime filters: RF002 -> t3.id, RF004 -> t3.id
+| | row-size=4B cardinality=100
| |
| 03:SCAN HDFS [functional.alltypesagg t4]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> t4.id
+| row-size=4B cardinality=11.00K
|
04:SCAN HDFS [functional.alltypes t5]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t5.id
+ row-size=4B cardinality=7.30K
====
# No tables can be re-ordered because of semi and outer joins that must
# remain at a fixed position in the plan (IMPALA-860).
@@ -881,47 +1032,59 @@ PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
12:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t6.id = t5.id
+| row-size=20B cardinality=8
|
|--11:HASH JOIN [INNER JOIN]
| | hash predicates: t5.id = t3.id
| | runtime filters: RF000 <- t3.id
+| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [RIGHT SEMI JOIN]
| | | hash predicates: t4.id = t3.id
| | | runtime filters: RF002 <- t3.id
+| | | row-size=12B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t3.id = t2.id
| | | | runtime filters: RF004 <- t2.id
+| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF006 <- t1.id
+| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
+| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF006 -> t2.id
+| | | | row-size=4B cardinality=7.30K
| | | |
| | | 02:SCAN HDFS [functional.alltypessmall t3]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF004 -> t3.id
+| | | row-size=4B cardinality=100
| | |
| | 03:SCAN HDFS [functional.alltypesagg t4]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF002 -> t4.id
+| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> t5.id
+| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
+ row-size=4B cardinality=8
====
# Check that a join in between outer/semi joins is re-ordered correctly.
# We expect t4 is joined before t3.
@@ -937,49 +1100,61 @@ PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t6.id = t3.id
| runtime filters: RF000 <- t3.id
+| row-size=20B cardinality=1
|
|--11:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: t5.id = t4.id
| | runtime filters: RF002 <- t4.id
+| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF004 <- t2.id
+| | | row-size=16B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t4.id = t2.id
| | | | runtime filters: RF006 <- t2.id
+| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF008 <- t1.id
+| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
+| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF008 -> t2.id
+| | | | row-size=4B cardinality=7.30K
| | | |
| | | 03:SCAN HDFS [functional.alltypessmall t4]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF006 -> t4.id
+| | | row-size=4B cardinality=100
| | |
| | 02:SCAN HDFS [functional.alltypesagg t3]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF004 -> t3.id
+| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> t5.id
+| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t6.id
+ row-size=4B cardinality=8
====
# Same above but using an anti join instead of a semi join.
select count(*) from
@@ -994,47 +1169,59 @@ PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t6.id = t3.id
| runtime filters: RF000 <- t3.id
+| row-size=20B cardinality=1
|
|--11:HASH JOIN [RIGHT ANTI JOIN]
| | hash predicates: t5.id = t4.id
+| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF002 <- t2.id
+| | | row-size=16B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t4.id = t2.id
| | | | runtime filters: RF004 <- t2.id
+| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF006 <- t1.id
+| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
+| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF006 -> t2.id
+| | | | row-size=4B cardinality=7.30K
| | | |
| | | 03:SCAN HDFS [functional.alltypessmall t4]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF004 -> t4.id
+| | | row-size=4B cardinality=100
| | |
| | 02:SCAN HDFS [functional.alltypesagg t3]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF002 -> t3.id
+| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t6.id
+ row-size=4B cardinality=8
====
# Test inverting outer joins in a complex query plan.
select 1 from
@@ -1057,49 +1244,63 @@ PLAN-ROOT SINK
12:HASH JOIN [INNER JOIN]
| hash predicates: t3.id = t4.id
| runtime filters: RF000 <- t4.id
+| row-size=28B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t3.id = a.id
| runtime filters: RF002 <- a.id
+| row-size=24B cardinality=8
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = count(a.id)
| | runtime filters: RF004 <- count(a.id)
+| | row-size=20B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
+| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
+| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF006 <- b.id
+| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
+| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF006 -> a.id
+| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: b.id = a.id
| | other predicates: a.year < 10
+| | row-size=12B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypes a]
+| | partition predicates: a.year < 10
| | partitions=0/24 files=0 size=0B
| | runtime filters: RF004 -> a.id
+| | row-size=8B cardinality=0
| |
| 06:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id, RF002 -> t3.id
+ row-size=4B cardinality=7.30K
====
# Same as above but with full outer joins.
select 1 from
@@ -1122,48 +1323,62 @@ PLAN-ROOT SINK
12:HASH JOIN [INNER JOIN]
| hash predicates: t3.id = t4.id
| runtime filters: RF000 <- t4.id
+| row-size=28B cardinality=9
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
11:HASH JOIN [FULL OUTER JOIN]
| hash predicates: t3.id = a.id
+| row-size=24B cardinality=7.31K
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = count(a.id)
| | runtime filters: RF002 <- count(a.id)
+| | row-size=20B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
+| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
+| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF004 <- b.id
+| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
+| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF004 -> a.id
+| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [FULL OUTER JOIN]
| | hash predicates: b.id = a.id
| | other predicates: a.year < 10
+| | row-size=12B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypes a]
+| | partition predicates: a.year < 10
| | partitions=0/24 files=0 size=0B
| | runtime filters: RF002 -> a.id
+| | row-size=8B cardinality=0
| |
| 06:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id
+ row-size=4B cardinality=7.30K
====
# Test inverting semi joins in a complex query plan.
select 1 from
@@ -1186,50 +1401,64 @@ PLAN-ROOT SINK
12:HASH JOIN [INNER JOIN]
| hash predicates: b.id = t4.id
| runtime filters: RF000 <- t4.id
+| row-size=16B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t3.id = b.id
| runtime filters: RF002 <- b.id
+| row-size=12B cardinality=8
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = count(a.id)
| | runtime filters: RF004 <- count(a.id)
+| | row-size=12B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
+| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
+| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF008 <- b.id
+| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
+| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF008 -> a.id
+| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF006 <- a.id
+| | row-size=4B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF000 -> a.id, RF004 -> a.id
+| | row-size=4B cardinality=8
| |
| 06:SCAN HDFS [functional.alltypes b]
+| partition predicates: b.month = 1
| partitions=2/24 files=2 size=40.32KB
| runtime filters: RF000 -> b.id, RF004 -> b.id, RF006 -> b.id
+| row-size=4B cardinality=620
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id, RF002 -> t3.id
+ row-size=4B cardinality=7.30K
====
# Same as above but with anti joins.
select 1 from
@@ -1252,48 +1481,62 @@ PLAN-ROOT SINK
12:HASH JOIN [INNER JOIN]
| hash predicates: b.id = t4.id
| runtime filters: RF000 <- t4.id
+| row-size=16B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: t3.id = b.id
+| row-size=12B cardinality=620
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = count(a.id)
| | runtime filters: RF002 <- count(a.id)
+| | row-size=12B cardinality=620
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
+| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
+| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF004 <- b.id
+| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
+| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF004 -> a.id
+| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT ANTI JOIN]
| | hash predicates: b.id = a.id
+| | row-size=4B cardinality=620
| |
| |--05:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF000 -> a.id, RF002 -> a.id
+| | row-size=4B cardinality=8
| |
| 06:SCAN HDFS [functional.alltypes b]
+| partition predicates: b.month = 1
| partitions=2/24 files=2 size=40.32KB
| runtime filters: RF000 -> b.id, RF002 -> b.id
+| row-size=4B cardinality=620
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id
+ row-size=4B cardinality=7.30K
====
# Regression test for IMPALA-1343.
SELECT sum(t4.tinyint_col)
@@ -1316,41 +1559,52 @@ PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: sum(t4.tinyint_col)
+| row-size=8B cardinality=1
|
09:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t4.bigint_col = tt1.int_col
| runtime filters: RF000 <- tt1.int_col
+| row-size=31B cardinality=8
|
|--06:SCAN HDFS [functional.alltypestiny tt1]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
08:NESTED LOOP JOIN [INNER JOIN]
| predicates: sum(t1.int_col) > t4.id
+| row-size=31B cardinality=8
|
|--05:AGGREGATE [FINALIZE]
| | output: sum(t1.int_col)
| | limit: 1
+| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.alltypesagg t1]
| partitions=11/11 files=11 size=814.73KB
+| row-size=4B cardinality=11.00K
|
07:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=23B cardinality=8
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: t1.bigint_col = t2.smallint_col
| | runtime filters: RF002 <- t2.smallint_col
| | limit: 1
+| | row-size=10B cardinality=1
| |
| |--02:SCAN HDFS [functional.alltypestiny t2]
| | partitions=4/4 files=4 size=460B
+| | row-size=2B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> t1.bigint_col
+| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny t4]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t4.bigint_col
+ row-size=13B cardinality=8
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
@@ -1366,13 +1620,16 @@ PLAN-ROOT SINK
| hash predicates: b.id = a.id
| other predicates: a.int_col = b.int_col, b.bigint_col < a.tinyint_col
| runtime filters: RF000 <- a.id, RF001 <- a.int_col
+| row-size=25B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny a]
| partitions=4/4 files=4 size=460B
+| row-size=9B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall b]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> b.id, RF001 -> b.int_col
+ row-size=16B cardinality=100
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
@@ -1391,21 +1648,26 @@ PLAN-ROOT SINK
| hash predicates: c.id = b.id
| other predicates: a.int_col = b.int_col, b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col, b.bigint_col < a.tinyint_col
| runtime filters: RF000 <- b.tinyint_col
+| row-size=33B cardinality=7.30K
|
|--03:HASH JOIN [RIGHT OUTER JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF002 <- a.id
+| | row-size=27B cardinality=8
| |
| |--00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
+| | row-size=9B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| runtime filters: RF002 -> b.id
+| row-size=18B cardinality=100
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> c.tinyint_col
+ row-size=6B cardinality=7.30K
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
@@ -1423,21 +1685,26 @@ PLAN-ROOT SINK
| hash predicates: c.id = b.id
| other predicates: b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col
| runtime filters: RF000 <- b.tinyint_col
+| row-size=16B cardinality=7.30K
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF002 <- a.id
+| | row-size=10B cardinality=9
| |
| |--00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
+| | row-size=4B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| runtime filters: RF002 -> b.id
+| row-size=6B cardinality=100
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> c.tinyint_col
+ row-size=6B cardinality=7.30K
====
# Regression test for IMPALA-1342.
select count(1) from
@@ -1453,24 +1720,30 @@ PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t2.string_col = t3.string_col, t1.string_col = t3.date_string_col
+| row-size=81B cardinality=83.39K
|
|--03:SCAN HDFS [functional.alltypestiny t3]
| partitions=4/4 files=4 size=460B
+| row-size=33B cardinality=8
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t2.date_string_col = t1.string_col
| other predicates: t2.date_string_col = t1.string_col
| runtime filters: RF000 <- t1.string_col, RF001 <- t1.string_col
+| row-size=48B cardinality=83.39K
|
|--00:SCAN HDFS [functional.alltypesagg t1]
| partitions=11/11 files=11 size=814.73KB
+| row-size=15B cardinality=11.00K
|
01:SCAN HDFS [functional.alltypes t2]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t2.date_string_col, RF001 -> t2.date_string_col
+ row-size=33B cardinality=7.30K
====
# Test that filtering with "<=>" sets selectivity, just as "=" does. First, the
# base case: functional.alltypes.timestamp_col has more distinct vals than
@@ -1490,15 +1763,18 @@ PLAN-ROOT SINK
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = functional.alltypes.id
| runtime filters: RF000 <- functional.alltypes.id
+| row-size=89B cardinality=1
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: timestamp_col = TIMESTAMP '2016-11-20 00:00:00'
+| row-size=20B cardinality=1
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.date_string_col = ''
runtime filters: RF000 -> a.id
+ row-size=89B cardinality=10
====
select * from functional.alltypes a
left semi join
@@ -1512,15 +1788,18 @@ PLAN-ROOT SINK
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: functional.alltypes.id = a.id
| runtime filters: RF000 <- a.id
+| row-size=89B cardinality=1
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
| predicates: a.timestamp_col = TIMESTAMP '2016-11-20 00:00:00'
+| row-size=89B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: date_string_col = ''
runtime filters: RF000 -> functional.alltypes.id
+ row-size=24B cardinality=10
====
# The same should hold true when the filtering is done with "<=>" rather than "=".
select * from functional.alltypes a
@@ -1535,15 +1814,18 @@ PLAN-ROOT SINK
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = functional.alltypes.id
| runtime filters: RF000 <- functional.alltypes.id
+| row-size=89B cardinality=1
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00'
+| row-size=20B cardinality=1
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.date_string_col IS NOT DISTINCT FROM ''
runtime filters: RF000 -> a.id
+ row-size=89B cardinality=10
====
select * from functional.alltypes a
left semi join
@@ -1557,13 +1839,16 @@ PLAN-ROOT SINK
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: functional.alltypes.id = a.id
| runtime filters: RF000 <- a.id
+| row-size=89B cardinality=1
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
| predicates: a.timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00'
+| row-size=89B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: date_string_col IS NOT DISTINCT FROM ''
runtime filters: RF000 -> functional.alltypes.id
+ row-size=24B cardinality=10
====