You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/01/12 18:19:22 UTC
[23/26] impala git commit: IMPALA-8021: Add estimated cardinality to
EXPLAIN output
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
index 067698a..617eb3a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
@@ -19,6 +19,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.widetable_250_cols a]
partitions=1/1 files=1 size=28.69KB
predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = 110
+ row-size=1.21KB cardinality=unavailable
====
# Test multiple forward propagation
select * from functional.widetable_250_cols a
@@ -30,6 +31,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.widetable_250_cols a]
partitions=1/1 files=1 size=28.69KB
predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = -385
+ row-size=1.21KB cardinality=unavailable
====
# Test multiple forward propagation
select * from functional.widetable_250_cols a
@@ -41,6 +43,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.widetable_250_cols a]
partitions=1/1 files=1 size=28.69KB
predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = -495
+ row-size=1.21KB cardinality=unavailable
====
# Test multiple forward propagation, and a reversed propagation
# (which fails as we can't rewrite 55 = a.int_col4 / 10)
@@ -53,6 +56,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.widetable_250_cols a]
partitions=1/1 files=1 size=28.69KB
predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 / 10 = 55
+ row-size=1.21KB cardinality=unavailable
====
# Another impossibility (a.int_col3 = a.int_col2 * 5 = a.int_col2 * -7)
select * from functional.widetable_250_cols a
@@ -72,6 +76,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -84,6 +89,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -96,6 +102,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: CAST(a.int_col AS STRING) = 'abc', CAST(int_col AS STRING) > 'xyz'
+ row-size=89B cardinality=231
====
# Implicit casts are considered for propagation
select * from functional.alltypes a
@@ -115,17 +122,21 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
02:SELECT
| predicates: int_col = 12, int_col > 1
+| row-size=4B cardinality=0
|
01:AGGREGATE [FINALIZE]
| group by: int_col
| limit: 10
+| row-size=4B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col = 10
+ row-size=4B cardinality=730
====
# Many constant predicates removed
select count(*) from
@@ -137,10 +148,12 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col = 10
+ row-size=4B cardinality=730
====
# All true predicates elided
select count(*) from
@@ -151,9 +164,11 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Many redundant / duplicate predicates
select count(*) from
@@ -204,10 +219,12 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col = 10
+ row-size=4B cardinality=730
====
# Non-trivial expr substitution (const false)
select count(*) from
@@ -219,6 +236,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -233,10 +251,12 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col = 10
+ row-size=4B cardinality=730
====
# Non-trivial expr substitution (non-constant)
select count(*) from
@@ -249,10 +269,12 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col = 10, TRUE OR 10 + random() * functional.alltypes.tinyint_col = 100
+ row-size=5B cardinality=231
====
# Collection predicates within HDFS scan nodes get optimized
select 1
@@ -262,26 +284,35 @@ where l_partkey < l_suppkey and c.c_nationkey = 10 and o_orderkey = 4 and l_supp
PLAN-ROOT SINK
|
01:SUBPLAN
+| row-size=50B cardinality=600.00K
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=50B cardinality=100
| |
| |--02:SINGULAR ROW SRC
+| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
+| | row-size=36B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
+| | | row-size=36B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
+| | | row-size=20B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems]
+| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
+| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
- partitions=1/1 files=4 size=292.36MB
+ partitions=1/1 files=4 size=288.99MB
predicates: c.c_nationkey = 10, !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderkey = 4
predicates on o_lineitems: l_partkey < 10, l_suppkey = 10
+ row-size=14B cardinality=6.00K
====
# Nested predicates also get propagated
select 1
@@ -292,27 +323,36 @@ where l_partkey < l_suppkey and c.c_nationkey = 10 and o_orderkey = o_shippriori
PLAN-ROOT SINK
|
01:SUBPLAN
+| row-size=54B cardinality=600.00K
|
|--08:NESTED LOOP JOIN [INNER JOIN]
| | join predicates: o_shippriority = c_nationkey
+| | row-size=54B cardinality=100
| |
| |--02:SINGULAR ROW SRC
+| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
+| | row-size=40B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
+| | | row-size=40B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
+| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems]
+| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
+| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
- partitions=1/1 files=4 size=292.36MB
+ partitions=1/1 files=4 size=288.99MB
predicates: c.c_nationkey = 10, !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o.o_orderkey = 10, o.o_shippriority = 10
predicates on o_lineitems: l_partkey < 10, l_suppkey = 10
+ row-size=14B cardinality=6.00K
====
# Using IS NULL
select count(*) from functional.alltypes where id = 10 and bool_col is null
@@ -322,6 +362,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -332,6 +373,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -343,10 +385,12 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: bool_col IS NULL, functional.alltypes.id IS NULL, id > 0, functional.alltypes.bool_col > 0, id = bool_col
+ row-size=5B cardinality=730
====
# = NULL and >
select count(*) from functional.alltypes where id > 0 and bool_col = null
@@ -356,6 +400,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
index 3c03cd5..5cfa415 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test
@@ -4,9 +4,11 @@ PLAN-ROOT SINK
|
00:UNION
constant-operands=1
+ row-size=2B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=1
+ row-size=2B cardinality=1
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
index ce4dbd7..3368cd7 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test
@@ -10,6 +10,7 @@ PLAN-ROOT SINK
00:SCAN DATA SOURCE [functional.alltypes_datasource]
data source predicates: tinyint_col < 256
predicates: float_col != 0, CAST(int_col AS BIGINT) < 10
+ row-size=112B cardinality=500
====
# The first four predicates are in a form that can be offered to the data source
# and the first and third will be accepted (it accepts every other conjunct).
@@ -29,6 +30,7 @@ PLAN-ROOT SINK
00:SCAN DATA SOURCE [functional.alltypes_datasource]
data source predicates: int_col < 10, string_col != 'Foo'
predicates: double_col < 5, NOT bool_col = TRUE, NOT double_col = 5.0, string_col != 'Bar'
+ row-size=112B cardinality=500
====
# The 3rd predicate is not in a form that can be offered to the data source so
# the 4th will be offered and accepted instead.
@@ -43,6 +45,7 @@ PLAN-ROOT SINK
00:SCAN DATA SOURCE [functional.alltypes_datasource]
data source predicates: int_col < 10, bool_col != FALSE
predicates: double_col > 5, string_col IN ('Foo', 'Bar')
+ row-size=112B cardinality=500
====
# Tests that all predicates from the On-clause are applied (IMPALA-805)
# and that slot equivalences are enforced at lowest possible plan node
@@ -60,12 +63,15 @@ PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
+| row-size=35B cardinality=500
|
|--01:SCAN DATA SOURCE [functional.alltypes_datasource b]
|--predicates: b.id = b.int_col, b.id = b.bigint_col
+| row-size=0B cardinality=500
|
00:SCAN DATA SOURCE [functional.alltypes_datasource a]
predicates: a.id = a.int_col, a.id = a.tinyint_col, a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
+ row-size=0B cardinality=500
====
# Tests that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be offered to the
# data source.
@@ -82,6 +88,7 @@ PLAN-ROOT SINK
00:SCAN DATA SOURCE [functional.alltypes_datasource]
data source predicates: id IS NOT DISTINCT FROM 1, tinyint_col IS DISTINCT FROM 2, int_col IS NOT DISTINCT FROM 4
predicates: bigint_col IS NOT DISTINCT FROM 5, bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3
+ row-size=112B cardinality=500
====
# EmptySet datasource
select * from functional.alltypes_datasource
@@ -96,7 +103,6 @@ PLAN-ROOT SINK
|
00:EMPTYSET
====
----- QUERY
# IMPALA-5602: If a query contains predicates that are all pushed to the datasource and
# there is a limit, then the query should not incorrectly run with 'small query'
# optimization.
@@ -110,4 +116,5 @@ PLAN-ROOT SINK
00:SCAN DATA SOURCE [functional.alltypes_datasource]
data source predicates: id = 1
limit: 15
+ row-size=112B cardinality=15
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
index 74c7e5f..ce495b3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test
@@ -5,12 +5,14 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
create table t as
select distinct a.int_col, a.string_col from functional.alltypes a
@@ -22,41 +24,54 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
03:AGGREGATE [FINALIZE]
| group by: a.int_col, a.string_col
+| row-size=17B cardinality=13
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
+| row-size=25B cardinality=13
|
|--01:SCAN HDFS [functional.alltypessmall b]
+| partition predicates: b.month = 2
| partitions=1/4 files=1 size=1.58KB
+| row-size=4B cardinality=25
|
00:SCAN HDFS [functional.alltypes a]
+ partition predicates: a.year = 2009
partitions=12/24 files=12 size=238.68KB
runtime filters: RF000 -> a.id
+ row-size=21B cardinality=3.65K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
|
06:AGGREGATE [FINALIZE]
| group by: a.int_col, a.string_col
+| row-size=17B cardinality=13
|
05:EXCHANGE [HASH(a.int_col,a.string_col)]
|
03:AGGREGATE [STREAMING]
| group by: a.int_col, a.string_col
+| row-size=17B cardinality=13
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
+| row-size=25B cardinality=13
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
+| partition predicates: b.month = 2
| partitions=1/4 files=1 size=1.58KB
+| row-size=4B cardinality=25
|
00:SCAN HDFS [functional.alltypes a]
+ partition predicates: a.year = 2009
partitions=12/24 files=12 size=238.68KB
runtime filters: RF000 -> a.id
+ row-size=21B cardinality=3.65K
====
# CTAS with a view that has a limit clause (IMPALA-1411)
create table t as
@@ -68,6 +83,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
limit: 1
+ row-size=0B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
@@ -78,6 +94,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
limit: 1
+ row-size=0B cardinality=1
====
# CTAS with multiple nested inline views that have a limit clause (IMPALA-1411)
create table t as
@@ -91,6 +108,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
limit: 1
+ row-size=4B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
@@ -101,6 +119,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
limit: 1
+ row-size=4B cardinality=1
====
# CTAS with a select statement that has a limit and offset clause (IMPALA-1411)
@@ -112,9 +131,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
01:TOP-N [LIMIT=1 OFFSET=5]
| order by: id ASC
+| row-size=89B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=89B cardinality=8
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
@@ -126,9 +147,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
01:TOP-N [LIMIT=6]
| order by: id ASC
+| row-size=89B cardinality=6
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=89B cardinality=8
====
# CTAS with an inline view that has a limit and offset clause (IMPALA-1411)
create table t as
@@ -140,9 +163,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
01:TOP-N [LIMIT=2 OFFSET=5]
| order by: id ASC
+| row-size=8B cardinality=2
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=8B cardinality=8
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
@@ -154,9 +179,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
01:TOP-N [LIMIT=7]
| order by: id ASC
+| row-size=8B cardinality=7
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=8B cardinality=8
====
# CTAS with sort columns
create table t sort by (int_col, bool_col) as
@@ -167,18 +194,22 @@ WRITE TO HDFS [default.t, OVERWRITE=false]
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# CTAS with partitions and sort columns
create table t partitioned by (year, month) sort by (int_col, bool_col) as
@@ -189,20 +220,24 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
+| row-size=89B cardinality=7.30K
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# IMPALA-4167: if no (no)shuffle hint is given for CTAS into partitioned HDFS table, then
# Impala is free to decide whether to add an exchange node or not. In this example, the
@@ -220,6 +255,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.y
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# IMPALA-4167: non-shuffled CTAS into partitioned table has no exchange node before write.
# Note that plan hint tests for CTAS are minimal by design, as this logic is covered well
@@ -234,6 +270,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.y
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# CTAS with more complex select query
create table t partitioned by (c_nationkey) sort by (c_custkey) as
@@ -247,52 +284,63 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)]
|
04:SORT
| order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST
+| row-size=18B cardinality=228.68K
|
03:AGGREGATE [FINALIZE]
| output: max(o_totalprice)
| group by: c_custkey, c_nationkey
+| row-size=18B cardinality=228.68K
|
02:HASH JOIN [INNER JOIN]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF000 <- c_custkey
+| row-size=26B cardinality=228.68K
|
|--01:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| predicates: c_nationkey < 10
+| row-size=10B cardinality=15.00K
|
00:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF000 -> o_custkey
+ row-size=16B cardinality=1.50M
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)]
| partitions=25
|
08:SORT
| order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST
+| row-size=18B cardinality=228.68K
|
07:EXCHANGE [HASH(c_nationkey)]
|
06:AGGREGATE [FINALIZE]
| output: max:merge(o_totalprice)
| group by: c_custkey, c_nationkey
+| row-size=18B cardinality=228.68K
|
05:EXCHANGE [HASH(c_custkey,c_nationkey)]
|
03:AGGREGATE [STREAMING]
| output: max(o_totalprice)
| group by: c_custkey, c_nationkey
+| row-size=18B cardinality=228.68K
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF000 <- c_custkey
+| row-size=26B cardinality=228.68K
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| predicates: c_nationkey < 10
+| row-size=10B cardinality=15.00K
|
00:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF000 -> o_custkey
+ row-size=16B cardinality=1.50M
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test
index 8735f97..3ff9e66 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test
@@ -9,15 +9,18 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: x.a = y.a
| runtime filters: RF000 <- y.a
+| row-size=48B cardinality=unavailable
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.tinytable y]
| partitions=1/1 files=1 size=38B
+| row-size=24B cardinality=unavailable
|
00:SCAN HDFS [functional.tinytable x]
partitions=1/1 files=1 size=38B
runtime filters: RF000 -> x.a
+ row-size=24B cardinality=unavailable
====
# Left join input has an unknown cardinality.
select /* +straight_join */ * from
@@ -30,15 +33,18 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: x.a = y.string_col
| runtime filters: RF000 <- y.string_col
+| row-size=113B cardinality=unavailable
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes y]
-| partitions=24/24 files=24 size=469.90KB
+| partitions=24/24 files=24 size=478.45KB
+| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.tinytable x]
partitions=1/1 files=1 size=38B
runtime filters: RF000 -> x.a
+ row-size=24B cardinality=unavailable
====
# Right join input has an unknown cardinality.
select /* +straight_join */ * from
@@ -51,13 +57,16 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: x.string_col = y.a
| runtime filters: RF000 <- y.a
+| row-size=113B cardinality=7.30K
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.tinytable y]
| partitions=1/1 files=1 size=38B
+| row-size=24B cardinality=unavailable
|
00:SCAN HDFS [functional.alltypes x]
- partitions=24/24 files=24 size=469.90KB
+ partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> x.string_col
+ row-size=89B cardinality=7.30K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test
index 59e60c9..7065371 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test
@@ -9,17 +9,20 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: x.a = y.a
| runtime filters: RF000 <- y.a
+| row-size=48B cardinality=unavailable
|
|--04:EXCHANGE [HASH(y.a)]
| |
| 01:SCAN HDFS [functional.tinytable y]
| partitions=1/1 files=1 size=38B
+| row-size=24B cardinality=unavailable
|
03:EXCHANGE [HASH(x.a)]
|
00:SCAN HDFS [functional.tinytable x]
partitions=1/1 files=1 size=38B
runtime filters: RF000 -> x.a
+ row-size=24B cardinality=unavailable
====
# Left join input has an unknown cardinality.
select /* +straight_join */ * from
@@ -32,17 +35,20 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: x.a = y.string_col
| runtime filters: RF000 <- y.string_col
+| row-size=113B cardinality=unavailable
|
|--04:EXCHANGE [HASH(y.string_col)]
| |
| 01:SCAN HDFS [functional.alltypes y]
-| partitions=24/24 files=24 size=469.90KB
+| partitions=24/24 files=24 size=478.45KB
+| row-size=89B cardinality=7.30K
|
03:EXCHANGE [HASH(x.a)]
|
00:SCAN HDFS [functional.tinytable x]
partitions=1/1 files=1 size=38B
runtime filters: RF000 -> x.a
+ row-size=24B cardinality=unavailable
====
# Right join input has an unknown cardinality.
select /* +straight_join */ * from
@@ -55,15 +61,18 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: x.string_col = y.a
| runtime filters: RF000 <- y.a
+| row-size=113B cardinality=7.30K
|
|--04:EXCHANGE [HASH(y.a)]
| |
| 01:SCAN HDFS [functional.tinytable y]
| partitions=1/1 files=1 size=38B
+| row-size=24B cardinality=unavailable
|
03:EXCHANGE [HASH(x.string_col)]
|
00:SCAN HDFS [functional.alltypes x]
- partitions=24/24 files=24 size=469.90KB
+ partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> x.string_col
+ row-size=89B cardinality=7.30K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
index 3987410..3d2702b 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
@@ -9,14 +9,17 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Rows per node is > 3000: codegen should be enabled.
select count(*) from functional.alltypesagg
@@ -28,14 +31,17 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=0B cardinality=11.00K
====
# No stats on functional_parquet: codegen should be disabled.
select count(*) from functional_parquet.alltypes
@@ -49,14 +55,17 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: sum_init_zero(functional_parquet.alltypes.parquet-stats: num_rows)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=188.29KB
+ partitions=24/24 files=24 size=189.28KB
+ row-size=8B cardinality=unavailable
====
# > 3000 rows returned to coordinator: codegen should be enabled
select * from functional_parquet.alltypes
@@ -71,7 +80,8 @@ PLAN-ROOT SINK
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=188.29KB
+ partitions=24/24 files=24 size=189.28KB
+ row-size=80B cardinality=unavailable
====
# Optimisation is enabled for join producing < 3000 rows
select count(*)
@@ -86,24 +96,29 @@ PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
+| row-size=8B cardinality=8
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 files=4 size=460B
+| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t1.id
+ row-size=4B cardinality=7.30K
====
# Optimisation is disabled by cross join producing > 3000 rows
select count(*) from functional.alltypes t1, functional.alltypes t2
@@ -115,21 +130,26 @@ PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+| row-size=0B cardinality=53.29M
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
+| row-size=0B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Optimisation is enabled for union producing < 3000 rows
select count(*) from (
@@ -145,20 +165,25 @@ PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
00:UNION
| pass-through-operands: all
+| row-size=0B cardinality=7.31K
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=0B cardinality=8
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Optimisation is disabled by union producing > 3000 rows
select count(*) from (
@@ -173,20 +198,25 @@ PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count:merge(*)
+| row-size=8B cardinality=1
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: count(*)
+| row-size=8B cardinality=1
|
00:UNION
| pass-through-operands: all
+| row-size=0B cardinality=14.60K
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
+| row-size=0B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Scan with limit on large table: the number of rows scanned is bounded,
# codegen should be disabled
@@ -201,6 +231,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(tpch.lineitem.l_discount)
+| row-size=16B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
| limit: 1000
@@ -208,6 +239,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
limit: 1000
+ row-size=8B cardinality=1.00K
====
# Scan with limit and predicates on large table: any number of rows could be scanned:
# codegen should be enabled
@@ -221,6 +253,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(tpch.lineitem.l_discount)
+| row-size=16B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
| limit: 1000
@@ -229,4 +262,5 @@ PLAN-ROOT SINK
partitions=1/1 files=1 size=718.94MB
predicates: l_orderkey > 100
limit: 1000
+ row-size=16B cardinality=1.00K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
index 07726c9..ac2b6e4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test
@@ -9,15 +9,18 @@ PLAN-ROOT SINK
03:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: tinyint_col
+| row-size=9B cardinality=9
|
02:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| output: count(*)
| group by: tinyint_col
+| row-size=9B cardinality=9
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=1B cardinality=11.00K
====
select count(distinct id)
from functional.alltypesagg
@@ -26,20 +29,25 @@ PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(id)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(id)
+| row-size=8B cardinality=1
|
04:AGGREGATE
| group by: id
+| row-size=4B cardinality=10.28K
|
03:EXCHANGE [HASH(id)]
|
01:AGGREGATE
| group by: id
+| row-size=4B cardinality=10.28K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=4B cardinality=11.00K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
index b895ad0..cb8a4e0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test
@@ -5,22 +5,27 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: distinctpc(l_orderkey)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: distinctpc:merge(l_orderkey)
+| row-size=8B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: distinctpc(l_orderkey)
+| row-size=8B cardinality=1
|
00:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
====
# Distinct estimate with distinct
select count(distinct l_orderkey), distinctpc(l_orderkey) from tpch.lineitem
@@ -29,34 +34,42 @@ PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: count(l_orderkey), distinctpc:merge(l_orderkey)
+| row-size=16B cardinality=1
|
01:AGGREGATE
| output: distinctpc(l_orderkey)
| group by: l_orderkey
+| row-size=16B cardinality=1.56M
|
00:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(l_orderkey), distinctpc:merge(l_orderkey)
+| row-size=16B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(l_orderkey), distinctpc:merge(l_orderkey)
+| row-size=16B cardinality=1
|
04:AGGREGATE
| output: distinctpc:merge(l_orderkey)
| group by: l_orderkey
+| row-size=16B cardinality=1.56M
|
03:EXCHANGE [HASH(l_orderkey)]
|
01:AGGREGATE [STREAMING]
| output: distinctpc(l_orderkey)
| group by: l_orderkey
+| row-size=16B cardinality=1.56M
|
00:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
+ row-size=8B cardinality=6.00M
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
index 6e66e30..3e671da 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
@@ -6,9 +6,11 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
+| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -16,14 +18,17 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
+| row-size=24B cardinality=0
|
02:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
+| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=24B cardinality=0
====
# distinct w/ explicit select list
select distinct id, zip
@@ -33,9 +38,11 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: id, zip
+| row-size=12B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=12B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -43,14 +50,17 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| group by: id, zip
+| row-size=12B cardinality=0
|
02:EXCHANGE [HASH(id,zip)]
|
01:AGGREGATE [STREAMING]
| group by: id, zip
+| row-size=12B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=12B cardinality=0
====
# count(distinct)
select count(distinct id, zip)
@@ -60,33 +70,41 @@ PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: count(if(id IS NULL, NULL, zip))
+| row-size=8B cardinality=0
|
01:AGGREGATE
| group by: id, zip
+| row-size=12B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=12B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(if(id IS NULL, NULL, zip))
+| row-size=8B cardinality=0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(if(id IS NULL, NULL, zip))
+| row-size=8B cardinality=0
|
04:AGGREGATE
| group by: id, zip
+| row-size=12B cardinality=0
|
03:EXCHANGE [HASH(id,zip)]
|
01:AGGREGATE [STREAMING]
| group by: id, zip
+| row-size=12B cardinality=0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
+ row-size=12B cardinality=0
====
# count(distinct) w/ grouping
select tinyint_col, count(distinct int_col, bigint_col)
@@ -98,12 +116,15 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(if(int_col IS NULL, NULL, bigint_col))
| group by: tinyint_col
+| row-size=9B cardinality=9
|
01:AGGREGATE
| group by: tinyint_col, int_col, bigint_col
+| row-size=13B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=13B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -112,23 +133,28 @@ PLAN-ROOT SINK
06:AGGREGATE [FINALIZE]
| output: count:merge(if(int_col IS NULL, NULL, bigint_col))
| group by: tinyint_col
+| row-size=9B cardinality=9
|
05:EXCHANGE [HASH(tinyint_col)]
|
02:AGGREGATE [STREAMING]
| output: count(if(int_col IS NULL, NULL, bigint_col))
| group by: tinyint_col
+| row-size=9B cardinality=9
|
04:AGGREGATE
| group by: tinyint_col, int_col, bigint_col
+| row-size=13B cardinality=11.00K
|
03:EXCHANGE [HASH(tinyint_col,int_col,bigint_col)]
|
01:AGGREGATE [STREAMING]
| group by: tinyint_col, int_col, bigint_col
+| row-size=13B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=13B cardinality=11.00K
====
# count(distinct) and sum(distinct) w/ grouping
select tinyint_col, count(distinct int_col), sum(distinct int_col)
@@ -140,12 +166,15 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(int_col), sum(int_col)
| group by: tinyint_col
+| row-size=17B cardinality=9
|
01:AGGREGATE
| group by: tinyint_col, int_col
+| row-size=5B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=5B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -154,23 +183,28 @@ PLAN-ROOT SINK
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col), sum:merge(int_col)
| group by: tinyint_col
+| row-size=17B cardinality=9
|
05:EXCHANGE [HASH(tinyint_col)]
|
02:AGGREGATE [STREAMING]
| output: count(int_col), sum(int_col)
| group by: tinyint_col
+| row-size=17B cardinality=9
|
04:AGGREGATE
| group by: tinyint_col, int_col
+| row-size=5B cardinality=8.61K
|
03:EXCHANGE [HASH(tinyint_col,int_col)]
|
01:AGGREGATE [STREAMING]
| group by: tinyint_col, int_col
+| row-size=5B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=5B cardinality=11.00K
====
# sum(distinct) w/o grouping
select sum(distinct int_col)
@@ -180,33 +214,41 @@ PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: sum(int_col)
+| row-size=8B cardinality=1
|
01:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=957
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=4B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum:merge(int_col)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: sum(int_col)
+| row-size=8B cardinality=1
|
04:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=957
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
+| row-size=4B cardinality=957
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=4B cardinality=11.00K
====
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
# is ignored
@@ -219,13 +261,16 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(int_col), min:merge(smallint_col), max:merge(string_col)
| group by: tinyint_col
+| row-size=23B cardinality=9
|
01:AGGREGATE
| output: min(smallint_col), max(string_col)
| group by: tinyint_col, int_col
+| row-size=19B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=22B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -234,25 +279,30 @@ PLAN-ROOT SINK
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col), min:merge(smallint_col), max:merge(string_col)
| group by: tinyint_col
+| row-size=23B cardinality=9
|
05:EXCHANGE [HASH(tinyint_col)]
|
02:AGGREGATE [STREAMING]
| output: count(int_col), min:merge(smallint_col), max:merge(string_col)
| group by: tinyint_col
+| row-size=23B cardinality=9
|
04:AGGREGATE
| output: min:merge(smallint_col), max:merge(string_col)
| group by: tinyint_col, int_col
+| row-size=19B cardinality=8.61K
|
03:EXCHANGE [HASH(tinyint_col,int_col)]
|
01:AGGREGATE [STREAMING]
| output: min(smallint_col), max(string_col)
| group by: tinyint_col, int_col
+| row-size=19B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=22B cardinality=11.00K
====
# aggregate fns with and without distinct
select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
@@ -264,13 +314,16 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col)
| group by: tinyint_col
+| row-size=43B cardinality=9
|
01:AGGREGATE
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
| group by: tinyint_col, int_col
+| row-size=31B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=15B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -279,25 +332,30 @@ PLAN-ROOT SINK
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col), sum:merge(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col)
| group by: tinyint_col
+| row-size=43B cardinality=9
|
05:EXCHANGE [HASH(tinyint_col)]
|
02:AGGREGATE [STREAMING]
| output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col)
| group by: tinyint_col
+| row-size=43B cardinality=9
|
04:AGGREGATE
| output: count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col)
| group by: tinyint_col, int_col
+| row-size=31B cardinality=8.61K
|
03:EXCHANGE [HASH(tinyint_col,int_col)]
|
01:AGGREGATE [STREAMING]
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
| group by: tinyint_col, int_col
+| row-size=31B cardinality=8.61K
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
+ row-size=15B cardinality=11.00K
====
# test join on inline views containing distinct aggregates to make sure
# the aggregation info reports the correct tuple ids (from the 2nd phase
@@ -310,69 +368,87 @@ PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: count(int_col) = count(bigint_col)
+| row-size=16B cardinality=1
|
|--05:AGGREGATE [FINALIZE]
| | output: count(bigint_col)
+| | row-size=8B cardinality=1
| |
| 04:AGGREGATE
| | group by: bigint_col
+| | row-size=8B cardinality=2
| |
| 03:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=8B cardinality=8
|
02:AGGREGATE [FINALIZE]
| output: count(int_col)
+| row-size=8B cardinality=1
|
01:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=2
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: count(int_col) = count(bigint_col)
+| row-size=16B cardinality=1
|
|--15:EXCHANGE [UNPARTITIONED]
| |
| 14:AGGREGATE [FINALIZE]
| | output: count:merge(bigint_col)
+| | row-size=8B cardinality=1
| |
| 13:EXCHANGE [UNPARTITIONED]
| |
| 05:AGGREGATE
| | output: count(bigint_col)
+| | row-size=8B cardinality=1
| |
| 12:AGGREGATE
| | group by: bigint_col
+| | row-size=8B cardinality=2
| |
| 11:EXCHANGE [HASH(bigint_col)]
| |
| 04:AGGREGATE [STREAMING]
| | group by: bigint_col
+| | row-size=8B cardinality=2
| |
| 03:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=8B cardinality=8
|
10:AGGREGATE [FINALIZE]
| output: count:merge(int_col)
+| row-size=8B cardinality=1
|
09:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(int_col)
+| row-size=8B cardinality=1
|
08:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=2
|
07:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
+| row-size=4B cardinality=2
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
+ row-size=4B cardinality=8
====
# Test placement of having predicate into 2nd phase merge agg for
# distinct + non-distinct aggregates without group by (IMPALA-845).
@@ -388,37 +464,45 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(tinyint_col), count:merge(bigint_col)
| having: zeroifnull(count(bigint_col)) > 0
+| row-size=16B cardinality=0
|
01:AGGREGATE
| output: count(bigint_col)
| group by: tinyint_col
+| row-size=9B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(tinyint_col), count:merge(bigint_col)
| having: zeroifnull(count(bigint_col)) > 0
+| row-size=16B cardinality=0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(tinyint_col), count:merge(bigint_col)
+| row-size=16B cardinality=0
|
04:AGGREGATE
| output: count:merge(bigint_col)
| group by: tinyint_col
+| row-size=9B cardinality=10
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
| output: count(bigint_col)
| group by: tinyint_col
+| row-size=9B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=9B cardinality=7.30K
====
# test slot materialization on a distinct agg inside an inline view
# triggered by a predicate in an outer query block (IMPALA-861)
@@ -431,34 +515,42 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(1)
| having: count(1) IS NOT NULL
+| row-size=8B cardinality=0
|
01:AGGREGATE
| group by: 1
+| row-size=1B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(1)
| having: count(1) IS NOT NULL
+| row-size=8B cardinality=0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(1)
+| row-size=8B cardinality=0
|
04:AGGREGATE
| group by: 1
+| row-size=1B cardinality=1
|
03:EXCHANGE [HASH(1)]
|
01:AGGREGATE [STREAMING]
| group by: 1
+| row-size=1B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# test slot materialization on a distinct agg inside an inline view
# triggered by a predicate in an outer query block (IMPALA-861)
@@ -471,37 +563,45 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: count(1), count:merge(*)
| having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10
+| row-size=16B cardinality=0
|
01:AGGREGATE
| output: count(*)
| group by: 1
+| row-size=9B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(1), count:merge(*)
| having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10
+| row-size=16B cardinality=0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(1), count:merge(*)
+| row-size=16B cardinality=0
|
04:AGGREGATE
| output: count:merge(*)
| group by: 1
+| row-size=9B cardinality=1
|
03:EXCHANGE [HASH(1)]
|
01:AGGREGATE [STREAMING]
| output: count(*)
| group by: 1
+| row-size=9B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# IMPALA-2266: Test non-grouping distinct aggregation inside an inline view.
select * from (select count(distinct int_col) cd from functional.alltypes) v
@@ -510,22 +610,27 @@ PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(int_col)
+| row-size=8B cardinality=1
|
04:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=10
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
+| row-size=4B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=4B cardinality=7.30K
====
# IMPALA-2266: Test grouping distinct aggregation inside an inline view.
select * from (select count(distinct int_col) cd from functional.alltypes group by bool_col) v
@@ -537,23 +642,28 @@ PLAN-ROOT SINK
06:AGGREGATE [FINALIZE]
| output: count:merge(int_col)
| group by: bool_col
+| row-size=9B cardinality=2
|
05:EXCHANGE [HASH(bool_col)]
|
02:AGGREGATE [STREAMING]
| output: count(int_col)
| group by: bool_col
+| row-size=9B cardinality=2
|
04:AGGREGATE
| group by: bool_col, int_col
+| row-size=5B cardinality=20
|
03:EXCHANGE [HASH(bool_col,int_col)]
|
01:AGGREGATE [STREAMING]
| group by: bool_col, int_col
+| row-size=5B cardinality=20
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=5B cardinality=7.30K
====
# IMPALA-4042: count(distinct NULL) fails on a view
select count(distinct null) from functional.alltypes_view
@@ -562,22 +672,27 @@ PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count:merge(NULL)
+| row-size=8B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(NULL)
+| row-size=8B cardinality=1
|
04:AGGREGATE
| group by: NULL
+| row-size=1B cardinality=1
|
03:EXCHANGE [HASH(NULL)]
|
01:AGGREGATE [STREAMING]
| group by: NULL
+| row-size=1B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
# Query block with a single distinct and multiple non-distinct aggs simplifies to a
# non-grouping aggregation plan.
@@ -592,35 +707,43 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: min:merge(string_col), max:merge(string_col)
| having: min(string_col) < '9', min(string_col) < max(string_col)
+| row-size=24B cardinality=0
|
01:AGGREGATE
| output: min(string_col), max(string_col)
| group by: smallint_col
+| row-size=26B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=15B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: min:merge(string_col), max:merge(string_col)
| having: min(string_col) < '9', min(string_col) < max(string_col)
+| row-size=24B cardinality=0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: min:merge(string_col), max:merge(string_col)
+| row-size=24B cardinality=0
|
04:AGGREGATE
| output: min:merge(string_col), max:merge(string_col)
| group by: smallint_col
+| row-size=26B cardinality=10
|
03:EXCHANGE [HASH(smallint_col)]
|
01:AGGREGATE [STREAMING]
| output: min(string_col), max(string_col)
| group by: smallint_col
+| row-size=26B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
+ row-size=15B cardinality=7.30K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
index 43d1fcf..964b7f9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
@@ -47,6 +47,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(int_col), avg(double_col), count(*)
+| row-size=24B cardinality=0
|
00:EMPTYSET
====
@@ -61,6 +62,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -93,12 +95,14 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN]
| hash predicates: f.id = t1.id
| runtime filters: RF000 <- t1.id
+| row-size=8B cardinality=0
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypessmall f]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> f.id
+ row-size=4B cardinality=100
====
# Constant conjunct causes union operand to be dropped.
select * from functional.alltypessmall
@@ -111,12 +115,15 @@ PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
+| row-size=89B cardinality=108
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=89B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall]
partitions=4/4 files=4 size=6.32KB
+ row-size=89B cardinality=100
====
# Constant conjunct turns union into an empty-set node.
select *
@@ -133,11 +140,13 @@ PLAN-ROOT SINK
|
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = id
+| row-size=178B cardinality=7.30K
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
+ row-size=89B cardinality=7.30K
====
# Constant conjunct in the ON-clause of an outer join is
# assigned to the join.
@@ -151,12 +160,15 @@ PLAN-ROOT SINK
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = b.id
| other join predicates: FALSE
+| row-size=178B cardinality=100
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
+| row-size=89B cardinality=8
|
00:SCAN HDFS [functional.alltypessmall a]
partitions=4/4 files=4 size=6.32KB
+ row-size=89B cardinality=100
====
# Constant conjunct in the ON-clause of an outer join is
# assigned to the join.
@@ -171,13 +183,16 @@ PLAN-ROOT SINK
| hash predicates: a.id = b.id
| other join predicates: FALSE
| runtime filters: RF000 <- b.id
+| row-size=178B cardinality=9
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
+| row-size=89B cardinality=8
|
00:SCAN HDFS [functional.alltypessmall a]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.id
+ row-size=89B cardinality=100
====
# Constant conjunct in the ON-clause of an outer join is
# assigned to the join.
@@ -191,12 +206,15 @@ PLAN-ROOT SINK
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = b.id
| other join predicates: NULL
+| row-size=178B cardinality=108
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
+| row-size=89B cardinality=8
|
00:SCAN HDFS [functional.alltypessmall a]
partitions=4/4 files=4 size=6.32KB
+ row-size=89B cardinality=100
====
# Limit 0 turns query block into an empty-set node.
select t1.id, t2.id
@@ -234,12 +252,14 @@ PLAN-ROOT SINK
02:HASH JOIN [INNER JOIN]
| hash predicates: f.id = t1.id
| runtime filters: RF000 <- t1.id
+| row-size=8B cardinality=0
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypessmall f]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> f.id
+ row-size=4B cardinality=100
====
# Limit 0 causes union operand to be dropped.
select * from functional.alltypessmall
@@ -252,12 +272,15 @@ PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
+| row-size=89B cardinality=108
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=89B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall]
partitions=4/4 files=4 size=6.32KB
+ row-size=89B cardinality=100
====
# Limit 0 causes empty-set union.
select * from functional.alltypessmall
@@ -282,12 +305,15 @@ select int_col from functional.alltypesagg
PLAN-ROOT SINK
|
00:UNION
+| row-size=8B cardinality=11.00K
|
|--03:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
+| row-size=4B cardinality=11.00K
|
02:AGGREGATE [FINALIZE]
| output: count(1)
+| row-size=8B cardinality=0
|
01:EMPTYSET
====
@@ -307,6 +333,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=0
|
00:EMPTYSET
====
@@ -320,6 +347,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST
+| row-size=89B cardinality=0
|
00:EMPTYSET
====
@@ -343,6 +371,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(id), count(int_col)
+| row-size=16B cardinality=0
|
00:EMPTYSET
====
@@ -358,6 +387,7 @@ PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(id + int_col)
+| row-size=8B cardinality=0
|
00:EMPTYSET
====
@@ -373,9 +403,11 @@ PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: count(T1.int_col)
+| row-size=8B cardinality=0
|
01:AGGREGATE
| group by: int_col
+| row-size=4B cardinality=0
|
00:EMPTYSET
====
@@ -396,11 +428,14 @@ PLAN-ROOT SINK
|
02:UNION
| pass-through-operands: all
+| row-size=2B cardinality=0
|
01:AGGREGATE [FINALIZE]
| group by: lead(-496, 81, NULL) OVER(...)
+| row-size=2B cardinality=0
|
00:UNION
+ row-size=2B cardinality=0
====
# IMPALA-2088: Test empty union operands with analytic functions.
select lead(-496, 81) over (order by t1.double_col desc, t1.id asc)
@@ -421,14 +456,18 @@ PLAN-ROOT SINK
02:UNION
| constant-operands=1
| pass-through-operands: 01
+| row-size=2B cardinality=9
|
|--03:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
+| row-size=8B cardinality=8
|
01:AGGREGATE [FINALIZE]
| group by: lead(-496, 81, NULL) OVER(...)
+| row-size=2B cardinality=0
|
00:UNION
+ row-size=2B cardinality=0
====
# IMPALA-2216: Make sure the final output exprs are substituted, even
# if the resulting plan is an EmptySetNode.
@@ -471,42 +510,55 @@ where c_custkey < 10
PLAN-ROOT SINK
|
01:SUBPLAN
+| row-size=48B cardinality=15.00K
|
|--16:NESTED LOOP JOIN [LEFT OUTER JOIN]
+| | row-size=48B cardinality=1
| |
| |--12:AGGREGATE [FINALIZE]
| | | output: count(*)
+| | | row-size=8B cardinality=1
| | |
| | 08:SUBPLAN
+| | | row-size=0B cardinality=10
| | |
| | |--11:NESTED LOOP JOIN [RIGHT OUTER JOIN]
+| | | | row-size=0B cardinality=1
| | | |
| | | |--09:SINGULAR ROW SRC
+| | | | row-size=12B cardinality=1
| | | |
| | | 10:EMPTYSET
| | |
| | 07:UNNEST [c.c_orders o]
+| | row-size=0B cardinality=10
| |
| 15:NESTED LOOP JOIN [LEFT OUTER JOIN]
+| | row-size=40B cardinality=1
| |
| |--06:EMPTYSET
| |
| 14:NESTED LOOP JOIN [LEFT OUTER JOIN]
+| | row-size=36B cardinality=1
| |
| |--05:EMPTYSET
| |
| 13:NESTED LOOP JOIN [RIGHT OUTER JOIN]
+| | row-size=28B cardinality=1
| |
| |--02:SINGULAR ROW SRC
+| | row-size=56B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | output: count(*)
+| | row-size=8B cardinality=0
| |
| 03:EMPTYSET
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
- partitions=1/1 files=4 size=292.36MB
+ partitions=1/1 files=4 size=288.99MB
predicates: c_custkey < 10
+ row-size=56B cardinality=15.00K
====
# IMPALA-2539: Test empty union operands containing relative table refs.
select c_custkey, o_orderkey
@@ -524,23 +576,31 @@ where c_custkey = 1
PLAN-ROOT SINK
|
01:SUBPLAN
+| row-size=28B cardinality=10
|
|--07:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=28B cardinality=10
| |
| |--02:SINGULAR ROW SRC
+| | row-size=44B cardinality=1
| |
| 06:UNION
+| | row-size=8B cardinality=10
| |
| 05:AGGREGATE [FINALIZE]
| | group by: o_orderkey
+| | row-size=8B cardinality=10
| |
| 03:UNION
+| | row-size=8B cardinality=10
| |
| 04:UNNEST [c.c_orders o1]
+| row-size=8B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
- partitions=1/1 files=4 size=292.36MB
+ partitions=1/1 files=4 size=288.99MB
predicates: c_custkey = 1
+ row-size=44B cardinality=1
====
# IMPALA-2215: Having clause without aggregation.
select 1 from (select 1) v having 1 > 1
@@ -564,11 +624,14 @@ PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
+| row-size=8B cardinality=0
|
02:NESTED LOOP JOIN [CROSS JOIN]
+| row-size=1B cardinality=0
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypes x]
partitions=24/24 files=24 size=478.45KB
+ row-size=0B cardinality=7.30K
====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
index 8b71f11..5383858 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
@@ -14,7 +14,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: ss_customer_sk = c_customer_sk
| runtime filters: RF000[bloom] <- c_customer_sk
| mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0
-| tuple-ids=0,1 row-size=319B cardinality=529700
+| tuple-ids=0,1 row-size=319B cardinality=529.70K
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.customer]
@@ -25,7 +25,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=219B cardinality=16667
+| tuple-ids=1 row-size=219B cardinality=16.67K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -37,7 +37,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Single-column FK/PK join detection on left outer join. The join cardinality
@@ -57,7 +57,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: ss_customer_sk = c_customer_sk
| other predicates: c_salutation = 'Mrs.'
| mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0
-| tuple-ids=0,1N row-size=319B cardinality=2880404
+| tuple-ids=0,1N row-size=319B cardinality=2.88M
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.customer]
@@ -68,7 +68,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=219B cardinality=16667
+| tuple-ids=1 row-size=219B cardinality=16.67K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -79,7 +79,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Single-column FK/PK join detection on right outer join. The join cardinality
@@ -99,7 +99,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: ss_customer_sk = c_customer_sk
| runtime filters: RF000[bloom] <- c_customer_sk
| mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0
-| tuple-ids=0N,1 row-size=319B cardinality=529700
+| tuple-ids=0N,1 row-size=319B cardinality=529.70K
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.customer]
@@ -110,7 +110,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=219B cardinality=16667
+| tuple-ids=1 row-size=219B cardinality=16.67K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -122,7 +122,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Multi-column FK/PK join detection
@@ -141,7 +141,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number
| runtime filters: RF000[bloom] <- sr_item_sk, RF001[bloom] <- sr_ticket_number
| mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0
-| tuple-ids=0,1 row-size=188B cardinality=211838
+| tuple-ids=0,1 row-size=188B cardinality=211.84K
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.store_returns]
@@ -152,7 +152,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=287514
| mem-estimate=80.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=88B cardinality=28751
+| tuple-ids=1 row-size=88B cardinality=28.75K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -164,7 +164,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Many-to-many join detection.
@@ -182,7 +182,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: none
| runtime filters: RF000[bloom] <- ws_sold_time_sk
| mem-estimate=108.67MB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0
-| tuple-ids=0,1 row-size=244B cardinality=44136418
+| tuple-ids=0,1 row-size=244B cardinality=44.14M
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.web_sales]
@@ -192,7 +192,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=657377
| mem-estimate=160.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=144B cardinality=719384
+| tuple-ids=1 row-size=144B cardinality=719.38K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -204,7 +204,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# PK/PK join is detected as FK/PK.
@@ -223,7 +223,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: b.d_date_sk = a.d_date_sk
| runtime filters: RF000[bloom] <- a.d_date_sk
| mem-estimate=17.00MB mem-reservation=17.00MB spill-buffer=1.00MB thread-reservation=0
-| tuple-ids=1,0 row-size=510B cardinality=36525
+| tuple-ids=1,0 row-size=510B cardinality=36.52K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
|--00:SCAN HDFS [tpcds.date_dim a]
@@ -234,7 +234,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=73049
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=0 row-size=255B cardinality=36525
+| tuple-ids=0 row-size=255B cardinality=36.52K
| in pipelines: 00(GETNEXT)
|
01:SCAN HDFS [tpcds.date_dim b]
@@ -245,7 +245,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=73049
mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=1 row-size=255B cardinality=73049
+ tuple-ids=1 row-size=255B cardinality=73.05K
in pipelines: 01(GETNEXT)
====
# Single query with various join types combined.
@@ -268,7 +268,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: none
| runtime filters: RF000[bloom] <- c_current_addr_sk
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| tuple-ids=1,0,3,4,2 row-size=60B cardinality=19358
+| tuple-ids=1,0,3,4,2 row-size=60B cardinality=19.36K
| in pipelines: 01(GETNEXT), 02(OPEN)
|
|--02:SCAN HDFS [tpcds.customer]
@@ -278,7 +278,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=2 row-size=4B cardinality=100000
+| tuple-ids=2 row-size=4B cardinality=100.00K
| in pipelines: 02(GETNEXT)
|
07:HASH JOIN [INNER JOIN]
@@ -286,7 +286,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: sr_returned_date_sk = d2.d_date_sk
| runtime filters: RF002[bloom] <- d2.d_date_sk
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| tuple-ids=1,0,3,4 row-size=56B cardinality=8131
+| tuple-ids=1,0,3,4 row-size=56B cardinality=8.13K
| in pipelines: 01(GETNEXT), 04(OPEN)
|
|--04:SCAN HDFS [tpcds.date_dim d2]
@@ -296,7 +296,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=73049
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=4 row-size=4B cardinality=73049
+| tuple-ids=4 row-size=4B cardinality=73.05K
| in pipelines: 04(GETNEXT)
|
06:HASH JOIN [INNER JOIN]
@@ -304,7 +304,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number
| runtime filters: RF004[bloom] <- ss_item_sk, RF005[bloom] <- ss_ticket_number
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| tuple-ids=1,0,3 row-size=52B cardinality=8131
+| tuple-ids=1,0,3 row-size=52B cardinality=8.13K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
|--05:HASH JOIN [INNER JOIN]
@@ -312,7 +312,7 @@ PLAN-ROOT SINK
| | fk/pk conjuncts: ss_sold_date_sk = d1.d_date_sk
| | runtime filters: RF008[bloom] <- d1.d_date_sk
| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| | tuple-ids=0,3 row-size=32B cardinality=11055
+| | tuple-ids=0,3 row-size=32B cardinality=11.05K
| | in pipelines: 00(GETNEXT), 03(OPEN)
| |
| |--03:SCAN HDFS [tpcds.date_dim d1]
@@ -335,7 +335,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=130093
| mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=0 row-size=24B cardinality=2880404
+| tuple-ids=0 row-size=24B cardinality=2.88M
| in pipelines: 00(GETNEXT)
|
01:SCAN HDFS [tpcds.store_returns]
@@ -346,7 +346,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=287514
mem-estimate=80.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=1 row-size=20B cardinality=287514
+ tuple-ids=1 row-size=20B cardinality=287.51K
in pipelines: 01(GETNEXT)
====
# Assumed FK/PK join becasue of non-trivial equi-join exprs.
@@ -364,7 +364,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: assumed fk/pk
| runtime filters: RF000[bloom] <- c_customer_sk / 100
| mem-estimate=34.00MB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0
-| tuple-ids=0,1 row-size=319B cardinality=2880404
+| tuple-ids=0,1 row-size=319B cardinality=2.88M
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds.customer]
@@ -374,7 +374,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=219B cardinality=100000
+| tuple-ids=1 row-size=219B cardinality=100.00K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -386,7 +386,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to
@@ -405,7 +405,7 @@ PLAN-ROOT SINK
| fk/pk conjuncts: assumed fk/pk
| runtime filters: RF000[bloom] <- c_customer_sk
| mem-estimate=2.00GB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0
-| tuple-ids=0,1 row-size=8B cardinality=2880404
+| tuple-ids=0,1 row-size=8B cardinality=2.88M
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [tpcds_seq_snap.customer]
@@ -427,7 +427,7 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=4B cardinality=2880404
+ tuple-ids=0 row-size=4B cardinality=2.88M
in pipelines: 00(GETNEXT)
====
# Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown.
@@ -455,7 +455,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100000
| mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=4B cardinality=100000
+| tuple-ids=1 row-size=4B cardinality=100.00K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds_seq_snap.store_sales]
@@ -487,13 +487,13 @@ PLAN-ROOT SINK
| fk/pk conjuncts: none
| runtime filters: RF000[bloom] <- ws_sold_time_sk
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| tuple-ids=0,2 row-size=104B cardinality=2440073
+| tuple-ids=0,2 row-size=104B cardinality=2.44M
| in pipelines: 00(GETNEXT), 02(OPEN)
|
|--02:AGGREGATE [FINALIZE]
| | group by: ws_sold_time_sk
| | mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
-| | tuple-ids=2 row-size=4B cardinality=39771
+| | tuple-ids=2 row-size=4B cardinality=39.77K
| | in pipelines: 02(GETNEXT), 01(OPEN)
| |
| 01:SCAN HDFS [tpcds.web_sales]
@@ -503,7 +503,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=657377
| mem-estimate=160.00MB mem-reservation=8.00MB thread-reservation=1
-| tuple-ids=1 row-size=4B cardinality=719384
+| tuple-ids=1 row-size=4B cardinality=719.38K
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [tpcds.store_sales]
@@ -515,6 +515,6 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled max-scan-range-rows=130093
mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1
- tuple-ids=0 row-size=100B cardinality=2880404
+ tuple-ids=0 row-size=100B cardinality=2.88M
in pipelines: 00(GETNEXT)
====