You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/17 02:08:41 UTC
[3/7] impala git commit: IMPALA-5821: Add query with implicit casts
to extended explain output.
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/fe/src/test/java/org/apache/impala/util/PrintUtilsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/util/PrintUtilsTest.java b/fe/src/test/java/org/apache/impala/util/PrintUtilsTest.java
index 8f55c7d..550578f 100644
--- a/fe/src/test/java/org/apache/impala/util/PrintUtilsTest.java
+++ b/fe/src/test/java/org/apache/impala/util/PrintUtilsTest.java
@@ -78,4 +78,83 @@ public class PrintUtilsTest {
assertEquals("-10B", PrintUtils.printBytesRoundedToMb(-10L));
assertEquals("-123456789B", PrintUtils.printBytesRoundedToMb(-123456789L));
}
+
+ /**
+ * Wrap length for testWrapText() - less than 80 to make test layout nicer.
+ */
+ private static final int WRAP_LENGTH = 60;
+
+ /**
+ * Test for PrintUtils.wrapString().
+ */
+ @Test
+ public void testWrapText() {
+ // Simple query wrapping.
+ assertWrap(
+ "Analyzed query: SELECT * FROM functional_kudu.alltypestiny WHERE CAST(bigint_col"
+ + " AS DOUBLE) < CAST(10 AS DOUBLE)",
+ "Analyzed query: SELECT * FROM functional_kudu.alltypestiny\n"
+ + "WHERE CAST(bigint_col AS DOUBLE) < CAST(10 AS DOUBLE)");
+ // Simple query with a hint retains newlines surrounding hint.
+ assertWrap("SELECT \n"
+ + "-- +straight_join\n"
+ + " * FROM tpch_parquet.orders INNER JOIN \n"
+ + "-- +shuffle\n"
+ + " tpch_parquet.customer ON o_custkey = c_custkey",
+ "SELECT \n"
+ + "-- +straight_join\n"
+ + "* FROM tpch_parquet.orders INNER JOIN \n"
+ + "-- +shuffle\n"
+ + "tpch_parquet.customer ON o_custkey = c_custkey");
+ // test that a long string of blanks prints OK, some may be lost for clarity
+ assertWrap("insert into foo values (' "
+ + " "
+ + " ')",
+ "insert into foo values (' \n"
+ + "')");
+ // test that long words are broken up for clarity
+ assertWrap("select xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+ "select\n"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"
+ + "xxxxxxxxxxxxxxxxxxxxxxxxx");
+ }
+
+ /**
+ * Check that code that has been wrapped is correctly formatted.
+ * @param expected what it should be
+ */
+ private void assertWrap(String input, String expected) {
+ String actual = PrintUtils.wrapString(input, WRAP_LENGTH);
+ assertEquals(expected, actual);
+ assertNoBlankLines(actual);
+ assertNoTerminatingNewline(actual);
+ assertNoLongLines(actual);
+ }
+
+ /**
+ * Assert that all lines of wrapped output are 80 chars or less.
+ */
+ private void assertNoLongLines(String s) {
+ for (String line : s.split("\n")) {
+ assertTrue("line too long: " + line, line.length() <= WRAP_LENGTH);
+ }
+ }
+
+ /**
+ * Assert that the wrapped output does not end in a newline.
+ */
+ private void assertNoTerminatingNewline(String s) {
+ assertFalse("wrapped string ends in newline: " + s, s.endsWith("\n"));
+ }
+
+ /**
+ * Assert that there are no blank liones embedded in the wrapped output.
+ */
+ private void assertNoBlankLines(String s) {
+ assertFalse("output contains blank line " + s, s.contains("\n\n"));
+ }
}
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index 181d866..88d23d4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -4,6 +4,10 @@ from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
where 5 + 5 < c_custkey and o_orderkey = (2 + 2)
and (coalesce(2, 3, 4) * 10) + l_linenumber < (0 * 1)
---- PLAN
+Analyzed query: SELECT CAST(1 AS TINYINT) FROM tpch_nested_parquet.customer c,
+c.c_orders o, o.o_lineitems WHERE c_custkey > CAST(10 AS BIGINT) AND o_orderkey
+= CAST(4 AS BIGINT) AND CAST(20 AS BIGINT) + CAST(l_linenumber AS BIGINT) <
+CAST(0 AS BIGINT)
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2
PLAN-ROOT SINK
@@ -55,18 +59,18 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=289.00MB
- predicates: c_custkey > 10, !empty(c.c_orders)
- predicates on o: !empty(o.o_lineitems), o_orderkey = 4
- predicates on o_lineitems: 20 + l_linenumber < 0
+ predicates: c_custkey > CAST(10 AS BIGINT), !empty(c.c_orders)
+ predicates on o: !empty(o.o_lineitems), o_orderkey = CAST(4 AS BIGINT)
+ predicates on o_lineitems: CAST(20 AS BIGINT) + CAST(l_linenumber AS BIGINT) < CAST(0 AS BIGINT)
stored statistics:
table: rows=150000 size=289.00MB
columns missing stats: c_orders
extrapolated-rows=disabled max-scan-range-rows=44227
- parquet statistics predicates: c_custkey > 10
- parquet statistics predicates on o: o_orderkey = 4
- parquet dictionary predicates: c_custkey > 10
- parquet dictionary predicates on o: o_orderkey = 4
- parquet dictionary predicates on o_lineitems: 20 + l_linenumber < 0
+ parquet statistics predicates: c_custkey > CAST(10 AS BIGINT)
+ parquet statistics predicates on o: o_orderkey = CAST(4 AS BIGINT)
+ parquet dictionary predicates: c_custkey > CAST(10 AS BIGINT)
+ parquet dictionary predicates on o: o_orderkey = CAST(4 AS BIGINT)
+ parquet dictionary predicates on o_lineitems: CAST(20 AS BIGINT) + CAST(l_linenumber AS BIGINT) < CAST(0 AS BIGINT)
mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
tuple-ids=0 row-size=24B cardinality=15000
in pipelines: 00(GETNEXT)
@@ -76,6 +80,8 @@ select * from functional_hbase.stringids
where string_col = cast(4 as string) and 2 + 3 = tinyint_col
and id between concat('1', '0') and upper('20')
---- PLAN
+Analyzed query: SELECT * FROM functional_hbase.stringids WHERE string_col = '4'
+AND tinyint_col = CAST(5 AS TINYINT) AND id >= '10' AND id <= '20'
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=4.00KB mem-reservation=0B thread-reservation=1
PLAN-ROOT SINK
@@ -85,7 +91,7 @@ PLAN-ROOT SINK
start key: 10
stop key: 20\0
hbase filters: d:string_col EQUAL '4'
- predicates: tinyint_col = 5, string_col = '4'
+ predicates: tinyint_col = CAST(5 AS TINYINT), string_col = '4'
stored statistics:
table: rows=10000
columns: all
@@ -97,14 +103,17 @@ PLAN-ROOT SINK
select * from functional.alltypes_datasource
where tinyint_col < (pow(2, 8)) and float_col != 0 and 1 + 1 > int_col
---- PLAN
+Analyzed query: SELECT * FROM functional.alltypes_datasource WHERE
+CAST(tinyint_col AS DOUBLE) < CAST(256 AS DOUBLE) AND float_col != CAST(0 AS
+FLOAT) AND int_col < CAST(2 AS INT)
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=1.00GB mem-reservation=0B thread-reservation=1
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN DATA SOURCE [functional.alltypes_datasource]
-data source predicates: tinyint_col < 256, int_col < 2
-predicates: float_col != 0
+data source predicates: CAST(tinyint_col AS DOUBLE) < CAST(256 AS DOUBLE), int_col < CAST(2 AS INT)
+predicates: float_col != CAST(0 AS FLOAT)
mem-estimate=1.00GB mem-reservation=0B thread-reservation=0
tuple-ids=0 row-size=116B cardinality=500
in pipelines: 00(GETNEXT)
@@ -117,15 +126,21 @@ having 1024 * 1024 * count(*) % 2 = 0
and (sum(1 + 1 + id) > 1 or sum(1 + 1 + id) > 1)
and (sum(1 + 1 + id) between 5 and 10)
---- PLAN
+Analyzed query: SELECT sum(CAST(2 AS BIGINT) + CAST(id AS BIGINT)) FROM
+functional.alltypes GROUP BY timestamp_col = TIMESTAMP '2016-11-15 00:00:00'
+HAVING CAST(1048576 AS BIGINT) * count(*) % CAST(2 AS BIGINT) = CAST(0 AS
+BIGINT) AND sum(CAST(2 AS BIGINT) + CAST(id AS BIGINT)) > CAST(1 AS BIGINT) AND
+sum(CAST(2 AS BIGINT) + CAST(id AS BIGINT)) >= CAST(5 AS BIGINT) AND sum(CAST(2
+AS BIGINT) + CAST(id AS BIGINT)) <= CAST(10 AS BIGINT)
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB thread-reservation=2
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
-| output: sum(2 + id), count(*)
+| output: sum(CAST(2 AS BIGINT) + CAST(id AS BIGINT)), count(*)
| group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00'
-| having: sum(2 + id) <= 10, sum(2 + id) > 1, sum(2 + id) >= 5, 1048576 * count(*) % 2 = 0
+| having: sum(2 + id) <= CAST(10 AS BIGINT), sum(2 + id) > CAST(1 AS BIGINT), sum(2 + id) >= CAST(5 AS BIGINT), CAST(1048576 AS BIGINT) * count(*) % CAST(2 AS BIGINT) = CAST(0 AS BIGINT)
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=1 row-size=17B cardinality=0
| in pipelines: 01(GETNEXT), 00(OPEN)
@@ -148,6 +163,12 @@ left outer join functional.alltypes b
a.int_col between 0 + 0 + 0 + b.bigint_col and b.bigint_col + ascii('a'))
where round(1.11 + 2.22 + 3.33 + 4.44, 1) < cast(b.double_col as decimal(3, 2))
---- PLAN
+Analyzed query: SELECT CAST(1 AS TINYINT) FROM functional.alltypes a LEFT OUTER
+JOIN functional.alltypes b ON (CAST(2 AS BIGINT) + CAST(a.id AS BIGINT) =
+CAST(b.id AS BIGINT) - CAST(2 AS BIGINT) AND CAST(a.int_col AS BIGINT) >= CAST(0
+AS BIGINT) + b.bigint_col AND CAST(a.int_col AS BIGINT) <= b.bigint_col +
+CAST(97 AS BIGINT)) WHERE CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS
+DECIMAL(6,1))
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=257.94MB mem-reservation=2.00MB thread-reservation=3
PLAN-ROOT SINK
@@ -156,15 +177,15 @@ PLAN-ROOT SINK
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: 2 + a.id = b.id - 2
| fk/pk conjuncts: assumed fk/pk
-| other join predicates: a.int_col <= b.bigint_col + 97, a.int_col >= 0 + b.bigint_col
-| other predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
+| other join predicates: CAST(a.int_col AS BIGINT) <= b.bigint_col + CAST(97 AS BIGINT), CAST(a.int_col AS BIGINT) >= CAST(0 AS BIGINT) + b.bigint_col
+| other predicates: CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS DECIMAL(6,1))
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=0,1N row-size=28B cardinality=7300
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
-| predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
+| predicates: CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS DECIMAL(6,1))
| stored statistics:
| table: rows=7300 size=478.45KB
| partitions: 24/24 rows=7300
@@ -193,21 +214,27 @@ left outer join functional.alltypes b
a.int_col between 0 + 0 + 0 + b.bigint_col and b.bigint_col + ascii('a'))
where cast(b.double_col as decimal(3, 2)) > round(1.11 + 2.22 + 3.33 + 4.44, 1)
---- PLAN
+Analyzed query: SELECT CAST(1 AS TINYINT) FROM functional.alltypes a LEFT OUTER
+JOIN functional.alltypes b ON (CAST(2 AS BIGINT) + CAST(a.id AS BIGINT) =
+CAST(b.id AS BIGINT) - CAST(2 AS BIGINT) OR CAST(a.int_col AS BIGINT) >= CAST(0
+AS BIGINT) + b.bigint_col AND CAST(a.int_col AS BIGINT) <= b.bigint_col +
+CAST(97 AS BIGINT)) WHERE CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS
+DECIMAL(6,1))
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=256.01MB mem-reservation=64.00KB thread-reservation=3
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
02:NESTED LOOP JOIN [LEFT OUTER JOIN]
-| join predicates: (2 + a.id = b.id - 2 OR a.int_col >= 0 + b.bigint_col AND a.int_col <= b.bigint_col + 97)
-| predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
+| join predicates: (CAST(2 AS BIGINT) + CAST(a.id AS BIGINT) = CAST(b.id AS BIGINT) - CAST(2 AS BIGINT) OR CAST(a.int_col AS BIGINT) >= CAST(0 AS BIGINT) + b.bigint_col AND CAST(a.int_col AS BIGINT) <= b.bigint_col + CAST(97 AS BIGINT))
+| predicates: CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS DECIMAL(6,1))
| mem-estimate=14.26KB mem-reservation=0B thread-reservation=0
| tuple-ids=0,1N row-size=28B cardinality=7300
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
-| predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
+| predicates: CAST(b.double_col AS DECIMAL(3,2)) > CAST(11.1 AS DECIMAL(6,1))
| stored statistics:
| table: rows=7300 size=478.45KB
| partitions: 24/24 rows=7300
@@ -234,6 +261,10 @@ from functional.alltypes
group by timestamp_col = cast('2015-11-15' as timestamp) + interval 1 year
having 1024 * 1024 * count(*) % 2 = 0
---- PLAN
+Analyzed query: SELECT sum(DISTINCT CAST(2 AS BIGINT) + CAST(id AS BIGINT)) FROM
+functional.alltypes GROUP BY timestamp_col = TIMESTAMP '2016-11-15 00:00:00'
+HAVING CAST(1048576 AS BIGINT) * count(*) % CAST(2 AS BIGINT) = CAST(0 AS
+BIGINT)
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=3.88MB thread-reservation=2
PLAN-ROOT SINK
@@ -242,14 +273,14 @@ PLAN-ROOT SINK
02:AGGREGATE [FINALIZE]
| output: sum(2 + id), count:merge(*)
| group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00'
-| having: 1048576 * count(*) % 2 = 0
+| having: CAST(1048576 AS BIGINT) * count(*) % CAST(2 AS BIGINT) = CAST(0 AS BIGINT)
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=2 row-size=17B cardinality=0
| in pipelines: 02(GETNEXT), 01(OPEN)
|
01:AGGREGATE
| output: count(*)
-| group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00', 2 + id
+| group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00', CAST(2 AS BIGINT) + CAST(id AS BIGINT)
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=1 row-size=17B cardinality=7300
| in pipelines: 01(GETNEXT), 00(OPEN)
@@ -270,6 +301,9 @@ select sum(distinct 1 + 1 + id)
from functional.alltypes
having 1024 * 1024 * count(*) % 2 = 0
---- PLAN
+Analyzed query: SELECT sum(DISTINCT CAST(2 AS BIGINT) + CAST(id AS BIGINT)) FROM
+functional.alltypes HAVING CAST(1048576 AS BIGINT) * count(*) % CAST(2 AS
+BIGINT) = CAST(0 AS BIGINT)
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB thread-reservation=2
PLAN-ROOT SINK
@@ -277,14 +311,14 @@ PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: sum(2 + id), count:merge(*)
-| having: 1048576 * zeroifnull(count(*)) % 2 = 0
+| having: CAST(1048576 AS BIGINT) * zeroifnull(count(*)) % CAST(2 AS BIGINT) = CAST(0 AS BIGINT)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=2 row-size=16B cardinality=0
| in pipelines: 02(GETNEXT), 01(OPEN)
|
01:AGGREGATE
| output: count(*)
-| group by: 2 + id
+| group by: CAST(2 AS BIGINT) + CAST(id AS BIGINT)
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=1 row-size=16B cardinality=7300
| in pipelines: 01(GETNEXT), 00(OPEN)
@@ -306,6 +340,9 @@ select first_value(1 + 1 + int_col - (1 - 1)) over
order by greatest(greatest(10, 20), bigint_col))
from functional.alltypes
---- PLAN
+Analyzed query: SELECT first_value(1 + 1 + int_col - (1 - 1)) OVER (PARTITION BY
+concat(concat('a', 'b'), string_col) ORDER BY greatest(greatest(10, 20),
+bigint_col) ASC) FROM functional.alltypes
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.03MB thread-reservation=2
PLAN-ROOT SINK
@@ -342,6 +379,8 @@ PLAN-ROOT SINK
select int_col from functional.alltypes
order by id * abs((factorial(5) / power(2, 4)))
---- PLAN
+Analyzed query: SELECT int_col FROM functional.alltypes ORDER BY id * CAST(7.5
+AS DOUBLE) ASC
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=134.00MB mem-reservation=6.03MB thread-reservation=2
PLAN-ROOT SINK
@@ -369,6 +408,9 @@ insert into functional.alltypes (id, int_col) partition(year,month)
select id, int_col, cast(1 + 1 + 1 + year as int), cast(month - (1 - 1 - 1) as int)
from functional.alltypessmall
---- PLAN
+Analyzed query: SELECT id, int_col, CAST(CAST(3 AS BIGINT) + CAST(year AS
+BIGINT) AS INT), CAST(CAST(month AS BIGINT) - CAST(-1 AS BIGINT) AS INT) FROM
+functional.alltypessmall
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=38.00MB mem-reservation=6.01MB thread-reservation=2
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(CAST(3 + year AS INT),CAST(month - -1 AS INT))]
@@ -400,13 +442,17 @@ select sum(id + c3) from
) v2
) v3
---- PLAN
+Analyzed query: SELECT sum(CAST(id AS BIGINT) + CAST(c3 AS BIGINT)) FROM (SELECT
+id, CAST(10 AS INT) + CAST(c2 AS INT) c3 FROM (SELECT id, CAST(20 AS SMALLINT) +
+CAST(c1 AS SMALLINT) c2 FROM (SELECT id, CAST(30 AS TINYINT) c1 FROM
+functional.alltypes LIMIT CAST(2 AS TINYINT)) v1) v2) v3
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
-| output: sum(id + 10 + 20 + 30)
+| output: sum(CAST(id AS BIGINT) + CAST(CAST(10 AS INT) + CAST(CAST(20 AS SMALLINT) + CAST(30 AS SMALLINT) AS INT) AS BIGINT))
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=4 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
index 6de7b49..881f2da 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
@@ -146,7 +146,7 @@ PLAN-ROOT SINK
|
|--01:SCAN HDFS [tpcds.store_returns]
| partitions=1/1 files=1 size=31.19MB
-| predicates: sr_return_quantity < 10
+| predicates: sr_return_quantity < CAST(10 AS INT)
| stored statistics:
| table: rows=287514 size=31.19MB
| columns: all
@@ -317,7 +317,7 @@ PLAN-ROOT SINK
| |
| |--03:SCAN HDFS [tpcds.date_dim d1]
| | partitions=1/1 files=1 size=9.84MB
-| | predicates: d1.d_fy_week_seq = 1000
+| | predicates: d1.d_fy_week_seq = CAST(1000 AS INT)
| | stored statistics:
| | table: rows=73049 size=9.84MB
| | columns: all
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index 05b1c92..dfe8241 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -164,8 +164,8 @@ Per-Host Resources: mem-estimate=9.75MB mem-reservation=0B thread-reservation=2
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN KUDU [functional_kudu.alltypes]
- predicates: id IN (int_col), bigint_col IN (9999999999999999999), double_col IN (CAST('inf' AS DOUBLE)), float_col IN (CAST('NaN' AS FLOAT)), int_col IN (9999999999), smallint_col IN (99999, 2), tinyint_col IN (1, 999), bool_col IN (1), string_col NOT IN ('bar')
- kudu predicates: double_col IN (0), float_col IN (0), bigint_col IN (1, 2), int_col IN (1, 2), smallint_col IN (0, 2), string_col IN ('foo', 'foo '), tinyint_col IN (1, 2), bool_col IN (TRUE)
+ predicates: id IN (int_col), CAST(bigint_col AS DECIMAL(19,0)) IN (CAST(9999999999999999999 AS DECIMAL(19,0))), double_col IN (CAST('inf' AS DOUBLE)), float_col IN (CAST('NaN' AS FLOAT)), CAST(int_col AS BIGINT) IN (CAST(9999999999 AS BIGINT)), CAST(smallint_col AS INT) IN (CAST(99999 AS INT), CAST(2 AS INT)), CAST(tinyint_col AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(999 AS SMALLINT)), CAST(bool_col AS TINYINT) IN (CAST(1 AS TINYINT)), string_col NOT IN ('bar')
+ kudu predicates: double_col IN (CAST(0 AS DOUBLE)), float_col IN (CAST(0 AS FLOAT)), bigint_col IN (CAST(1 AS BIGINT), CAST(2 AS BIGINT)), int_col IN (CAST(1 AS INT), CAST(2 AS INT)), smallint_col IN (CAST(0 AS SMALLINT), CAST(2 AS SMALLINT)), string_col IN ('foo', 'foo '), tinyint_col IN (CAST(1 AS TINYINT), CAST(2 AS TINYINT)), bool_col IN (TRUE)
mem-estimate=9.75MB mem-reservation=0B thread-reservation=1
tuple-ids=0 row-size=97B cardinality=5
in pipelines: 00(GETNEXT)
@@ -179,7 +179,7 @@ Per-Host Resources: mem-estimate=4.50MB mem-reservation=0B thread-reservation=2
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN KUDU [functional_kudu.decimal_tbl]
- kudu predicates: d1 IN (1234, 12345)
+ kudu predicates: d1 IN (CAST(1234 AS DECIMAL(9,0)), CAST(12345 AS DECIMAL(9,0)))
mem-estimate=4.50MB mem-reservation=0B thread-reservation=1
tuple-ids=0 row-size=56B cardinality=2
in pipelines: 00(GETNEXT)
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
index bc06135..3ef04a7 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
@@ -6,6 +6,10 @@ from tpch_parquet.customer
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=33.97MB Threads=5
Per-Host Resource Estimates: Memory=68MB
+Analyzed query: SELECT
+-- +straight_join
+* FROM tpch_parquet.customer INNER JOIN tpch_parquet.nation ON c_nationkey =
+n_nationkey
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.35MB mem-reservation=0B thread-reservation=1
@@ -63,6 +67,10 @@ from tpch_parquet.lineitem
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=110.00MB Threads=5
Per-Host Resource Estimates: Memory=442MB
+Analyzed query: SELECT
+-- +straight_join
+* FROM tpch_parquet.lineitem LEFT OUTER JOIN tpch_parquet.orders ON l_orderkey =
+o_orderkey
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=11.35MB mem-reservation=0B thread-reservation=1
@@ -117,6 +125,9 @@ where l_orderkey not in (select o_orderkey from tpch_parquet.orders)
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=78.00MB Threads=5
Per-Host Resource Estimates: Memory=175MB
+Analyzed query: SELECT * FROM tpch_parquet.lineitem NULL AWARE LEFT ANTI JOIN
+(SELECT o_orderkey FROM tpch_parquet.orders) `$a$1` (`$c$1`) ON l_orderkey =
+`$a$1`.`$c$1`
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.78MB mem-reservation=0B thread-reservation=1
@@ -173,6 +184,11 @@ having count(*) = 1
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=125.00MB Threads=7
Per-Host Resource Estimates: Memory=293MB
+Analyzed query: SELECT
+-- +straight_join
+l_orderkey, o_orderstatus, count(*) FROM tpch_parquet.lineitem INNER JOIN
+tpch_parquet.orders ON o_orderkey = l_orderkey GROUP BY CAST(1 AS INVALID_TYPE),
+CAST(2 AS INVALID_TYPE) HAVING count(*) = CAST(1 AS BIGINT)
F04:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.11MB mem-reservation=0B thread-reservation=1
@@ -189,7 +205,7 @@ Per-Host Resources: mem-estimate=56.11MB mem-reservation=46.00MB thread-reservat
07:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: l_orderkey, o_orderstatus
-| having: count(*) = 1
+| having: count(*) = CAST(1 AS BIGINT)
| mem-estimate=46.00MB mem-reservation=46.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=2 row-size=33B cardinality=4690314
| in pipelines: 07(GETNEXT), 00(OPEN)
@@ -258,6 +274,7 @@ from tpch_parquet.lineitem
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=120.00MB Threads=4
Per-Host Resource Estimates: Memory=1.71GB
+Analyzed query: SELECT DISTINCT * FROM tpch_parquet.lineitem
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.78MB mem-reservation=0B thread-reservation=1
@@ -308,6 +325,9 @@ group by 1, 2
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=98.00MB Threads=4
Per-Host Resource Estimates: Memory=302MB
+Analyzed query: SELECT l_orderkey, l_partkey, group_concat(l_linestatus, ',')
+FROM tpch_parquet.lineitem GROUP BY CAST(1 AS INVALID_TYPE), CAST(2 AS
+INVALID_TYPE)
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.11MB mem-reservation=0B thread-reservation=1
@@ -360,6 +380,8 @@ from functional.alltypes
Max Per-Host Resource Reservation: Memory=40.03MB Threads=4
Per-Host Resource Estimates: Memory=56MB
Codegen disabled by planner
+Analyzed query: SELECT max(tinyint_col) OVER (PARTITION BY int_col) FROM
+functional.alltypes
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=56.26KB mem-reservation=0B thread-reservation=1
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
index 864fc61..c01e8a0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -59,14 +59,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.92KB
- predicates: id < 10
+ predicates: id < CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: id < 10
- parquet dictionary predicates: id < 10
+ parquet statistics predicates: id < CAST(10 AS INT)
+ parquet dictionary predicates: id < CAST(10 AS INT)
mem-estimate=16.00MB mem-reservation=24.00KB thread-reservation=0
tuple-ids=0 row-size=16B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -114,14 +114,14 @@ Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.07MB thread-reserv
|
00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
partitions=24/24 files=24 size=188.92KB
- predicates: id < 10
+ predicates: id < CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: id < 10
- parquet dictionary predicates: id < 10
+ parquet statistics predicates: id < CAST(10 AS INT)
+ parquet dictionary predicates: id < CAST(10 AS INT)
mem-estimate=16.00MB mem-reservation=24.00KB thread-reservation=0
tuple-ids=0 row-size=16B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -153,14 +153,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.92KB
- predicates: id < 10
+ predicates: id < CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: id < 10
- parquet dictionary predicates: id < 10
+ parquet statistics predicates: id < CAST(10 AS INT)
+ parquet dictionary predicates: id < CAST(10 AS INT)
mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=0
tuple-ids=0 row-size=8B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -201,14 +201,14 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
Per-Host Resources: mem-estimate=48.00MB mem-reservation=48.00KB thread-reservation=3
00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
partitions=24/24 files=24 size=188.92KB
- predicates: id < 10
+ predicates: id < CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: id < 10
- parquet dictionary predicates: id < 10
+ parquet statistics predicates: id < CAST(10 AS INT)
+ parquet dictionary predicates: id < CAST(10 AS INT)
mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=0
tuple-ids=0 row-size=8B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -269,19 +269,19 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=288.98MB
- predicates: c_custkey < 10, !empty(c.c_orders)
- predicates on o: !empty(o.o_lineitems), o_orderkey < 5
- predicates on o_lineitems: l_linenumber < 3
+ predicates: c_custkey < CAST(10 AS BIGINT), !empty(c.c_orders)
+ predicates on o: !empty(o.o_lineitems), o_orderkey < CAST(5 AS BIGINT)
+ predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders
- extrapolated-rows=disabled max-scan-range-rows=44229
- parquet statistics predicates: c_custkey < 10
- parquet statistics predicates on o: o_orderkey < 5
- parquet statistics predicates on o_lineitems: l_linenumber < 3
- parquet dictionary predicates: c_custkey < 10
- parquet dictionary predicates on o: o_orderkey < 5
- parquet dictionary predicates on o_lineitems: l_linenumber < 3
+ extrapolated-rows=disabled max-scan-range-rows=44227
+ parquet statistics predicates: c_custkey < CAST(10 AS BIGINT)
+ parquet statistics predicates on o: o_orderkey < CAST(5 AS BIGINT)
+ parquet statistics predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
+ parquet dictionary predicates: c_custkey < CAST(10 AS BIGINT)
+ parquet dictionary predicates on o: o_orderkey < CAST(5 AS BIGINT)
+ parquet dictionary predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
mem-estimate=88.00MB mem-reservation=88.00MB thread-reservation=0
tuple-ids=0 row-size=254B cardinality=15000
in pipelines: 00(GETNEXT)
@@ -344,19 +344,19 @@ Per-Host Resources: mem-estimate=264.00MB mem-reservation=264.00MB thread-reserv
|
00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
partitions=1/1 files=4 size=288.98MB
- predicates: c_custkey < 10, !empty(c.c_orders)
- predicates on o: !empty(o.o_lineitems), o_orderkey < 5
- predicates on o_lineitems: l_linenumber < 3
+ predicates: c_custkey < CAST(10 AS BIGINT), !empty(c.c_orders)
+ predicates on o: !empty(o.o_lineitems), o_orderkey < CAST(5 AS BIGINT)
+ predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders
extrapolated-rows=disabled max-scan-range-rows=44229
- parquet statistics predicates: c_custkey < 10
- parquet statistics predicates on o: o_orderkey < 5
- parquet statistics predicates on o_lineitems: l_linenumber < 3
- parquet dictionary predicates: c_custkey < 10
- parquet dictionary predicates on o: o_orderkey < 5
- parquet dictionary predicates on o_lineitems: l_linenumber < 3
+ parquet statistics predicates: c_custkey < CAST(10 AS BIGINT)
+ parquet statistics predicates on o: o_orderkey < CAST(5 AS BIGINT)
+ parquet statistics predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
+ parquet dictionary predicates: c_custkey < CAST(10 AS BIGINT)
+ parquet dictionary predicates on o: o_orderkey < CAST(5 AS BIGINT)
+ parquet dictionary predicates on o_lineitems: l_linenumber < CAST(3 AS INT)
mem-estimate=88.00MB mem-reservation=88.00MB thread-reservation=0
tuple-ids=0 row-size=254B cardinality=15000
in pipelines: 00(GETNEXT)
@@ -409,13 +409,13 @@ PLAN-ROOT SINK
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=288.98MB
predicates: !empty(c.c_orders), !empty(c.c_orders)
- predicates on o1: o1.o_orderkey < 5
+ predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders, c_orders
extrapolated-rows=disabled max-scan-range-rows=44229
- parquet statistics predicates on o1: o1.o_orderkey < 5
- parquet dictionary predicates on o1: o1.o_orderkey < 5
+ parquet statistics predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
+ parquet dictionary predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
mem-estimate=88.00MB mem-reservation=16.00MB thread-reservation=0
tuple-ids=0 row-size=270B cardinality=150000
in pipelines: 00(GETNEXT)
@@ -470,13 +470,13 @@ Per-Host Resources: mem-estimate=269.81MB mem-reservation=53.81MB thread-reserva
00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
partitions=1/1 files=4 size=288.98MB
predicates: !empty(c.c_orders), !empty(c.c_orders)
- predicates on o1: o1.o_orderkey < 5
+ predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders, c_orders
extrapolated-rows=disabled max-scan-range-rows=44229
- parquet statistics predicates on o1: o1.o_orderkey < 5
- parquet dictionary predicates on o1: o1.o_orderkey < 5
+ parquet statistics predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
+ parquet dictionary predicates on o1: o1.o_orderkey < CAST(5 AS BIGINT)
mem-estimate=88.00MB mem-reservation=16.00MB thread-reservation=0
tuple-ids=0 row-size=270B cardinality=150000
in pipelines: 00(GETNEXT)
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test
index 426ef38..b744547 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test
@@ -21,7 +21,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.29KB
- predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * rand() > 50
+ predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
@@ -58,7 +58,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=22/24 files=22 size=172.28KB
- predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/22 rows=unavailable
@@ -88,7 +88,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.29KB
- predicates: id IN (int_col), id NOT IN (0, 1, 2), int_col % 50 IN (0, 1), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
+ predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
@@ -157,7 +157,7 @@ PLAN-ROOT SINK
partitions=1/1 files=2 size=6.92KB
predicates: !empty(c.nested_struct.c.d)
predicates on cn: !empty(cn.item)
- predicates on a: a.item.e < -10
+ predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
@@ -223,9 +223,9 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=288.98MB
- predicates: c_custkey > 0, !empty(c.c_orders)
- predicates on o: !empty(o.o_lineitems), o.o_orderkey > 0
- predicates on l: l.l_partkey > 0
+ predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
+ predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
+ predicates on l: l.l_partkey > CAST(0 AS BIGINT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders
@@ -334,7 +334,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypesmixedformat]
partitions=4/4 files=4 size=66.61KB
- predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/4 rows=unavailable
http://git-wip-us.apache.org/repos/asf/impala/blob/d3948d9a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
index 247edb3..a373b96 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
@@ -20,14 +20,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.92KB
- predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * rand() > 50
+ predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: int_col > 1
- parquet dictionary predicates: int_col > 1
+ parquet statistics predicates: int_col > CAST(1 AS INT)
+ parquet dictionary predicates: int_col > CAST(1 AS INT)
mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
tuple-ids=0 row-size=5B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -50,7 +50,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
- predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * rand() > 50
+ predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
stored statistics:
table: rows=7300 size=478.45KB
partitions: 24/24 rows=7300
@@ -82,14 +82,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=22/24 files=22 size=172.93KB
- predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/22 rows=unavailable
columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), date_string_col > '1993-10-01'
- parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
+ parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
tuple-ids=0 row-size=80B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -116,7 +116,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes]
partitions=22/24 files=22 size=437.72KB
- predicates: id = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, tinyint_col < 50, int_col % 2 = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: id = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=7300 size=478.45KB
partitions: 22/22 rows=6680
@@ -149,13 +149,13 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=24/24 files=24 size=188.92KB
- predicates: id IN (int_col), id NOT IN (0, 1, 2), int_col % 50 IN (0, 1), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
+ predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet dictionary predicates: id NOT IN (0, 1, 2), int_col % 50 IN (0, 1), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
+ parquet dictionary predicates: id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=24B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -219,13 +219,13 @@ PLAN-ROOT SINK
partitions=1/1 files=2 size=6.92KB
predicates: !empty(c.nested_struct.c.d)
predicates on cn: !empty(cn.item)
- predicates on a: a.item.e < -10
+ predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates on a: a.item.e < -10
- parquet dictionary predicates on a: a.item.e < -10
+ parquet statistics predicates on a: a.item.e < CAST(-10 AS INT)
+ parquet dictionary predicates on a: a.item.e < CAST(-10 AS INT)
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=24B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -254,7 +254,7 @@ PLAN-ROOT SINK
| | in pipelines: 00(GETNEXT)
| |
| |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN]
-| | | predicates: a.item.e < -10
+| | | predicates: a.item.e < CAST(-10 AS INT)
| | | mem-estimate=40B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2N,1,0 row-size=44B cardinality=1
| | | in pipelines: 00(GETNEXT)
@@ -291,7 +291,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.complextypestbl c]
partitions=1/1 files=2 size=6.92KB
predicates: !empty(c.nested_struct.c.d)
- predicates on a: a.item.e < -10
+ predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
@@ -357,7 +357,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
partitions=1/1 files=2 size=6.92KB
- predicates on a: a.item.e < -10
+ predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
@@ -421,19 +421,19 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=288.98MB
- predicates: c_custkey > 0, !empty(c.c_orders)
- predicates on o: !empty(o.o_lineitems), o.o_orderkey > 0
- predicates on l: l.l_partkey > 0
+ predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
+ predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
+ predicates on l: l.l_partkey > CAST(0 AS BIGINT)
stored statistics:
table: rows=150000 size=288.98MB
columns missing stats: c_orders
extrapolated-rows=disabled max-scan-range-rows=44229
- parquet statistics predicates: c_custkey > 0
- parquet statistics predicates on o: o.o_orderkey > 0
- parquet statistics predicates on l: l.l_partkey > 0
- parquet dictionary predicates: c_custkey > 0
- parquet dictionary predicates on o: o.o_orderkey > 0
- parquet dictionary predicates on l: l.l_partkey > 0
+ parquet statistics predicates: c_custkey > CAST(0 AS BIGINT)
+ parquet statistics predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
+ parquet statistics predicates on l: l.l_partkey > CAST(0 AS BIGINT)
+ parquet dictionary predicates: c_custkey > CAST(0 AS BIGINT)
+ parquet dictionary predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
+ parquet dictionary predicates on l: l.l_partkey > CAST(0 AS BIGINT)
mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
tuple-ids=0 row-size=24B cardinality=15000
in pipelines: 00(GETNEXT)
@@ -478,7 +478,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
partitions=1/1 files=2 size=6.92KB
- predicates on int_array: item > 10
+ predicates on int_array: item > CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns: unavailable
@@ -582,14 +582,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypesmixedformat]
partitions=4/4 files=4 size=66.61KB
- predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/4 rows=unavailable
columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
extrapolated-rows=disabled max-scan-range-rows=unavailable
- parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), date_string_col > '1993-10-01'
- parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
+ parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
tuple-ids=0 row-size=80B cardinality=unavailable
in pipelines: 00(GETNEXT)
@@ -617,7 +617,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypesmixedformat]
partitions=0/4 files=0 size=0B
- predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, int_col % 2 = 1, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+ predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/0 rows=unavailable