You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/05/12 22:09:52 UTC
[17/50] [abbrv] incubator-impala git commit: IMPALA-2345,
2991: test coverage for spilling and sorts
IMPALA-2345,2991: test coverage for spilling and sorts
Add missing coverage for sorting by CHAR and VARCHAR.
Add more coverage for spilling sorts.
Fix spilling tests: ensure that they actually reliably spill (many of
them had memory limits high enough that they could run entirely in
memory).
I ran this in a loop for a while to flush out flaky tests. The tests
should be fairly predictable given that they're not run concurrently
with other tests and we allocate enough block manager memory so that
each operator can obtain its reservation.
Change-Id: Ia2d2627a2c327dcdf269ea3216385b1af9dfa305
Reviewed-on: http://gerrit.cloudera.org:8080/2877
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/34c95c95
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/34c95c95
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/34c95c95
Branch: refs/heads/master
Commit: 34c95c95901ba3d81a2b30f17ebf194cec4ef1d1
Parents: a805e10
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Apr 22 11:14:02 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Thu May 12 14:17:55 2016 -0700
----------------------------------------------------------------------
.../queries/QueryTest/spilling.test | 273 +++++++++++++++++--
tests/custom_cluster/test_spilling.py | 4 +-
2 files changed, 257 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34c95c95/testdata/workloads/functional-query/queries/QueryTest/spilling.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
index 1db90d3..a29c6c7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
@@ -1,7 +1,6 @@
====
---- QUERY
-set num_nodes=1;
-set max_block_mgr_memory=265m;
+set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
@@ -19,11 +18,18 @@ order by 1 limit 10
34,3
---- TYPES
BIGINT, BIGINT
+---- RUNTIME_PROFILE
+# Verify that spilling and passthrough were activated.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
+row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
+# Could only get it to spill reliably with num_nodes=1.
+# TODO: revisit with new buffer pool.
set num_nodes=1;
-set max_block_mgr_memory=275m;
+set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
@@ -34,9 +40,12 @@ order by 1,2 limit 3
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
+---- RUNTIME_PROFILE
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
-set max_block_mgr_memory=275m;
+set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
@@ -54,11 +63,17 @@ order by 1 limit 10;
34,3
---- TYPES
BIGINT, BIGINT
+---- RUNTIME_PROFILE
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
+row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column
-set num_nodes=0;
-set max_block_mgr_memory=275m;
+# Could only get it to spill reliably with num_nodes=1.
+# TODO: revisit with new buffer pool.
+set num_nodes=1;
+set max_block_mgr_memory=25m;
select l_comment, count(*)
from lineitem
group by 1
@@ -71,11 +86,16 @@ order by count(*) desc limit 5
' furiously ',845
---- TYPES
STRING, BIGINT
+---- RUNTIME_PROFILE
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
-set num_nodes=0;
-set max_block_mgr_memory=80m;
+# Could only get it to spill reliably with num_nodes=1.
+# TODO: revisit with new buffer pool.
+set num_nodes=1;
+set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
from lineitem
group by 1,2
@@ -87,15 +107,13 @@ order by 1,2 limit 3;
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
-# Verify that passthrough and spilling happened in the pre and merge agg.
-# TODO: reenable if we can get it to spill in a non-flaky way
-# row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
-# row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
+# Verify that spilling happened in the agg.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
-# Test query with string intermediate state.
-set num_nodes=0;
-set max_block_mgr_memory=275m;
+# Test with string intermediate state (avg() uses string intermediate value).
+set max_block_mgr_memory=25m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
@@ -108,6 +126,11 @@ order by 1 limit 5
5,5
---- TYPES
BIGINT, DOUBLE
+---- RUNTIME_PROFILE
+# Verify that passthrough and spilling happened in the pre and merge agg.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
+row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
@@ -128,6 +151,9 @@ l1.l_shipdate = l3.l_shipdate
1846743
---- TYPES
BIGINT
+---- RUNTIME_PROFILE
+# Verify that at least one of the joins was spilled.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
@@ -138,6 +164,10 @@ select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
---- TYPES
BIGINT, STRING, STRING
+---- RUNTIME_PROFILE
+# Indirectly verify that the analytic spilled: if it spills a block, it must repin it.
+row_regex: .*PinTime: [1-9][0-9]*.*
+====
---- QUERY
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
# using buffers much smaller than the io buffer.
@@ -158,13 +188,16 @@ where a.id = b.id and b.id = c.id group by a.int_col
9,8
---- TYPES
INT, BIGINT
+---- RUNTIME_PROFILE
+# This query is not meant to spill.
+row_regex: .*SpilledPartitions: 0 .*
====
---- QUERY: TPCH-Q21
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
# and left outer joins were returning wrong results).
# Q21 - Suppliers Who Kept Orders Waiting Query
set num_nodes=0;
-set max_block_mgr_memory=100m;
+set max_block_mgr_memory=65m;
select
s_name,
count(*) as numwait
@@ -308,12 +341,14 @@ limit 100
'Supplier#000002483',12
---- TYPES
string, bigint
+---- RUNTIME_PROFILE
+# Verify that at least one of the joins was spilled.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test aggregation spill with group_concat distinct
-# TODO: get this to spill.
set num_nodes=1;
-set max_block_mgr_memory=265m;
+set max_block_mgr_memory=100m;
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
from lineitem
group by 1
@@ -331,6 +366,9 @@ order by 1 limit 10
34,3,'O'
---- TYPES
BIGINT, BIGINT, STRING
+---- RUNTIME_PROFILE
+# Verify that at least one of the aggs spilled.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Regression test for IMPALA-2612. The following query will cause CastToChar
@@ -348,4 +386,203 @@ from lineitem
4502054
---- TYPES
BIGINT
+---- RUNTIME_PROFILE
+# Verify that the agg spilled.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+====
+---- QUERY
+# Test sort with inlined char column materialized by exprs.
+# Set low memory limit to force spilling.
+set num_nodes=0;
+set max_block_mgr_memory=4m;
+# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
+# When IMPALA-3332 is fixed, can reenable this memory limit.
+#set mem_limit=200m;
+set disable_outermost_topn=1;
+select cast(l_comment as char(50))
+from lineitem
+order by 1
+limit 20;
+---- RESULTS
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias about the en '
+' Tiresias about the slyly ironic dinos ca '
+' Tiresias about the slyly unus '
+' Tiresias above '
+' Tiresias above the fox '
+' Tiresias above the furiously final th '
+' Tiresias above the slyly expr '
+' Tiresias above the stealthily p '
+---- TYPES
+CHAR
+---- RUNTIME_PROFILE
+# Verify that the sort actually spilled
+row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
+====
+---- QUERY
+# Test sort with input inlined char column materialized before sort.
+set num_nodes=0;
+set mem_limit=200m;
+set max_block_mgr_memory=4m;
+set disable_outermost_topn=1;
+select char_col
+from (select cast(l_comment as char(50)) char_col
+ from lineitem) subquery
+order by 1
+limit 20;
+---- RESULTS
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias about the en '
+' Tiresias about the slyly ironic dinos ca '
+' Tiresias about the slyly unus '
+' Tiresias above '
+' Tiresias above the fox '
+' Tiresias above the furiously final th '
+' Tiresias above the slyly expr '
+' Tiresias above the stealthily p '
+---- TYPES
+CHAR
+---- RUNTIME_PROFILE
+# Verify that the sort actually spilled
+row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
+====
+---- QUERY
+# Test sort with input non-inlined char column materialized before sort.
+# Set low memory limit to force spilling.
+set num_nodes=0;
+set mem_limit=200m;
+set max_block_mgr_memory=4m;
+set disable_outermost_topn=1;
+select char_col
+from (select cast(l_comment as char(200)) char_col
+ from lineitem) subquery
+order by 1
+limit 20;
+---- RESULTS
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias about the en '
+' Tiresias about the slyly ironic dinos ca '
+' Tiresias about the slyly unus '
+' Tiresias above '
+' Tiresias above the fox '
+' Tiresias above the furiously final th '
+' Tiresias above the slyly expr '
+' Tiresias above the stealthily p '
+---- TYPES
+CHAR
+---- RUNTIME_PROFILE
+# Verify that the sort actually spilled
+row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
+====
+---- QUERY
+# Test sort with varchar column materialized by exprs.
+# Set low memory limit to force spilling.
+set num_nodes=0;
+set max_block_mgr_memory=4m;
+# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
+# When IMPALA-3332 is fixed, can reenable this memory limit.
+#set mem_limit=200m;
+set disable_outermost_topn=1;
+select cast(l_comment as varchar(50))
+from lineitem
+order by 1
+limit 20;
+---- RESULTS
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias about the en'
+' Tiresias about the slyly ironic dinos ca'
+' Tiresias about the slyly unus'
+' Tiresias above'
+' Tiresias above the fox'
+' Tiresias above the furiously final th'
+' Tiresias above the slyly expr'
+' Tiresias above the stealthily p'
+---- TYPES
+STRING
+---- RUNTIME_PROFILE
+# Verify that the sort actually spilled
+row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
+====
+---- QUERY
+# Test sort with input varchar column materialized before sort.
+# Set low memory limit to force spilling.
+set num_nodes=0;
+set mem_limit=200m;
+set max_block_mgr_memory=4m;
+set disable_outermost_topn=1;
+select char_col
+from (select cast(l_comment as varchar(50)) char_col
+ from lineitem) subquery
+order by 1
+limit 20;
+---- RESULTS
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias '
+' Tiresias about the en'
+' Tiresias about the slyly ironic dinos ca'
+' Tiresias about the slyly unus'
+' Tiresias above'
+' Tiresias above the fox'
+' Tiresias above the furiously final th'
+' Tiresias above the slyly expr'
+' Tiresias above the stealthily p'
+---- TYPES
+STRING
+---- RUNTIME_PROFILE
+# Verify that the sort actually spilled
+row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/34c95c95/tests/custom_cluster/test_spilling.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_spilling.py b/tests/custom_cluster/test_spilling.py
index 8e17e92..541352f 100644
--- a/tests/custom_cluster/test_spilling.py
+++ b/tests/custom_cluster/test_spilling.py
@@ -27,8 +27,8 @@ class TestSpillStress(CustomClusterTestSuite):
@classmethod
def setup_class(cls):
- #start impala with args
- cls._start_impala_cluster(['--impalad_args=--"read_size=200000"',
+ # Start with 256KB buffers, to reduce data size required to force spilling.
+ cls._start_impala_cluster(['--impalad_args=--"read_size=262144"',
'catalogd_args="--load_catalog_in_background=false"'])
super(CustomClusterTestSuite, cls).setup_class()