You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by jr...@apache.org on 2017/09/29 19:14:31 UTC

[1/6] incubator-impala git commit: IMPALA-5975: Work around broken beeline clients

Repository: incubator-impala
Updated Branches:
  refs/heads/master dd340b881 -> 64d0dd93e


IMPALA-5975: Work around broken beeline clients

To make statements execute, some clients require always appending
a semi-colon to the end.  The workaround is quite simple.

Change-Id: Id8b9f3dde4445513f1f389785a002c6cc6b3dada
Reviewed-on: http://gerrit.cloudera.org:8080/8132
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/439f245d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/439f245d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/439f245d

Branch: refs/heads/master
Commit: 439f245d34fa7a76508a5a620a0d69e7bed947a4
Parents: dd340b8
Author: Zachary Amsden <za...@cloudera.com>
Authored: Fri Sep 22 18:38:58 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Sep 27 03:27:45 2017 +0000

----------------------------------------------------------------------
 testdata/bin/create-load-data.sh         | 2 +-
 testdata/bin/create-table-many-blocks.sh | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/439f245d/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 6e43380..97787c2 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -311,7 +311,7 @@ function create-internal-hbase-table {
   # Need to investigate this more, but this works around the problem to unblock automation.
   set +o errexit
   beeline -n $USER -u "${JDBC_URL}" -e\
-    "DROP TABLE IF EXISTS functional_hbase.internal_hbase_table"
+    "DROP TABLE IF EXISTS functional_hbase.internal_hbase_table;"
   echo "disable 'functional_hbase.internal_hbase_table'" | hbase shell
   echo "drop 'functional_hbase.internal_hbase_table'" | hbase shell
   set -e

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/439f245d/testdata/bin/create-table-many-blocks.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-table-many-blocks.sh b/testdata/bin/create-table-many-blocks.sh
index abdf94f..4c0a57d 100755
--- a/testdata/bin/create-table-many-blocks.sh
+++ b/testdata/bin/create-table-many-blocks.sh
@@ -72,9 +72,9 @@ HDFS_PATH=/test-warehouse/many_blocks_num_blocks_per_partition_${BLOCKS_PER_PART
 DB_NAME=scale_db
 TBL_NAME=num_partitions_${NUM_PARTITIONS}_blocks_per_partition_${BLOCKS_PER_PARTITION}
 
-$HIVE_CMD -e "create database if not exists scale_db"
-$HIVE_CMD -e "drop table if exists ${DB_NAME}.${TBL_NAME}"
-$HIVE_CMD -e "create external table ${DB_NAME}.${TBL_NAME} (i int) partitioned by (j int)"
+$HIVE_CMD -e "create database if not exists scale_db;"
+$HIVE_CMD -e "drop table if exists ${DB_NAME}.${TBL_NAME};"
+$HIVE_CMD -e "create external table ${DB_NAME}.${TBL_NAME} (i int) partitioned by (j int);"
 
 # Generate many (small) files. Each file will be assigned a unique block.
 echo "Generating ${BLOCKS_PER_PARTITION} files"

[3/6] incubator-impala git commit: Move tests related to the old join node.

Posted by jr...@apache.org.

Move tests related to the old join node.

No tests were added/dropped or modified. They are consolidated into
fewer .test files.

Change-Id: Idda4b34b5e6e9b5012b177a4c00077aa7fec394c
Reviewed-on: http://gerrit.cloudera.org:8080/8153
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c1781b73
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c1781b73
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c1781b73

Branch: refs/heads/master
Commit: c1781b73b35899ed9374befa2e04e14747224be7
Parents: 4d49099
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Sep 26 22:47:55 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 28 18:36:17 2017 +0000

----------------------------------------------------------------------
 .../queries/QueryTest/exprs.test                | 12 +++
 .../queries/QueryTest/inline-view.test          | 19 ++++
 .../queries/QueryTest/joins-partitioned.test    | 36 --------
 .../queries/QueryTest/runtime_row_filters.test  | 95 ++++++++++++++++++++
 .../QueryTest/runtime_row_filters_phj.test      | 94 -------------------
 tests/query_test/test_join_queries.py           |  3 -
 tests/query_test/test_runtime_filters.py        |  3 -
 7 files changed, 126 insertions(+), 136 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 811a169..552dea2 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -876,6 +876,18 @@ order by int_col limit 20
 int,string
 ====
 ---- QUERY
+# IMPALA-2495: In predicate containing a TupleIsNullPredicate
+select count(id) from functional.alltypestiny t1
+left join (select coalesce(1, 10) as int_col
+           from functional.alltypessmall) t2
+on t1.id = t2.int_col
+where t2.int_col in (t2.int_col, 10)
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
 select count(*) from alltypesagg
 where true in (bool_col, tinyint_col)
 ---- RESULTS

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
index 7c7616d..dd0094b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
@@ -499,3 +499,22 @@ true
 ---- TYPES
 boolean
 ====
+---- QUERY
+# IMPALA-1987: Equi-join predicates of outer joins contain TupleIsNullPredicate exprs.
+select t1.int_col, t2.int_col, t3.id
+from alltypestiny t1 left outer join
+  (select coalesce(int_col, 384) as int_col from alltypestiny) t2
+  on t1.int_col = t2.int_col
+left outer join
+  (select 0 as id from alltypestiny) t3
+  on t1.int_col = t3.id
+order by 1 limit 5
+---- RESULTS
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+---- TYPES
+INT,INT,TINYINT
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/testdata/workloads/functional-query/queries/QueryTest/joins-partitioned.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins-partitioned.test b/testdata/workloads/functional-query/queries/QueryTest/joins-partitioned.test
deleted file mode 100644
index d8acb9a..0000000
--- a/testdata/workloads/functional-query/queries/QueryTest/joins-partitioned.test
+++ /dev/null
@@ -1,36 +0,0 @@
-====
----- QUERY
-# Regression test for IMPALA-1987. Equi-join predicates of outer joins contain
-# TupleIsNullPredicate exprs.
-# TODO: Move back to inline-views.test when this works with the non-partitioned hash
-# join (IMPALA-2375).
-select t1.int_col, t2.int_col, t3.id
-from alltypestiny t1 left outer join
-  (select coalesce(int_col, 384) as int_col from alltypestiny) t2
-  on t1.int_col = t2.int_col
-left outer join
-  (select 0 as id from alltypestiny) t3
-  on t1.int_col = t3.id
-order by 1 limit 5
----- RESULTS
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
----- TYPES
-INT,INT,TINYINT
-====
----- QUERY
-# Regression test for IMPALA-2495: Crash: impala::InPredicate::SetLookupPrepare
-# TODO: Move back to exprs.test when this works with the non-partitioned hash join
-# (IMPALA-2375).
-select count(id) from functional.alltypestiny t1
-left join (select coalesce(1, 10) as int_col
-from functional.alltypessmall) t2 on t1.id = t2.int_col
-where t2.int_col in (t2.int_col, 10);
----- RESULTS
-100
----- TYPES
-BIGINT
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters.test b/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters.test
index 7cb5884..3af07af 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters.test
@@ -322,10 +322,12 @@ from tpch_parquet.lineitem l1 join tpch_parquet.lineitem l2
 
 
 ---- QUERY
+####################################################
 # Test case 15: filter with a predicate that has different decimal precision between
 # lhs expr and rhs expr.
 # IMPALA-5597: Runtime filter should be generated and assigned successfully when the
 # source expr and target expr have different decimal types.
+####################################################
 
 select count(*)
 from tpch_parquet.lineitem
@@ -334,3 +336,96 @@ where l_orderkey = 965 and l_extendedprice * l_tax = p_retailprice;
 ---- RESULTS
 1
 ====
+
+
+---- QUERY
+####################################################
+# Test case 16: PHJ nodes that spill should still produce filters.
+# Run this for Parquet only to avoid variable memory
+# consumption / spilling behaviour.
+####################################################
+
+SET BUFFER_POOL_LIMIT=40m;
+SET RUNTIME_FILTER_MODE=GLOBAL;
+SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
+SET RUNTIME_BLOOM_FILTER_SIZE=16M;
+select STRAIGHT_JOIN count(a.l_comment)
+    from tpch_parquet.lineitem a join tpch_parquet.lineitem b
+        on a.l_comment = b.l_comment;
+---- RESULTS
+51495713
+---- TYPES
+BIGINT
+---- RUNTIME_PROFILE
+row_regex: .*SpilledPartitions: [1-9]\d* .*
+row_regex: .*Rows processed: 16.38K.*
+row_regex: .*Rows rejected: 0 .*
+row_regex: .*1 of 1 Runtime Filter Published.*
+====
+
+
+---- QUERY
+####################################################
+# Test case 17: Filters are still effective inside subplans
+# (in certain cases). The query has a HJ-scan pair inside a
+# subplan (on the LHS).
+####################################################
+SET RUNTIME_FILTER_MODE=GLOBAL;
+SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
+select straight_join count(1)
+    from alltypes a join complextypestbl b
+        # b.id + 10 confuses planner, so it doesn't think it
+        # can transitively push a.id < 20 to scan of a.
+        on a.id = b.id + 10 join b.int_array where b.id < 10
+---- RESULTS
+10
+---- RUNTIME_PROFILE
+row_regex: .*1 of 1 Runtime Filter Published.*
+row_regex: .*Rows rejected: 2.43K \(2432\).*
+====
+
+
+---- QUERY
+####################################################
+# Test case 18: Filters will not be used if they exceed
+# the configured memory limit on the coordinator.
+# To test this, we need to construct a query where memory
+# consumption on the coordinator exceeds MEM_LIMIT, but
+# not on the backends (because otherwise they will disable
+# the filters through another path). We set MEM_LIMIT to
+# the minimum possible then set filter size to be roughly
+# half that: since the coordinator must aggregate two of
+# these filters (and indeed must create one as well), it
+# will exceed the memory limit. This is checked for
+# indirectly by confirming that the filter had no effect
+# (when usually it would be selective).
+####################################################
+SET RUNTIME_FILTER_MODE=GLOBAL;
+SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
+SET RUNTIME_FILTER_MIN_SIZE=128MB;
+SET RUNTIME_FILTER_MAX_SIZE=500MB;
+SET MEM_LIMIT=140MB;
+select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b
+    on a.month = b.id and b.int_col = -3
+---- RESULTS
+---- RUNTIME_PROFILE
+row_regex: .*Filter 0 \(128.00 MB\).*
+row_regex: .*Files processed: 8.*
+row_regex: .*Files rejected: 0.*
+====
+---- QUERY
+# Confirm that with broadcast join, memory limit is not hit.
+SET RUNTIME_FILTER_MODE=GLOBAL;
+SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
+SET RUNTIME_FILTER_MIN_SIZE=128MB;
+SET RUNTIME_FILTER_MAX_SIZE=500MB;
+# Allocate enough memory for the join + filter + scan
+SET MEM_LIMIT=170MB;
+select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b
+    on a.month = b.id and b.int_col = -3
+---- RESULTS
+---- RUNTIME_PROFILE
+row_regex: .*Filter 0 \(128.00 MB\).*
+row_regex: .*Files processed: 8.*
+row_regex: .*Files rejected: 8.*
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters_phj.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters_phj.test b/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters_phj.test
deleted file mode 100644
index 66391a5..0000000
--- a/testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters_phj.test
+++ /dev/null
@@ -1,94 +0,0 @@
-====
----- QUERY
-# Test cases that are only valid for PHJ joins are in this file.
-
-####################################################
-# Test case 1: PHJ nodes that spill should still produce filters.
-# Run this for Parquet only to avoid variable memory
-# consumption / spilling behaviour.
-####################################################
-
-SET BUFFER_POOL_LIMIT=40m;
-SET RUNTIME_FILTER_MODE=GLOBAL;
-SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
-SET RUNTIME_BLOOM_FILTER_SIZE=16M;
-select STRAIGHT_JOIN count(a.l_comment)
-    from tpch_parquet.lineitem a join tpch_parquet.lineitem b
-        on a.l_comment = b.l_comment;
----- RESULTS
-51495713
----- TYPES
-BIGINT
----- RUNTIME_PROFILE
-row_regex: .*SpilledPartitions: [1-9]\d* .*
-row_regex: .*Rows processed: 16.38K.*
-row_regex: .*Rows rejected: 0 .*
-row_regex: .*1 of 1 Runtime Filter Published.*
-====
-
-
----- QUERY
-####################################################
-# Test case 2: Filters are still effective inside subplans
-# (in certain cases). The query has a HJ-scan pair inside a
-# subplan (on the LHS).
-####################################################
-SET RUNTIME_FILTER_MODE=GLOBAL;
-SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
-select straight_join count(1)
-    from alltypes a join complextypestbl b
-        # b.id + 10 confuses planner, so it doesn't think it
-        # can transitively push a.id < 20 to scan of a.
-        on a.id = b.id + 10 join b.int_array where b.id < 10
----- RESULTS
-10
----- RUNTIME_PROFILE
-row_regex: .*1 of 1 Runtime Filter Published.*
-row_regex: .*Rows rejected: 2.43K \(2432\).*
-====
-
-
----- QUERY
-####################################################
-# Test case 3: Filters will not be used if they exceed
-# the configured memory limit on the coordinator.
-# To test this, we need to construct a query where memory
-# consumption on the coordinator exceeds MEM_LIMIT, but
-# not on the backends (because otherwise they will disable
-# the filters through another path). We set MEM_LIMIT to
-# the minimum possible then set filter size to be roughly
-# half that: since the coordinator must aggregate two of
-# these filters (and indeed must create one as well), it
-# will exceed the memory limit. This is checked for
-# indirectly by confirming that the filter had no effect
-# (when usually it would be selective).
-####################################################
-SET RUNTIME_FILTER_MODE=GLOBAL;
-SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
-SET RUNTIME_FILTER_MIN_SIZE=128MB;
-SET RUNTIME_FILTER_MAX_SIZE=500MB;
-SET MEM_LIMIT=140MB;
-select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b
-    on a.month = b.id and b.int_col = -3
----- RESULTS
----- RUNTIME_PROFILE
-row_regex: .*Filter 0 \(128.00 MB\).*
-row_regex: .*Files processed: 8.*
-row_regex: .*Files rejected: 0.*
-====
----- QUERY
-# Confirm that with broadcast join, memory limit is not hit.
-SET RUNTIME_FILTER_MODE=GLOBAL;
-SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
-SET RUNTIME_FILTER_MIN_SIZE=128MB;
-SET RUNTIME_FILTER_MAX_SIZE=500MB;
-# Allocate enough memory for the join + filter + scan
-SET MEM_LIMIT=170MB;
-select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b
-    on a.month = b.id and b.int_col = -3
----- RESULTS
----- RUNTIME_PROFILE
-row_regex: .*Filter 0 \(128.00 MB\).*
-row_regex: .*Files processed: 8.*
-row_regex: .*Files rejected: 8.*
-====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/tests/query_test/test_join_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py
index a8f2be0..30c659c 100644
--- a/tests/query_test/test_join_queries.py
+++ b/tests/query_test/test_join_queries.py
@@ -61,9 +61,6 @@ class TestJoinQueries(ImpalaTestSuite):
     new_vector.get_value('exec_option')['num_nodes'] = 1
     self.run_test_case('QueryTest/single-node-joins-with-limits-exhaustive', new_vector)
 
-  def test_partitioned_joins(self, vector):
-    self.run_test_case('QueryTest/joins-partitioned', vector)
-
   @SkipIfS3.hbase
   @SkipIfADLS.hbase
   @SkipIfIsilon.hbase

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c1781b73/tests/query_test/test_runtime_filters.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_runtime_filters.py b/tests/query_test/test_runtime_filters.py
index 7710f43..82b5792 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -62,6 +62,3 @@ class TestRuntimeRowFilters(ImpalaTestSuite):
 
   def test_row_filters(self, vector):
     self.run_test_case('QueryTest/runtime_row_filters', vector)
-
-  def test_row_filters_phj_only(self, vector):
-    self.run_test_case('QueryTest/runtime_row_filters_phj', vector)

[6/6] incubator-impala git commit: IMPALA-992: [DOCS] Document impala-shell 'rerun' command

Posted by jr...@apache.org.

IMPALA-992: [DOCS] Document impala-shell 'rerun' command

Change-Id: I78b353af3b3d8386c243f884b37442b5283a96a8
Reviewed-on: http://gerrit.cloudera.org:8080/8044
Reviewed-by: John Russell <jr...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/64d0dd93
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/64d0dd93
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/64d0dd93

Branch: refs/heads/master
Commit: 64d0dd93eca13ae27714f1aadf90bf9f6e2c9c15
Parents: 1969c56
Author: John Russell <jr...@cloudera.com>
Authored: Tue Sep 12 14:51:15 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 29 18:38:03 2017 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap                   |  1 +
 docs/topics/impala_shell_commands.xml         | 24 ++++++++
 docs/topics/impala_shell_running_commands.xml | 65 ++++++++++++++++++++++
 3 files changed, 90 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64d0dd93/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 3c47329..3068ca4 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10894,6 +10894,7 @@ under the License.
   <keydef href="topics/impala_shell_options.xml#shell_config_file" keys="shell_config_file"/>
   <keydef href="topics/impala_connecting.xml" keys="connecting"/>
   <keydef href="topics/impala_shell_running_commands.xml" keys="shell_running_commands"/>
+  <keydef href="topics/impala_shell_running_commands.xml#rerun" keys="rerun"/>
   <keydef href="topics/impala_shell_commands.xml" keys="shell_commands"/>
 
   <keydef href="topics/impala_performance.xml" keys="performance"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64d0dd93/docs/topics/impala_shell_commands.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shell_commands.xml b/docs/topics/impala_shell_commands.xml
index 3469c15..f9a48d9 100644
--- a/docs/topics/impala_shell_commands.xml
+++ b/docs/topics/impala_shell_commands.xml
@@ -245,6 +245,30 @@ under the License.
               </p>
             </entry>
           </row>
+          <row id="rerun_cmd" rev="2.10.0 IMPALA-992">
+            <entry>
+              <p>
+                <codeph>rerun</codeph> or <codeph>@</codeph>
+              </p>
+            </entry>
+            <entry>
+              <p>
+                Executes a previous <cmdname>impala-shell</cmdname> command again,
+                from the list of commands displayed by the <codeph>history</codeph>
+                command. These could be SQL statements, or commands specific to
+                <cmdname>impala-shell</cmdname> such as <codeph>quit</codeph>
+                or <codeph>profile</codeph>.
+              </p>
+              <p>
+                Specify an integer argument. A positive integer <codeph>N</codeph>
+                represents the command labelled <codeph>N</codeph> in the history list.
+                A negative integer <codeph>-N</codeph> represents the <codeph>N</codeph>th
+                command from the end of the list, such as -1 for the most recent command.
+                Commands that are executed again do not produce new entries in the
+                history list.
+              </p>
+            </entry>
+          </row>
           <row id="select_cmd">
             <entry>
               <p>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64d0dd93/docs/topics/impala_shell_running_commands.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shell_running_commands.xml b/docs/topics/impala_shell_running_commands.xml
index e74d015..75a0758 100644
--- a/docs/topics/impala_shell_running_commands.xml
+++ b/docs/topics/impala_shell_running_commands.xml
@@ -280,4 +280,69 @@ Fetched 5 row(s) in 0.01s
 </codeblock>
 
   </conbody>
+
+  <concept id="rerun" rev="2.10.0 IMPALA-992">
+    <title>Rerunning impala-shell Commands</title>
+    <conbody>
+
+      <p>
+        In <keyword keyref="impala210_full"/> and higher, you can use the
+        <codeph>rerun</codeph> command, or its abbreviation <codeph>@</codeph>,
+        to re-execute commands from the history list. The argument can be
+        a positive integer (reflecting the number shown in <codeph>history</codeph>
+        output) or a negative integer (reflecting the N'th last command in the
+        <codeph>history</codeph> output. For example:
+      </p>
+
+<codeblock><![CDATA[
+[localhost:21000] > select * from p1 order by t limit 5;
+...
+[localhost:21000] > show table stats p1;
++-----------+--------+--------+------------------------------------------------------------+
+| #Rows     | #Files | Size   | Location                                                   |
++-----------+--------+--------+------------------------------------------------------------+
+| 134217728 | 50     | 4.66MB | hdfs://test.example.com:8020/user/hive/warehouse/jdr.db/p1 |
++-----------+--------+--------+------------------------------------------------------------+
+[localhost:21000] > compute stats p1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 3 column(s). |
++-----------------------------------------+
+[localhost:21000] > history;
+[1]: use jdr;
+[2]: history;
+[3]: show tables;
+[4]: select * from p1 order by t limit 5;
+[5]: show table stats p1;
+[6]: compute stats p1;
+[7]: history;
+[localhost:21000] > @-2; <- Rerun the 2nd last command in the history list
+Rerunning compute stats p1;
++-----------------------------------------+
+| summary                                 |
++-----------------------------------------+
+| Updated 1 partition(s) and 3 column(s). |
++-----------------------------------------+
+[localhost:21000] > history; <- History list is not updated by rerunning commands
+                                or by repeating the last command, in this case 'history'.
+[1]: use jdr;
+[2]: history;
+[3]: show tables;
+[4]: select * from p1 order by t limit 5;
+[5]: show table stats p1;
+[6]: compute stats p1;
+[7]: history;
+[localhost:21000] > @4; <- Rerun command #4 in the history list using short form '@'.
+Rerunning select * from p1 order by t limit 5;
+...
+[localhost:21000] > rerun 4; <- Rerun command #4 using long form 'rerun'.
+Rerunning select * from p1 order by t limit 5;
+...
+]]>
+</codeblock>
+
+    </conbody>
+  </concept>
+
 </concept>

[2/6] incubator-impala git commit: IMPALA-5870: Improve runtime profile for partial sort

Posted by jr...@apache.org.

IMPALA-5870: Improve runtime profile for partial sort

A recent change (IMPALA-5498) added the ability to do partial sorts,
which divide their input up into runs each of which is sorted
individually, avoiding the need to spill. Some of the debug output
wasn't updated vs. regular sorts, leading to confusion.

This patch removes the counters 'SpilledRuns' and 'MergesPerformed'
since they will always be 0, and it renames the 'IntialRunsCreated'
counter to 'RunsCreated' since the 'Initial' refers to the fact that
in a regular sort those runs may be spilled or merged.

It also adds a profile info string 'SortType' that can take the values
'Total', 'TopN', or 'Partial' to reflect the type of exec node being
used.

Example profile snippet for a partial sort:
SORT_NODE (id=2):(Total: 403.261us, non-child: 382.029us, % non-child: 94.73%)
 SortType: Partial
 ExecOption: Codegen Enabled
    - NumRowsPerRun: (Avg: 44 (44) ; Min: 44 (44) ; Max: 44 (44) ; Number of samples: 1)
    - InMemorySortTime: 34.201us
    - PeakMemoryUsage: 2.02 MB (2117632)
    - RowsReturned: 44 (44)
    - RowsReturnedRate: 109.11 K/sec
    - RunsCreated: 1 (1)
    - SortDataSize: 572.00 B (572)

Testing:
- Manually ran several sorting queries and inspected their profiles
- Updated a kudu_insert test that relied on the 'SpilledRuns' counter
  to be 0 for a partial sort.

Change-Id: I2b15af78d8299db8edc44ff820c85db1cbe0be1b
Reviewed-on: http://gerrit.cloudera.org:8080/8123
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4d49099a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4d49099a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4d49099a

Branch: refs/heads/master
Commit: 4d49099a8bbea3f24f53272f321a19266dc932b8
Parents: 439f245
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Sep 21 12:04:25 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Sep 27 18:55:26 2017 +0000

----------------------------------------------------------------------
 be/src/exec/partial-sort-node.cc                          |  1 +
 be/src/exec/sort-node.cc                                  |  1 +
 be/src/exec/topn-node.cc                                  |  1 +
 be/src/runtime/sorter.cc                                  | 10 +++++++---
 .../functional-query/queries/QueryTest/kudu_insert.test   |  2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/partial-sort-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partial-sort-node.cc b/be/src/exec/partial-sort-node.cc
index 88b2f26..107c29f 100644
--- a/be/src/exec/partial-sort-node.cc
+++ b/be/src/exec/partial-sort-node.cc
@@ -50,6 +50,7 @@ Status PartialSortNode::Init(const TPlanNode& tnode, RuntimeState* state) {
       *child(0)->row_desc(), state, &sort_tuple_exprs_));
   is_asc_order_ = tnode.sort_node.sort_info.is_asc_order;
   nulls_first_ = tnode.sort_node.sort_info.nulls_first;
+  runtime_profile()->AddInfoString("SortType", "Partial");
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/sort-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/sort-node.cc b/be/src/exec/sort-node.cc
index 80df214..33b3acb 100644
--- a/be/src/exec/sort-node.cc
+++ b/be/src/exec/sort-node.cc
@@ -45,6 +45,7 @@ Status SortNode::Init(const TPlanNode& tnode, RuntimeState* state) {
       *child(0)->row_desc(), state, &sort_tuple_exprs_));
   is_asc_order_ = tnode.sort_node.sort_info.is_asc_order;
   nulls_first_ = tnode.sort_node.sort_info.nulls_first;
+  runtime_profile()->AddInfoString("SortType", "Total");
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/exec/topn-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index 9b94fe7..5bba89d 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -65,6 +65,7 @@ Status TopNNode::Init(const TPlanNode& tnode, RuntimeState* state) {
   nulls_first_ = tnode.sort_node.sort_info.nulls_first;
   DCHECK_EQ(conjuncts_.size(), 0)
       << "TopNNode should never have predicates to evaluate.";
+  runtime_profile()->AddInfoString("SortType", "TopN");
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/be/src/runtime/sorter.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index d2e6bb2..16984ca 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -1516,9 +1516,13 @@ Status Sorter::Prepare(ObjectPool* obj_pool, MemPool* expr_mem_pool) {
   in_mem_tuple_sorter_.reset(new TupleSorter(compare_less_than_, page_len_,
       sort_tuple_desc->byte_size(), state_));
 
-  initial_runs_counter_ = ADD_COUNTER(profile_, "InitialRunsCreated", TUnit::UNIT);
-  spilled_runs_counter_ = ADD_COUNTER(profile_, "SpilledRuns", TUnit::UNIT);
-  num_merges_counter_ = ADD_COUNTER(profile_, "TotalMergesPerformed", TUnit::UNIT);
+  if (enable_spilling_) {
+    initial_runs_counter_ = ADD_COUNTER(profile_, "InitialRunsCreated", TUnit::UNIT);
+    spilled_runs_counter_ = ADD_COUNTER(profile_, "SpilledRuns", TUnit::UNIT);
+    num_merges_counter_ = ADD_COUNTER(profile_, "TotalMergesPerformed", TUnit::UNIT);
+  } else {
+    initial_runs_counter_ = ADD_COUNTER(profile_, "RunsCreated", TUnit::UNIT);
+  }
   in_mem_sort_timer_ = ADD_TIMER(profile_, "InMemorySortTime");
   sorted_data_size_ = ADD_COUNTER(profile_, "SortDataSize", TUnit::BYTES);
   run_sizes_ = ADD_SUMMARY_STATS_COUNTER(profile_, "NumRowsPerRun", TUnit::UNIT);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4d49099a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
index 1150898..420e42c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test
@@ -449,5 +449,5 @@ set mem_limit=400m;
 create table kudu_test primary key(a, b) partition by hash(a, b) partitions 8 stored as kudu as
 select l_orderkey a, concat(l_comment, l_comment, l_comment) b from tpch.lineitem
 ---- RUNTIME_PROFILE
-row_regex: .*SpilledRuns: 0 \(0\)
+row_regex: .*SortType: Partial
 ====

[4/6] incubator-impala git commit: IMPALA-5307: Part 3: remove TODO from RCFile

Posted by jr...@apache.org.

IMPALA-5307: Part 3: remove TODO from RCFile

Our RCFile implementations already copies out data (it sets
set_contains_tuple_data to false). Remove a TODO that suggests undoing
this. The current implementation is suboptimal but improving RCFile
performance is not a priority.

Change-Id: I594bb246cab64e15de750114890881a2ad9f504d
Reviewed-on: http://gerrit.cloudera.org:8080/8151
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/a93b7c52
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/a93b7c52
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/a93b7c52

Branch: refs/heads/master
Commit: a93b7c52bf17838bcbfeb5002b982f213df4be31
Parents: c1781b7
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Sep 26 23:41:48 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 28 21:54:39 2017 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-rcfile-scanner.cc | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a93b7c52/be/src/exec/hdfs-rcfile-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-rcfile-scanner.cc b/be/src/exec/hdfs-rcfile-scanner.cc
index 6851bd6..2ea8229 100644
--- a/be/src/exec/hdfs-rcfile-scanner.cc
+++ b/be/src/exec/hdfs-rcfile-scanner.cc
@@ -433,7 +433,6 @@ Status HdfsRCFileScanner::ReadColumnBuffers() {
       uint8_t* uncompressed_data;
       RETURN_IF_FALSE(stream_->ReadBytes(
           column.buffer_len, &uncompressed_data, &parse_status_));
-      // TODO: this is bad.  Remove this copy.
       memcpy(row_group_buffer_ + column.start_offset,
           uncompressed_data, column.buffer_len);
     }

[5/6] incubator-impala git commit: IMPALA-5986: Correct set-option logic to recognize digits in names.

Posted by jr...@apache.org.

IMPALA-5986: Correct set-option logic to recognize digits in names.

Arose during work for IMPALA-5376; prevents tests from passing consistently.

Change-Id: Ia3ba641553ff827dbd4673b9fe7ed7d9d5e83052
Reviewed-on: http://gerrit.cloudera.org:8080/8166
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1969c56c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1969c56c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1969c56c

Branch: refs/heads/master
Commit: 1969c56c2fba248794d4d52eae4410998afa0709
Parents: a93b7c5
Author: Tim Wood <tw...@cloudera.com>
Authored: Thu Sep 28 10:44:23 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 29 01:50:31 2017 +0000

----------------------------------------------------------------------
 tests/common/impala_test_suite.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1969c56c/tests/common/impala_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index b0857e9..0732695 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -99,8 +99,8 @@ TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs"
 IMPALA_HOME = os.getenv("IMPALA_HOME")
 EE_TEST_LOGS_DIR = os.getenv("IMPALA_EE_TEST_LOGS_DIR")
 # Match any SET statement. Assume that query options' names
-# only contain alphabets and underscores.
-SET_PATTERN = re.compile(r'\s*set\s*([a-zA-Z_]+)=*', re.I)
+# only contain alphabets, underscores and digits after position 1.
+SET_PATTERN = re.compile(r'\s*set\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=*', re.I)
 
 # Base class for Impala tests. All impala test cases should inherit from this class
 class ImpalaTestSuite(BaseTestSuite):