You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/10/16 03:25:45 UTC

[impala] branch master updated (3ae5f98 -> 5377477)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 3ae5f98  IMPALA-9034: fix distcc+ccache
     new 2d74dfd  IMPALA-8920: [DOCS] Documented the query option for disabling HBase row estimation
     new 0cd690d  IMPALA-9025: Handle AnalysisException caused by expr_rewrites properly
     new 5377477  IMPALA-9028: impala-shell should not try to reconnect if quitting

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/impala.ditamap                                |  2 +-
 docs/impala_keydefs.ditamap                        |  1 -
 ... => impala_disable_hbase_num_rows_estimate.xml} | 43 +++++++++++---------
 docs/topics/impala_disable_outermost_topn.xml      | 47 ----------------------
 .../main/java/org/apache/impala/analysis/Expr.java | 13 ++++--
 shell/impala_shell.py                              |  9 ++++-
 .../functional-query/queries/QueryTest/alias.test  |  9 +++++
 tests/shell/test_shell_interactive.py              | 15 +++++++
 8 files changed, 68 insertions(+), 71 deletions(-)
 copy docs/topics/{impala_disable_codegen.xml => impala_disable_hbase_num_rows_estimate.xml} (51%)
 delete mode 100644 docs/topics/impala_disable_outermost_topn.xml


[impala] 01/03: IMPALA-8920: [DOCS] Documented the query option for disabling HBase row estimation

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2d74dfd5e2bf4791c116e14911631715c0db3a86
Author: Alex Rodoni <ar...@cloudera.com>
AuthorDate: Tue Oct 8 17:03:56 2019 -0700

    IMPALA-8920: [DOCS] Documented the query option for disabling HBase row estimation
    
    Change-Id: Id131b66a3457ef6cbc326a0f3ed99de2a3950c3e
    Reviewed-on: http://gerrit.cloudera.org:8080/14394
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
 docs/impala.ditamap                                |  2 +-
 docs/impala_keydefs.ditamap                        |  1 -
 .../impala_disable_hbase_num_rows_estimate.xml     | 89 ++++++++++++++++++++++
 docs/topics/impala_disable_outermost_topn.xml      | 47 ------------
 4 files changed, 90 insertions(+), 49 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index be4b76b..8e11b89 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -186,7 +186,7 @@ under the License.
           <topicref rev="3.3.0" href="topics/impala_default_transactional_type.xml"/>
           <topicref href="topics/impala_disable_codegen.xml"/>
           <topicref rev="2.10.0 IMPALA-5483" href="topics/impala_disable_codegen_rows_threshold.xml"/>
-          <topicref audience="hidden" href="topics/impala_disable_outermost_topn.xml"/>
+          <topicref href="topics/impala_disable_hbase_num_rows_estimate.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_row_runtime_filtering.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_streaming_preaggregations.xml"/>
           <topicref href="topics/impala_disable_unsafe_spills.xml"/>
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index c71bdf2..efb6f5f 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10790,7 +10790,6 @@ under the License.
   <keydef rev="2.10.0 IMPALA-3200" href="topics/impala_default_spillable_buffer_size.xml" keys="default_spillable_buffer_size"/>
   <keydef href="topics/impala_disable_codegen.xml" keys="disable_codegen"/>
   <keydef href="topics/impala_disable_codegen_rows_threshold.xml" keys="disable_codegen_rows_threshold"/>
-  <keydef href="topics/impala_disable_outermost_topn.xml" keys="disable_outermost_topn"/>
   <keydef href="topics/impala_disable_row_runtime_filtering.xml" keys="disable_row_runtime_filtering"/>
   <keydef href="topics/impala_disable_streaming_preaggregations.xml" keys="disable_streaming_preaggregations"/>
   <keydef href="topics/impala_disable_unsafe_spills.xml" keys="disable_unsafe_spills"/>
diff --git a/docs/topics/impala_disable_hbase_num_rows_estimate.xml b/docs/topics/impala_disable_hbase_num_rows_estimate.xml
new file mode 100644
index 0000000..998d551
--- /dev/null
+++ b/docs/topics/impala_disable_hbase_num_rows_estimate.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="disable_hbase_num_rows_estimate">
+
+  <title>DISABLE_HBASE_NUM_ROWS_ESTIMATE Query Option</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>DISABLE_HBASE_NUM_ROWS_ESTIMATE</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Use the <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option to disable key
+      sampling of HBase tables in row count and row size estimation.
+    </p>
+
+    <p>
+      While generating a plan for an HBase query, the planner samples the underlying HBase
+      tables to estimate their row count and row size, and the sampling can negatively impact
+      the planning time. When the HBase table stats does not change much in short time, disable
+      the sampling by setting the <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option
+      to <codeph>TRUE</codeph>. And Impala planner will fall back to using Hive Metastore (HMS)
+      table stats instead.
+    </p>
+
+    <p>
+      When <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option is set to
+      <codeph>TRUE</codeph>, you need to update the HMS table stats by running <codeph>COMPUTE
+      STATS</codeph>. Alternatively, you can manually set table statistics by running
+      <codeph>ALTER TABLE</codeph>. See <xref
+        href="impala_perf_stats.xml#perf_stats"/>
+      for details.
+    </p>
+
+    <p>
+      The following values are supported:
+      <ul>
+        <li>
+          <codeph>TRUE</codeph> or <codeph>1</codeph>: Disables the normal key sampling of HBase
+          tables and uses HMS table stats for estimation.
+        </li>
+
+        <li>
+          <codeph>FALSE</codeph> or <codeph>0</codeph>: Enables the normal sampling of HBase
+          tables.
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>BOOLEAN</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> <codeph>FALSE</codeph>
+    </p>
+
+  </conbody>
+
+</concept>
diff --git a/docs/topics/impala_disable_outermost_topn.xml b/docs/topics/impala_disable_outermost_topn.xml
deleted file mode 100644
index 2bc1f12..0000000
--- a/docs/topics/impala_disable_outermost_topn.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="disable_outermost_topn" rev="2.5.0">
-
-  <title>DISABLE_OUTERMOST_TOPN Query Option</title>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-      <data name="Category" value="Impala Query Options"/>
-      <data name="Category" value="Developers"/>
-      <data name="Category" value="Data Analysts"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p rev="2.5.0">
-      <indexterm audience="hidden">DISABLE_OUTERMOST_TOPN query option</indexterm>
-    </p>
-
-    <p>
-      <b>Type:</b>
-    </p>
-
-    <p>
-      <b>Default:</b>
-    </p>
-  </conbody>
-</concept>


[impala] 02/03: IMPALA-9025: Handle AnalysisException caused by expr_rewrites properly

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0cd690dcbc5b90021a14804d7dc1aaeecca77439
Author: Yongzhi Chen <yc...@cloudera.com>
AuthorDate: Tue Oct 8 10:14:31 2019 -0400

    IMPALA-9025: Handle AnalysisException caused by expr_rewrites
    properly
    
    When the optimizer rewrites conjunct exprs with constant values,
    a new expr may cause AnalysisException. In this case,
    the conjuncts should use the original expr, not the intermediate
    expr produced by propagateConstants. Fixed optimizeConjuncts
    to handle this scenario properly.
    
    Tests:
    Add unit test for alias.
    Ran exhaustive tests.
    
    Change-Id: Ic57bf3f4cdabfe9c5bb304d735bfbf1c0ca7a274
    Reviewed-on: http://gerrit.cloudera.org:8080/14403
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/src/main/java/org/apache/impala/analysis/Expr.java       | 13 ++++++++++---
 .../workloads/functional-query/queries/QueryTest/alias.test |  9 +++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 84ee9b9..628e0c5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1182,15 +1182,20 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
    */
   public static boolean optimizeConjuncts(List<Expr> conjuncts, Analyzer analyzer) {
     Preconditions.checkNotNull(conjuncts);
+    List<Expr> tmpConjuncts = new ArrayList<>();
     try {
       BitSet candidates = new BitSet(conjuncts.size());
       candidates.set(0, Math.min(conjuncts.size(), CONST_PROPAGATION_EXPR_LIMIT));
       int transfers = 0;
-
+      tmpConjuncts.addAll(conjuncts);
       // Constant propagation may make other slots constant, so repeat the process
       // until there are no more changes.
       while (!candidates.isEmpty()) {
-        BitSet changed = propagateConstants(conjuncts, candidates, analyzer);
+        // Use tmpConjuncts instead of conjuncts because propagateConstants can
+        // change the content of the first input param. We do not want to
+        // change the expr in conjucts before make sure the constant propagation
+        // does not cause analysis failures.
+        BitSet changed = propagateConstants(tmpConjuncts, candidates, analyzer);
         candidates.clear();
         int pruned = 0;
         for (int i = changed.nextSetBit(0); i >= 0; i = changed.nextSetBit(i+1)) {
@@ -1200,12 +1205,13 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
           int index = i - pruned;
           Preconditions.checkState(index >= 0);
           ExprRewriter rewriter = analyzer.getExprRewriter();
-          Expr rewritten = rewriter.rewrite(conjuncts.get(index), analyzer);
+          Expr rewritten = rewriter.rewrite(tmpConjuncts.get(index), analyzer);
           // Re-analyze to add implicit casts and update cost
           rewritten.reset();
           rewritten.analyze(analyzer);
           if (!rewritten.isConstant()) {
             conjuncts.set(index, rewritten);
+            tmpConjuncts.set(index, rewritten);
             if (++transfers < CONST_PROPAGATION_EXPR_LIMIT) candidates.set(index, true);
             continue;
           }
@@ -1221,6 +1227,7 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
           if (IS_TRUE_LITERAL.apply(rewritten)) {
             pruned++;
             conjuncts.remove(index);
+            tmpConjuncts.remove(index);
           }
         }
       }
diff --git a/testdata/workloads/functional-query/queries/QueryTest/alias.test b/testdata/workloads/functional-query/queries/QueryTest/alias.test
index e14488f..8560048 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/alias.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/alias.test
@@ -85,3 +85,12 @@ select sum(id) over(order by id) a from alltypestiny order by a
 ---- TYPES
 bigint
 ====
+---- QUERY
+# literal alias
+# IMPALA-9025: When constant propagation optimization fails, the query fails
+# with "ERROR: IllegalStateException: null"
+select * from (select t.* from alltypestiny t
+inner join (select 10 bigint_col) d where
+t.int_col < d.bigint_col ) q
+where int_col <> bigint_col and int_col = 10
+====


[impala] 03/03: IMPALA-9028: impala-shell should not try to reconnect if quitting

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 53774770bcfdc21ca86717653da31d0e0e93419e
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Tue Oct 8 14:22:20 2019 -0700

    IMPALA-9028: impala-shell should not try to reconnect if quitting
    
    When the impala-shell is disconnected, it will try to reconnect
    for any command that a user runs (as part of ImpalaShell's precmd()).
    This doesn't make sense when the user is trying to quit the shell
    (i.e. by typing 'quit' or 'exit' or hitting Ctrl-D).
    
    This skips the attempt to reconnect when quitting the shell.
    
    Testing:
     - Added test in test_shell_interactive.py
     - Verified by hand
    
    Change-Id: I6a76bc515db609498fa8772e9f0b0c547b82c09e
    Reviewed-on: http://gerrit.cloudera.org:8080/14391
    Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 shell/impala_shell.py                 |  9 ++++++++-
 tests/shell/test_shell_interactive.py | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 35af848..554ae26 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -147,6 +147,8 @@ class ImpalaShell(object, cmd.Cmd):
   HISTORY_FILE_QUERY_DELIM = '_IMP_DELIM_'
   # Strings that are interpreted as True for some shell options.
   TRUE_STRINGS = ("true", "TRUE", "True", "1")
+  # List of quit commands
+  QUIT_COMMANDS = ("quit", "exit")
 
   VALID_SHELL_OPTIONS = {
     'LIVE_PROGRESS' : (lambda x: x in ImpalaShell.TRUE_STRINGS, "print_progress"),
@@ -580,6 +582,10 @@ class ImpalaShell(object, cmd.Cmd):
         print_to_stderr("Failed to reconnect and close (try %i/%i): %s" % (
             cancel_try + 1, ImpalaShell.CANCELLATION_TRIES, err_msg))
 
+  def _is_quit_command(self, command):
+    # Do a case insensitive check
+    return command.lower() in ImpalaShell.QUIT_COMMANDS
+
   def set_prompt(self, db):
     self.prompt = ImpalaShell.PROMPT_FORMAT.format(
         host=self.impalad[0], port=self.impalad[1], db=db)
@@ -597,7 +603,8 @@ class ImpalaShell(object, cmd.Cmd):
       # If cmdqueue is populated, then commands are executed from the cmdqueue, and user
       # input is ignored. Send an empty string as the user input just to be safe.
       return str()
-    if not self.imp_client.is_connected():
+    # There is no need to reconnect if we are quitting.
+    if not self.imp_client.is_connected() and not self._is_quit_command(args):
       print_to_stderr("Connection lost, reconnecting...")
       self._connect()
       self._validate_database(immediately=True)
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index 92e5ab2..65b85a7 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -238,6 +238,21 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
                                           wait_until_connected=False)
     assert ImpalaShellClass.DISCONNECTED_PROMPT in result.stdout, result.stderr
 
+  def test_quit_no_reconnect(self, vector):
+    """Test that a disconnected shell does not try to reconnect if quitting"""
+    result = run_impala_shell_interactive(vector, 'quit;', shell_args=['-ifoo'],
+                                          wait_until_connected=False)
+    assert "reconnect" not in result.stderr
+
+    result = run_impala_shell_interactive(vector, 'exit;', shell_args=['-ifoo'],
+                                          wait_until_connected=False)
+    assert "reconnect" not in result.stderr
+
+    # Null case: This is not quitting, so it will result in an attempt to reconnect.
+    result = run_impala_shell_interactive(vector, 'show tables;', shell_args=['-ifoo'],
+                                          wait_until_connected=False)
+    assert "reconnect" in result.stderr
+
   def test_bash_cmd_timing(self, vector):
     """Test existence of time output in bash commands run from shell"""
     args = ["! ls;"]