You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/16 01:21:00 UTC

[impala] 10/11: IMPALA-5604: document DISABLE_CODEGEN_ROWS_THRESHOLD

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fb8332ccd0fe6b44f0876df4fb78f0bea0322357
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Jun 14 16:13:58 2018 -0700

    IMPALA-5604: document DISABLE_CODEGEN_ROWS_THRESHOLD
    
    Also fix a couple of nits in EXEC_SINGLE_NODE_ROWS_THRESHOLD.
    
    Change-Id: I709cd55e3869888feb645f85e61a99901d41d479
    Reviewed-on: http://gerrit.cloudera.org:8080/10727
    Reviewed-by: Alex Rodoni <ar...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docs/impala.ditamap                                |  1 +
 docs/impala_keydefs.ditamap                        |  1 +
 .../impala_disable_codegen_rows_threshold.xml      | 97 ++++++++++++++++++++++
 .../impala_exec_single_node_rows_threshold.xml     | 11 +--
 4 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index b91ad74..a730559 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -185,6 +185,7 @@ under the License.
           <topicref rev="2.10.0 IMPALA-3200" href="topics/impala_default_spillable_buffer_size.xml"/>
           <topicref audience="hidden" href="topics/impala_disable_cached_reads.xml"/>
           <topicref href="topics/impala_disable_codegen.xml"/>
+          <topicref rev="2.10.0 IMPALA-5483" href="topics/impala_disable_codegen_rows_threshold.xml"/>
           <topicref audience="hidden" href="topics/impala_disable_outermost_topn.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_row_runtime_filtering.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_streaming_preaggregations.xml"/>
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 200bf79..8340382 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10784,6 +10784,7 @@ under the License.
   <keydef rev="2.10.0 IMPALA-3200" href="topics/impala_default_spillable_buffer_size.xml" keys="default_spillable_buffer_size"/>
   <keydef href="topics/impala_disable_cached_reads.xml" keys="disable_cached_reads"/>
   <keydef href="topics/impala_disable_codegen.xml" keys="disable_codegen"/>
+  <keydef href="topics/impala_disable_codegen_rows_threshold.xml" keys="disable_codegen_rows_threshold"/>
   <keydef href="topics/impala_disable_outermost_topn.xml" keys="disable_outermost_topn"/>
   <keydef href="topics/impala_disable_row_runtime_filtering.xml" keys="disable_row_runtime_filtering"/>
   <keydef href="topics/impala_disable_streaming_preaggregations.xml" keys="disable_streaming_preaggregations"/>
diff --git a/docs/topics/impala_disable_codegen_rows_threshold.xml b/docs/topics/impala_disable_codegen_rows_threshold.xml
new file mode 100644
index 0000000..b16a691
--- /dev/null
+++ b/docs/topics/impala_disable_codegen_rows_threshold.xml
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="2.0.0" id="exec_single_node_rows_threshold">
+
+  <title>DISABLE_CODEGEN_ROWS_THRESHOLD Query Option (<keyword keyref="impala210_full"/> or higher only)</title>
+  <titlealts audience="PDF"><navtitle>DISABLE_CODEGEN_ROWS_THRESHOLD</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Scalability"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="2.0.0">
+      <indexterm audience="hidden">DISABLE_CODEGEN_ROWS_THRESHOLD query option</indexterm>
+      This setting controls the cutoff point (in terms of number of rows processed per Impala daemon) below which
+      Impala disables native code generation for the whole query.
+
+      Native code generation is very beneficial for queries that process many rows because
+      it reduces the time taken to process of each row. However, generating the native code
+      adds latency to query startup. Therefore, automatically disabling codegen for
+      queries that process relatively small amounts of data can improve query response time.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>SET DISABLE_CODEGEN_ROWS_THRESHOLD=<varname>number_of_rows</varname></codeblock>
+
+    <p>
+      <b>Type:</b> numeric
+    </p>
+
+    <p>
+      <b>Default:</b> 50000
+    </p>
+
+    <p>
+      <b>Usage notes:</b> Typically, you increase the default value to make this optimization apply to more queries.
+      If incorrect or corrupted table and column statistics cause Impala to apply this optimization incorrectly to
+      queries that actually involve substantial work, you might see the queries being slower as a result of codegen
+      being disabled. In that case, recompute statistics with the <codeph>COMPUTE STATS</codeph> or
+      <codeph>COMPUTE INCREMENTAL STATS</codeph> statement. If there is a problem collecting accurate statistics,
+      you can turn this feature off by setting the value to 0.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_blurb"/>
+
+    <p>
+      This setting applies to queries where the number of rows processed can be accurately
+      determined, either through table and column statistics, or by the presence of a
+      <codeph>LIMIT</codeph> clause. If Impala cannot accurately estimate the number of rows,
+      then this setting does not apply.
+    </p>
+
+    <p rev="2.3.0">
+      If a query uses the complex data types <codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>,
+      or <codeph>MAP</codeph>, then codegen is never automatically disabled regardless of the
+      <codeph>DISABLE_CODEGEN_ROWS_THRESHOLD</codeph> setting.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/added_in_2100"/>
+
+<!-- Don't have any other places that tie into this particular optimization technique yet.
+Potentially: conceptual topics about code generation, distributed queries
+
+<p conref="../shared/impala_common.xml#common/related_info"/>
+<p>
+</p>
+-->
+
+  </conbody>
+
+</concept>
diff --git a/docs/topics/impala_exec_single_node_rows_threshold.xml b/docs/topics/impala_exec_single_node_rows_threshold.xml
index 4822712..62f7988 100644
--- a/docs/topics/impala_exec_single_node_rows_threshold.xml
+++ b/docs/topics/impala_exec_single_node_rows_threshold.xml
@@ -41,8 +41,8 @@ under the License.
       as a <q>small</q> query, turning off optimizations such as parallel execution and native code generation. The
       overhead for these optimizations is applicable for queries involving substantial amounts of data, but it
       makes sense to skip them for queries involving tiny amounts of data. Reducing the overhead for small queries
-      allows Impala to complete them more quickly, keeping YARN resources, admission control slots, and so on
-      available for data-intensive queries.
+      allows Impala to complete them more quickly, keeping admission control slots, CPU, memory, and so on
+      available for resource-intensive queries.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -69,9 +69,10 @@ under the License.
     <p conref="../shared/impala_common.xml#common/internals_blurb"/>
 
     <p>
-      This setting applies to query fragments where the amount of data to scan can be accurately determined, either
-      through table and column statistics, or by the presence of a <codeph>LIMIT</codeph> clause. If Impala cannot
-      accurately estimate the size of the input data, this setting does not apply.
+      This setting applies to queries where the number of rows processed can be accurately
+      determined, either through table and column statistics, or by the presence of a
+      <codeph>LIMIT</codeph> clause. If Impala cannot accurately estimate the number of rows,
+      then this setting does not apply.
     </p>
 
     <p rev="2.3.0">