You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/10/16 03:25:46 UTC

[impala] 01/03: IMPALA-8920: [DOCS] Documented the query option for disabling HBase row estimation

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2d74dfd5e2bf4791c116e14911631715c0db3a86
Author: Alex Rodoni <ar...@cloudera.com>
AuthorDate: Tue Oct 8 17:03:56 2019 -0700

    IMPALA-8920: [DOCS] Documented the query option for disabling HBase row estimation
    
    Change-Id: Id131b66a3457ef6cbc326a0f3ed99de2a3950c3e
    Reviewed-on: http://gerrit.cloudera.org:8080/14394
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
 docs/impala.ditamap                                |  2 +-
 docs/impala_keydefs.ditamap                        |  1 -
 .../impala_disable_hbase_num_rows_estimate.xml     | 89 ++++++++++++++++++++++
 docs/topics/impala_disable_outermost_topn.xml      | 47 ------------
 4 files changed, 90 insertions(+), 49 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index be4b76b..8e11b89 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -186,7 +186,7 @@ under the License.
           <topicref rev="3.3.0" href="topics/impala_default_transactional_type.xml"/>
           <topicref href="topics/impala_disable_codegen.xml"/>
           <topicref rev="2.10.0 IMPALA-5483" href="topics/impala_disable_codegen_rows_threshold.xml"/>
-          <topicref audience="hidden" href="topics/impala_disable_outermost_topn.xml"/>
+          <topicref href="topics/impala_disable_hbase_num_rows_estimate.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_row_runtime_filtering.xml"/>
           <topicref rev="2.5.0" href="topics/impala_disable_streaming_preaggregations.xml"/>
           <topicref href="topics/impala_disable_unsafe_spills.xml"/>
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index c71bdf2..efb6f5f 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10790,7 +10790,6 @@ under the License.
   <keydef rev="2.10.0 IMPALA-3200" href="topics/impala_default_spillable_buffer_size.xml" keys="default_spillable_buffer_size"/>
   <keydef href="topics/impala_disable_codegen.xml" keys="disable_codegen"/>
   <keydef href="topics/impala_disable_codegen_rows_threshold.xml" keys="disable_codegen_rows_threshold"/>
-  <keydef href="topics/impala_disable_outermost_topn.xml" keys="disable_outermost_topn"/>
   <keydef href="topics/impala_disable_row_runtime_filtering.xml" keys="disable_row_runtime_filtering"/>
   <keydef href="topics/impala_disable_streaming_preaggregations.xml" keys="disable_streaming_preaggregations"/>
   <keydef href="topics/impala_disable_unsafe_spills.xml" keys="disable_unsafe_spills"/>
diff --git a/docs/topics/impala_disable_hbase_num_rows_estimate.xml b/docs/topics/impala_disable_hbase_num_rows_estimate.xml
new file mode 100644
index 0000000..998d551
--- /dev/null
+++ b/docs/topics/impala_disable_hbase_num_rows_estimate.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="disable_hbase_num_rows_estimate">
+
+  <title>DISABLE_HBASE_NUM_ROWS_ESTIMATE Query Option</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>DISABLE_HBASE_NUM_ROWS_ESTIMATE</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Performance"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Use the <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option to disable key
+      sampling of HBase tables in row count and row size estimation.
+    </p>
+
+    <p>
+      While generating a plan for an HBase query, the planner samples the underlying HBase
+      tables to estimate their row count and row size, and the sampling can negatively impact
+      the planning time. When the HBase table stats does not change much in short time, disable
+      the sampling by setting the <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option
+      to <codeph>TRUE</codeph>. And Impala planner will fall back to using Hive Metastore (HMS)
+      table stats instead.
+    </p>
+
+    <p>
+      When <codeph>DISABLE_HBASE_NUM_ROWS_ESTIMATE</codeph> query option is set to
+      <codeph>TRUE</codeph>, you need to update the HMS table stats by running <codeph>COMPUTE
+      STATS</codeph>. Alternatively, you can manually set table statistics by running
+      <codeph>ALTER TABLE</codeph>. See <xref
+        href="impala_perf_stats.xml#perf_stats"/>
+      for details.
+    </p>
+
+    <p>
+      The following values are supported:
+      <ul>
+        <li>
+          <codeph>TRUE</codeph> or <codeph>1</codeph>: Disables the normal key sampling of HBase
+          tables and uses HMS table stats for estimation.
+        </li>
+
+        <li>
+          <codeph>FALSE</codeph> or <codeph>0</codeph>: Enables the normal sampling of HBase
+          tables.
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Type:</b> <codeph>BOOLEAN</codeph>
+    </p>
+
+    <p>
+      <b>Default:</b> <codeph>FALSE</codeph>
+    </p>
+
+  </conbody>
+
+</concept>
diff --git a/docs/topics/impala_disable_outermost_topn.xml b/docs/topics/impala_disable_outermost_topn.xml
deleted file mode 100644
index 2bc1f12..0000000
--- a/docs/topics/impala_disable_outermost_topn.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="disable_outermost_topn" rev="2.5.0">
-
-  <title>DISABLE_OUTERMOST_TOPN Query Option</title>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-      <data name="Category" value="Impala Query Options"/>
-      <data name="Category" value="Developers"/>
-      <data name="Category" value="Data Analysts"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p rev="2.5.0">
-      <indexterm audience="hidden">DISABLE_OUTERMOST_TOPN query option</indexterm>
-    </p>
-
-    <p>
-      <b>Type:</b>
-    </p>
-
-    <p>
-      <b>Default:</b>
-    </p>
-  </conbody>
-</concept>