You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2018/11/07 21:28:10 UTC

[1/2] impala git commit: Bump CDH_BUILD_VERSION to include Hive fix for RELY constraint

Repository: impala
Updated Branches:
  refs/heads/master c7ef48e1d -> 690d905a5


Bump CDH_BUILD_VERSION to include Hive fix for RELY constraint

This change bumps the CDH_BUILD_VERSION to a version that includes
the fix for a bug that does not correctly propagate "RELY" constraints
from SQL statements in Hive. This is needed for the upcoming patch for
IMPALA-3531.

This new CDH_BUILD_VERSION pulls in newer version of Kudu binaries (v1.9).
Kudu version variables in the impala-config are updated accordingly. Also,
includes a newer toolchain build that has these v1.9 bits.

Testing: DEBUG/Core tests passed.

Change-Id: Ie626882044fea98b81b07caa950b480e16df122d
Reviewed-on: http://gerrit.cloudera.org:8080/11891
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Thomas Marshall <th...@cmu.edu>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/04695f4e
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/04695f4e
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/04695f4e

Branch: refs/heads/master
Commit: 04695f4ed8e66aec117ab64cf6e56f62ef93d865
Parents: c7ef48e
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Mon Nov 5 11:31:23 2018 -0800
Committer: Bharath Vissapragada <bh...@cloudera.com>
Committed: Wed Nov 7 20:26:19 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/04695f4e/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index c0030ca..32df25d 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -68,7 +68,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=185-ec90313c05
+export IMPALA_TOOLCHAIN_BUILD_ID=210-5033cfcc59
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -158,7 +158,7 @@ fi
 : ${CDH_DOWNLOAD_HOST:=native-toolchain.s3.amazonaws.com}
 export CDH_DOWNLOAD_HOST
 export CDH_MAJOR_VERSION=6
-export CDH_BUILD_NUMBER=663310
+export CDH_BUILD_NUMBER=680275
 export IMPALA_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
 export IMPALA_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT
@@ -167,7 +167,7 @@ export IMPALA_PARQUET_VERSION=1.9.0-cdh6.x-SNAPSHOT
 export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
-export KUDU_JAVA_VERSION=1.8.0-cdh6.x-SNAPSHOT
+export KUDU_JAVA_VERSION=1.9.0-cdh6.x-SNAPSHOT
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
 # which will be substituted for the CDH platform label in bootstrap_toolchain.py
@@ -596,10 +596,10 @@ fi
 export KUDU_IS_SUPPORTED
 
 if $USE_CDH_KUDU; then
-  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.8.0-cdh6.x-SNAPSHOT"}
+  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.9.0-cdh6.x-SNAPSHOT"}
   export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
 else
-  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"5211897"}
+  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"4ec2598"}
   export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION
 fi
 


[2/2] impala git commit: IMPALA-7789: [DOCS] Admission status in Impala Shell

Posted by ar...@apache.org.
IMPALA-7789: [DOCS] Admission status in Impala Shell

Change-Id: I17d788eb716c6a2f7a144ee2d81bbe823f74d16a
Reviewed-on: http://gerrit.cloudera.org:8080/11895
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/690d905a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/690d905a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/690d905a

Branch: refs/heads/master
Commit: 690d905a5c14a87b5b32a656d3f463117d27c50d
Parents: 04695f4
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue Nov 6 17:24:31 2018 -0800
Committer: Alex Rodoni <ar...@cloudera.com>
Committed: Wed Nov 7 20:57:02 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_admission.xml     |  72 +++++++++----------
 docs/topics/impala_live_progress.xml |  26 +++----
 docs/topics/impala_live_summary.xml  | 115 +++++++++++++-----------------
 3 files changed, 100 insertions(+), 113 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/690d905a/docs/topics/impala_admission.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_admission.xml b/docs/topics/impala_admission.xml
index 8b114eb..1dc1512 100644
--- a/docs/topics/impala_admission.xml
+++ b/docs/topics/impala_admission.xml
@@ -783,42 +783,15 @@ impala.admission-control.pool-queue-timeout-ms.<varname>queue_name</varname></ph
         </metadata>
       </prolog>
       <conbody>
-        <p>
-          To see how admission control works for particular queries, examine
-          the profile output for the query. This information is available
-          through the <codeph>PROFILE</codeph> statement in
-            <cmdname>impala-shell</cmdname> immediately after running a query in
-          the shell, on the <uicontrol>queries</uicontrol> page of the Impala
-          debug web UI, or in the Impala log file (basic information at log
-          level 1, more detailed information at log level 2). The profile output
-          contains details about the admission decision, such as whether the
-          query was queued or not and which resource pool it was assigned to. It
-          also includes the estimated and actual memory usage for the query, so
-          you can fine-tune the configuration for the memory limits of the
-          resource pools.
-        </p>
-        <p>
-          Remember that the limits imposed by admission control are
-            <q>soft</q> limits. The decentralized nature of this mechanism means
-          that each Impala node makes its own decisions about whether to allow
-          queries to run immediately or to queue them. These decisions rely on
-          information passed back and forth between nodes by the statestore
-          service. If a sudden surge in requests causes more queries than
-          anticipated to run concurrently, then throughput could decrease due to
-          queries spilling to disk or contending for resources; or queries could
-          be cancelled if they exceed the <codeph>MEM_LIMIT</codeph> setting
-          while running.
-        </p>
-        <!--
-      <p>
-        If you have trouble getting a query to run because its estimated memory usage is too high, you can override
-        the estimate by setting the <codeph>MEM_LIMIT</codeph> query option in <cmdname>impala-shell</cmdname>,
-        then issuing the query through the shell in the same session. The <codeph>MEM_LIMIT</codeph> value is
-        treated as the estimated amount of memory, overriding the estimate that Impala would generate based on
-        table and column statistics. This value is used only for making admission control decisions, and is not
-        pre-allocated by the query.
-      </p>
--->
+        <p> The limits imposed by admission control are de-centrally managed
+            <q>soft</q> limits. Each Impala coordinator node makes its own
+          decisions about whether to allow queries to run immediately or to
+          queue them. These decisions rely on information passed back and forth
+          between nodes by the StateStore service. If a sudden surge in requests
+          causes more queries than anticipated to run concurrently, then the
+          throughput could decrease due to queries spilling to disk or
+          contending for resources. Or queries could be cancelled if they exceed
+          the <codeph>MEM_LIMIT</codeph> setting while running. </p>
         <p>
           In <cmdname>impala-shell</cmdname>, you can also specify which
           resource pool to direct queries to by setting the
@@ -830,6 +803,33 @@ impala.admission-control.pool-queue-timeout-ms.<varname>queue_name</varname></ph
           with Sentry security. See <xref
             href="impala_authorization.xml#authorization"/> for details.
         </p>
+        <p> To see how admission control works for particular queries, examine
+          the profile output or the summary output for the query. <ul>
+            <li>Profile<p>The information is available through the
+                  <codeph>PROFILE</codeph> statement in
+                  <cmdname>impala-shell</cmdname> immediately after running a
+                query in the shell, on the <uicontrol>queries</uicontrol> page
+                of the Impala debug web UI, or in the Impala log file (basic
+                information at log level 1, more detailed information at log
+                level 2). </p><p>The profile output contains details about the
+                admission decision, such as whether the query was queued or not
+                and which resource pool it was assigned to. It also includes the
+                estimated and actual memory usage for the query, so you can
+                fine-tune the configuration for the memory limits of the
+                resource pools. </p></li>
+            <li>Summary<p>Starting in <keyword keyref="impala31"/>, the
+                information is available in <cmdname>impala-shell</cmdname> when
+                the <codeph>LIVE_PROGRESS</codeph> or
+                  <codeph>LIVE_SUMMARY</codeph> query option is set to
+                  <codeph>TRUE</codeph>.</p><p>You can also start an
+                  <codeph>impala-shell</codeph> session with the
+                  <codeph>--live_progress</codeph> or
+                  <codeph>--live_summary</codeph> flags to monitor all queries
+                in that <codeph>impala-shell</codeph> session.</p><p>The summary
+                output includes the queuing status consisting of whether the
+                query was queued and what was the latest queuing
+              reason.</p></li>
+          </ul></p>
         <p>
           For details about all the Fair Scheduler configuration settings, see
             <xref keyref="FairScheduler">Fair Scheduler Configuration</xref>, in

http://git-wip-us.apache.org/repos/asf/impala/blob/690d905a/docs/topics/impala_live_progress.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_live_progress.xml b/docs/topics/impala_live_progress.xml
index 0c91824..63297aa 100644
--- a/docs/topics/impala_live_progress.xml
+++ b/docs/topics/impala_live_progress.xml
@@ -37,19 +37,19 @@ under the License.
 
   <conbody>
 
-    <p rev="2.3.0">
-      <indexterm audience="hidden">LIVE_PROGRESS query option</indexterm>
-      For queries submitted through the <cmdname>impala-shell</cmdname> command,
-      displays an interactive progress bar showing roughly what percentage of
-      processing has been completed. When the query finishes, the progress bar is erased
-      from the <cmdname>impala-shell</cmdname> console output.
-    </p>
-
-    <p>
-    </p>
-
-    <p conref="../shared/impala_common.xml#common/type_boolean"/>
-    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+    <p rev="2.3.0"> When the <codeph>LIVE_PROGRESS</codeph> query option is set
+      to <codeph>TRUE</codeph>, Impala displays an interactive progress bar
+      showing roughly what percentage of processing has been completed for
+      queries submitted through the <cmdname>impala-shell</cmdname> command.
+      When the query finishes, the progress bar is erased from the
+        <cmdname>impala-shell</cmdname> console output. </p>
+    <p>Starting in <keyword keyref="impala31"/>, the summary output also
+      includes the queuing status consisting of whether the query was queued and
+      what was the latest queuing reason.</p>
+    <p><b>Type:</b>
+      <codeph>Boolean</codeph></p>
+    <p><b>Default:</b>
+      <codeph>FALSE (0)</codeph></p>
 
     <p conref="../shared/impala_common.xml#common/command_line_blurb"/>
     <p>

http://git-wip-us.apache.org/repos/asf/impala/blob/690d905a/docs/topics/impala_live_summary.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_live_summary.xml b/docs/topics/impala_live_summary.xml
index 94733d2..10ecae3 100644
--- a/docs/topics/impala_live_summary.xml
+++ b/docs/topics/impala_live_summary.xml
@@ -36,71 +36,59 @@ under the License.
   </prolog>
 
   <conbody>
-
-    <p rev="2.3.0">
-      <indexterm audience="hidden">LIVE_SUMMARY query option</indexterm>
-      For queries submitted through the <cmdname>impala-shell</cmdname> command,
-      displays the same output as the <codeph>SUMMARY</codeph> command,
-      with the measurements updated in real time as the query progresses.
-      When the query finishes, the final <codeph>SUMMARY</codeph> output remains
-      visible in the <cmdname>impala-shell</cmdname> console output.
-    </p>
-
-    <p>
-    </p>
-
-    <p conref="../shared/impala_common.xml#common/type_boolean"/>
-    <p conref="../shared/impala_common.xml#common/default_false_0"/>
-
+    <p rev="2.3.0"> When the <codeph>LIVE_SUMMARY</codeph> query option is set
+      to <codeph>TRUE</codeph>, Impala displays the same output as the
+        <codeph>SUMMARY</codeph> command for queries submitted through the
+        <cmdname>impala-shell</cmdname> command, with the measurements updated
+      in real time as the query progresses. When the query finishes, the final
+        <codeph>SUMMARY</codeph> output remains visible in the
+        <cmdname>impala-shell</cmdname> console output. </p>
+    <p>Starting in <keyword keyref="impala31"/>, the summary output also
+      includes the queuing status consisting of whether the query was queued and
+      what was the latest queuing reason.</p>
+    <p>the queuing status, whether the query was queued and what was the latest
+      queuing reason.</p>
+    <p><b>Type:</b>
+      <codeph>Boolean</codeph></p>
+    <p><b>Default:</b>
+      <codeph>FALSE (0)</codeph></p>
     <p conref="../shared/impala_common.xml#common/command_line_blurb"/>
-    <p>
-      You can enable this query option within <cmdname>impala-shell</cmdname>
+    <p> You can enable this query option within <cmdname>impala-shell</cmdname>
       by starting the shell with the <codeph>--live_summary</codeph>
-      command-line option.
-      You can still turn this setting off and on again within the shell through the
-      <codeph>SET</codeph> command.
-    </p>
-
+      command-line option. You can still turn this setting off and on again
+      within the shell through the <codeph>SET</codeph> command. </p>
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
-    <p>
-      The live summary output can be useful for evaluating long-running queries,
-      to evaluate which phase of execution takes up the most time, or if some hosts
-      take much longer than others for certain operations, dragging overall performance down.
-      By making the information available in real time, this feature lets you decide what
-      action to take even before you cancel a query that is taking much longer than normal.
-    </p>
-    <p>
-      For example, you might see the HDFS scan phase taking a long time, and therefore revisit
-      performance-related aspects of your schema design such as constructing a partitioned table,
-      switching to the Parquet file format, running the <codeph>COMPUTE STATS</codeph> statement
-      for the table, and so on.
-      Or you might see a wide variation between the average and maximum times for all hosts to
-      perform some phase of the query, and therefore investigate if one particular host
-      needed more memory or was experiencing a network problem.
-    </p>
+    <p> The live summary output can be useful for evaluating long-running
+      queries, to evaluate which phase of execution takes up the most time, or
+      if some hosts take much longer than others for certain operations,
+      dragging overall performance down. By making the information available in
+      real time, this feature lets you decide what action to take even before
+      you cancel a query that is taking much longer than normal. </p>
+    <p> For example, you might see the HDFS scan phase taking a long time, and
+      therefore revisit performance-related aspects of your schema design such
+      as constructing a partitioned table, switching to the Parquet file format,
+      running the <codeph>COMPUTE STATS</codeph> statement for the table, and so
+      on. Or you might see a wide variation between the average and maximum
+      times for all hosts to perform some phase of the query, and therefore
+      investigate if one particular host needed more memory or was experiencing
+      a network problem. </p>
     <p conref="../shared/impala_common.xml#common/live_reporting_details"/>
-    <p>
-      For a simple and concise way of tracking the progress of an interactive query, see
-      <xref href="impala_live_progress.xml#live_progress"/>.
-    </p>
-
+    <p> For a simple and concise way of tracking the progress of an interactive
+      query, see <xref href="impala_live_progress.xml#live_progress"/>. </p>
     <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
-    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_compute_stats_caveat"/>
-    <p conref="../shared/impala_common.xml#common/impala_shell_progress_reports_shell_only_caveat"/>
-
+    <p
+      conref="../shared/impala_common.xml#common/impala_shell_progress_reports_compute_stats_caveat"/>
+    <p
+      conref="../shared/impala_common.xml#common/impala_shell_progress_reports_shell_only_caveat"/>
     <p conref="../shared/impala_common.xml#common/added_in_230"/>
-
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
-
-    <p>
-      The following example shows a series of <codeph>LIVE_SUMMARY</codeph> reports that
-      are displayed during the course of a query, showing how the numbers increase to
-      show the progress of different phases of the distributed query. When you do the same
-      in <cmdname>impala-shell</cmdname>, only a single report is displayed at any one time,
-      with each update overwriting the previous numbers.
-    </p>
-
-<codeblock><![CDATA[[localhost:21000] > set live_summary=true;
+    <p> The following example shows a series of <codeph>LIVE_SUMMARY</codeph>
+      reports that are displayed during the course of a query, showing how the
+      numbers increase to show the progress of different phases of the
+      distributed query. When you do the same in
+      <cmdname>impala-shell</cmdname>, only a single report is displayed at any
+      one time, with each update overwriting the previous numbers. </p>
+    <codeblock><![CDATA[[localhost:21000] > set live_summary=true;
 LIVE_SUMMARY set to true
 [localhost:21000] > select count(*) from customer t1 cross join customer t2;
 +---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
@@ -140,9 +128,8 @@ LIVE_SUMMARY set to true
 +---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
 ]]>
 </codeblock>
-
-<!-- Keeping this sample output that illustrates a couple of glitches in the LIVE_SUMMARY display, hidden, to help filing JIRAs. -->
-<codeblock audience="hidden"><![CDATA[[
+    <!-- Keeping this sample output that illustrates a couple of glitches in the LIVE_SUMMARY display, hidden, to help filing JIRAs. -->
+    <codeblock audience="hidden"><![CDATA[[
 +---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
 | Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
 +---------------------+--------+----------+----------+---------+------------+----------+---------------+-----------------------+
@@ -222,8 +209,8 @@ Query: select count(*) from customer t1 cross join customer t2
 | Operator            | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem | Est. Peak Mem | Detail                |
 ]]>
 </codeblock>
-
-    <p conref="../shared/impala_common.xml#common/live_progress_live_summary_asciinema"/>
-
+    <p
+      conref="../shared/impala_common.xml#common/live_progress_live_summary_asciinema"
+    />
   </conbody>
 </concept>