You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2018/04/17 20:25:50 UTC

[1/4] impala git commit: IMPALA-6732: [DOCS] 2.12 Release Notes

Repository: impala
Updated Branches:
  refs/heads/master 5960d1b36 -> eaf66172d


IMPALA-6732: [DOCS] 2.12 Release Notes

Change-Id: Ia0eee2a5bd7d31afaff048f0ac6e46123eb56e47
Reviewed-on: http://gerrit.cloudera.org:8080/10071
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a6767de0
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a6767de0
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a6767de0

Branch: refs/heads/master
Commit: a6767de0aa7b385cf2846797e109257d7b23075d
Parents: 5960d1b
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Sat Apr 14 09:19:16 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon Apr 16 20:27:51 2018 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap                 |  2 ++
 docs/impala_release_notes.ditamap           |  2 +-
 docs/topics/impala_fixed_issues.xml         | 20 ++++++++++++++++++--
 docs/topics/impala_incompatible_changes.xml | 16 ++++++++++++++++
 docs/topics/impala_new_features.xml         | 17 +++++++++++++++++
 5 files changed, 54 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/a6767de0/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index fb60e14..52b86cf 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10601,6 +10601,8 @@ under the License.
   <keydef keys="impala13_full"><topicmeta><keywords><keyword>Impala 1.3</keyword></keywords></topicmeta></keydef>
 
 <!-- Pointers to changelog pages -->
+  <keydef keys="changelog_300" href="https://impala.apache.org/docs/changelog-3.0.html" scope="external" format="html"/>
+  <keydef keys="changelog_212" href="https://impala.apache.org/docs/changelog-2.12.html" scope="external" format="html"/>
   <keydef keys="changelog_211" href="https://impala.apache.org/docs/changelog-2.11.html" scope="external" format="html"/>
   <keydef keys="changelog_210" href="https://impala.apache.org/docs/changelog-2.10.html" scope="external" format="html"/>
   <keydef keys="changelog_29" href="https://impala.apache.org/docs/changelog-2.9.html" scope="external" format="html"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/a6767de0/docs/impala_release_notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_release_notes.ditamap b/docs/impala_release_notes.ditamap
index 58ad5ab..554b2ee 100644
--- a/docs/impala_release_notes.ditamap
+++ b/docs/impala_release_notes.ditamap
@@ -20,7 +20,7 @@ under the License.
 <!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
 <map audience="standalone">
   <title>Apache Impala Release Notes</title>
-  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
+<!--  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>-->
   <topicref href="topics/impala_new_features.xml"/>
   <topicref href="topics/impala_incompatible_changes.xml"/>
   <topicref href="topics/impala_known_issues.xml"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/a6767de0/docs/topics/impala_fixed_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_fixed_issues.xml b/docs/topics/impala_fixed_issues.xml
index 6f2b789..9d2ca97 100644
--- a/docs/topics/impala_fixed_issues.xml
+++ b/docs/topics/impala_fixed_issues.xml
@@ -46,11 +46,27 @@ under the License.
     <p outputclass="toc inpage"/>
   </conbody>
 
+<!-- All 2.12.x subsections go under here -->
+
+  <concept rev="2.12.0" id="fixed_issues_2_12_0">
+
+    <title>Issues Fixed in <keyword keyref="impala212"/></title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including bug fixes,
+        see the <xref keyref="changelog_212">changelog for <keyword keyref="impala212"/></xref>.
+      </p>
+
+    </conbody>
+  </concept>
+
 <!-- All 2.11.x subsections go under here -->
 
   <concept rev="2.11.0" id="fixed_issues_2_11_0">
 
-    <title>Issues Fixed in <keyword keyref="impala2110"/></title>
+    <title>Issues Fixed in <keyword keyref="impala211"/></title>
 
     <conbody>
 
@@ -66,7 +82,7 @@ under the License.
 
   <concept rev="2.10.0" id="fixed_issues_2100">
 
-    <title>Issues Fixed in <keyword keyref="impala2100"/></title>
+    <title>Issues Fixed in <keyword keyref="impala210"/></title>
 
     <conbody>
 

http://git-wip-us.apache.org/repos/asf/impala/blob/a6767de0/docs/topics/impala_incompatible_changes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_incompatible_changes.xml b/docs/topics/impala_incompatible_changes.xml
index 9d8d711..f26c2f2 100644
--- a/docs/topics/impala_incompatible_changes.xml
+++ b/docs/topics/impala_incompatible_changes.xml
@@ -53,6 +53,22 @@ under the License.
     <p outputclass="toc inpage"/>
   </conbody>
 
+  <concept rev="2.12.0" id="incompatible_changes_212x">
+
+    <title>Incompatible Changes Introduced in Impala 2.12.x</title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including any that introduce
+        behavior changes or incompatibilities, see the
+        <xref keyref="changelog_212">changelog for <keyword keyref="impala212"/></xref>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
   <concept rev="2.11.0" id="incompatible_changes_211x">
 
     <title>Incompatible Changes Introduced in Impala 2.11.x</title>

http://git-wip-us.apache.org/repos/asf/impala/blob/a6767de0/docs/topics/impala_new_features.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_new_features.xml b/docs/topics/impala_new_features.xml
index 0deb311..deb15e0 100644
--- a/docs/topics/impala_new_features.xml
+++ b/docs/topics/impala_new_features.xml
@@ -46,6 +46,23 @@ under the License.
 
   </conbody>
 
+<!-- All 2.12.x new features go under here -->
+
+  <concept rev="2.12.0" id="new_features_2120">
+
+    <title>New Features in <keyword keyref="impala212_full"/></title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including the issues
+        marked as <q>new features</q> or <q>improvements</q>, see the
+        <xref keyref="changelog_212">changelog for <keyword keyref="impala212"/></xref>.
+      </p>
+
+    </conbody>
+  </concept>
+
 <!-- All 2.11.x new features go under here -->
 
   <concept rev="2.11.0" id="new_features_2110">


[3/4] impala git commit: IMPALA-6723: [DOCS] Hints for CTAS

Posted by jo...@apache.org.
IMPALA-6723: [DOCS] Hints for CTAS

Change-Id: I91d9f4f039a603382ff4415d1dd22a351279cbfa

IMPALA-6723 Hints for CTAS

Change-Id: I201a4e1ddaf62164e1f6b636c4e1e60af60e1af7

IMPALA-6723: [DOCS] Hints for CTAS

Optimizer hints were move out of SELECT section.
Hints for CTAS were added to the same section as INSERT.

Change-Id: I91d9f4f039a603382ff4415d1dd22a351279cbfa
Reviewed-on: http://gerrit.cloudera.org:8080/9993
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ea698cd4
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ea698cd4
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ea698cd4

Branch: refs/heads/master
Commit: ea698cd497f63908b231e4b108c20d259e5bd8fb
Parents: 0e98b9a
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Apr 6 09:56:48 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Apr 17 00:34:52 2018 +0000

----------------------------------------------------------------------
 docs/impala.ditamap           |   2 +-
 docs/shared/impala_common.xml |   1 +
 docs/topics/impala_hints.xml  | 226 +++++++++++++++++++++++++++++--------
 3 files changed, 181 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ea698cd4/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 08b69ca..4e9c71a 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -166,7 +166,6 @@ under the License.
         <topicref rev="IMPALA-5309" href="topics/impala_tablesample.xml"/>
         <topicref href="topics/impala_with.xml"/>
         <topicref href="topics/impala_distinct.xml"/>
-        <topicref href="topics/impala_hints.xml"/>
       </topicref>
       <topicref href="topics/impala_set.xml">
         <topicref href="topics/impala_query_options.xml">
@@ -230,6 +229,7 @@ under the License.
       <topicref href="topics/impala_update.xml"/>
       <topicref href="topics/impala_upsert.xml"/>
       <topicref href="topics/impala_use.xml"/>
+      <topicref href="topics/impala_hints.xml"/>
     </topicref>
     <topicref href="topics/impala_functions.xml">
       <topicref href="topics/impala_math_functions.xml"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/ea698cd4/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 12d7d4e..df58a53 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -3063,6 +3063,7 @@ select max(height), avg(height) from census_data where age &gt; 20;
         Another way to define different names for the same tables or columns is to create views. See
         <xref href="../topics/impala_views.xml#views"/> for details.
       </p>
+      <!--Alex R: Insert hints below is being refactored in impala_hints.xml fore more general purpose. Keep this for now for impala_paquet.xml.-->
 
       <p id="insert_hints">
         When inserting into partitioned tables, especially using the Parquet file format, you can include a hint in

http://git-wip-us.apache.org/repos/asf/impala/blob/ea698cd4/docs/topics/impala_hints.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hints.xml b/docs/topics/impala_hints.xml
index 6cafcfb..b936b6a 100644
--- a/docs/topics/impala_hints.xml
+++ b/docs/topics/impala_hints.xml
@@ -20,8 +20,8 @@ under the License.
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept id="hints">
 
-  <title>Query Hints in Impala SELECT Statements</title>
-  <titlealts audience="PDF"><navtitle>Hints</navtitle></titlealts>
+  <title>Optimizer Hints</title>
+  <titlealts audience="PDF"><navtitle>Optimizer Hints</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -37,15 +37,13 @@ under the License.
   <conbody>
 
     <p>
-      <indexterm audience="hidden">hints</indexterm>
-      The Impala SQL dialect supports query hints, for fine-tuning the inner workings of queries. Specify hints as
-      a temporary workaround for expensive queries, where missing statistics or other factors cause inefficient
-      performance.
-    </p>
+      <indexterm audience="hidden">hints</indexterm> The Impala SQL supports
+      query hints, for fine-tuning the inner workings of queries. Specify hints
+      as a temporary workaround for expensive queries, where missing statistics
+      or other factors cause inefficient performance. </p>
 
-    <p>
-      Hints are most often used for the most resource-intensive kinds of Impala queries:
-    </p>
+    <p> Hints are most often used for the resource-intensive Impala queries,
+      such as: </p>
 
     <ul>
       <li>
@@ -61,41 +59,18 @@ under the License.
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
-    <p>
-      You can also represent the hints as keywords surrounded by <codeph>[]</codeph>
-      square brackets; include the brackets in the text of the SQL statement.
-      <note conref="../shared/impala_common.xml#common/square_bracket_hint_caveat"/>
-    </p>
-
-<codeblock>SELECT STRAIGHT_JOIN <varname>select_list</varname> FROM
-<varname>join_left_hand_table</varname>
-  JOIN [{ /* +BROADCAST */ | /* +SHUFFLE */ }]
-<varname>join_right_hand_table</varname>
-<varname>remainder_of_query</varname>;
-
-INSERT <varname>insert_clauses</varname>
-  [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
-  [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
-  SELECT <varname>remainder_of_query</varname>;
-
-<ph rev="2.12.0 IMPALA-4168">
-UPSERT [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
-  [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
-  <varname>upsert_clauses</varname>
-  SELECT <varname>remainder_of_query</varname>;</ph>
-</codeblock>
-
-    <p rev="2.0.0">
-      In <keyword keyref="impala20_full"/> and higher, you can also specify the hints inside comments that use
-      either the <codeph>/* */</codeph> or <codeph>--</codeph> notation. Specify a <codeph>+</codeph> symbol
-      immediately before the hint name. Recently added hints are only available using the <codeph>/* */</codeph>
-      and <codeph>--</codeph> notation.
-      For clarity, the <codeph>/* */</codeph> and <codeph>--</codeph> styles
-      are used in the syntax and examples throughout this section.
-      With the <codeph>/* */</codeph> or <codeph>--</codeph> notation for
-      hints, specify a <codeph>+</codeph> symbol immediately before the first hint name.
-      Multiple hints can be specified separated by commas, for example
-      <codeph>/* +clustered,shuffle */</codeph>
+    <p rev="2.0.0"> In <keyword keyref="impala20_full"/> and higher, you can
+      specify the hints inside comments that use either the <codeph>/*
+        */</codeph> or <codeph>--</codeph> notation. Specify a
+        <codeph>+</codeph> symbol immediately before the hint name. Recently
+      added hints are only available using the <codeph>/* */</codeph> and
+        <codeph>--</codeph> notation. For clarity, the <codeph>/* */</codeph>
+      and <codeph>--</codeph> styles are used in the syntax and examples
+      throughout this section. With the <codeph>/* */</codeph> or
+        <codeph>--</codeph> notation for hints, specify a <codeph>+</codeph>
+      symbol immediately before the first hint name. Multiple hints can be
+      specified separated by commas, for example <codeph>/* +clustered,shuffle
+        */</codeph>
     </p>
 
 <codeblock rev="2.0.0">SELECT STRAIGHT_JOIN <varname>select_list</varname> FROM
@@ -167,6 +142,43 @@ UPSERT -- +CLUSTERED
 UPSERT /* +CLUSTERED */
   <varname>upsert_clauses</varname>
   SELECT <varname>remainder_of_query</varname>;</ph>
+
+CREATE /* +SHUFFLE|NOSHUFFLE */
+  <varname>table_clauses</varname>
+  AS SELECT <varname>remainder_of_query</varname>;
+
+CREATE -- +SHUFFLE|NOSHUFFLE
+  <varname>table_clauses</varname>
+  AS SELECT <varname>remainder_of_query</varname>;
+
+CREATE /* +CLUSTER|NOCLUSTER */
+  <varname>table_clauses</varname>
+  AS SELECT <varname>remainder_of_query</varname>;
+
+CREATE -- +CLUSTER|NOCLUSTER
+  <varname>table_clauses</varname>
+  AS SELECT <varname>remainder_of_query</varname>;
+</codeblock>
+    <p>The square bracket style hints are supported for backward compatibility,
+      but the syntax is deprecated and will be removed in a future release. For
+      that reason, any newly added hints are not available with the square
+      bracket syntax.</p>
+    <codeblock>SELECT STRAIGHT_JOIN <varname>select_list</varname> FROM
+<varname>join_left_hand_table</varname>
+  JOIN [{ /* +BROADCAST */ | /* +SHUFFLE */ }]
+<varname>join_right_hand_table</varname>
+<varname>remainder_of_query</varname>;
+
+INSERT <varname>insert_clauses</varname>
+  [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
+  [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
+  SELECT <varname>remainder_of_query</varname>;
+
+<ph rev="2.12.0 IMPALA-4168">
+UPSERT [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
+  [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
@@ -226,10 +238,130 @@ UPSERT /* +CLUSTERED */
     </ul>
 
     <p>
-      <b>Hints for INSERT ... SELECT queries:</b>
+      <b>Hints for INSERT ... SELECT and CREATE TABLE AS SELECT (CTAS):</b>
+    </p>
+    <p id="insert_hints">
+      When inserting into partitioned tables, such as using the Parquet file
+      format, you can include a hint in the <codeph>INSERT</codeph> or <codeph>CREATE TABLE AS SELECT(CTAS)</codeph>
+      statements to fine-tune the overall performance of the operation and its
+      resource usage.</p>
+    <p>
+      You would only use hints if an <codeph>INSERT</codeph> or
+        <codeph>CTAS</codeph> into a partitioned table was failing due to
+      capacity limits, or if such an operation was succeeding but with
+      less-than-optimal performance.
     </p>
 
-    <p conref="../shared/impala_common.xml#common/insert_hints"/>
+    <ul>
+      <li>
+        <codeph>/* +SHUFFLE */</codeph> and <codeph>/* +NOSHUFFLE */</codeph> Hints
+        <ul>
+          <li>
+            <codeph>/* +SHUFFLE */</codeph> adds an exchange node, before
+            writing the data, which re-partitions the result of the
+              <codeph>SELECT</codeph> based on the partitioning columns of the
+            target table. With this hint, only one node writes to a partition at
+            a time, minimizing the global number of simultaneous writes and the
+            number of memory buffers holding data for individual partitions.
+            This also reduces fragmentation, resulting in fewer files. Thus it
+            reduces overall resource usage of the <codeph>INSERT</codeph> or
+              <codeph>CTAS</codeph> operation and allows some operations to
+            succeed that otherwise would fail. It does involve some data
+            transfer between the nodes so that the data files for a particular
+            partition are all written on the same node.
+
+            <p>
+              Use <codeph>/* +SHUFFLE */</codeph> in cases where an <codeph>INSERT</codeph>
+              or <codeph>CTAS</codeph> statement fails or runs inefficiently due
+              to all nodes attempting to write data for all partitions.
+            </p>
+
+            <p> If the table is unpartitioned or every partitioning expression
+              is constant, then <codeph>/* +SHUFFLE */</codeph> will cause every
+              write to happen on the coordinator node.
+            </p>
+          </li>
+
+          <li>
+            <codeph>/* +NOSHUFFLE */</codeph> does not add exchange node before
+            inserting to partitioned tables and disables re-partitioning. So the
+            selected execution plan might be faster overall, but might also
+            produce a larger number of small data files or exceed capacity
+            limits, causing the <codeph>INSERT</codeph> or <codeph>CTAS</codeph>
+            operation to fail.
+
+            <p> Impala automatically uses the <codeph>/*
+                +SHUFFLE */</codeph> method if any partition key column in the
+              source table, mentioned in the <codeph>SELECT</codeph> clause,
+              does not have column statistics. In this case, use the <codeph>/*
+                +NOSHUFFLE */</codeph> hint if you want to override this default
+              behavior.
+            </p>
+          </li>
+
+          <li>
+            If column statistics are available for all partition key columns
+            in the source table mentioned in the <codeph>INSERT ...
+              SELECT</codeph> or <codeph>CTAS</codeph> query, Impala chooses
+            whether to use the <codeph>/* +SHUFFLE */</codeph> or <codeph>/*
+              +NOSHUFFLE */</codeph> technique based on the estimated number of
+            distinct values in those columns and the number of nodes involved in
+            the operation. In this case, you might need the <codeph>/* +SHUFFLE
+              */</codeph> or the <codeph>/* +NOSHUFFLE */</codeph> hint to
+            override the execution plan selected by Impala.
+          </li>
+        </ul>
+      </li>
+
+      <li>
+        <codeph>/* +CLUSTERED */</codeph> and <codeph>/* +NOCLUSTERED
+          */</codeph> Hints
+        <ul>
+          <li>
+            <codeph>/* +CLUSTERED */</codeph> sorts data by the partition
+            columns before inserting to ensure that only one partition is
+            written at a time per node. Use this hint to reduce the number of
+            files kept open and the number of buffers kept in memory
+            simultaneously. This technique is primarily useful for inserts into
+            Parquet tables, where the large block size requires substantial
+            memory to buffer data for multiple output files at once. This hint
+            is available in <keyword keyref="impala28_full"/> or higher.
+
+            <p>
+              Starting in <keyword keyref="impala30_full"/>, <codeph>/*
+                +CLUSTERED */</codeph> is the default behavior for HDFS tables.
+            </p>
+          </li>
+
+          <li>
+            <codeph>/* +NOCLUSTERED */</codeph> does not sort by primary key
+            before insert. This hint is available in <keyword
+              keyref="impala28_full"/> or higher.
+
+            <p>
+              Use this hint when inserting to Kudu tables.
+            </p>
+
+            <p>
+              In the versions lower than <keyword keyref="impala30_full"/>,
+                <codeph>/* +NOCLUSTERED */</codeph> is the default in HDFS
+              tables.
+            </p>
+          </li>
+        </ul>
+      </li>
+    </ul>
+
+    <p>
+      Starting from <keyword keyref="impala29_full"/>, <codeph>INSERT</codeph>
+      or <codeph>UPSERT</codeph> operations into Kudu tables automatically have
+      an exchange and sort node added to the plan that partitions and sorts the
+      rows according to the partitioning/primary key scheme of the target table
+      (unless the number of rows to be inserted is small enough to trigger
+      single node execution). Use the<codeph> /* +NOCLUSTERED */</codeph> and
+        <codeph>/* +NOSHUFFLE */</codeph> hints together to disable partitioning
+      and sorting before the rows are sent to Kudu.
+    </p>
 
     <p rev="IMPALA-2924">
       <b>Hints for scheduling of HDFS blocks:</b>


[4/4] impala git commit: IMPALA-6863: Make pip_download.py honor redirects.

Posted by jo...@apache.org.
IMPALA-6863: Make pip_download.py honor redirects.

As part of our continuing woes with PyPi infrastructure, we've now seen
redirects. Following redirects seems like the right thing to do, so I've
changed the downloader code to follow them.

I checked that this is available in Python 2.6.

The build failure signature looks like:

   Downloading AllPairs-2.0.1.tar.gz from https://pypi.python.org/packages/04/44/cb85d029b33ddfa7b5a27ae523434c2230f11a89d1f0c73203e9007a6eb0/AllPairs-2.0.1.tar.gz
   ('http error', 302, 'Found', <httplib.HTTPMessage instance at 0x7fbf7819b050>)
   Download failed after several attempts.
   Warning: Unable to download Python requirements.
   Warning: bootstrap_virtualenv or other Python-based tooling may fail.

Change-Id: Ic7551cec43a2d378df7e3cc7d521ace338b56ba2
Reviewed-on: http://gerrit.cloudera.org:8080/10083
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Philip Zeyliger <ph...@cloudera.com>
Reviewed-by: Lars Volker <lv...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/eaf66172
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/eaf66172
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/eaf66172

Branch: refs/heads/master
Commit: eaf66172df113dbf10cdb0a08a2bc51e4077ca38
Parents: ea698cd
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Mon Apr 16 16:19:57 2018 -0700
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Tue Apr 17 19:49:55 2018 +0000

----------------------------------------------------------------------
 infra/python/deps/pip_download.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/eaf66172/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index 6fbb683..0cce9e9 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -28,7 +28,7 @@ import re
 import sys
 from random import randint
 from time import sleep
-from urllib import urlopen, URLopener
+from urllib import urlopen, FancyURLopener
 
 NUM_DOWNLOAD_ATTEMPTS = 8
 
@@ -114,7 +114,7 @@ def download_package(pkg_name, pkg_version):
       expected_digest):
     print 'File with matching digest already exists, skipping {0}'.format(file_name)
     return True
-  downloader = URLopener()
+  downloader = FancyURLopener()
   pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
   print 'Downloading {0} from {1}'.format(file_name, pkg_url)
   downloader.retrieve(pkg_url, file_name)


[2/4] impala git commit: IMPALA-6464: [DOCS] COMPUTE STATS supports a list of columns

Posted by jo...@apache.org.
IMPALA-6464: [DOCS] COMPUTE STATS supports a list of columns

Change-Id: I609c38eac29e36eca008bfb66f5e78f5491e719a
Reviewed-on: http://gerrit.cloudera.org:8080/10070
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0e98b9ab
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0e98b9ab
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0e98b9ab

Branch: refs/heads/master
Commit: 0e98b9abd05ccfb3f01657434f913ad7d061f087
Parents: a6767de
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Apr 13 18:14:57 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Mon Apr 16 20:28:34 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_compute_stats.xml | 116 ++++++++++++++++++++----------
 1 file changed, 77 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0e98b9ab/docs/topics/impala_compute_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compute_stats.xml b/docs/topics/impala_compute_stats.xml
index 98694f8..b62972c 100644
--- a/docs/topics/impala_compute_stats.xml
+++ b/docs/topics/impala_compute_stats.xml
@@ -49,7 +49,11 @@ under the License.
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
-<codeblock rev="2.1.0">COMPUTE STATS [<varname>db_name</varname>.]<varname>table_name</varname>
+<codeblock rev="impala-3562">COMPUTE STATS
+  [<varname>db_name</varname>.]<varname>table_name</varname> [ ( <varname>column_list</varname> ) ]
+
+<varname>column_list</varname> ::= <varname>column_name</varname> [ , <varname>column_name</varname>, ... ]
+
 COMPUTE INCREMENTAL STATS [<varname>db_name</varname>.]<varname>table_name</varname> [PARTITION (<varname>partition_spec</varname>)]
 
 <varname>partition_spec</varname> ::= <varname>simple_partition_spec</varname> | <ph rev="IMPALA-1654"><varname>complex_partition_spec</varname></ph>
@@ -64,12 +68,40 @@ COMPUTE INCREMENTAL STATS [<varname>db_name</varname>.]<varname>table_name</varn
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
 
     <p>
-      Originally, Impala relied on users to run the Hive <codeph>ANALYZE TABLE</codeph> statement, but that method
-      of gathering statistics proved unreliable and difficult to use. The Impala <codeph>COMPUTE STATS</codeph>
-      statement is built from the ground up to improve the reliability and user-friendliness of this operation.
-      <codeph>COMPUTE STATS</codeph> does not require any setup steps or special configuration. You only run a
-      single Impala <codeph>COMPUTE STATS</codeph> statement to gather both table and column statistics, rather
-      than separate Hive <codeph>ANALYZE TABLE</codeph> statements for each kind of statistics.
+      Originally, Impala relied on users to run the Hive <codeph>ANALYZE
+        TABLE</codeph> statement, but that method of gathering statistics proved
+      unreliable and difficult to use. The Impala <codeph>COMPUTE STATS</codeph>
+      statement was built to improve the reliability and user-friendliness of
+      this operation. <codeph>COMPUTE STATS</codeph> does not require any setup
+      steps or special configuration. You only run a single Impala
+        <codeph>COMPUTE STATS</codeph> statement to gather both table and column
+      statistics, rather than separate Hive <codeph>ANALYZE TABLE</codeph>
+      statements for each kind of statistics.
+    </p>
+
+    <p rev="impala-3562">
+      For non-incremental <codeph>COMPUTE STATS</codeph>
+      statement, the columns for which statistics are computed can be specified
+      with an optional comma-separate list of columns.
+    </p>
+
+    <p rev="impala-3562">
+      If no column list is given, the <codeph>COMPUTE STATS</codeph> statement
+      computes column-level statistics for all columns of the table. This adds
+      potentially unneeded work for columns whose stats are not needed by
+      queries. It can be especially costly for very wide tables and unneeded
+      large string fields.
+    </p>
+    <p rev="impala-3562">
+      <codeph>COMPUTE STATS</codeph> returns an error when a specified column
+      cannot be analyzed, such as when the column does not exist, the column is
+      of an unsupported type for COMPUTE STATS, e.g. colums of complex types,
+      or the column is a partitioning column.
+
+    </p>
+    <p rev="impala-3562">
+      If an empty column list is given, no column is analyzed by <codeph>COMPUTE
+        STATS</codeph>.
     </p>
 
     <p rev="2.1.0">
@@ -92,39 +124,45 @@ COMPUTE INCREMENTAL STATS [<varname>db_name</varname>.]<varname>table_name</varn
       <codeph>COMPUTE STATS</codeph> statement. Such tables display <codeph>false</codeph> under the
       <codeph>Incremental stats</codeph> column of the <codeph>SHOW TABLE STATS</codeph> output.
     </p>
-
     <note>
-      Because many of the most performance-critical and resource-intensive operations rely on table and column
-      statistics to construct accurate and efficient plans, <codeph>COMPUTE STATS</codeph> is an important step at
-      the end of your ETL process. Run <codeph>COMPUTE STATS</codeph> on all tables as your first step during
-      performance tuning for slow queries, or troubleshooting for out-of-memory conditions:
-      <ul>
-        <li>
-          Accurate statistics help Impala construct an efficient query plan for join queries, improving performance
-          and reducing memory usage.
-        </li>
-
-        <li>
-          Accurate statistics help Impala distribute the work effectively for insert operations into Parquet
-          tables, improving performance and reducing memory usage.
-        </li>
-
-        <li rev="1.3.0">
-          Accurate statistics help Impala estimate the memory required for each query, which is important when you
-          use resource management features, such as admission control and the YARN resource management framework.
-          The statistics help Impala to achieve high concurrency, full utilization of available memory, and avoid
-          contention with workloads from other Hadoop components.
-        </li>
-        <li rev="IMPALA-4572">
-          In <keyword keyref="impala28_full"/> and higher, when you run the
-          <codeph>COMPUTE STATS</codeph> or <codeph>COMPUTE INCREMENTAL STATS</codeph>
-          statement against a Parquet table, Impala automatically applies the query
-          option setting <codeph>MT_DOP=4</codeph> to increase the amount of intra-node
-          parallelism during this CPU-intensive operation. See <xref keyref="mt_dop"/>
-          for details about what this query option does and how to use it with
-          CPU-intensive <codeph>SELECT</codeph> statements.
-        </li>
-      </ul>
+      <p>
+        Because many of the most performance-critical and resource-intensive
+        operations rely on table and column statistics to construct accurate and
+        efficient plans, <codeph>COMPUTE STATS</codeph> is an important step at
+        the end of your ETL process. Run <codeph>COMPUTE STATS</codeph> on all
+        tables as your first step during performance tuning for slow queries, or
+        troubleshooting for out-of-memory conditions:
+        <ul>
+          <li>
+            Accurate statistics help Impala construct an efficient query plan
+            for join queries, improving performance and reducing memory usage.
+          </li>
+          <li>
+            Accurate statistics help Impala distribute the work effectively
+            for insert operations into Parquet tables, improving performance and
+            reducing memory usage.
+          </li>
+          <li rev="1.3.0">
+            Accurate statistics help Impala estimate the memory
+            required for each query, which is important when you use resource
+            management features, such as admission control and the YARN resource
+            management framework. The statistics help Impala to achieve high
+            concurrency, full utilization of available memory, and avoid
+            contention with workloads from other Hadoop components.
+          </li>
+          <li rev="IMPALA-4572">
+            In <keyword keyref="impala28_full"/> and
+            higher, when you run the <codeph>COMPUTE STATS</codeph> or
+              <codeph>COMPUTE INCREMENTAL STATS</codeph> statement against a
+            Parquet table, Impala automatically applies the query option setting
+              <codeph>MT_DOP=4</codeph> to increase the amount of intra-node
+            parallelism during this CPU-intensive operation. See <xref
+              keyref="mt_dop"/> for details about what this query option does
+            and how to use it with CPU-intensive <codeph>SELECT</codeph>
+            statements.
+          </li>
+        </ul>
+      </p>
     </note>
 
     <p rev="IMPALA-1654">