You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/19 18:12:12 UTC

[1/8] impala git commit: IMPALA-6868: [DOCS] Removed kerberos_reinit_interval flag

Repository: impala
Updated Branches:
  refs/heads/master 39f986ecf -> 5bbcfaf22


IMPALA-6868: [DOCS] Removed kerberos_reinit_interval flag

Change-Id: If04c6cf73d92562b6a236ce244b8d499a0d91949
Cherry-picks: not for 2.x.
Reviewed-on: http://gerrit.cloudera.org:8080/10103
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/353f7d41
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/353f7d41
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/353f7d41

Branch: refs/heads/master
Commit: 353f7d41f9a5d262006fc945b1d5c8f66025c316
Parents: 39f986e
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Wed Apr 18 10:14:32 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 01:26:44 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_kerberos.xml    |  8 --------
 docs/topics/impala_scalability.xml | 26 +++++++++++---------------
 2 files changed, 11 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/353f7d41/docs/topics/impala_kerberos.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_kerberos.xml b/docs/topics/impala_kerberos.xml
index 5d97aeb..5032fba 100644
--- a/docs/topics/impala_kerberos.xml
+++ b/docs/topics/impala_kerberos.xml
@@ -351,12 +351,4 @@ $ chown impala:impala impala-http.keytab</codeblock>
     </conbody>
   </concept>
 
-  <concept rev="IMPALA-2294" id="kerberos_overhead_memory_usage">
-  <title>Kerberos-Related Memory Overhead for Large Clusters</title>
-  <conbody>
-    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_intro"/>
-    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="vm_overcommit_memory_end"/>
-  </conbody>
-  </concept>
-
 </concept>

http://git-wip-us.apache.org/repos/asf/impala/blob/353f7d41/docs/topics/impala_scalability.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_scalability.xml b/docs/topics/impala_scalability.xml
index 22e8e72..79cc0c4 100644
--- a/docs/topics/impala_scalability.xml
+++ b/docs/topics/impala_scalability.xml
@@ -962,25 +962,21 @@ While these authentication requests are being processed, any submitted Impala qu
 During this period, the KDC and DNS may be slow to respond to requests from components other than Impala,
 so other secure services might be affected temporarily.
 </p>
-
-<p>
-  To reduce the frequency  of the <codeph>kinit</codeph> renewal that initiates
-  a new set of authentication requests, increase the <codeph>kerberos_reinit_interval</codeph>
-  configuration setting for the <cmdname>impalad</cmdname> daemons. Currently, the default is 60 minutes.
-  Consider using a higher value such as 360 (6 hours).
-</p>
+  <p>
+    In <keyword keyref="impala212_full"/> or earlier, to reduce the
+    frequency of the <codeph>kinit</codeph> renewal that initiates a new set
+    of authentication requests, increase the <codeph>kerberos_reinit_interval</codeph>
+    configuration setting for the <codeph>impalad</codeph> daemons. Currently,
+    the default is 60 minutes. Consider using a higher value such as 360 (6 hours).
+      </p>
+  <p>
+    The <codeph>kerberos_reinit_interval</codeph> configuration setting is removed
+    in <keyword keyref="impala30_full"/>, and the above step is no longer needed.
+  </p>
 
 </conbody>
 </concept>
 
-  <concept rev="IMPALA-2294" id="kerberos_overhead_memory_usage">
-  <title>Kerberos-Related Memory Overhead for Large Clusters</title>
-  <conbody>
-    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_intro"/>
-    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="../shared/impala_common.xml#common/vm_overcommit_memory_end"/>
-  </conbody>
-  </concept>
-
   <concept id="scalability_hotspots" rev="2.5.0 IMPALA-2696">
     <title>Avoiding CPU Hotspots for HDFS Cached Data</title>
     <conbody>

[4/8] impala git commit: IMPALA-6651: [DOCS] Fine grained privileges

Posted by ta...@apache.org.

IMPALA-6651: [DOCS] Fine grained privileges

Change-Id: I7b018bf847537ed461df6d9caee99f90b139f8ab
Cherry-picks: not for 2.x.
Reviewed-on: http://gerrit.cloudera.org:8080/10079
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/22714a7a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/22714a7a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/22714a7a

Branch: refs/heads/master
Commit: 22714a7ab127b3871aa1cc5c97ff415a48ce2f85
Parents: ddc795d
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Sat Apr 14 08:32:06 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 04:30:01 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_grant.xml  | 163 +++++++++++++++++++++++++++++++------
 docs/topics/impala_revoke.xml |  43 ++++++----
 2 files changed, 165 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/22714a7a/docs/topics/impala_grant.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_grant.xml b/docs/topics/impala_grant.xml
index 03ad518..956a458 100644
--- a/docs/topics/impala_grant.xml
+++ b/docs/topics/impala_grant.xml
@@ -40,10 +40,9 @@ under the License.
   <conbody>
 
     <p rev="2.0.0">
-      <indexterm audience="hidden">GRANT statement</indexterm>
-<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
-      The <codeph>GRANT</codeph> statement grants roles or privileges on specified objects to groups. Only Sentry
-      administrative users can grant roles to a group.
+      <indexterm audience="hidden">GRANT statement</indexterm> The
+        <codeph>GRANT</codeph> statement grants a privilege on a specified object
+      to a role or grants a role to a group.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -54,8 +53,8 @@ GRANT <varname>privilege</varname> ON <varname>object_type</varname> <varname>ob
    TO [ROLE] <varname>roleName</varname>
    [WITH GRANT OPTION]
 
-<ph rev="2.3.0">privilege ::= SELECT | SELECT(<varname>column_name</varname>) | INSERT | ALL</ph>
-object_type ::= TABLE | DATABASE | SERVER | URI
+<ph id="privileges" rev="3.0">privilege ::= ALL | ALTER | CREATE | DROP | INSERT | REFRESH | SELECT | SELECT(<varname>column_name</varname>)</ph>
+<ph id="priv_objs" rev="3.0">object_type ::= TABLE | DATABASE | SERVER | URI</ph>
 </codeblock>
 
     <p>
@@ -67,36 +66,148 @@ object_type ::= TABLE | DATABASE | SERVER | URI
     <p conref="../shared/impala_common.xml#common/privileges_blurb"/>
 
     <p>
-<!-- To do: The wording here can be fluid, and it's reused in several statements. Turn into a conref. -->
-      Only administrative users (initially, a predefined set of users specified in the Sentry service configuration
-      file) can use this statement.
+      Only administrative users (initially, a predefined set of users
+      specified in the Sentry service configuration file) can use this
+      statement.
     </p>
+    <p>Only Sentry administrative users can grant roles to a group. </p>
+
+    <p> The <codeph>WITH GRANT OPTION</codeph> clause allows members of the
+      specified role to issue <codeph>GRANT</codeph> and <codeph>REVOKE</codeph>
+      statements for those same privileges Hence, if a role has the
+        <codeph>ALL</codeph> privilege on a database and the <codeph>WITH GRANT
+        OPTION</codeph> set, users granted that role can execute
+        <codeph>GRANT</codeph>/<codeph>REVOKE</codeph> statements only for that
+      database or child tables of the database. This means a user could revoke
+      the privileges of the user that provided them the <codeph>GRANT
+        OPTION</codeph>. </p>
+
+    <p> Impala does not currently support revoking only the <codeph>WITH GRANT
+        OPTION</codeph> from a privilege previously granted to a role. To remove
+      the <codeph>WITH GRANT OPTION</codeph>, revoke the privilege and grant it
+      again without the <codeph>WITH GRANT OPTION</codeph> flag. </p>
 
+    <p rev="2.3.0 collevelauth">
+      The ability to grant or revoke <codeph>SELECT</codeph> privilege on specific columns is available
+      in <keyword keyref="impala23_full"/> and higher. See <xref keyref="sg_hive_sql"/> for details.
+    </p>
     <p>
-      The <codeph>WITH GRANT OPTION</codeph> clause allows members of the specified role to issue
-      <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements for those same privileges
-<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
-      Hence, if a role has the <codeph>ALL</codeph> privilege on a database and the <codeph>WITH GRANT
-      OPTION</codeph> set, users granted that role can execute <codeph>GRANT</codeph>/<codeph>REVOKE</codeph>
-      statements only for that database or child tables of the database. This means a user could revoke the
-      privileges of the user that provided them the <codeph>GRANT OPTION</codeph>.
+      <b>Usage notes:</b>
     </p>
 
     <p>
-<!-- Copied from Sentry docs. Turn into conref. Except I changed Hive to Impala. -->
-      Impala does not currently support revoking only the <codeph>WITH GRANT OPTION</codeph> from a privilege
-      previously granted to a role. To remove the <codeph>WITH GRANT OPTION</codeph>, revoke the privilege and
-      grant it again without the <codeph>WITH GRANT OPTION</codeph> flag.
+      You can only grant the <codeph>ALL</codeph> privilege to the
+        <codeph>URI</codeph> object. Finer-grained privileges mentioned below on
+      a <codeph>URI</codeph> are not supported.
     </p>
 
-    <p rev="2.3.0 collevelauth">
-      The ability to grant or revoke <codeph>SELECT</codeph> privilege on specific columns is available
-      in <keyword keyref="impala23_full"/> and higher. See <xref keyref="sg_hive_sql"/> for details.
+    <p>
+      Starting in <keyword keyref="impala30_full"/>, finer grained privileges
+      are enforced as below.<simpletable frame="all" relcolwidth="1* 1* 1*"
+        id="simpletable_kmb_ppn_ndb">
+        <sthead>
+          <stentry>Privilege</stentry>
+          <stentry>Scope</stentry>
+          <stentry>SQL Allowed to Execute</stentry>
+        </sthead>
+        <strow>
+          <stentry><codeph>REFRESH</codeph></stentry>
+          <stentry><codeph>SERVER</codeph></stentry>
+          <stentry><codeph>INVALIDATE METADATA</codeph> on all tables in all
+                databases<p><codeph>REFRESH</codeph> on all tables and functions
+              in all databases</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>REFRESH</codeph></stentry>
+          <stentry><codeph>DATABASE</codeph></stentry>
+          <stentry><codeph>INVALIDATE METADATA</codeph> on all tables in the
+            named database<p><codeph>REFRESH</codeph> on all tables and
+              functions in the named database</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>REFRESH</codeph></stentry>
+          <stentry><codeph>TABLE</codeph></stentry>
+          <stentry><codeph>INVALIDATE METADATA</codeph> on the named
+                table<p><codeph>REFRESH</codeph> on the named
+            table</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>CREATE</codeph></stentry>
+          <stentry><codeph>SERVER</codeph></stentry>
+          <stentry><codeph>CREATE DATABASE</codeph> on all
+                databases<p><codeph>CREATE TABLE</codeph> on all
+            tables</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>CREATE</codeph></stentry>
+          <stentry><codeph>DATABASE</codeph></stentry>
+          <stentry><codeph>CREATE TABLE</codeph> on all tables in the named
+            database</stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>DROP</codeph></stentry>
+          <stentry><codeph>SERVER</codeph></stentry>
+          <stentry><codeph>DROP DATBASE</codeph> on all databases<p><codeph>DROP
+                TABLE</codeph> on all tables</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>DROP</codeph></stentry>
+          <stentry><codeph>DATABASE</codeph></stentry>
+          <stentry><codeph>DROP DATABASE</codeph> on the named
+                database<p><codeph>DROP TABLE</codeph> on all tables in the
+              named database</p></stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>DROP</codeph></stentry>
+          <stentry><codeph>TABLE</codeph></stentry>
+          <stentry><codeph>DROP TABLE</codeph> on the named table</stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>ALTER</codeph></stentry>
+          <stentry><codeph>SERVER</codeph></stentry>
+          <stentry><codeph>ALTER TABLE</codeph> on all tables</stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>ALTER</codeph></stentry>
+          <stentry><codeph>DATABASE</codeph></stentry>
+          <stentry><codeph>ALTER TABLE</codeph> on the tables in the named
+            database</stentry>
+        </strow>
+        <strow>
+          <stentry><codeph>ALTER</codeph></stentry>
+          <stentry><codeph>TABLE</codeph></stentry>
+          <stentry><codeph>ALTER TABLE</codeph> on the named table</stentry>
+        </strow>
+      </simpletable>
     </p>
 
-<!-- Turn compatibility info into a conref or series of conrefs. (In both GRANT and REVOKE.) -->
-
-<!-- If they diverge during development, consider the version here in GRANT the authoritative one. -->
+    <p>
+      <note>
+        <p>
+          <ul>
+            <li>
+              <codeph>ALTER TABLE RENAME</codeph> requires the
+                <codeph>ALTER</codeph> privilege at the <codeph>TABLE</codeph>
+              level and the <codeph>CREATE</codeph> privilege at the
+                <codeph>DATABASE</codeph> level.
+            </li>
+
+            <li>
+              <codeph>CREATE TABLE AS SELECT</codeph> requires the
+                <codeph>CREATE</codeph> privilege on the database that should
+              contain the new table and the <codeph>SELECT</codeph> privilege on
+              the tables referenced in the query portion of the statement.
+            </li>
+
+            <li>
+              <codeph>COMPUTE STATS</codeph> requires  the
+                <codeph>ALTER</codeph> and <codeph>SELECT</codeph> privileges on
+              the target table.
+            </li>
+          </ul>
+        </p>
+      </note>
+    </p>
 
     <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
 

http://git-wip-us.apache.org/repos/asf/impala/blob/22714a7a/docs/topics/impala_revoke.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_revoke.xml b/docs/topics/impala_revoke.xml
index 78eda00..4c997f8 100644
--- a/docs/topics/impala_revoke.xml
+++ b/docs/topics/impala_revoke.xml
@@ -40,12 +40,8 @@ under the License.
   <conbody>
 
     <p rev="2.0.0">
-      <indexterm audience="hidden">REVOKE statement</indexterm>
-<!-- Copied from Sentry docs. Turn into conref. I did some rewording for clarity. -->
-      The <codeph>REVOKE</codeph> statement revokes roles or privileges on a specified object from groups. Only
-      Sentry administrative users can revoke the role from a group. The revocation has a cascading effect. For
-      example, revoking the <codeph>ALL</codeph> privilege on a database also revokes the same privilege for all
-      the tables in that database.
+      The <codeph>REVOKE</codeph> statement revokes roles or
+      privileges on a specified object from groups.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -55,11 +51,29 @@ under the License.
 REVOKE <varname>privilege</varname> ON <varname>object_type</varname> <varname>object_name</varname>
   FROM [ROLE] <varname>role_name</varname>
 
-<ph rev="2.3.0">privilege ::= SELECT | SELECT(<varname>column_name</varname>) | INSERT | ALL</ph>
-object_type ::= TABLE | DATABASE | SERVER | URI
+<ph rev="3.0">
+  privilege ::= ALL | ALTER | CREATE | DROP | INSERT | REFRESH | SELECT | SELECT(<varname>column_name</varname>)
+</ph>
+<ph rev="3.0">
+  object_type ::= TABLE | DATABASE | SERVER | URI
+</ph>
 </codeblock>
 
     <p>
+      See <keyword keyref="grant"/> for the required privileges and the scope
+      for SQL operations.
+    </p>
+
+    <p>
+      The <codeph>ALL</codeph> privilege is a distinct privilege and not a
+      union of all other privileges. Revoking <codeph>SELECT</codeph>,
+        <codeph>INSERT</codeph>, etc. from a role that only has the
+        <codeph>ALL</codeph> privilege has no effect. To reduce the privileges
+      of that role you must <codeph>REVOKE ALL</codeph> and
+        <codeph>GRANT</codeph> the desired privileges.
+    </p>
+
+    <p>
       Typically, the object name is an identifier. For URIs, it is a string literal.
     </p>
 
@@ -75,16 +89,15 @@ object_type ::= TABLE | DATABASE | SERVER | URI
       Only administrative users (those with <codeph>ALL</codeph> privileges on the server, defined in the Sentry
       policy file) can use this statement.
     </p>
-
-<!-- Turn compatibility info into a conref or series of conrefs. (In both GRANT and REVOKE.) -->
+    <p>Only Sentry administrative users can revoke the role from a group.</p>
 
     <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
 
     <p>
       <ul>
         <li>
-          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements are available in <keyword keyref="impala20_full"/> and
-          higher.
+          The <codeph>REVOKE</codeph> statements are available in <keyword
+            keyref="impala20_full"/> and higher.
         </li>
 
         <li>
@@ -94,9 +107,9 @@ object_type ::= TABLE | DATABASE | SERVER | URI
         </li>
 
         <li>
-          The Impala <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements do not require the
-          <codeph>ROLE</codeph> keyword to be repeated before each role name, unlike the equivalent Hive
-          statements.
+          The Impala <codeph>REVOKE</codeph> statements do not require the
+            <codeph>ROLE</codeph> keyword to be repeated before each role name,
+          unlike the equivalent Hive statements.
         </li>
 
         <li conref="../shared/impala_common.xml#common/grant_revoke_single"/>

[6/8] impala git commit: IMPALA-6878: SentryServicePinger should not print stacktrace at every retry

Posted by ta...@apache.org.

IMPALA-6878: SentryServicePinger should not print stacktrace at every retry

With this patch, SentryServicePinger only prints the stacktrace at the
very end when Sentry Service is unable to start.

Testing:
- Started Sentry in a normal way to make sure no stack trace was printed
- Injected failure in Sentry to see the stack trace at the end

Change-Id: I26f9a141c89692443cb3cdcb6bf62581a93b5ba0
Reviewed-on: http://gerrit.cloudera.org:8080/10108
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5d328f4a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5d328f4a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5d328f4a

Branch: refs/heads/master
Commit: 5d328f4a05198eac756594b140fa1927a67bf479
Parents: 1176a52
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Wed Apr 18 14:02:59 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 04:56:29 2018 +0000

----------------------------------------------------------------------
 .../impala/testutil/SentryServicePinger.java    | 22 ++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5d328f4a/fe/src/test/java/org/apache/impala/testutil/SentryServicePinger.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/testutil/SentryServicePinger.java b/fe/src/test/java/org/apache/impala/testutil/SentryServicePinger.java
index 96a849b..705f58a 100644
--- a/fe/src/test/java/org/apache/impala/testutil/SentryServicePinger.java
+++ b/fe/src/test/java/org/apache/impala/testutil/SentryServicePinger.java
@@ -21,12 +21,12 @@ import org.apache.commons.cli.BasicParser;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.impala.authorization.User;
 import org.apache.impala.authorization.SentryConfig;
+import org.apache.impala.authorization.User;
 import org.apache.impala.util.SentryPolicyService;
+import org.apache.log4j.Level;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Simple class that issues a read-only RPC to the Sentry Service to check if it
@@ -41,6 +41,11 @@ public class SentryServicePinger {
   // Suppress warnings from OptionBuilder.
   @SuppressWarnings("static-access")
   public static void main(String[] args) throws Exception {
+    // Programmatically disable Sentry Thrift logging since Sentry error logging can be
+    // pretty noisy and verbose.
+    org.apache.log4j.Logger logger4j = org.apache.log4j.Logger.getLogger("sentry");
+    logger4j.setLevel(Level.OFF);
+
     // Parse command line options to get config file path.
     Options options = new Options();
     options.addOption(OptionBuilder.withLongOpt("config_file")
@@ -69,6 +74,7 @@ public class SentryServicePinger {
     int sleepSecs = Integer.parseInt(cmdArgs.getOptionValue("sleep_secs"));
 
     sentryConfig.loadConfig();
+    Exception exception = null;
     while (numPings > 0) {
       SentryPolicyService policyService = new SentryPolicyService(sentryConfig);
       try {
@@ -76,12 +82,16 @@ public class SentryServicePinger {
         LOG.info("Sentry Service ping succeeded.");
         System.exit(0);
       } catch (Exception e) {
-        LOG.error(String.format("Error issuing RPC to Sentry Service (attempt %d/%d): ",
-            maxPings - numPings + 1, maxPings), e);
+        exception = e;
+        LOG.error(String.format("Error issuing RPC to Sentry Service (attempt %d/%d)",
+            maxPings - numPings + 1, maxPings));
         Thread.sleep(sleepSecs * 1000);
       }
       --numPings;
     }
+    if (exception != null) {
+      LOG.error("Error starting Sentry Service: ", exception);
+    }
     System.exit(1);
   }
 }

[2/8] impala git commit: IMPALA-6748: [DOCS] Separators when casting STRING to TIMESTAMP

Posted by ta...@apache.org.

IMPALA-6748: [DOCS] Separators when casting STRING to TIMESTAMP

Change-Id: Ib82884d5f56c520712c4391b53b799d518d6a54f
Reviewed-on: http://gerrit.cloudera.org:8080/10052
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bfb21316
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bfb21316
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bfb21316

Branch: refs/heads/master
Commit: bfb21316c79bc07d745603f58a138be57de15d8a
Parents: 353f7d4
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Apr 12 15:52:29 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 01:44:38 2018 +0000

----------------------------------------------------------------------
 docs/shared/impala_common.xml    |  44 +++++-
 docs/topics/impala_timestamp.xml | 245 ++++++++++++++++++++--------------
 2 files changed, 182 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/bfb21316/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index f8ded41..75fceb0 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -1400,12 +1400,44 @@ drop database temp;
       </p>
 
       <p id="timestamp_conversions">
-        Impala automatically converts <codeph>STRING</codeph> literals of the correct format into
-        <codeph>TIMESTAMP</codeph> values. Timestamp values are accepted in the format
-        <codeph>"yyyy-MM-dd HH:mm:ss.SSSSSS"</codeph>, and can consist of just the date, or just the time, with or
-        without the fractional second portion. For example, you can specify <codeph>TIMESTAMP</codeph> values such as
-        <codeph>'1966-07-30'</codeph>, <codeph>'08:30:00'</codeph>, or <codeph>'1985-09-25 17:45:30.005'</codeph>.
-        <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+        Impala automatically converts <codeph>STRING</codeph> literals of the
+        correct format into <codeph>TIMESTAMP</codeph> values. Timestamp values
+        are accepted in the format <codeph>"yyyy-MM-dd HH:mm:ss.SSSSSS"</codeph>,
+        and can consist of just the date, or just the time, with or without the
+        fractional second portion. For example, you can specify <codeph>TIMESTAMP</codeph>
+        values such as <codeph>'1966-07-30'</codeph>, <codeph>'08:30:00'</codeph>,
+        or <codeph>'1985-09-25 17:45:30.005'</codeph>.
+      </p>
+      <p>Leading zeroes are not required in the numbers representing the date
+        component, such as month and date, or the time component, such as
+        month, date, hour, minute, second. For example, Impala accepts both
+        <codeph>"2018-1-1 01:02:03"</codeph> and
+          <codeph>"2018-01-01 1:2:3"</codeph> as valid.</p>
+
+      <p id="cast_string_to_timestamp">
+        When you convert or cast a <codeph>STRING</codeph> literal to <codeph>TIMESTAMP</codeph>,
+        you can use the following separators between the date part and the time part:
+        <ul>
+          <li>
+            <p>
+              One or more space characters
+            </p>
+
+            <p>
+              Example: <codeph>CAST ('2001-01-09   01:05:01' AS TIMESTAMP)</codeph>
+            </p>
+          </li>
+
+          <li>
+            <p>
+              The character “T”
+            </p>
+
+            <p>
+              Example: <codeph>CAST ('2001-01-09T01:05:01' AS TIMESTAMP)</codeph>
+            </p>
+          </li>
+        </ul>
       </p>
 
       <p>

http://git-wip-us.apache.org/repos/asf/impala/blob/bfb21316/docs/topics/impala_timestamp.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml
index 320f99d..d032e33 100644
--- a/docs/topics/impala_timestamp.xml
+++ b/docs/topics/impala_timestamp.xml
@@ -21,7 +21,13 @@ under the License.
 <concept id="timestamp">
 
   <title>TIMESTAMP Data Type</title>
-  <titlealts audience="PDF"><navtitle>TIMESTAMP</navtitle></titlealts>
+
+  <titlealts audience="PDF">
+
+    <navtitle>TIMESTAMP</navtitle>
+
+  </titlealts>
+
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -36,8 +42,8 @@ under the License.
   <conbody>
 
     <p>
-      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements, representing a
-      point in time.
+      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
+      statements, representing a point in time.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -49,9 +55,9 @@ under the License.
 <codeblock><varname>column_name</varname> TIMESTAMP</codeblock>
 
     <p>
-      <b>Range:</b> Allowed date values range from 1400-01-01 to 9999-12-31; this range is different from the Hive
-      <codeph>TIMESTAMP</codeph> type. Internally, the resolution of the time portion of a
-      <codeph>TIMESTAMP</codeph> value is in nanoseconds.
+      <b>Range:</b> Allowed date values range from 1400-01-01 to 9999-12-31; this range is
+      different from the Hive <codeph>TIMESTAMP</codeph> type. Internally, the resolution of the
+      time portion of a <codeph>TIMESTAMP</codeph> value is in nanoseconds.
     </p>
 
     <p>
@@ -59,16 +65,18 @@ under the License.
     </p>
 
     <p>
-      You can perform date arithmetic by adding or subtracting a specified number of time units, using the
-      <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> and <codeph>-</codeph> operators or
-      <codeph>date_add()</codeph> and <codeph>date_sub()</codeph> functions. You can specify units as
-      <codeph>YEAR[S]</codeph>, <codeph>MONTH[S]</codeph>, <codeph>WEEK[S]</codeph>, <codeph>DAY[S]</codeph>,
+      You can perform date arithmetic by adding or subtracting a specified number of time units,
+      using the <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> and
+      <codeph>-</codeph> operators or <codeph>date_add()</codeph> and
+      <codeph>date_sub()</codeph> functions. You can specify units as <codeph>YEAR[S]</codeph>,
+      <codeph>MONTH[S]</codeph>, <codeph>WEEK[S]</codeph>, <codeph>DAY[S]</codeph>,
       <codeph>HOUR[S]</codeph>, <codeph>MINUTE[S]</codeph>, <codeph>SECOND[S]</codeph>,
-      <codeph>MILLISECOND[S]</codeph>, <codeph>MICROSECOND[S]</codeph>, and <codeph>NANOSECOND[S]</codeph>. You can
-      only specify one time unit in each interval expression, for example <codeph>INTERVAL 3 DAYS</codeph> or
-      <codeph>INTERVAL 25 HOURS</codeph>, but you can produce any granularity by adding together successive
-      <codeph>INTERVAL</codeph> values, such as <codeph><varname>timestamp_value</varname> + INTERVAL 3 WEEKS -
-      INTERVAL 1 DAY + INTERVAL 10 MICROSECONDS</codeph>.
+      <codeph>MILLISECOND[S]</codeph>, <codeph>MICROSECOND[S]</codeph>, and
+      <codeph>NANOSECOND[S]</codeph>. You can only specify one time unit in each interval
+      expression, for example <codeph>INTERVAL 3 DAYS</codeph> or <codeph>INTERVAL 25
+      HOURS</codeph>, but you can produce any granularity by adding together successive
+      <codeph>INTERVAL</codeph> values, such as <codeph><varname>timestamp_value</varname> +
+      INTERVAL 3 WEEKS - INTERVAL 1 DAY + INTERVAL 10 MICROSECONDS</codeph>.
     </p>
 
     <p>
@@ -86,34 +94,39 @@ insert into auction_details
     </p>
 
     <p>
-      By default, Impala does not store timestamps using the local timezone, to avoid undesired results from
-      unexpected time zone issues. Timestamps are stored and interpreted relative to UTC, both when written to or
-      read from data files, or when converted to or from Unix time values through functions such as
-      <codeph>from_unixtime()</codeph> or <codeph>unix_timestamp()</codeph>. To convert such a
-      <codeph>TIMESTAMP</codeph> value to one that represents the date and time in a specific time zone, convert
-      the original value with the <codeph>from_utc_timestamp()</codeph> function.
+      By default, Impala does not store timestamps using the local timezone, to avoid undesired
+      results from unexpected time zone issues. Timestamps are stored and interpreted relative
+      to UTC, both when written to or read from data files, or when converted to or from Unix
+      time values through functions such as <codeph>from_unixtime()</codeph> or
+      <codeph>unix_timestamp()</codeph>. To convert such a <codeph>TIMESTAMP</codeph> value to
+      one that represents the date and time in a specific time zone, convert the original value
+      with the <codeph>from_utc_timestamp()</codeph> function.
     </p>
 
     <p>
-      Because Impala does not assume that <codeph>TIMESTAMP</codeph> values are in any particular time zone, you
-      must be conscious of the time zone aspects of data that you query, insert, or convert.
+      Because Impala does not assume that <codeph>TIMESTAMP</codeph> values are in any
+      particular time zone, you must be conscious of the time zone aspects of data that you
+      query, insert, or convert.
     </p>
 
     <p>
-      For consistency with Unix system calls, the <codeph>TIMESTAMP</codeph> returned by the <codeph>now()</codeph>
-      function represents the local time in the system time zone, rather than in UTC. To store values relative to
-      the current time in a portable way, convert any <codeph>now()</codeph> return values using the
-      <codeph>to_utc_timestamp()</codeph> function first. For example, the following example shows that the current
-      time in California (where this Impala cluster is located) is shortly after 2 PM. If that value was written to a data
-      file, and shipped off to a distant server to be analyzed alongside other data from far-flung locations, the
-      dates and times would not match up precisely because of time zone differences. Therefore, the
-      <codeph>to_utc_timestamp()</codeph> function converts it using a common reference point, the UTC time zone
-      (descended from the old Greenwich Mean Time standard). The <codeph>'PDT'</codeph> argument indicates that the
-      original value is from the Pacific time zone with Daylight Saving Time in effect. When servers in all
-      geographic locations run the same transformation on any local date and time values (with the appropriate time
-      zone argument), the stored data uses a consistent representation. Impala queries can use functions such as
-      <codeph>EXTRACT()</codeph>, <codeph>MIN()</codeph>, <codeph>AVG()</codeph>, and so on to do time-series
-      analysis on those timestamps.
+      For consistency with Unix system calls, the <codeph>TIMESTAMP</codeph> returned by the
+      <codeph>now()</codeph> function represents the local time in the system time zone, rather
+      than in UTC. To store values relative to the current time in a portable way, convert any
+      <codeph>now()</codeph> return values using the <codeph>to_utc_timestamp()</codeph>
+      function first. For example, the following example shows that the current time in
+      California (where this Impala cluster is located) is shortly after 2 PM. If that value was
+      written to a data file, and shipped off to a distant server to be analyzed alongside other
+      data from far-flung locations, the dates and times would not match up precisely because of
+      time zone differences. Therefore, the <codeph>to_utc_timestamp()</codeph> function
+      converts it using a common reference point, the UTC time zone (descended from the old
+      Greenwich Mean Time standard). The <codeph>'PDT'</codeph> argument indicates that the
+      original value is from the Pacific time zone with Daylight Saving Time in effect. When
+      servers in all geographic locations run the same transformation on any local date and time
+      values (with the appropriate time zone argument), the stored data uses a consistent
+      representation. Impala queries can use functions such as <codeph>EXTRACT()</codeph>,
+      <codeph>MIN()</codeph>, <codeph>AVG()</codeph>, and so on to do time-series analysis on
+      those timestamps.
     </p>
 
 <codeblock>[localhost:21000] > select now();
@@ -131,12 +144,14 @@ insert into auction_details
 </codeblock>
 
     <p>
-      The converse function, <codeph>from_utc_timestamp()</codeph>, lets you take stored <codeph>TIMESTAMP</codeph>
-      data or calculated results and convert back to local date and time for processing on the application side.
-      The following example shows how you might represent some future date (such as the ending date and time of an
-      auction) in UTC, and then convert back to local time when convenient for reporting or other processing. The
-      final query in the example tests whether this arbitrary UTC date and time has passed yet, by converting it
-      back to the local time zone and comparing it against the current date and time.
+      The converse function, <codeph>from_utc_timestamp()</codeph>, lets you take stored
+      <codeph>TIMESTAMP</codeph> data or calculated results and convert back to local date and
+      time for processing on the application side. The following example shows how you might
+      represent some future date (such as the ending date and time of an auction) in UTC, and
+      then convert back to local time when convenient for reporting or other processing. The
+      final query in the example tests whether this arbitrary UTC date and time has passed yet,
+      by converting it back to the local time zone and comparing it against the current date and
+      time.
     </p>
 
 <codeblock>[localhost:21000] > select to_utc_timestamp(now() + interval 2 weeks, 'PDT');
@@ -160,35 +175,42 @@ insert into auction_details
 </codeblock>
 
     <p rev="2.2.0">
-      If you have data files written by Hive, those <codeph>TIMESTAMP</codeph> values represent the local timezone
-      of the host where the data was written, potentially leading to inconsistent results when processed by Impala.
-      To avoid compatibility problems or having to code workarounds, you can specify one or both of these
-      <cmdname>impalad</cmdname> startup flags: <codeph>--use_local_tz_for_unix_timestamp_conversions=true</codeph>
+      If you have data files written by Hive, those <codeph>TIMESTAMP</codeph> values represent
+      the local timezone of the host where the data was written, potentially leading to
+      inconsistent results when processed by Impala. To avoid compatibility problems or having
+      to code workarounds, you can specify one or both of these <cmdname>impalad</cmdname>
+      startup flags: <codeph>--use_local_tz_for_unix_timestamp_conversions=true</codeph>
       <codeph>-convert_legacy_hive_parquet_utc_timestamps=true</codeph>. Although
-      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is turned off by default to avoid performance overhead, where practical
-      turn it on when processing <codeph>TIMESTAMP</codeph> columns in Parquet files written by Hive, to avoid unexpected behavior.
+      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is turned off by default to
+      avoid performance overhead, where practical turn it on when processing
+      <codeph>TIMESTAMP</codeph> columns in Parquet files written by Hive, to avoid unexpected
+      behavior.
     </p>
 
     <p rev="2.2.0">
-      The <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting affects conversions from
-      <codeph>TIMESTAMP</codeph> to <codeph>BIGINT</codeph>, or from <codeph>BIGINT</codeph>
-      to <codeph>TIMESTAMP</codeph>. By default, Impala treats all <codeph>TIMESTAMP</codeph> values as UTC,
-      to simplify analysis of time-series data from different geographic regions. When you enable the
+      The <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting affects
+      conversions from <codeph>TIMESTAMP</codeph> to <codeph>BIGINT</codeph>, or from
+      <codeph>BIGINT</codeph> to <codeph>TIMESTAMP</codeph>. By default, Impala treats all
+      <codeph>TIMESTAMP</codeph> values as UTC, to simplify analysis of time-series data from
+      different geographic regions. When you enable the
       <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting, these operations
-      treat the input values as if they are in the local tie zone of the host doing the processing.
-      See <xref href="impala_datetime_functions.xml#datetime_functions"/> for the list of functions
-      affected by the <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting.
+      treat the input values as if they are in the local tie zone of the host doing the
+      processing. See <xref
+        href="impala_datetime_functions.xml#datetime_functions"/>
+      for the list of functions affected by the
+      <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting.
     </p>
 
     <p>
-      The following sequence of examples shows how the interpretation of <codeph>TIMESTAMP</codeph> values in
-      Parquet tables is affected by the setting of the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph>
-      setting.
+      The following sequence of examples shows how the interpretation of
+      <codeph>TIMESTAMP</codeph> values in Parquet tables is affected by the setting of the
+      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting.
     </p>
 
     <p>
       Regardless of the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting,
-      <codeph>TIMESTAMP</codeph> columns in text tables can be written and read interchangeably by Impala and Hive:
+      <codeph>TIMESTAMP</codeph> columns in text tables can be written and read interchangeably
+      by Impala and Hive:
     </p>
 
 <codeblock>Impala DDL and queries for text table:
@@ -220,11 +242,12 @@ Time taken: 1.245 seconds, Fetched: 2 row(s)
 </codeblock>
 
     <p>
-      When the table uses Parquet format, Impala expects any time zone adjustment to be applied prior to writing,
-      while <codeph>TIMESTAMP</codeph> values written by Hive are adjusted to be in the UTC time zone. When Hive
-      queries Parquet data files that it wrote, it adjusts the <codeph>TIMESTAMP</codeph> values back to the local
-      time zone, while Impala does no conversion. Hive does no time zone conversion when it queries Impala-written
-      Parquet files.
+      When the table uses Parquet format, Impala expects any time zone adjustment to be applied
+      prior to writing, while <codeph>TIMESTAMP</codeph> values written by Hive are adjusted to
+      be in the UTC time zone. When Hive queries Parquet data files that it wrote, it adjusts
+      the <codeph>TIMESTAMP</codeph> values back to the local time zone, while Impala does no
+      conversion. Hive does no time zone conversion when it queries Impala-written Parquet
+      files.
     </p>
 
 <codeblock>Impala DDL and queries for Parquet table:
@@ -264,10 +287,11 @@ Time taken: 0.197 seconds, Fetched: 2 row(s)
 </codeblock>
 
     <p>
-      The discrepancy arises when Impala queries the Hive-created Parquet table. The underlying values in the
-      <codeph>TIMESTAMP</codeph> column are different from the ones written by Impala, even though they were copied
-      from one table to another by an <codeph>INSERT ... SELECT</codeph> statement in Hive. Hive did an implicit
-      conversion from the local time zone to UTC as it wrote the values to Parquet.
+      The discrepancy arises when Impala queries the Hive-created Parquet table. The underlying
+      values in the <codeph>TIMESTAMP</codeph> column are different from the ones written by
+      Impala, even though they were copied from one table to another by an <codeph>INSERT ...
+      SELECT</codeph> statement in Hive. Hive did an implicit conversion from the local time
+      zone to UTC as it wrote the values to Parquet.
     </p>
 
 <codeblock>Impala query for TIMESTAMP values from Impala-written and Hive-written data:
@@ -310,11 +334,12 @@ Fetched 2 row(s) in 0.20s
 </codeblock>
 
     <p>
-      When the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting is enabled, Impala recognizes
-      the Parquet data files written by Hive, and applies the same UTC-to-local-timezone conversion logic during
-      the query as Hive uses, making the contents of the Impala-written <codeph>P1</codeph> table and the
-      Hive-written <codeph>H1</codeph> table appear identical, whether represented as <codeph>TIMESTAMP</codeph>
-      values or the underlying <codeph>BIGINT</codeph> integers:
+      When the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting is enabled,
+      Impala recognizes the Parquet data files written by Hive, and applies the same
+      UTC-to-local-timezone conversion logic during the query as Hive uses, making the contents
+      of the Impala-written <codeph>P1</codeph> table and the Hive-written <codeph>H1</codeph>
+      table appear identical, whether represented as <codeph>TIMESTAMP</codeph> values or the
+      underlying <codeph>BIGINT</codeph> integers:
     </p>
 
 <codeblock>[localhost:21000] > select x from p1;
@@ -355,14 +380,23 @@ Fetched 2 row(s) in 0.22s
       <b>Conversions:</b>
     </p>
 
-    <p conref="../shared/impala_common.xml#common/timestamp_conversions"/>
+    <p conref="../shared/impala_common.xml#common/timestamp_conversions"
+      conrefend="../shared/impala_common.xml#common/cast_string_to_timestamp"/>
 
     <p>
-      In Impala 1.3 and higher, the <codeph>FROM_UNIXTIME()</codeph> and <codeph>UNIX_TIMESTAMP()</codeph>
-      functions allow a wider range of format strings, with more flexibility in element order, repetition of letter
-      placeholders, and separator characters. In <keyword keyref="impala23_full"/> and higher, the <codeph>UNIX_TIMESTAMP()</codeph>
-      function also allows a numeric timezone offset to be specified as part of the input string.
-      See <xref href="impala_datetime_functions.xml#datetime_functions"/> for details.
+      <ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
+    </p>
+
+    <p>
+      In Impala 1.3 and higher, the <codeph>FROM_UNIXTIME()</codeph> and
+      <codeph>UNIX_TIMESTAMP()</codeph> functions allow a wider range of format strings, with
+      more flexibility in element order, repetition of letter placeholders, and separator
+      characters. In <keyword
+        keyref="impala23_full"/> and higher, the
+      <codeph>UNIX_TIMESTAMP()</codeph> function also allows a numeric timezone offset to be
+      specified as part of the input string. See
+      <xref
+        href="impala_datetime_functions.xml#datetime_functions"/> for details.
     </p>
 
     <p conref="../shared/impala_common.xml#common/y2k38"/>
@@ -372,11 +406,13 @@ Fetched 2 row(s) in 0.22s
     </p>
 
     <p>
-      Although you cannot use a <codeph>TIMESTAMP</codeph> column as a partition key, you can extract the
-      individual years, months, days, hours, and so on and partition based on those columns. Because the partition
-      key column values are represented in HDFS directory names, rather than as fields in the data files
-      themselves, you can also keep the original <codeph>TIMESTAMP</codeph> values if desired, without duplicating
-      data or wasting storage space. See <xref href="impala_partitioning.xml#partition_key_columns"/> for more
+      Although you cannot use a <codeph>TIMESTAMP</codeph> column as a partition key, you can
+      extract the individual years, months, days, hours, and so on and partition based on those
+      columns. Because the partition key column values are represented in HDFS directory names,
+      rather than as fields in the data files themselves, you can also keep the original
+      <codeph>TIMESTAMP</codeph> values if desired, without duplicating data or wasting storage
+      space. See <xref
+        href="impala_partitioning.xml#partition_key_columns"/> for more
       details on partitioning with date and time values.
     </p>
 
@@ -409,21 +445,23 @@ ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column
     <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
 
     <p>
-      If you cast a <codeph>STRING</codeph> with an unrecognized format to a <codeph>TIMESTAMP</codeph>, the result
-      is <codeph>NULL</codeph> rather than an error. Make sure to test your data pipeline to be sure any textual
-      date and time values are in a format that Impala <codeph>TIMESTAMP</codeph> can recognize.
+      If you cast a <codeph>STRING</codeph> with an unrecognized format to a
+      <codeph>TIMESTAMP</codeph>, the result is <codeph>NULL</codeph> rather than an error. Make
+      sure to test your data pipeline to be sure any textual date and time values are in a
+      format that Impala <codeph>TIMESTAMP</codeph> can recognize.
     </p>
 
     <p conref="../shared/impala_common.xml#common/avro_no_timestamp"/>
 
     <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+
     <p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/>
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
 
     <p>
-      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values
-      with built-in functions:
+      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with built-in
+      functions:
     </p>
 
 <codeblock>select cast('1966-07-30' as timestamp);
@@ -441,8 +479,8 @@ select now();                               -- Returns current date and time in
 </codeblock>
 
     <p>
-      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values
-      with HDFS-backed tables:
+      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with
+      HDFS-backed tables:
     </p>
 
 <codeblock>create table dates_and_times (t timestamp);
@@ -451,8 +489,8 @@ insert into dates_and_times values
 </codeblock>
 
     <p rev="IMPALA-5137">
-      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values
-      with Kudu tables:
+      The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with Kudu
+      tables:
     </p>
 
 <codeblock rev="IMPALA-5137">create table timestamp_t (x int primary key, s string, t timestamp, b bigint)
@@ -495,16 +533,21 @@ select s, t, b from timestamp_t order by t;
       </li>
 
       <li>
-        To convert to or from different date formats, or perform date arithmetic, use the date and time functions
-        described in <xref href="impala_datetime_functions.xml#datetime_functions"/>. In particular, the
-        <codeph>from_unixtime()</codeph> function requires a case-sensitive format string such as
-        <codeph>"yyyy-MM-dd HH:mm:ss.SSSS"</codeph>, matching one of the allowed variations of a
-        <codeph>TIMESTAMP</codeph> value (date plus time, only date, only time, optional fractional seconds).
+        To convert to or from different date formats, or perform date arithmetic, use the date
+        and time functions described in
+        <xref
+          href="impala_datetime_functions.xml#datetime_functions"/>. In
+        particular, the <codeph>from_unixtime()</codeph> function requires a case-sensitive
+        format string such as <codeph>"yyyy-MM-dd HH:mm:ss.SSSS"</codeph>, matching one of the
+        allowed variations of a <codeph>TIMESTAMP</codeph> value (date plus time, only date,
+        only time, optional fractional seconds).
       </li>
 
       <li>
-        See <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"/> for details about differences in
-        <codeph>TIMESTAMP</codeph> handling between Impala and Hive.
+        See <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"
+        /> for
+        details about differences in <codeph>TIMESTAMP</codeph> handling between Impala and
+        Hive.
       </li>
     </ul>

[7/8] impala git commit: IMPALA-6887: Typo in authz-policy.ini.template

Posted by ta...@apache.org.

IMPALA-6887: Typo in authz-policy.ini.template

Before: alter_functionl_text_lzo
After: alter_functional_text_lzo

This patch also adds missing test cases for ALTER privilege on
functional_text_lzo database.

Testing:
- Ran all front-end tests

Change-Id: I6aea8d71dda39838e9e70160018ce2c5fc73df21
Reviewed-on: http://gerrit.cloudera.org:8080/10113
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/08a1a7a9
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/08a1a7a9
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/08a1a7a9

Branch: refs/heads/master
Commit: 08a1a7a967f19e41e8523a9020320bbad5cdac77
Parents: 5d328f4
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Wed Apr 18 18:19:23 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 07:23:29 2018 +0000

----------------------------------------------------------------------
 .../impala/analysis/AuthorizationTest.java      | 20 ++++++++++++++++++++
 fe/src/test/resources/authz-policy.ini.template |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/08a1a7a9/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
index f0b7332..b04e138 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
@@ -1479,6 +1479,26 @@ public class AuthorizationTest extends FrontendTestBase {
     AuthzOk("ALTER TABLE functional.alltypeserror SET CACHED IN 'testPool'");
     AuthzOk("ALTER TABLE functional.alltypeserror RECOVER PARTITIONS");
 
+    // User has ALTER privilege on functional_text_lzo database.
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror ADD COLUMNS (c1 int)");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror REPLACE COLUMNS (c1 int)");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror CHANGE id c1 int");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror DROP id");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror RENAME TO " +
+        "functional_seq_snap.t1");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror SET FILEFORMAT PARQUET");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror SET LOCATION " +
+        "'/test-warehouse/new_table'");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror SET TBLPROPERTIES " +
+        "('a'='b', 'c'='d')");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror SET LOCATION " +
+        "'hdfs://localhost:20500/test-warehouse/new_table'");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror " +
+        "PARTITION(year=2009, month=1) SET LOCATION " +
+        "'hdfs://localhost:20500/test-warehouse/new_table'");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror SET CACHED IN 'testPool'");
+    AuthzOk("ALTER TABLE functional_text_lzo.alltypeserror RECOVER PARTITIONS");
+
     // Alter table and set location to a path the user does not have access to.
     // User needs ALTER on table and ALL on URI.
     AuthzError("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " +

http://git-wip-us.apache.org/repos/asf/impala/blob/08a1a7a9/fe/src/test/resources/authz-policy.ini.template
----------------------------------------------------------------------
diff --git a/fe/src/test/resources/authz-policy.ini.template b/fe/src/test/resources/authz-policy.ini.template
index 26ed70b..e26ee55 100644
--- a/fe/src/test/resources/authz-policy.ini.template
+++ b/fe/src/test/resources/authz-policy.ini.template
@@ -68,7 +68,7 @@ alter_functional_alltypes_view =\
     server=server1->db=functional->table=alltypes_view->action=alter
 insert_functional_text_lzo = server=server1->db=functional_text_lzo->action=insert
 create_functional_text_lzo = server=server1->db=functional_text_lzo->action=create
-alter_functionl_text_lzo = server=server1->db=functional_text_lzo->action=alter
+alter_functional_text_lzo = server=server1->db=functional_text_lzo->action=alter
 drop_functional_text_lzo = server=server1->db=functional_text_lzo->action=drop
 select_column_level_functional =\
     server=server1->db=functional->table=alltypessmall->column=id->action=select,\

[5/8] impala git commit: IMPALA-6886: [DOCS] Removed impala_cluster_sizing.xml

Posted by ta...@apache.org.

IMPALA-6886: [DOCS] Removed impala_cluster_sizing.xml

Change-Id: I03d605d33ed6ced809074b1fc96def30ad0887fd
Reviewed-on: http://gerrit.cloudera.org:8080/10109
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1176a524
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1176a524
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1176a524

Branch: refs/heads/master
Commit: 1176a5244a8dd1e48f7c39a51d23cb91f5ccc5e5
Parents: 22714a7
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Wed Apr 18 16:49:51 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 04:40:06 2018 +0000

----------------------------------------------------------------------
 docs/impala.ditamap                   |   2 +-
 docs/topics/impala_cluster_sizing.xml | 371 -----------------------------
 2 files changed, 1 insertion(+), 372 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1176a524/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index f40ae8f..abf7bdb 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -47,7 +47,7 @@ under the License.
   </topicref>
   <topicref href="topics/impala_planning.xml">
     <topicref href="topics/impala_prereqs.xml#prereqs"/>
-    <topicref href="topics/impala_cluster_sizing.xml"/>
+    <!-- Removed per Alan Choi's request on 4/18/2018 <topicref href="topics/impala_cluster_sizing.xml"/> -->
     <topicref href="topics/impala_schema_design.xml"/>
   </topicref>
   <topicref audience="standalone" href="topics/impala_install.xml#install">

http://git-wip-us.apache.org/repos/asf/impala/blob/1176a524/docs/topics/impala_cluster_sizing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_cluster_sizing.xml b/docs/topics/impala_cluster_sizing.xml
deleted file mode 100644
index 7b395c5..0000000
--- a/docs/topics/impala_cluster_sizing.xml
+++ /dev/null
@@ -1,371 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept id="cluster_sizing">
-
-  <title>Cluster Sizing Guidelines for Impala</title>
-  <titlealts audience="PDF"><navtitle>Cluster Sizing</navtitle></titlealts>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-      <data name="Category" value="Clusters"/>
-      <data name="Category" value="Planning"/>
-      <data name="Category" value="Sizing"/>
-      <data name="Category" value="Deploying"/>
-      <!-- Hoist by my own petard. Memory is an important theme of this topic but that's in a <section> title. -->
-      <data name="Category" value="Sectionated Pages"/>
-      <data name="Category" value="Memory"/>
-      <data name="Category" value="Scalability"/>
-      <data name="Category" value="Proof of Concept"/>
-      <data name="Category" value="Requirements"/>
-      <data name="Category" value="Guidelines"/>
-      <data name="Category" value="Best Practices"/>
-      <data name="Category" value="Administrators"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p>
-      <indexterm audience="hidden">cluster sizing</indexterm>
-      This document provides a very rough guideline to estimate the size of a cluster needed for a specific
-      customer application. You can use this information when planning how much and what type of hardware to
-      acquire for a new cluster, or when adding Impala workloads to an existing cluster.
-    </p>
-
-    <note>
-      Before making purchase or deployment decisions, consult organizations with relevant experience
-      to verify the conclusions about hardware requirements based on your data volume and workload.
-    </note>
-
-<!--    <p outputclass="toc inpage"/> -->
-
-    <p>
-      Always use hosts with identical specifications and capacities for all the nodes in the cluster. Currently,
-      Impala divides the work evenly between cluster nodes, regardless of their exact hardware configuration.
-      Because work can be distributed in different ways for different queries, if some hosts are overloaded
-      compared to others in terms of CPU, memory, I/O, or network, you might experience inconsistent performance
-      and overall slowness
-    </p>
-
-    <p>
-      For analytic workloads with star/snowflake schemas, and using consistent hardware for all nodes (64 GB RAM,
-      12 2 TB hard drives, 2x E5-2630L 12 cores total, 10 GB network), the following table estimates the number of
-      DataNodes needed in the cluster based on data size and the number of concurrent queries, for workloads
-      similar to TPC-DS benchmark queries:
-    </p>
-
-    <table>
-      <title>Cluster size estimation based on the number of concurrent queries and data size with a 20 second average query response time</title>
-      <tgroup cols="6">
-        <colspec colnum="1" colname="col1"/>
-        <colspec colnum="2" colname="col2"/>
-        <colspec colnum="3" colname="col3"/>
-        <colspec colnum="4" colname="col4"/>
-        <colspec colnum="5" colname="col5"/>
-        <colspec colnum="6" colname="col6"/>
-        <thead>
-          <row>
-            <entry>
-              Data Size
-            </entry>
-            <entry>
-              1 query
-            </entry>
-            <entry>
-              10 queries
-            </entry>
-            <entry>
-              100 queries
-            </entry>
-            <entry>
-              1000 queries
-            </entry>
-            <entry>
-              2000 queries
-            </entry>
-          </row>
-        </thead>
-        <tbody>
-          <row>
-            <entry>
-              <b>250 GB</b>
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              5
-            </entry>
-            <entry>
-              35
-            </entry>
-            <entry>
-              70
-            </entry>
-          </row>
-          <row>
-            <entry>
-              <b>500 GB</b>
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              10
-            </entry>
-            <entry>
-              70
-            </entry>
-            <entry>
-              135
-            </entry>
-          </row>
-          <row>
-            <entry>
-              <b>1 TB</b>
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              15
-            </entry>
-            <entry>
-              135
-            </entry>
-            <entry>
-              270
-            </entry>
-          </row>
-          <row>
-            <entry>
-              <b>15 TB</b>
-            </entry>
-            <entry>
-              2
-            </entry>
-            <entry>
-              20
-            </entry>
-            <entry>
-              200
-            </entry>
-            <entry>
-              N/A
-            </entry>
-            <entry>
-              N/A
-            </entry>
-          </row>
-          <row>
-            <entry>
-              <b>30 TB</b>
-            </entry>
-            <entry>
-              4
-            </entry>
-            <entry>
-              40
-            </entry>
-            <entry>
-              400
-            </entry>
-            <entry>
-              N/A
-            </entry>
-            <entry>
-              N/A
-            </entry>
-          </row>
-          <row>
-            <entry>
-              <b>60 TB</b>
-            </entry>
-            <entry>
-              8
-            </entry>
-            <entry>
-              80
-            </entry>
-            <entry>
-              800
-            </entry>
-            <entry>
-              N/A
-            </entry>
-            <entry>
-              N/A
-            </entry>
-          </row>
-        </tbody>
-      </tgroup>
-    </table>
-
-    <section id="sizing_factors">
-
-      <title>Factors Affecting Scalability</title>
-
-      <p>
-        A typical analytic workload (TPC-DS style queries) using recommended hardware is usually CPU-bound. Each
-        node can process roughly 1.6 GB/sec. Both CPU-bound and disk-bound workloads can scale almost linearly with
-        cluster size. However, for some workloads, the scalability might be bounded by the network, or even by
-        memory.
-      </p>
-
-      <p>
-        If the workload is already network bound (on a 10 GB network), increasing the cluster size won’t reduce
-        the network load; in fact, a larger cluster could increase network traffic because some queries involve
-        <q>broadcast</q> operations to all DataNodes. Therefore, boosting the cluster size does not improve query
-        throughput in a network-constrained environment.
-      </p>
-
-      <p>
-        Let’s look at a memory-bound workload. A workload is memory-bound if Impala cannot run any additional
-        concurrent queries because all memory allocated has already been consumed, but neither CPU, disk, nor
-        network is saturated yet. This can happen because currently Impala uses only a single core per node to
-        process join and aggregation queries. For a node with 128 GB of RAM, if a join node takes 50 GB, the system
-        cannot run more than 2 such queries at the same time.
-      </p>
-
-      <p>
-        Therefore, at most 2 cores are used. Throughput can still scale almost linearly even for a memory-bound
-        workload. It’s just that the CPU will not be saturated. Per-node throughput will be lower than 1.6
-        GB/sec. Consider increasing the memory per node.
-      </p>
-
-      <p>
-        As long as the workload is not network- or memory-bound, we can use the 1.6 GB/second per node as the
-        throughput estimate.
-      </p>
-    </section>
-
-    <section id="sizing_details">
-
-      <title>A More Precise Approach</title>
-
-      <p>
-        A more precise sizing estimate would require not only queries per minute (QPM), but also an average data
-        size scanned per query (D). With the proper partitioning strategy, D is usually a fraction of the total
-        data size. The following equation can be used as a rough guide to estimate the number of nodes (N) needed:
-      </p>
-
-<codeblock>Eq 1: N &gt; QPM * D / 100 GB
-</codeblock>
-
-      <p>
-        Here is an example. Suppose, on average, a query scans 50 GB of data and the average response time is
-        required to be 15 seconds or less when there are 100 concurrent queries. The QPM is 100/15*60 = 400. We can
-        estimate the number of node using our equation above.
-      </p>
-
-<codeblock>N &gt; QPM * D / 100GB
-N &gt; 400 * 50GB / 100GB
-N &gt; 200
-</codeblock>
-
-      <p>
-        Because this figure is a rough estimate, the corresponding number of nodes could be between 100 and 500.
-      </p>
-
-      <p>
-        Depending on the complexity of the query, the processing rate of query might change. If the query has more
-        joins, aggregation functions, or CPU-intensive functions such as string processing or complex UDFs, the
-        process rate will be lower than 1.6 GB/second per node. On the other hand, if the query only does scan and
-        filtering on numbers, the processing rate can be higher.
-      </p>
-    </section>
-
-    <section id="sizing_mem_estimate">
-
-      <title>Estimating Memory Requirements</title>
-      <!--
-  <prolog>
-    <metadata>
-      <data name="Category" value="Memory"/>
-    </metadata>
-  </prolog>
-      -->
-
-      <p>
-        Impala can handle joins between multiple large tables. Make sure that statistics are collected for all the
-        joined tables, using the <codeph><xref href="impala_compute_stats.xml#compute_stats">COMPUTE
-        STATS</xref></codeph> statement. However, joining big tables does consume more memory. Follow the steps
-        below to calculate the minimum memory requirement.
-      </p>
-
-      <p>
-        Suppose you are running the following join:
-      </p>
-
-<codeblock>select a.*, b.col_1, b.col_2, … b.col_n
-from a, b
-where a.key = b.key
-and b.col_1 in (1,2,4...)
-and b.col_4 in (....);
-</codeblock>
-
-      <p>
-        And suppose table <codeph>B</codeph> is smaller than table <codeph>A</codeph> (but still a large table).
-      </p>
-
-      <p>
-        The memory requirement for the query is the right-hand table (<codeph>B</codeph>), after decompression,
-        filtering (<codeph>b.col_n in ...</codeph>) and after projection (only using certain columns) must be less
-        than the total memory of the entire cluster.
-      </p>
-
-<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
-  selectivity factor from the predicate *
-  projection factor * compression ratio
-</codeblock>
-
-      <p>
-        In this case, assume that table <codeph>B</codeph> is 100 TB in Parquet format with 200 columns. The
-        predicate on <codeph>B</codeph> (<codeph>b.col_1 in ...and b.col_4 in ...</codeph>) will select only 10% of
-        the rows from <codeph>B</codeph> and for projection, we are only projecting 5 columns out of 200 columns.
-        Usually, Snappy compression gives us 3 times compression, so we estimate a 3x compression factor.
-      </p>
-
-<codeblock>Cluster Total Memory Requirement  = Size of the smaller table *
-  selectivity factor from the predicate *
-  projection factor * compression ratio
-  = 100TB * 10% * 5/200 * 3
-  = 0.75TB
-  = 750GB
-</codeblock>
-
-      <p>
-        So, if you have a 10-node cluster, each node has 128 GB of RAM and you give 80% to Impala, then you have 1
-        TB of usable memory for Impala, which is more than 750GB. Therefore, your cluster can handle join queries
-        of this magnitude.
-      </p>
-    </section>
-  </conbody>
-</concept>

[3/8] impala git commit: IMPALA-5310: [DOCS] Document TABLESAMPLE clause for COMPUTE STATS

Posted by ta...@apache.org.

IMPALA-5310: [DOCS] Document TABLESAMPLE clause for COMPUTE STATS

Change-Id: I214b63db391bd35562f5ea9091508005f83b2fcc
Reviewed-on: http://gerrit.cloudera.org:8080/8975
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ddc795d8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ddc795d8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ddc795d8

Branch: refs/heads/master
Commit: ddc795d86a909770490d28ed84156c42757c2821
Parents: bfb2131
Author: John Russell <jr...@cloudera.com>
Authored: Mon Jan 8 14:41:16 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 03:06:04 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_compute_stats.xml | 30 ++++++++++++++++++++++--------
 docs/topics/impala_tablesample.xml   |  6 ++++++
 2 files changed, 28 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ddc795d8/docs/topics/impala_compute_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compute_stats.xml b/docs/topics/impala_compute_stats.xml
index b62972c..95343f4 100644
--- a/docs/topics/impala_compute_stats.xml
+++ b/docs/topics/impala_compute_stats.xml
@@ -39,18 +39,20 @@ under the License.
   <conbody>
 
     <p>
-      <indexterm audience="hidden">COMPUTE STATS statement</indexterm>
-      Gathers information about volume and distribution of data in a table and all associated columns and
-      partitions. The information is stored in the metastore database, and used by Impala to help optimize queries.
-      For example, if Impala can determine that a table is large or small, or has many or few distinct values it
-      can organize parallelize the work appropriately for a join query or insert operation. For details about the
-      kinds of information gathered by this statement, see <xref href="impala_perf_stats.xml#perf_stats"/>.
+      <indexterm audience="hidden">COMPUTE STATS statement</indexterm> The
+      COMPUTE STATS statement gathers information about volume and distribution
+      of data in a table and all associated columns and partitions. The
+      information is stored in the metastore database, and used by Impala to
+      help optimize queries. For example, if Impala can determine that a table
+      is large or small, or has many or few distinct values it can organize and
+      parallelize the work appropriately for a join query or insert operation.
+      For details about the kinds of information gathered by this statement, see
+        <xref href="impala_perf_stats.xml#perf_stats"/>.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
-<codeblock rev="impala-3562">COMPUTE STATS
-  [<varname>db_name</varname>.]<varname>table_name</varname> [ ( <varname>column_list</varname> ) ]
+<codeblock rev="2.1.0"><ph rev="2.12.0 IMPALA-5310">COMPUTE STATS [<varname>db_name</varname>.]<varname>table_name</varname>  [ ( <varname>column_list</varname> ) ] [TABLESAMPLE SYSTEM(<varname>percentage</varname>) [REPEATABLE(<varname>seed</varname>)]]</ph>
 
 <varname>column_list</varname> ::= <varname>column_name</varname> [ , <varname>column_name</varname>, ... ]
 
@@ -104,6 +106,18 @@ COMPUTE INCREMENTAL STATS [<varname>db_name</varname>.]<varname>table_name</varn
         STATS</codeph>.
     </p>
 
+    <p rev="2.12.0 IMPALA-5310">
+      In <keyword keyref="impala212_full"/> and
+      higher, an optional <codeph>TABLESAMPLE</codeph> clause immediately after
+      a table reference specifies that the <codeph>COMPUTE STATS</codeph>
+      operation only processes a specified percentage of the table data. For
+      tables that are so large that a full <codeph>COMPUTE STATS</codeph>
+      operation is impractical, you can use <codeph>COMPUTE STATS</codeph> with
+      a <codeph>TABLESAMPLE</codeph> clause to extrapolate statistics from a
+      sample of the table data. See <keyword keyref="perf_stats"/>about the
+      experimental stats extrapolation and sampling features.
+    </p>
+
     <p rev="2.1.0">
       The <codeph>COMPUTE INCREMENTAL STATS</codeph> variation is a shortcut for partitioned tables that works on a
       subset of partitions rather than the entire table. The incremental nature makes it suitable for large tables

http://git-wip-us.apache.org/repos/asf/impala/blob/ddc795d8/docs/topics/impala_tablesample.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_tablesample.xml b/docs/topics/impala_tablesample.xml
index f60c5be..e5123cb 100644
--- a/docs/topics/impala_tablesample.xml
+++ b/docs/topics/impala_tablesample.xml
@@ -81,6 +81,12 @@ under the License.
 
     <p conref="../shared/impala_common.xml#common/added_in_290"/>
 
+    <p rev="2.12.0 IMPALA-5310">
+      See <keyword keyref="compute_stats"/> for the
+        <codeph>TABLESAMPLE</codeph> clause used in the <codeph>COMPUTE
+        STATS</codeph> statement.
+    </p>
+
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
 
     <p>

[8/8] impala git commit: IMPALA-6886: [DOCS] Removed the missed keyref for impala_cluster_sizing

Posted by ta...@apache.org.

IMPALA-6886: [DOCS] Removed the missed keyref for impala_cluster_sizing

Change-Id: Ieab3eac8027c57a9a1a30a77ac0e07686a92f2b0
Reviewed-on: http://gerrit.cloudera.org:8080/10118
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5bbcfaf2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5bbcfaf2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5bbcfaf2

Branch: refs/heads/master
Commit: 5bbcfaf22e6603c6dd799116c804dd8be499370d
Parents: 08a1a7a
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Apr 19 10:37:26 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 19 18:00:53 2018 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap         | 2 +-
 docs/topics/impala_new_features.xml | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5bbcfaf2/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 88bdc71..8faea8e 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10618,7 +10618,7 @@ under the License.
   <keydef href="topics/impala_hadoop.xml" keys="intro_hadoop"/>
   <keydef href="topics/impala_planning.xml" keys="planning"/>
   <keydef href="topics/impala_prereqs.xml" keys="prereqs"/>
-  <keydef href="topics/impala_cluster_sizing.xml" keys="cluster_sizing"/>
+  <!-- Removed <keydef href="topics/impala_cluster_sizing.xml" keys="cluster_sizing"/> -->
   <keydef href="topics/impala_schema_design.xml" keys="schema_design"/>
   <keydef href="topics/impala_install.xml" keys="install"/>
   <keydef href="topics/impala_config.xml" keys="config"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/5bbcfaf2/docs/topics/impala_new_features.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_new_features.xml b/docs/topics/impala_new_features.xml
index deb15e0..c5109e3 100644
--- a/docs/topics/impala_new_features.xml
+++ b/docs/topics/impala_new_features.xml
@@ -2648,9 +2648,7 @@ under the License.
         <li>
           <p>
             The documentation provides additional guidance for planning tasks. <ph audience="PDF">See
-            <xref href="impala_planning.xml#planning"/>.</ph> <ph audience="PDF">In particular, see
-            <xref href="impala_cluster_sizing.xml#cluster_sizing"/> before you purchase or repurpose hardware for a
-            cluster to run Impala.</ph>
+            <xref href="impala_planning.xml#planning"/>.</ph>
           </p>
         </li>