You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2018/11/27 11:02:51 UTC

[01/11] impala git commit: Revert "IMPALA-7148: Make test_profile_fragment_instances() more robust"

Repository: impala
Updated Branches:
  refs/heads/branch-3.1.0 031d5690f -> 7d3e9beee


Revert "IMPALA-7148: Make test_profile_fragment_instances() more robust"

This reverts commit 1d412a09ac807c2f7ab39ac4fdc3e21c29024190.


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b0672c81
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b0672c81
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b0672c81

Branch: refs/heads/branch-3.1.0
Commit: b0672c81b1e6b7a7eb67f9d2e6180cdf2691a25e
Parents: 031d569
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Tue Nov 27 11:56:29 2018 +0100
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:56:29 2018 +0100

----------------------------------------------------------------------
 tests/query_test/test_observability.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b0672c81/tests/query_test/test_observability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 2950ee8..c351e02 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -157,11 +157,9 @@ class TestObservability(ImpalaTestSuite):
         with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
         select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
         join (select * from l LIMIT 2000000) b on a.l_orderkey = -b.l_orderkey;""")
-    # There are 3 scan nodes and each appears in the profile n+1 times (for n fragment
-    # instances + the averaged fragment). n depends on how data is loaded and scheduler's
-    # decision.
-    n = results.runtime_profile.count("HDFS_SCAN_NODE")
-    assert n > 0 and n % 3 == 0
+    # There are 3 scan nodes and each appears in the profile 4 times (for 3 fragment
+    # instances + the averaged fragment).
+    assert results.runtime_profile.count("HDFS_SCAN_NODE") == 12
     # There are 3 exchange nodes and each appears in the profile 2 times (for 1 fragment
     # instance + the averaged fragment).
     assert results.runtime_profile.count("EXCHANGE_NODE") == 6


[09/11] impala git commit: IMPALA-7233: [DOCS] Support for IANA timezone database

Posted by bo...@apache.org.
IMPALA-7233: [DOCS] Support for IANA timezone database

- Updated the timezone section
- Added the sections on customizing timezone db and aliases

Change-Id: Id400cda5a1be321063d17e0ee6337e92a5da732a
Reviewed-on: http://gerrit.cloudera.org:8080/11946
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
(cherry picked from commit e421223c5d4ef4a7536a223779834064d74df75d)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3d1afb4c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3d1afb4c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3d1afb4c

Branch: refs/heads/branch-3.1.0
Commit: 3d1afb4c4c691abb01c3876ddb3023520a22427a
Parents: df6e92f
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Nov 16 13:21:22 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 12:00:10 2018 +0100

----------------------------------------------------------------------
 docs/impala.ditamap                     |   4 +-
 docs/topics/impala_custom_timezones.xml | 181 +++++++++++
 docs/topics/impala_timestamp.xml        | 452 +++++++++------------------
 3 files changed, 340 insertions(+), 297 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3d1afb4c/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index e4c35a7..9b58786 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -110,7 +110,9 @@ under the License.
       <topicref href="topics/impala_smallint.xml"/>
       <topicref href="topics/impala_string.xml"/>
       <topicref href="topics/impala_struct.xml"/>
-      <topicref href="topics/impala_timestamp.xml"/>
+      <topicref href="topics/impala_timestamp.xml">
+        <topicref href="topics/impala_custom_timezones.xml"/>
+      </topicref>
       <topicref href="topics/impala_tinyint.xml"/>
       <topicref href="topics/impala_varchar.xml"/>
       <topicref href="topics/impala_complex_types.xml"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/3d1afb4c/docs/topics/impala_custom_timezones.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_custom_timezones.xml b/docs/topics/impala_custom_timezones.xml
new file mode 100644
index 0000000..be651e9
--- /dev/null
+++ b/docs/topics/impala_custom_timezones.xml
@@ -0,0 +1,181 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="custom_timezone">
+
+  <title>Customizing Time Zones</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>Customizing Time Zones</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Dates and Times"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Starting in <keyword keyref="impala31">Impala 3.1</keyword>, you can customize the time
+      zone definitions used in Impala.
+      <ul>
+        <li>
+          <p>
+            By default, Impala uses the OS’s time zone database located in
+            <codeph>/usr/share/zoneinfo</codeph>. This directory contains the IANA timezone
+            database in a compiled binary format. The contents of the <codeph>zoneinfo</codeph>
+            directory is controlled by the OS’s package manager.
+          </p>
+        </li>
+
+        <li>
+          <p >
+            New startup flags have been introduced:
+          </p>
+          <ul>
+            <li >
+              <codeph>--hdfs_zone_info_zip</codeph>: This flag allows Impala administrators to
+              specify a custom timezone database. The flag should be set to a shared (not
+              necessarily HDFS) path that points to a zip archive of a custom IANA timezone
+              database. The timezone database is expected to be in a compiled binary format. If
+              the startup flag is set, Impala will use the specified timezone database instead
+              of the default <codeph>/usr/share/zoneinfo </codeph>database. The timezone db
+              upgrade process is described in detail below.
+            </li>
+
+            <li >
+              <p >
+                <codeph>--hdfs_zone_alias_conf</codeph>: This flag allows Impala administrators
+                to specify definitions for custom timezone aliases. The flag should be set to a
+                shared (not necessarily HDFS) path that specifies a config file containing
+                custom timezone alias definitions. This config file can be used as a workaround
+                for users who want to keep using their legacy timezone names. Configuring custom
+                aliases is described in detail below.
+              </p>
+            </li>
+          </ul>
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      <b>Upgrading custom IANA time zone database:</b>
+      <ol>
+        <li >
+          Download latest IANA time zone database distribution:
+<codeblock>git clone https://github.com/eggert/tz</codeblock>
+          <p >
+            Alternatively, download a specific tzdb version from:
+<codeblock> https://www.iana.org/time-zones/repository</codeblock>
+          </p>
+        </li>
+
+        <li >
+          Build timezone tools:
+<codeblock>cd tz
+make TOPDIR=tzdata install</codeblock>
+        </li>
+
+        <li >
+          Generate the compiled binary time zone database:
+<codeblock>./zic -d ./tzdata/etc/zoneinfo africa antarctica asia australasia backward backzone etcetera europe factory northamerica pacificnew southamerica systemv</codeblock>
+        </li>
+
+        <li >
+          Create zip archive:
+<codeblock>pushd ./tzdata/etc
+zip -r zoneinfo.zip zoneinfo
+popd</codeblock>
+        </li>
+
+        <li >
+          Copy the time zone database to HDFS:
+<codeblock>hdfs dfs -mkdir -p /tzdb/latest
+hdfs dfs -copyFromLocal ./tzdata/etc/zoneinfo.zip /tzdb/latest</codeblock>
+        </li>
+
+        <li >
+          Set the <codeph>--hdfs_zone_info_zip</codeph> startup flag to
+          <codeph>/tzdb/latest/zoneinfo.zip</codeph> as an <codeph>impalad </codeph>safety
+          valve.
+        </li>
+
+        <li >
+          Perform a full restart of Impala service.
+        </li>
+      </ol>
+    </p>
+
+    <p>
+      <b>Configuring custom time zone aliases:</b>
+    </p>
+
+    <p>
+      <ol>
+        <li >
+          Create a <codeph>tzalias.conf</codeph> config file that contains time zone alias
+          definitions formatted as <codeph><i>ALIAS</i></codeph><codeph> =
+          </codeph><codeph><i>DEFINITION</i></codeph>. For example:
+<codeblock>#
+# Define aliases for existing timezone names:
+#
+Universal Coordinated Time = UTC
+Mideast/Riyadh89 = Asia/Riyadh
+PDT = America/Los_Angeles
+#
+# Define aliases as UTC offsets in seconds:
+#
+GMT-01:00 = 3600
+GMT+01:00 = -3600</codeblock>
+        </li>
+
+        <li >
+          Copy the config file to HDFS:
+<codeblock>hdfs dfs -mkdir -p /tzdb
+hdfs dfs -copyFromLocal tzalias.conf /tzdb</codeblock>
+        </li>
+
+        <li >
+          Set the <codeph>--hdfs_zone_alias_conf</codeph> startup flag to
+          <codeph>/tzdb/tzalias.conf</codeph> as an <codeph>impalad </codeph>safety valve.
+        </li>
+
+        <li >
+          Perform a full restart of Impala service.
+        </li>
+      </ol>
+    </p>
+
+    <p>
+      <b>Added in:</b> <keyword keyref="impala31"/>
+    </p>
+
+  </conbody>
+
+</concept>

http://git-wip-us.apache.org/repos/asf/impala/blob/3d1afb4c/docs/topics/impala_timestamp.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml
index d032e33..15dca34 100644
--- a/docs/topics/impala_timestamp.xml
+++ b/docs/topics/impala_timestamp.xml
@@ -42,339 +42,196 @@ under the License.
   <conbody>
 
     <p>
-      A data type used in <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph>
-      statements, representing a point in time.
+      The <codeph>TIMESTAMP</codeph> data type holds a value that represents a point in time.
     </p>
 
-    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
-
     <p>
-      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+      Internally, the resolution of the time portion of a <codeph>TIMESTAMP</codeph> value is in
+      nanoseconds.
     </p>
 
-<codeblock><varname>column_name</varname> TIMESTAMP</codeblock>
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
     <p>
-      <b>Range:</b> Allowed date values range from 1400-01-01 to 9999-12-31; this range is
-      different from the Hive <codeph>TIMESTAMP</codeph> type. Internally, the resolution of the
-      time portion of a <codeph>TIMESTAMP</codeph> value is in nanoseconds.
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
     </p>
 
-    <p>
-      <b>INTERVAL expressions:</b>
-    </p>
+<codeblock><varname>column_name</varname> TIMESTAMP
 
-    <p>
-      You can perform date arithmetic by adding or subtracting a specified number of time units,
-      using the <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> and
-      <codeph>-</codeph> operators or <codeph>date_add()</codeph> and
-      <codeph>date_sub()</codeph> functions. You can specify units as <codeph>YEAR[S]</codeph>,
-      <codeph>MONTH[S]</codeph>, <codeph>WEEK[S]</codeph>, <codeph>DAY[S]</codeph>,
-      <codeph>HOUR[S]</codeph>, <codeph>MINUTE[S]</codeph>, <codeph>SECOND[S]</codeph>,
-      <codeph>MILLISECOND[S]</codeph>, <codeph>MICROSECOND[S]</codeph>, and
-      <codeph>NANOSECOND[S]</codeph>. You can only specify one time unit in each interval
-      expression, for example <codeph>INTERVAL 3 DAYS</codeph> or <codeph>INTERVAL 25
-      HOURS</codeph>, but you can produce any granularity by adding together successive
-      <codeph>INTERVAL</codeph> values, such as <codeph><varname>timestamp_value</varname> +
-      INTERVAL 3 WEEKS - INTERVAL 1 DAY + INTERVAL 10 MICROSECONDS</codeph>.
-    </p>
+<varname>timestamp</varname> [+ | -] INTERVAL <varname>interval</varname>
+DATE_ADD (<varname>timestamp</varname>, INTERVAL <varname>interval</varname> <varname>time_unit</varname>)</codeblock>
 
     <p>
-      For example:
+      <b>Range:</b> 1400-01-01 to 9999-12-31
     </p>
 
-<codeblock>select now() + interval 1 day;
-select date_sub(now(), interval 5 minutes);
-insert into auction_details
-  select auction_id, auction_start_time, auction_start_time + interval 2 days + interval 12 hours
-  from new_auctions;</codeblock>
-
     <p>
-      <b>Time zones:</b>
+      Out of range <codeph>TIMESTAMP</codeph> values are converted to NULL.
     </p>
 
     <p>
-      By default, Impala does not store timestamps using the local timezone, to avoid undesired
-      results from unexpected time zone issues. Timestamps are stored and interpreted relative
-      to UTC, both when written to or read from data files, or when converted to or from Unix
-      time values through functions such as <codeph>from_unixtime()</codeph> or
-      <codeph>unix_timestamp()</codeph>. To convert such a <codeph>TIMESTAMP</codeph> value to
-      one that represents the date and time in a specific time zone, convert the original value
-      with the <codeph>from_utc_timestamp()</codeph> function.
+      The range of Impala <codeph>TIMESTAMP</codeph> is different from the Hive
+      <codeph>TIMESTAMP</codeph> type. Refer to
+      <xref
+        href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-timestamp"
+        format="html" scope="external">Hive
+      documentation</xref> for detail.
     </p>
 
     <p>
-      Because Impala does not assume that <codeph>TIMESTAMP</codeph> values are in any
-      particular time zone, you must be conscious of the time zone aspects of data that you
-      query, insert, or convert.
+      <b>INTERVAL expressions:</b>
     </p>
 
     <p>
-      For consistency with Unix system calls, the <codeph>TIMESTAMP</codeph> returned by the
-      <codeph>now()</codeph> function represents the local time in the system time zone, rather
-      than in UTC. To store values relative to the current time in a portable way, convert any
-      <codeph>now()</codeph> return values using the <codeph>to_utc_timestamp()</codeph>
-      function first. For example, the following example shows that the current time in
-      California (where this Impala cluster is located) is shortly after 2 PM. If that value was
-      written to a data file, and shipped off to a distant server to be analyzed alongside other
-      data from far-flung locations, the dates and times would not match up precisely because of
-      time zone differences. Therefore, the <codeph>to_utc_timestamp()</codeph> function
-      converts it using a common reference point, the UTC time zone (descended from the old
-      Greenwich Mean Time standard). The <codeph>'PDT'</codeph> argument indicates that the
-      original value is from the Pacific time zone with Daylight Saving Time in effect. When
-      servers in all geographic locations run the same transformation on any local date and time
-      values (with the appropriate time zone argument), the stored data uses a consistent
-      representation. Impala queries can use functions such as <codeph>EXTRACT()</codeph>,
-      <codeph>MIN()</codeph>, <codeph>AVG()</codeph>, and so on to do time-series analysis on
-      those timestamps.
+      You can perform date arithmetic by adding or subtracting a specified number of time units,
+      using the <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> operator, the
+      <codeph>-</codeph> operator, <codeph>date_add()</codeph> or <codeph>date_sub()</codeph>.
     </p>
 
-<codeblock>[localhost:21000] > select now();
-+-------------------------------+
-| now()                         |
-+-------------------------------+
-| 2015-04-09 14:07:46.580465000 |
-+-------------------------------+
-[localhost:21000] > select to_utc_timestamp(now(), 'PDT');
-+--------------------------------+
-| to_utc_timestamp(now(), 'pdt') |
-+--------------------------------+
-| 2015-04-09 21:08:07.664547000  |
-+--------------------------------+
-</codeblock>
-
     <p>
-      The converse function, <codeph>from_utc_timestamp()</codeph>, lets you take stored
-      <codeph>TIMESTAMP</codeph> data or calculated results and convert back to local date and
-      time for processing on the application side. The following example shows how you might
-      represent some future date (such as the ending date and time of an auction) in UTC, and
-      then convert back to local time when convenient for reporting or other processing. The
-      final query in the example tests whether this arbitrary UTC date and time has passed yet,
-      by converting it back to the local time zone and comparing it against the current date and
-      time.
+      The following units are supported for <codeph><i>time_unit</i></codeph> in the
+      <codeph>INTERVAL</codeph> clause:
+      <ul>
+        <li>
+          <codeph>YEAR[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>MONTH[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>WEEK[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>DAY[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>HOUR[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>MINUTE[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>SECOND[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>MILLISECOND[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>MICROSECOND[S]</codeph>
+        </li>
+
+        <li>
+          <codeph>NANOSECOND[S]</codeph>
+        </li>
+      </ul>
     </p>
 
-<codeblock>[localhost:21000] > select to_utc_timestamp(now() + interval 2 weeks, 'PDT');
-+---------------------------------------------------+
-| to_utc_timestamp(now() + interval 2 weeks, 'pdt') |
-+---------------------------------------------------+
-| 2015-04-23 21:08:34.152923000                     |
-+---------------------------------------------------+
-[localhost:21000] > select from_utc_timestamp('2015-04-23 21:08:34.152923000','PDT');
-+------------------------------------------------------------+
-| from_utc_timestamp('2015-04-23 21:08:34.152923000', 'pdt') |
-+------------------------------------------------------------+
-| 2015-04-23 14:08:34.152923000                              |
-+------------------------------------------------------------+
-[localhost:21000] > select from_utc_timestamp('2015-04-23 21:08:34.152923000','PDT') &lt; now();
-+--------------------------------------------------------------------+
-| from_utc_timestamp('2015-04-23 21:08:34.152923000', 'pdt') &lt; now() |
-+--------------------------------------------------------------------+
-| false                                                              |
-+--------------------------------------------------------------------+
-</codeblock>
-
-    <p rev="2.2.0">
-      If you have data files written by Hive, those <codeph>TIMESTAMP</codeph> values represent
-      the local timezone of the host where the data was written, potentially leading to
-      inconsistent results when processed by Impala. To avoid compatibility problems or having
-      to code workarounds, you can specify one or both of these <cmdname>impalad</cmdname>
-      startup flags: <codeph>--use_local_tz_for_unix_timestamp_conversions=true</codeph>
-      <codeph>-convert_legacy_hive_parquet_utc_timestamps=true</codeph>. Although
-      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is turned off by default to
-      avoid performance overhead, where practical turn it on when processing
-      <codeph>TIMESTAMP</codeph> columns in Parquet files written by Hive, to avoid unexpected
-      behavior.
+    <p>
+      You can only specify one time unit in each interval expression, for example
+      <codeph>INTERVAL 3 DAYS</codeph> or <codeph>INTERVAL 25 HOURS</codeph>, but you can
+      produce any granularity by adding together successive <codeph>INTERVAL</codeph> values,
+      such as <codeph><varname>timestamp_value</varname> + INTERVAL 3 WEEKS - INTERVAL 1 DAY +
+      INTERVAL 10 MICROSECONDS</codeph>.
     </p>
 
-    <p rev="2.2.0">
-      The <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting affects
-      conversions from <codeph>TIMESTAMP</codeph> to <codeph>BIGINT</codeph>, or from
-      <codeph>BIGINT</codeph> to <codeph>TIMESTAMP</codeph>. By default, Impala treats all
-      <codeph>TIMESTAMP</codeph> values as UTC, to simplify analysis of time-series data from
-      different geographic regions. When you enable the
-      <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting, these operations
-      treat the input values as if they are in the local tie zone of the host doing the
-      processing. See <xref
-        href="impala_datetime_functions.xml#datetime_functions"/>
-      for the list of functions affected by the
-      <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting.
-    </p>
+    <p conref="../shared/impala_common.xml#common/internals_16_bytes"/>
 
     <p>
-      The following sequence of examples shows how the interpretation of
-      <codeph>TIMESTAMP</codeph> values in Parquet tables is affected by the setting of the
-      <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting.
+      <b>Time zones:</b>
     </p>
 
     <p>
-      Regardless of the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting,
-      <codeph>TIMESTAMP</codeph> columns in text tables can be written and read interchangeably
-      by Impala and Hive:
+      By default, Impala stores and interprets <codeph>TIMESTAMP</codeph> values in UTC time
+      zone when writing to data files, reading from data files, or converting to and from system
+      time values through functions.
     </p>
 
-<codeblock>Impala DDL and queries for text table:
-
-[localhost:21000] > create table t1 (x timestamp);
-[localhost:21000] > insert into t1 values (now()), (now() + interval 1 day);
-[localhost:21000] > select x from t1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 15:43:02.892403000 |
-| 2015-04-08 15:43:02.892403000 |
-+-------------------------------+
-[localhost:21000] > select to_utc_timestamp(x, 'PDT') from t1;
-+-------------------------------+
-| to_utc_timestamp(x, 'pdt')    |
-+-------------------------------+
-| 2015-04-07 22:43:02.892403000 |
-| 2015-04-08 22:43:02.892403000 |
-+-------------------------------+
-
-Hive query for text table:
-
-hive> select * from t1;
-OK
-2015-04-07 15:43:02.892403
-2015-04-08 15:43:02.892403
-Time taken: 1.245 seconds, Fetched: 2 row(s)
-</codeblock>
-
     <p>
-      When the table uses Parquet format, Impala expects any time zone adjustment to be applied
-      prior to writing, while <codeph>TIMESTAMP</codeph> values written by Hive are adjusted to
-      be in the UTC time zone. When Hive queries Parquet data files that it wrote, it adjusts
-      the <codeph>TIMESTAMP</codeph> values back to the local time zone, while Impala does no
-      conversion. Hive does no time zone conversion when it queries Impala-written Parquet
-      files.
+      When you set the <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> startup
+      flag to <codeph>TRUE</codeph>, Impala treats the <codeph>TIMESTAMP</codeph> values
+      specified in the local time zone. The local time zone is determined in the following order
+      with the <codeph>TIMESTAMP</codeph> query option takes the highest precedence:
+      <ol>
+        <li>
+          The <codeph>TIMESTAMP</codeph> query option
+        </li>
+
+        <li>
+          <codeph>$TZ</codeph> environment variable
+        </li>
+
+        <li>
+          System time zone where the impalad coordinator runs
+        </li>
+      </ol>
     </p>
 
-<codeblock>Impala DDL and queries for Parquet table:
-
-[localhost:21000] > create table p1 stored as parquet as select x from t1;
-+-------------------+
-| summary           |
-+-------------------+
-| Inserted 2 row(s) |
-+-------------------+
-[localhost:21000] > select x from p1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 15:43:02.892403000 |
-| 2015-04-08 15:43:02.892403000 |
-+-------------------------------+
-
-Hive DDL and queries for Parquet table:
-
-hive> create table h1 (x timestamp) stored as parquet;
-OK
-hive> insert into h1 select * from p1;
-...
-OK
-Time taken: 35.573 seconds
-hive> select x from p1;
-OK
-2015-04-07 15:43:02.892403
-2015-04-08 15:43:02.892403
-Time taken: 0.324 seconds, Fetched: 2 row(s)
-hive> select x from h1;
-OK
-2015-04-07 15:43:02.892403
-2015-04-08 15:43:02.892403
-Time taken: 0.197 seconds, Fetched: 2 row(s)
-</codeblock>
+    <p> The <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph>
+      setting can be used to fix discrepancy in <codeph>INTERVAL</codeph>
+      operations. For example, a <codeph>TIMESTAMP + INTERVAL
+          <varname>n-hours</varname></codeph> can be affected by Daylight Saving
+      Time, which Impala does not consider by default as these operations are
+      applied as if the timestamp was in UTC. You can use the
+        <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting
+      to fix the issue. </p>
+    <p>See <xref href="impala_custom_timezones.xml#custom_timezone"/> for
+      configuring to use custom time zone database and aliases.</p>
 
     <p>
-      The discrepancy arises when Impala queries the Hive-created Parquet table. The underlying
-      values in the <codeph>TIMESTAMP</codeph> column are different from the ones written by
-      Impala, even though they were copied from one table to another by an <codeph>INSERT ...
-      SELECT</codeph> statement in Hive. Hive did an implicit conversion from the local time
-      zone to UTC as it wrote the values to Parquet.
+      See <xref href="impala_datetime_functions.xml#datetime_functions">Impala Date and Time
+      Functions</xref> for the list of functions affected by the
+      <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting.
     </p>
 
-<codeblock>Impala query for TIMESTAMP values from Impala-written and Hive-written data:
-
-[localhost:21000] > select * from p1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 15:43:02.892403000 |
-| 2015-04-08 15:43:02.892403000 |
-+-------------------------------+
-Fetched 2 row(s) in 0.29s
-[localhost:21000] > select * from h1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 22:43:02.892403000 |
-| 2015-04-08 22:43:02.892403000 |
-+-------------------------------+
-Fetched 2 row(s) in 0.41s
-
-Underlying integer values for Impala-written and Hive-written data:
-
-[localhost:21000] > select cast(x as bigint) from p1;
-+-------------------+
-| cast(x as bigint) |
-+-------------------+
-| 1428421382        |
-| 1428507782        |
-+-------------------+
-Fetched 2 row(s) in 0.38s
-[localhost:21000] > select cast(x as bigint) from h1;
-+-------------------+
-| cast(x as bigint) |
-+-------------------+
-| 1428446582        |
-| 1428532982        |
-+-------------------+
-Fetched 2 row(s) in 0.20s
-</codeblock>
-
     <p>
-      When the <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting is enabled,
-      Impala recognizes the Parquet data files written by Hive, and applies the same
-      UTC-to-local-timezone conversion logic during the query as Hive uses, making the contents
-      of the Impala-written <codeph>P1</codeph> table and the Hive-written <codeph>H1</codeph>
-      table appear identical, whether represented as <codeph>TIMESTAMP</codeph> values or the
-      underlying <codeph>BIGINT</codeph> integers:
+      <b>Time zone handling between Impala and Hive:</b>
     </p>
-
-<codeblock>[localhost:21000] > select x from p1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 15:43:02.892403000 |
-| 2015-04-08 15:43:02.892403000 |
-+-------------------------------+
-Fetched 2 row(s) in 0.37s
-[localhost:21000] > select x from h1;
-+-------------------------------+
-| x                             |
-+-------------------------------+
-| 2015-04-07 15:43:02.892403000 |
-| 2015-04-08 15:43:02.892403000 |
-+-------------------------------+
-Fetched 2 row(s) in 0.19s
-[localhost:21000] > select cast(x as bigint) from p1;
-+-------------------+
-| cast(x as bigint) |
-+-------------------+
-| 1428446582        |
-| 1428532982        |
-+-------------------+
-Fetched 2 row(s) in 0.29s
-[localhost:21000] > select cast(x as bigint) from h1;
-+-------------------+
-| cast(x as bigint) |
-+-------------------+
-| 1428446582        |
-| 1428532982        |
-+-------------------+
-Fetched 2 row(s) in 0.22s
-</codeblock>
+    <p>Interoperability between Hive and Impala is different depending on the
+      file format.</p>
+    <ul>
+      <li><i>Text</i><p> For text tables, <codeph>TIMESTAMP</codeph> values can
+          be written and read interchangeably by Impala and Hive as Hive reads
+          and writes <codeph>TIMESTAMP</codeph> values without converting with
+          respect to time zones. </p></li>
+      <li><i>Parquet</i><p> When Hive writes to Parquet data files, the
+            <codeph>TIMESTAMP</codeph> values are normalized to UTC from the
+          local time zone of the host where the data was written. On the other
+          hand, Impala does not make any time zone adjustment when it writes or
+          reads <codeph>TIMESTAMP</codeph> values to Parquet files. This
+          difference in time zone handling can cause potentially inconsistent
+          results when Impala processes <codeph>TIMESTAMP</codeph> values in the
+          Parquet files written by Hive. </p><p> To avoid incompatibility
+          problems or having to code workarounds, you can specify one or both of
+          these impalad startup flags: <ul>
+            <li>
+              <codeph>--use_local_tz_for_unix_timestamp_conversions=true</codeph>
+            </li>
+            <li>
+              <codeph>--convert_legacy_hive_parquet_utc_timestamps=true</codeph>
+            </li>
+          </ul>
+        </p><p> When the
+            <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting
+          is enabled, Impala recognizes the Parquet data files written by Hive,
+          and applies the same UTC-to-local-timezone conversion logic during the
+          query as Hive does. </p><p>In <keyword keyref="impala30"/> and lower,
+          this option had severe impact on multi-threaded performance. The new
+          time zone implementation in <keyword keyref="impala31"/> eliminated
+          most of the performance overhead and made Impala scale well to
+          multiple threads. Although
+            <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is
+          turned off by default for this performance reason, where practical
+          turn it on when processing <codeph>TIMESTAMP</codeph> columns in
+          Parquet files written by Hive, to avoid unexpected behavior. </p></li>
+    </ul>
 
     <p>
       <b>Conversions:</b>
@@ -422,26 +279,31 @@ ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column
 
     <p conref="../shared/impala_common.xml#common/null_bad_timestamp_cast"/>
 
-    <p conref="../shared/impala_common.xml#common/partitioning_worrisome"/>
-
     <p conref="../shared/impala_common.xml#common/hbase_ok"/>
 
+    <p>
+      <b>Parquet consideration:</b> int96 encoded Parquet timestamps are supported in Impala.
+      int64 timestamps will be supported in a future release.
+    </p>
+
+    <p/>
+
     <p conref="../shared/impala_common.xml#common/parquet_ok"/>
 
     <p conref="../shared/impala_common.xml#common/text_bulky"/>
 
 <!--    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
 
-    <p conref="../shared/impala_common.xml#common/internals_16_bytes"/>
-
-    <p conref="../shared/impala_common.xml#common/added_forever"/>
-
     <p conref="../shared/impala_common.xml#common/column_stats_constant"/>
 
     <p conref="../shared/impala_common.xml#common/sqoop_blurb"/>
 
     <p conref="../shared/impala_common.xml#common/sqoop_timestamp_caveat"/>
 
+    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/>
+
     <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
 
     <p>
@@ -453,10 +315,6 @@ ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column
 
     <p conref="../shared/impala_common.xml#common/avro_no_timestamp"/>
 
-    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
-
-    <p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/>
-
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
 
     <p>
@@ -524,6 +382,8 @@ select s, t, b from timestamp_t order by t;
 +-------------------------------+-------------------------------+------------+
 </codeblock>
 
+    <p conref="../shared/impala_common.xml#common/added_forever"/>
+
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <ul>


[02/11] impala git commit: IMPALA-7806: [DOCS] Updated Known Issues in 3.1

Posted by bo...@apache.org.
IMPALA-7806: [DOCS] Updated Known Issues in 3.1

- Reviewed and updated the currently known issues for the 3.1 release

Change-Id: Iae68b308f5c0d8bfe124054480d9b2333f70b249
Reviewed-on: http://gerrit.cloudera.org:8080/11921
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
(cherry picked from commit 74354a7189491625f3afa7e628eb4b04af7ecaed)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/174ac2f8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/174ac2f8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/174ac2f8

Branch: refs/heads/branch-3.1.0
Commit: 174ac2f8a2b1a97b68df40b3a613676ce00bcc1d
Parents: b0672c8
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon Nov 12 16:44:07 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:57:18 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_known_issues.xml | 73 --------------------------------
 1 file changed, 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/174ac2f8/docs/topics/impala_known_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_known_issues.xml b/docs/topics/impala_known_issues.xml
index b7c439d..b498197 100644
--- a/docs/topics/impala_known_issues.xml
+++ b/docs/topics/impala_known_issues.xml
@@ -173,8 +173,6 @@ under the License.
 
     <concept id="IMPALA-1792" rev="IMPALA-1792">
 
-<!-- Not part of Alex's spreadsheet -->
-
       <title>ImpalaODBC: Can not get the value in the SQLGetData(m-x th column) after the SQLBindCol(m th column)</title>
 
       <conbody>
@@ -243,28 +241,6 @@ under the License.
       </p>
       </conbody>
     </concept>
-    <concept id="IMPALLA-7298">
-      <title>Kerberos authentication fails with the reverse DNS lookup
-        disabled</title>
-      <conbody>
-        <p> Kerberos authentication does not function correctly if <codeph>rdns
-            = false</codeph> is configured in <codeph>krb5.conf</codeph>. If the
-          flag <codeph>rdns = false</codeph>, when Impala tries to match
-          principals, it will fail because Kerberos receives a SPN (Service
-          Principal Name) with an IP address in it, but Impala expects a
-          principal with a FQDN in it.</p>
-        <p>
-          <b>Bug:</b>
-          <xref keyref="IMPALA-7298">IMPALA-7298</xref></p>
-        <p><b>Affected Versions:</b> Impala 2.12.0 and 3.0</p>
-        <p>
-          <b>Workaround:</b> Set the following flags in
-            <codeph>krb5.conf</codeph>: <ul>
-            <li><codeph>dns_canonicalize_hostname = true</codeph></li>
-            <li><codeph>rdns = true</codeph></li>
-          </ul></p>
-      </conbody>
-    </concept>
 </concept>
 
   <concept id="known_issues_resources">
@@ -279,27 +255,6 @@ under the License.
       </p>
 
     </conbody>
-    <!--AR: The workaround topic does not exist. Not sure if this was ever fully documented upstream.-->
-
-    <concept id="IMPALA-6028" audience="hidden">
-      <title>Handling large rows during upgrade to <keyword
-          keyref="impala210_full"/> or higher</title>
-      <conbody>
-        <p> After an upgrade to <keyword keyref="impala210_full"/> or higher,
-          users who process very large column values (long strings), or have
-          increased the <codeph>--read_size</codeph> configuration setting from
-          its default of 8 MB, might encounter capacity errors for some queries
-          that previously worked. </p>
-        <p>
-          <b>Resolution:</b> After the upgrade, follow the instructions in <xref
-            keyref="convert_read_size"/> to check if your queries are affected
-          by these changes and to modify your configuration settings if so. </p>
-        <p>
-          <b>Apache Issue:</b>
-          <xref keyref="IMPALA-6028">IMPALA-6028</xref>
-        </p>
-      </conbody>
-    </concept>
 
     <concept id="IMPALA-5605">
 
@@ -518,8 +473,6 @@ explain SELECT 1 FROM alltypestiny a1
 
     </conbody>
 
-<!-- Opened based on internal JIRA. Not part of Alex's spreadsheet AFAIK. -->
-
     <concept id="describe_formatted_avro">
 
       <title>DESCRIBE FORMATTED gives error on Avro table</title>
@@ -564,8 +517,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMP-175">
 
-<!-- Not part of Alex's spreadsheet. Perhaps it really is a permanent limitation and nobody is tracking it? -->
-
       <title>Deviation from Hive behavior: Out of range values float/double values are returned as maximum allowed value of type (Hive returns NULL)</title>
 
       <conbody>
@@ -585,8 +536,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="flume_writeformat_text">
 
-<!-- Not part of Alex's spreadsheet. From a non-public JIRA. -->
-
       <title>Configuration needed for Flume to be compatible with Impala</title>
 
       <conbody>
@@ -610,8 +559,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-635" rev="IMPALA-635">
 
-<!-- Not part of Alex's spreadsheet -->
-
       <title>Avro Scanner fails to parse some schemas</title>
 
       <conbody>
@@ -641,8 +588,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-1024" rev="IMPALA-1024">
 
-<!-- Not part of Alex's spreadsheet -->
-
       <title>Impala BE cannot parse Avro schema that contains a trailing semi-colon</title>
 
       <conbody>
@@ -666,8 +611,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-1652" rev="IMPALA-1652">
 
-<!-- To do: Isn't this more a correctness issue? -->
-
       <title>Incorrect results with basic predicate on CHAR typed column</title>
 
       <conbody>
@@ -725,10 +668,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
         </p>
 
         <p>
-          <b>Resolution:</b>
-        </p>
-
-        <p>
           <b>Workaround:</b> Avoid queries with extremely large expression trees. Setting the
           query option <codeph>disable_codegen=true</codeph> may reduce the impact, at a cost of
           longer query runtime.
@@ -740,8 +679,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-77" rev="IMPALA-77">
 
-<!-- Not part of Alex's spreadsheet. Perhaps it really is a permanent limitation and nobody is tracking it? -->
-
       <title>Impala does not support running on clusters with federated namespaces</title>
 
       <conbody>
@@ -785,8 +722,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-2005" rev="IMPALA-2005">
 
-<!-- Not part of Alex's spreadsheet -->
-
       <title>A failed CTAS does not drop the table if the insert fails</title>
 
       <conbody>
@@ -812,8 +747,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-1821" rev="IMPALA-1821">
 
-<!-- Not part of Alex's spreadsheet -->
-
       <title>Casting scenarios with invalid/inconsistent results</title>
 
       <conbody>
@@ -837,8 +770,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-941" rev="IMPALA-941">
 
-<!-- Not part of Alex's spreadsheet. Maybe this is interop? -->
-
       <title>Impala Parser issue when using fully qualified table names that start with a number</title>
 
       <conbody>
@@ -864,8 +795,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMPALA-532" rev="IMPALA-532">
 
-<!-- Not part of Alex's spreadsheet. Perhaps it really is a permanent limitation and nobody is tracking it? -->
-
       <title>Impala should tolerate bad locale settings</title>
 
       <conbody>
@@ -897,8 +826,6 @@ ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 
     <concept id="IMP-1203">
 
-<!-- Not part of Alex's spreadsheet. Perhaps it really is a permanent limitation and nobody is tracking it? -->
-
       <title>Log Level 3 Not Recommended for Impala</title>
 
       <conbody>


[10/11] impala git commit: [DOCS] A number of typos were fixed in impala_dedicated_coordinator

Posted by bo...@apache.org.
[DOCS] A number of typos were fixed in impala_dedicated_coordinator

Change-Id: I5758a5beabdf46feaf52fa0b3ed14bdce4408754
Reviewed-on: http://gerrit.cloudera.org:8080/11986
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
(cherry picked from commit fcfabe0f5c38a4e37d52d14a1010b02ae2973afb)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9749096a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9749096a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9749096a

Branch: refs/heads/branch-3.1.0
Commit: 9749096a16e6cb7fb564ae6367f58c9cb0ccef67
Parents: 3d1afb4
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon Nov 26 11:49:28 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 12:01:02 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_dedicated_coordinator.xml | 54 ++++++++++++-----------
 1 file changed, 28 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/9749096a/docs/topics/impala_dedicated_coordinator.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_dedicated_coordinator.xml b/docs/topics/impala_dedicated_coordinator.xml
index 1b43772..73aa2cf 100644
--- a/docs/topics/impala_dedicated_coordinator.xml
+++ b/docs/topics/impala_dedicated_coordinator.xml
@@ -165,13 +165,13 @@ under the License.
 
       <li >
         <p>
-          Provides a better concurrency by avoiding coordinator bottleneck.
+          Provides better concurrency by avoiding coordinator bottleneck.
         </p>
       </li>
 
       <li>
         <p>
-          Eliminates the query over admission by using one dedicated coordinator.
+          Eliminates query over-admission.
         </p>
       </li>
 
@@ -185,7 +185,7 @@ under the License.
       <li >
         <p>
           Improves reliability and performance for highly concurrent workloads by reducing
-          workload stress on coordinators. Dedicated coordinators require 50% or less
+          workload stress on coordinators. Dedicated coordinators require 50% or fewer
           connections and threads.
         </p>
       </li>
@@ -228,7 +228,7 @@ under the License.
       <p>
         To maintain a healthy state and optimal performance, it is recommended that you keep the
         peak utilization of all resources used by Impala, including CPU, the number of threads,
-        the number of connections, RPCs, under 80%.
+        the number of connections, and RPCs, under 80%.
       </p>
 
       <p >
@@ -337,9 +337,9 @@ under the License.
               On a large cluster with 50+ nodes, the number of network connections from a
               coordinator to executors can grow quickly as query complexity increases. The
               growth is much greater on coordinators than executors. Add a few more coordinators
-              if workload are complex, i.e. (an average number of fragments * number of Impalad)
-              > 500, but with the low memory/CPU usage to share the load. Watch IMPALA-4603 and
-              IMPALA-7213 to track the progress on fixing this issue.
+              if workloads are complex, i.e. (an average number of fragments * number of
+              Impalad) > 500, but with the low memory/CPU usage to share the load. Watch
+              IMPALA-4603 and IMPALA-7213 to track the progress on fixing this issue.
             </li>
 
             <li >
@@ -352,7 +352,7 @@ under the License.
             <li>
               The front-end connection requirement is not a factor in determining the number of
               dedicated coordinators. Consider setting up a connection pool at the client side
-              instead of adding coordinators. For a short term solution, you could increase the
+              instead of adding coordinators. For a short-term solution, you could increase the
               value of <codeph>fe_service_threads</codeph> on coordinators to allow more client
               connections.
             </li>
@@ -591,33 +591,35 @@ under the License.
 
         <li >
           <p>
-            <b>(Dedicated) Executors: </b>They should be collocated with DataNodes as usual.
-            The number of hosts with this setting typically increases as the cluster grows
-            larger and handles more table partitions, data files, and concurrent queries.
+            <b>(Dedicated) Executors: </b>They should be collocated with DataNodes as usual. The
+            number of hosts with this setting typically increases as the cluster grows larger
+            and handles more table partitions, data files, and concurrent queries.
           </p>
         </li>
       </ul>
 
-      <p> To configuring dedicated coordinators/executors, you specify one of
-        the following startup flags for the <cmdname>impalad</cmdname> daemon on
-        each host: <ul>
+      <p>
+        To configuring dedicated coordinators/executors, you specify one of the following
+        startup flags for the <cmdname>impalad</cmdname> daemon on each host:
+        <ul>
           <li>
             <p>
-              <codeph>is_executor=false</codeph> for each host that does not act
-              as an executor for Impala queries. These hosts act exclusively as
-              query coordinators. This setting typically applies to a relatively
-              small number of hosts, because the most common topology is to have
-              nearly all DataNodes doing work for query execution. </p>
+              <codeph>is_executor=false</codeph> for each host that does not act as an executor
+              for Impala queries. These hosts act exclusively as query coordinators. This
+              setting typically applies to a relatively small number of hosts, because the most
+              common topology is to have nearly all DataNodes doing work for query execution.
+            </p>
           </li>
+
           <li>
             <p>
-              <codeph>is_coordinator=false</codeph> for each host that does not
-              act as a coordinator for Impala queries. These hosts act
-              exclusively as executors. The number of hosts with this setting
-              typically increases as the cluster grows larger and handles more
-              table partitions, data files, and concurrent queries. As the
-              overhead for query coordination increases, it becomes more
-              important to centralize that work on dedicated hosts. </p>
+              <codeph>is_coordinator=false</codeph> for each host that does not act as a
+              coordinator for Impala queries. These hosts act exclusively as executors. The
+              number of hosts with this setting typically increases as the cluster grows larger
+              and handles more table partitions, data files, and concurrent queries. As the
+              overhead for query coordination increases, it becomes more important to centralize
+              that work on dedicated hosts.
+            </p>
           </li>
         </ul>
       </p>


[11/11] impala git commit: [DOCS] Copy edits in impala_custom_timezones

Posted by bo...@apache.org.
[DOCS] Copy edits in impala_custom_timezones

Change-Id: Ia8fa298ef49c71313bb9be3a8224462e01b3ca6a
Reviewed-on: http://gerrit.cloudera.org:8080/11991
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
(cherry picked from commit 5e740f45c6c93e0c0ab49afaf1fc5b74072753af)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7d3e9bee
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7d3e9bee
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7d3e9bee

Branch: refs/heads/branch-3.1.0
Commit: 7d3e9beee8d91bf775103ee9b362dcd08114abd4
Parents: 9749096
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon Nov 26 17:05:09 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 12:01:39 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_custom_timezones.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7d3e9bee/docs/topics/impala_custom_timezones.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_custom_timezones.xml b/docs/topics/impala_custom_timezones.xml
index be651e9..9856bb4 100644
--- a/docs/topics/impala_custom_timezones.xml
+++ b/docs/topics/impala_custom_timezones.xml
@@ -55,8 +55,8 @@ under the License.
         </li>
 
         <li>
-          <p >
-            New startup flags have been introduced:
+          <p>
+            Use the following start-up flags to customize the time zone definitions.
           </p>
           <ul>
             <li >


[08/11] impala git commit: IMPALA-7815: [DOCS] Release notes for 3.1

Posted by bo...@apache.org.
IMPALA-7815: [DOCS] Release notes for 3.1

Change-Id: Ib4d124557f29e539f97c5c52606c16f4147ab169
Reviewed-on: http://gerrit.cloudera.org:8080/11922
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
(cherry picked from commit 622e19c5f8bb3523f4cbbd0053fd6314ea66a929)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/df6e92f7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/df6e92f7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/df6e92f7

Branch: refs/heads/branch-3.1.0
Commit: df6e92f7ae23db5dc68a5f03ed3741bcfbd2a960
Parents: 3ec779a
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Mon Nov 12 17:13:05 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 12:00:00 2018 +0100

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap                 |  3 ++-
 docs/topics/impala_fixed_issues.xml         |  9 +++++++++
 docs/topics/impala_incompatible_changes.xml |  9 +++++++++
 docs/topics/impala_new_features.xml         | 10 ++++++++++
 docs/topics/impala_upgrading.xml            | 11 +++++++++++
 5 files changed, 41 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/df6e92f7/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index ff83cd2..a25f4d1 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10541,7 +10541,7 @@ under the License.
 
 <!-- 3-part forms of version numbers, for use in release notes. -->
 <!-- Using spaced-out form to avoid conflict with variable for 2.1.10 -->
-  <keydef keys="impala3_1_0"><topicmeta><keywords><keyword>Impala 3.1.0</keyword></keywords></topicmeta></keydef>
+  <keydef keys="impala3_01_0"><topicmeta><keywords><keyword>Impala 3.1.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala3_00_0"><topicmeta><keywords><keyword>Impala 3.0.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala2_12_0"><topicmeta><keywords><keyword>Impala 2.12.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala2_11_0"><topicmeta><keywords><keyword>Impala 2.11.0</keyword></keywords></topicmeta></keydef>
@@ -10603,6 +10603,7 @@ under the License.
   <keydef keys="impala13_full"><topicmeta><keywords><keyword>Impala 1.3</keyword></keywords></topicmeta></keydef>
 
 <!-- Pointers to changelog pages -->
+  <keydef keys="changelog_31" href="https://impala.apache.org/docs/changelog-3.1.html" scope="external" format="html"/>
   <keydef keys="changelog_300" href="https://impala.apache.org/docs/changelog-3.0.html" scope="external" format="html"/>
   <keydef keys="changelog_212" href="https://impala.apache.org/docs/changelog-2.12.html" scope="external" format="html"/>
   <keydef keys="changelog_211" href="https://impala.apache.org/docs/changelog-2.11.html" scope="external" format="html"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/df6e92f7/docs/topics/impala_fixed_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_fixed_issues.xml b/docs/topics/impala_fixed_issues.xml
index 0102afe..0f72296 100644
--- a/docs/topics/impala_fixed_issues.xml
+++ b/docs/topics/impala_fixed_issues.xml
@@ -45,6 +45,15 @@ under the License.
 
     <p outputclass="toc inpage"/>
   </conbody>
+  <!-- All 3.1.x subsections go under here -->
+  <concept rev="3.1.0" id="fixed_issues_3_1_0">
+    <title>Issues Fixed in <keyword keyref="impala31"/></title>
+    <conbody>
+      <p> For the full list of issues closed in this release, including bug
+        fixes, see the <xref keyref="changelog_31">changelog for <keyword
+            keyref="impala31"/></xref>. </p>
+    </conbody>
+  </concept>
   <!-- All 3.0.x subsections go under here -->
   <concept rev="3.0.0" id="fixed_issues_3_0_0">
     <title>Issues Fixed in <keyword keyref="impala30"/></title>

http://git-wip-us.apache.org/repos/asf/impala/blob/df6e92f7/docs/topics/impala_incompatible_changes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_incompatible_changes.xml b/docs/topics/impala_incompatible_changes.xml
index e5d4a86..ab88251 100644
--- a/docs/topics/impala_incompatible_changes.xml
+++ b/docs/topics/impala_incompatible_changes.xml
@@ -52,6 +52,15 @@ under the License.
 
     <p outputclass="toc inpage"/>
   </conbody>
+  <concept rev="3.1.0" id="new_features_31">
+    <title>Incompatible Changes Introduced in Impala 3.1.x</title>
+    <conbody>
+      <p> For the full list of issues closed in this release, including any that
+        introduce behavior changes or incompatibilities, see the <xref
+          keyref="changelog_31">changelog for <keyword keyref="impala31"
+          /></xref>. </p>
+    </conbody>
+  </concept>
   <concept rev="3.0.0" id="incompatible_changes_300x">
     <title>Incompatible Changes Introduced in Impala 3.0.x</title>
     <conbody>

http://git-wip-us.apache.org/repos/asf/impala/blob/df6e92f7/docs/topics/impala_new_features.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_new_features.xml b/docs/topics/impala_new_features.xml
index 8afb1a4..9236915 100644
--- a/docs/topics/impala_new_features.xml
+++ b/docs/topics/impala_new_features.xml
@@ -45,6 +45,16 @@ under the License.
     <p outputclass="toc inpage"/>
 
   </conbody>
+  <!-- All 3.1.x new features go under here -->
+  <concept rev="3.1.0" id="new_features_31">
+    <title>New Features in <keyword keyref="impala31"/></title>
+    <conbody>
+      <p> For the full list of issues closed in this release, including the
+        issues marked as <q>new features</q> or <q>improvements</q>, see the
+          <xref keyref="changelog_31">changelog for <keyword keyref="impala31"
+          /></xref>. </p>
+    </conbody>
+  </concept>
 
   <!-- All 3.0.x new features go under here -->
   <concept rev="3.0.0" id="new_features_300">

http://git-wip-us.apache.org/repos/asf/impala/blob/df6e92f7/docs/topics/impala_upgrading.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_upgrading.xml b/docs/topics/impala_upgrading.xml
index 679b77d..502c56d 100644
--- a/docs/topics/impala_upgrading.xml
+++ b/docs/topics/impala_upgrading.xml
@@ -147,6 +147,17 @@ $ ps ax | grep [i]mpalad
   <concept id="concept_a2p_szq_jdb">
 
     <title>Impala Upgrade Considerations</title>
+    <concept id="impala_privileges_30">
+      <title>Grant REFRESH Privilege to Impala Roles with SELECT or INSERT
+        Privilege when Upgrading to <keyword keyref="impala30"/></title>
+      <conbody>
+        <p> To use the fine grained privileges feature in <keyword
+            keyref="impala30"/>, if a role has the <codeph>SELECT</codeph> or
+            <codeph>INSERT</codeph> privilege on an object in Impala before
+          upgrading to <keyword keyref="impala30"/>, grant that role the
+            <codeph>REFRESH</codeph> privilege after the upgrade. </p>
+      </conbody>
+    </concept>
 
     <concept id="IMPALA-3916">
 


[05/11] impala git commit: IMPALA-7836: [DOCS] Format changes in impala_topn_bytes_limit.xml

Posted by bo...@apache.org.
IMPALA-7836: [DOCS] Format changes in impala_topn_bytes_limit.xml

Change-Id: I731b26fe2c225e706454f16cd3b6de697ec70fe2
Reviewed-on: http://gerrit.cloudera.org:8080/11935
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
(cherry picked from commit 3dea93ef0f364325dff2893642d5516a4ecd16bd)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f5348d4c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f5348d4c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f5348d4c

Branch: refs/heads/branch-3.1.0
Commit: f5348d4cd88fff23b118dfef68db978e54327c15
Parents: bd573d1
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Nov 15 13:18:33 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:59:20 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_topn_bytes_limit.xml | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f5348d4c/docs/topics/impala_topn_bytes_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_topn_bytes_limit.xml b/docs/topics/impala_topn_bytes_limit.xml
index c6329f9..fa212b2 100644
--- a/docs/topics/impala_topn_bytes_limit.xml
+++ b/docs/topics/impala_topn_bytes_limit.xml
@@ -42,19 +42,18 @@ under the License.
 
     <p>
       The <codeph>TOPN_BYTES_LIMIT</codeph> query option places a limit on the amount of
-      estimated memory that Impala can process for <term>top-N</term> queries.
+      estimated memory that Impala can process for <i>top-N</i> queries.
     </p>
 
     <p>
-      <term>top-N</term> queries are the queries that include both <codeph>ORDER BY</codeph> and
-      <codeph>LIMIT</codeph> clauses. <term>top-N</term> queries don't spill to disk so they
-      have to keep all rows they process in memory, and those queries can cause out-of-memory
-      issues when running with a large limit and an offset. If the Impala planner estimates that
-      a <term>top-N</term> operator will process more bytes than the
-      <codeph>TOPN_BYTES_LIMIT</codeph> value, it will replace the <term>top-N</term> operator
-      with the <term>sort</term> operator. Switching to the <term>sort</term> operator allows
-      Impala to spill to disk, thus requiring less memory than <term>top-N</term>, but
-      potentially with performance penalties.
+      <i>Top-N</i> queries are the queries that include both <codeph>ORDER BY</codeph> and
+      <codeph>LIMIT</codeph> clauses. <i>Top-N</i> queries don't spill to disk so they have to
+      keep all rows they process in memory, and those queries can cause out-of-memory issues
+      when running with a large limit and an offset. If the Impala planner estimates that a
+      <i>top-N</i> operator will process more bytes than the <codeph>TOPN_BYTES_LIMIT</codeph>
+      value, it will replace the <i>top-N</i> operator with the <i>sort</i> operator. Switching
+      to the <i>sort</i> operator allows Impala to spill to disk, thus requiring less memory
+      than <i>top-N</i>, but potentially with performance penalties.
     </p>
 
     <p>


[06/11] impala git commit: [DOCS] Added a note in impala_scan_bytes_limit.xml

Posted by bo...@apache.org.
[DOCS] Added a note in impala_scan_bytes_limit.xml

- Added a note that queries can scan over the limit of this option
at times.

Change-Id: Id20f952622ce553d9c1f47e469d97a3b4c19683f
Reviewed-on: http://gerrit.cloudera.org:8080/11936
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
(cherry picked from commit a47b3b82c4288d324c3917c2b1a1f8a409958191)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0d5b5d4e
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0d5b5d4e
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0d5b5d4e

Branch: refs/heads/branch-3.1.0
Commit: 0d5b5d4ece423d910fae8ba6eb50f62929764857
Parents: f5348d4
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Nov 15 13:08:41 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:59:28 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_scan_bytes_limit.xml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0d5b5d4e/docs/topics/impala_scan_bytes_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_scan_bytes_limit.xml b/docs/topics/impala_scan_bytes_limit.xml
index 5fc4a8a..e67c971 100644
--- a/docs/topics/impala_scan_bytes_limit.xml
+++ b/docs/topics/impala_scan_bytes_limit.xml
@@ -20,8 +20,7 @@ under the License.
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept id="scan_bytes_limit">
 
-  <title>SCAN_BYTES_LIMIT Query Option (<keyword keyref="impala31"/> or higher
-    only)</title>
+  <title>SCAN_BYTES_LIMIT Query Option (<keyword keyref="impala31"/> or higher only)</title>
 
   <titlealts audience="PDF">
 
@@ -44,8 +43,8 @@ under the License.
   <conbody>
 
     <p>
-      The <codeph>SCAN_BYTES_LIMIT</codeph> query option sets a time limit on the bytes scanned
-      by HDFS and HBase SCAN operations. If a query is still executing when the query’s
+      The <codeph>SCAN_BYTES_LIMIT</codeph> query option sets a limit on the bytes scanned by
+      HDFS and HBase SCAN operations. If a query is still executing when the query’s
       coordinator detects that it has exceeded the limit, the query is terminated with an error.
       The option is intended to prevent runaway queries that scan more data than is intended.
     </p>
@@ -60,7 +59,7 @@ under the License.
       Admission Control and Query Queuing</xref> for information about default query options).
       If a user accidentally omits a partition filter in a <codeph>WHERE</codeph> clause and
       runs a large query that scans a lot of data, the query will be automatically terminated
-      after the time limit expires to free up resources.
+      after it scans more data than the <codeph>SCAN_BYTES_LIMIT</codeph>.
     </p>
 
     <p>
@@ -89,6 +88,10 @@ under the License.
     </p>
 
     <p>
+      Because the checks are done periodically, the query may scan over the limit at times.
+    </p>
+
+    <p>
       <b>Syntax:</b> <codeph>SET SCAN_BYTES_LIMIT=bytes;</codeph>
     </p>
 


[03/11] impala git commit: IMPALA-7836: [DOCS] Document TOPN_BYTES_LIMIT query option

Posted by bo...@apache.org.
IMPALA-7836: [DOCS] Document TOPN_BYTES_LIMIT query option

Change-Id: Ib7109c2949ee5137d8b4a748227948b79bd93f52
Reviewed-on: http://gerrit.cloudera.org:8080/11914
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
(cherry picked from commit 731254b52934c17d953da541df8bc4493beb037a)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8872e8bf
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8872e8bf
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8872e8bf

Branch: refs/heads/branch-3.1.0
Commit: 8872e8bf80d01b8e2ea88432f27eefc7a1a2169d
Parents: 174ac2f
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Nov 8 14:55:32 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:57:28 2018 +0100

----------------------------------------------------------------------
 docs/impala.ditamap                     |  1 +
 docs/topics/impala_topn_bytes_limit.xml | 84 ++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/8872e8bf/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 051b838..8eecf06 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -239,6 +239,7 @@ under the License.
           <topicref href="topics/impala_thread_reservation_aggregate_limit.xml"/>
           <topicref href="topics/impala_thread_reservation_limit.xml"/>
           <topicref href="topics/impala_timezone.xml"/>
+          <topicref href="topics/impala_topn_bytes_limit.xml"/>
         </topicref>
       </topicref>
       <topicref href="topics/impala_show.xml"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/8872e8bf/docs/topics/impala_topn_bytes_limit.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_topn_bytes_limit.xml b/docs/topics/impala_topn_bytes_limit.xml
new file mode 100644
index 0000000..c6329f9
--- /dev/null
+++ b/docs/topics/impala_topn_bytes_limit.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="3.1.0" id="topn_bytes_limit">
+
+  <title>TOPN_BYTES_LIMIT Query Option (<keyword keyref="impala31_full"/> or higher only)</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>TOPN_BYTES_LIMIT</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The <codeph>TOPN_BYTES_LIMIT</codeph> query option places a limit on the amount of
+      estimated memory that Impala can process for <term>top-N</term> queries.
+    </p>
+
+    <p>
+      <term>top-N</term> queries are the queries that include both <codeph>ORDER BY</codeph> and
+      <codeph>LIMIT</codeph> clauses. <term>top-N</term> queries don't spill to disk so they
+      have to keep all rows they process in memory, and those queries can cause out-of-memory
+      issues when running with a large limit and an offset. If the Impala planner estimates that
+      a <term>top-N</term> operator will process more bytes than the
+      <codeph>TOPN_BYTES_LIMIT</codeph> value, it will replace the <term>top-N</term> operator
+      with the <term>sort</term> operator. Switching to the <term>sort</term> operator allows
+      Impala to spill to disk, thus requiring less memory than <term>top-N</term>, but
+      potentially with performance penalties.
+    </p>
+
+    <p>
+      The option has no effect when set to 0 or -1.
+    </p>
+
+    <p>
+      <b>Syntax:</b>
+    </p>
+
+<codeblock>SET TOPN_BYTES_LIMIT=<varname>limit</varname></codeblock>
+
+    <p>
+      <b>Type:</b> Number
+    </p>
+
+    <p>
+      <b>Default:</b> 536870912 (512 MB)
+    </p>
+
+    <p>
+      <b>Added in:</b> <keyword keyref="impala31"/>
+    </p>
+
+  </conbody>
+
+</concept>


[04/11] impala git commit: IMPALA-7634: [DOCS] Document the new SHUTDOWN statement

Posted by bo...@apache.org.
IMPALA-7634: [DOCS] Document the new SHUTDOWN statement

Change-Id: I0cfe4bae1b7966980cdeececa6b959bbecb4a24a
Reviewed-on: http://gerrit.cloudera.org:8080/11872
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
(cherry picked from commit 71f8d0eba33c5d5d0a0cd2d2b617ac871dbcbb8a)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bd573d12
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bd573d12
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bd573d12

Branch: refs/heads/branch-3.1.0
Commit: bd573d1267e4e5f5d904656acadeab9e2e6c2097
Parents: 8872e8b
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Nov 2 17:17:40 2018 -0700
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:57:35 2018 +0100

----------------------------------------------------------------------
 docs/impala.ditamap             |   1 +
 docs/topics/impala_shutdown.xml | 222 +++++++++++++++++++++++++++++++++++
 2 files changed, 223 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/bd573d12/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 8eecf06..e4c35a7 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -243,6 +243,7 @@ under the License.
         </topicref>
       </topicref>
       <topicref href="topics/impala_show.xml"/>
+      <topicref href="topics/impala_shutdown.xml"/>
       <topicref href="topics/impala_truncate_table.xml"/>
       <topicref href="topics/impala_update.xml"/>
       <topicref href="topics/impala_upsert.xml"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/bd573d12/docs/topics/impala_shutdown.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_shutdown.xml b/docs/topics/impala_shutdown.xml
new file mode 100644
index 0000000..1677fac
--- /dev/null
+++ b/docs/topics/impala_shutdown.xml
@@ -0,0 +1,222 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="shutdown">
+
+  <title>SHUTDOWN Statement</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>SHUTDOWN</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      The <codeph>SHUTDOWN</codeph> statement performs a graceful shutdown of Impala Daemon. The
+      Impala daemon will notify other Impala daemons that it is shutting down, wait for a grace
+      period, then shut itself down once no more queries or fragments are executing on that
+      daemon. The <codeph>--shutdown_grace_period_s</codeph> flag determines the duration of the
+      grace period in seconds.
+    </p>
+
+    <p>
+      <b>Syntax:</b>
+    </p>
+
+<codeblock>:SHUTDOWN()
+:SHUTDOWN([<varname>host_name</varname>[:<varname>port_number</varname>] )
+:SHUTDOWN(<varname>deadline</varname>)
+:SHUTDOWN([<varname>host_name</varname>[:<varname>port_number</varname>], <varname>deadline</varname>)</codeblock>
+
+    <p>
+      <b>Usage notes:</b>
+    </p>
+
+    <p>
+      All arguments are optional for <codeph>SHUTDOWN</codeph>.
+    </p>
+
+    <simpletable frame="all" id="simpletable_sly_wrf_rfb">
+
+      <sthead>
+
+        <stentry>Argument</stentry>
+
+        <stentry>Type</stentry>
+
+        <stentry>Default</stentry>
+
+        <stentry>Description</stentry>
+
+      </sthead>
+
+      <strow>
+
+        <stentry><codeph><varname>host_name</varname></codeph>
+
+        </stentry>
+
+        <stentry><codeph>STRING</codeph>
+
+        </stentry>
+
+        <stentry>The current <codeph>impalad</codeph> host</stentry>
+
+        <stentry>
+
+          <p>
+            Address of the backend to be shut down.
+          </p>
+
+        </stentry>
+
+      </strow>
+
+      <strow>
+
+        <stentry><codeph><varname>port_number</varname></codeph>
+
+        </stentry>
+
+        <stentry><codeph>INT</codeph>
+
+        </stentry>
+
+        <stentry><codeph>0</codeph> which is treated the same port as current
+            <codeph>impalad</codeph>
+
+        </stentry>
+
+        <stentry>n/a</stentry>
+
+      </strow>
+
+      <strow>
+
+        <stentry><codeph><varname>deadline</varname></codeph>
+
+        </stentry>
+
+        <stentry><codeph>INT</codeph>
+
+        </stentry>
+
+        <stentry><codeph>The value of the <codeph>--shutdown_deadline_s</codeph>
+            flag, which defaults to 1 hour.</codeph>
+
+        </stentry>
+
+        <stentry>
+
+          <p>
+            <codeph><varname>deadline</varname></codeph> must be a non-negative number,
+            specified in seconds.
+          </p>
+
+          <p>
+            The value, 0, for <varname>deadline</varname> specifies an immediate shutdown.
+          </p>
+
+        </stentry>
+
+      </strow>
+
+    </simpletable>
+
+    <p>
+      Take the following points into consideration when running the <codeph>SHUTDOWN</codeph>
+      statement:
+    </p>
+
+    <ul>
+      <li>
+        A client can shut down the
+        <xref
+          href="impala_components.xml#intro_impalad">coordinator</xref>
+        <codeph>impalad</codeph> that it is connected to via <codeph>:SHUTDOWN()</codeph>.
+      </li>
+
+      <li>
+        A client can remotely shut down another non-coordinator <codeph>impalad</codeph> via
+        <codeph>:SHUTDOWN('<varname>hostname</varname>')</codeph>.
+      </li>
+
+      <li>
+        The shutdown time limit can be overridden to force a quicker or slower shutdown by
+        specifying a deadline. The default deadline is determined by the
+        <codeph>--shutdown_deadline_s</codeph> flag, which defaults to 1 hour.
+      </li>
+
+      <li>
+        <xref href="impala_components.xml#intro_impalad">Executors</xref> can be shut down
+        without disrupting running queries. Short-running queries will finish, and long-running
+        queries will continue until a threshold time limit is reached.
+      </li>
+
+      <li>
+        If queries are submitted to a coordinator after shutdown of that coordinator has
+        started, they will fail.
+      </li>
+
+      <li>
+        Long running queries or other issues, such as stuck fragments, will slow down but not
+        prevent eventual shutdown.
+      </li>
+    </ul>
+
+    <p>
+      <b>Security considerations:</b>
+    </p>
+
+    <p>
+      The <codeph>ALL</codeph> privilege is required on the server.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p>
+      <b>Examples:</b>
+    </p>
+
+<codeblock>:SHUTDOWN(); -- Shut down the current impalad  with the default deadline.
+:SHUTDOWN('hostname'); --  Shut down impalad running on hostname  with the default deadline.
+:SHUTDOWN(\"hostname:1234\"); -- Shut down impalad running on host at port 1234  with the default deadline.
+:SHUTDOWN(10); - Shut down the current impalad after 10 seconds.
+:SHUTDOWN('hostname', 10); - Shut down impalad running on hostname when all queries running on hostname finish, or after 10 seconds.
+:SHUTDOWN('hostname:11', 10 * 60); -- Shut down impalad running on hostname at port 11 when all queries running on hostname finish, or after 600 seconds.
+:SHUTDOWN(0); -- Perform an immdediate shutdown of the current impalad.</codeblock>
+
+    <p>
+      <b>Added in:</b> <keyword keyref="impala31"/>
+    </p>
+
+  </conbody>
+
+</concept>


[07/11] impala git commit: IMPALA-7861: [DOCS] TLS enabled by default regardless of URI scheme

Posted by bo...@apache.org.
IMPALA-7861: [DOCS] TLS enabled by default regardless of URI scheme

Change-Id: I88f615cf23f406035e544e68adacdd0393f69ab3
Reviewed-on: http://gerrit.cloudera.org:8080/11948
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
(cherry picked from commit 09c25144fddcfc9b72c9c4914e41675f865857ac)


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3ec779a8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3ec779a8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3ec779a8

Branch: refs/heads/branch-3.1.0
Commit: 3ec779a80dd18b0be3d5a4bde888720c1f0bedf0
Parents: 0d5b5d4
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Nov 16 15:20:49 2018 -0800
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 27 11:59:52 2018 +0100

----------------------------------------------------------------------
 docs/topics/impala_adls.xml | 66 ++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3ec779a8/docs/topics/impala_adls.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_adls.xml b/docs/topics/impala_adls.xml
index f5103f4..d50b011 100644
--- a/docs/topics/impala_adls.xml
+++ b/docs/topics/impala_adls.xml
@@ -35,12 +35,14 @@ under the License.
 
   <conbody>
 
-    <p> You can use Impala to query data residing on the Azure Data Lake Store
+    <p>
+      You can use Impala to query data residing on the Azure Data Lake Store
       (ADLS) filesystem. This capability allows convenient access to a storage
       system that is remotely managed, accessible from anywhere, and integrated
       with various cloud-based services. Impala can query files in any supported
       file format from ADLS. The ADLS storage location can be for an entire
-      table, or individual partitions in a partitioned table. </p>
+      table or individual partitions in a partitioned table.
+    </p>
 
     <p>
       The default Impala tables use data files stored on HDFS, which are ideal for bulk loads and queries using
@@ -89,14 +91,12 @@ under the License.
       <ul>
         <li><p> The <xref href="impala_create_table.xml#create_table"/> or <xref
               href="impala_alter_table.xml#alter_table"/> statements can specify
-            that a table resides on the ADLS filesystem by specifying an ADLS
-            prefix for the <codeph>LOCATION</codeph> property.<ul>
-              <li><codeph>adl://</codeph> for ADLS Gen1</li>
-              <li><codeph>abfs://</codeph> for ADLS Gen2</li>
-              <li><codeph>abfss://</codeph> for ADLS Gen2 with a secure socket
-                layer connection</li>
-            </ul>
-            <codeph>ALTER TABLE</codeph> can also set the
+            that a table resides on the ADLS filesystem by using one of the
+            following ADLS prefixes in the <codeph>LOCATION</codeph> property.<ul>
+              <li>For ADLS Gen1: <codeph>adl://</codeph></li>
+              <li>For ADLS Gen2: <codeph>abfs://</codeph> or
+                  <codeph>abfss://</codeph></li>
+            </ul></p><p><codeph>ALTER TABLE</codeph> can also set the
               <codeph>LOCATION</codeph> property for an individual partition, so
             that some data in a table resides on ADLS and other data in the same
             table resides on HDFS. </p> See <xref href="impala_adls.xml#ddl"/>
@@ -269,24 +269,44 @@ under the License.
 
     <conbody>
 
-      <p> Impala reads data for a table or partition from ADLS based on the
+      <p>
+        Impala reads data for a table or partition from ADLS based on the
           <codeph>LOCATION</codeph> attribute for the table or partition.
         Specify the ADLS details in the <codeph>LOCATION</codeph> clause of a
           <codeph>CREATE TABLE</codeph> or <codeph>ALTER TABLE</codeph>
-        statement. The syntax for the <codeph>LOCATION</codeph> clause is:<ul>
-          <li>For ADLS Gen1,
-                <codeph>adl://<varname>account</varname>.azuredatalakestore.net/<varname>path/file</varname></codeph>
+        statement. The syntax for the <codeph>LOCATION</codeph> clause is:
+        <ul>
+          <li>
+            For ADLS Gen1:
+<codeblock>adl://<varname>account</varname>.azuredatalakestore.net/<varname>path/file</varname></codeblock></li>
+          <li>
+            For ADLS Gen2:
+<codeblock>abfs://<varname>container</varname>@<varname>account</varname>.dfs.core.windows.net/<varname>path</varname>/<varname>file</varname></codeblock>
+            <p>
+              or
+            </p>
+<codeblock>abfss://<varname>container</varname>@<varname>account</varname>.dfs.core.windows.net/<varname>path</varname>/<varname>file</varname></codeblock>
           </li>
-          <li>For ADLS Gen2,
-                <codeph>abfs://<varname>container</varname>@<varname>account</varname>.dfs.core.windows.net/<varname>path</varname>/<varname>file</varname></codeph></li>
-          <li>For ADLS Gen2 with a secure socket layer connection,
-                <codeph>abfss://<varname>container</varname>@<varname>account</varname>.dfs.core.windows.net/<varname>path</varname>/<varname>file</varname></codeph></li>
-        </ul></p>
-      <p><codeph><varname>container</varname></codeph> denotes the parent
+        </ul>
+      </p>
+      <p>
+        <codeph><varname>container</varname></codeph> denotes the parent
         location that holds the files and folders, which is the Containers in
-        the Azure Storage Blobs service.</p>
-      <p><codeph><varname>account</varname></codeph> is the name given for your
-        storage account.</p>
+        the Azure Storage Blobs service.
+      </p>
+      <p>
+        <codeph><varname>account</varname></codeph> is the name given for your
+        storage account.
+      </p>
+      <note>
+        <p> By default, TLS is enabled both with <codeph>abfs://</codeph> and
+            <codeph>abfss://</codeph>. </p>
+        <p>
+          When you set the <codeph>fs.azure.always.use.https=false</codeph>
+          property, TLS is disabled with <codeph>abfs://</codeph>, and TLS is
+          enabled with <codeph>abfss://</codeph>
+        </p>
+      </note>
 
       <p>
         For a partitioned table, either specify a separate <codeph>LOCATION</codeph> clause for each new partition,