You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/12 05:28:14 UTC

[01/28] impala git commit: IMPALA-6688: [DOCS] compact_catalog_topic setting is enabled by default

Repository: impala
Updated Branches:
  refs/heads/2.x 851f622bf -> bbe534298


IMPALA-6688: [DOCS] compact_catalog_topic setting is enabled by default

- Marked IMPALA-5500 as fixed in Impala 2.10 in the impala_known_issues.
xml
- In the same section, removed the mention of default setting being false.
- Added a section in impala_upgrading.xml for default setting Changes.
- Added new keydefs for 2.12 and 3.0 version strings.

Change-Id: Id6e9eef7e20fcc520865f3d6d065ed508b2917bb
Reviewed-on: http://gerrit.cloudera.org:8080/9922
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/80e385af
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/80e385af
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/80e385af

Branch: refs/heads/2.x
Commit: 80e385af9e46e2edc2bc3661c7246a920624f379
Parents: 09b58f6
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue Apr 3 15:39:34 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap         |  6 ++++++
 docs/topics/impala_known_issues.xml | 22 +++++++++++-----------
 docs/topics/impala_upgrading.xml    | 25 +++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/80e385af/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 1475092..4eb4d9b 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10521,6 +10521,8 @@ under the License.
   <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/>
 
 <!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. -->
+  <keydef keys="impala30"><topicmeta><keywords><keyword>Impala 3.0</keyword></keywords></topicmeta></keydef>
+  <keydef keys="impala212"><topicmeta><keywords><keyword>Impala 2.12</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala211"><topicmeta><keywords><keyword>Impala 2.11</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala210"><topicmeta><keywords><keyword>Impala 2.10</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala29"><topicmeta><keywords><keyword>Impala 2.9</keyword></keywords></topicmeta></keydef>
@@ -10538,6 +10540,8 @@ under the License.
 
 <!-- 3-part forms of version numbers, for use in release notes. -->
 <!-- Using spaced-out form to avoid conflict with variable for 2.1.10 -->
+  <keydef keys="impala3_00_0"><topicmeta><keywords><keyword>Impala 3.0.0</keyword></keywords></topicmeta></keydef>
+  <keydef keys="impala2_12_0"><topicmeta><keywords><keyword>Impala 2.12.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala2_11_0"><topicmeta><keywords><keyword>Impala 2.11.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala2100"><topicmeta><keywords><keyword>Impala 2.10.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala290"><topicmeta><keywords><keyword>Impala 2.9.0</keyword></keywords></topicmeta></keydef>
@@ -10578,6 +10582,8 @@ under the License.
   <keydef keys="impala130"><topicmeta><keywords><keyword>Impala 1.3.0</keyword></keywords></topicmeta></keydef>
 
 <!-- Long form of mapping from Impala release to vendor-specific releases, for use in running text. -->
+  <keydef keys="impala30_full"><topicmeta><keywords><keyword>Impala 3.0</keyword></keywords></topicmeta></keydef>
+  <keydef keys="impala212_full"><topicmeta><keywords><keyword>Impala 2.12</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala211_full"><topicmeta><keywords><keyword>Impala 2.11</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala210_full"><topicmeta><keywords><keyword>Impala 2.10</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala29_full"><topicmeta><keywords><keyword>Impala 2.9</keyword></keywords></topicmeta></keydef>

http://git-wip-us.apache.org/repos/asf/impala/blob/80e385af/docs/topics/impala_known_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_known_issues.xml b/docs/topics/impala_known_issues.xml
index 39fed3b..a8a8451 100644
--- a/docs/topics/impala_known_issues.xml
+++ b/docs/topics/impala_known_issues.xml
@@ -736,12 +736,10 @@ impala hard nproc 262144
         <p><b>Bug:</b> <xref keyref="IMPALA-5500">IMPALA-5500</xref></p>
         <p><b>Severity:</b> Medium</p>
         <p>
-          <b>Workaround:</b> Consider leaving the <codeph>compact_catalog_topic</codeph>
-          configuration setting at its default value of <codeph>false</codeph> until
-          this issue is resolved.
-        </p>
-        <p><b>Resolution:</b> A fix is in the pipeline. Check the status of
-        <xref keyref="IMPALA-5500">IMPALA-5500</xref> for the release where the fix is available.</p>
+          <b>Workaround:</b> Consider setting the
+            <codeph>compact_catalog_topic</codeph> configuration setting to
+            <codeph>false</codeph> until this issue is resolved. </p>
+        <p><b>Resolution:</b> Fixed in <keyword keyref="impala210"/>.</p>
       </conbody>
     </concept>
 
@@ -752,7 +750,7 @@ impala hard nproc 262144
         <p><b>Bug:</b> <xref keyref="IMPALA-2294">IMPALA-2294</xref></p>
         <p><b>Severity:</b> High</p>
         <p><b>Workaround:</b></p>
-        <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="vm_overcommit_memory_end"/>
+        <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="../shared/impala_common.xml#common/vm_overcommit_memory_end"/>
       </conbody>
     </concept>
 
@@ -1672,13 +1670,15 @@ select
         </p>
 
         <note type="warning">
-          Only use this for external tables, or Impala will remove the data files. In case of an internal table, set it to external first:
+          <p>Only use this for external tables, or Impala will remove the data
+            files. In case of an internal table, set it to external first:
 <codeblock>
 ALTER TABLE table_name SET TBLPROPERTIES('EXTERNAL'='TRUE');
 </codeblock>
-          (The part in parentheses is case sensitive.) Make sure to pick the right choice between internal and external when recreating the
-          table. See <xref href="impala_tables.xml#tables"/> for the differences between internal and external tables.
-        </note>
+          (The part in parentheses is case sensitive.) Make sure to pick the
+          right choice between internal and external when recreating the table.
+          See <xref href="impala_tables.xml#tables"/> for the differences
+          between internal and external tables. </p></note>
 
         <p>
           <b>Severity:</b> High

http://git-wip-us.apache.org/repos/asf/impala/blob/80e385af/docs/topics/impala_upgrading.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_upgrading.xml b/docs/topics/impala_upgrading.xml
index 37ef2b2..75ca7dd 100644
--- a/docs/topics/impala_upgrading.xml
+++ b/docs/topics/impala_upgrading.xml
@@ -136,4 +136,29 @@ $ ps ax | grep [i]mpalad
       </note>
     </conbody>
   </concept>
+  <concept id="concept_a2p_szq_jdb">
+    <title>Impala Upgrade Considerations</title>
+    <concept id="concept_mkn_ygr_jdb">
+      <title>Default Setting Changes</title>
+      <conbody>
+        <simpletable frame="all" id="simpletable_x55_ghr_jdb">
+          <sthead>
+            <stentry>Release Changed</stentry>
+            <stentry>Setting</stentry>
+            <stentry>Default Value</stentry>
+          </sthead>
+          <strow>
+            <stentry><keyword keyref="impala212_full"/></stentry>
+            <stentry><codeph>compact_catalog_topic</codeph></stentry>
+            <stentry><codeph>true</codeph></stentry>
+          </strow>
+          <strow>
+            <stentry><keyword keyref="impala212_full"/></stentry>
+            <stentry><codeph>max_cached_file_handle</codeph></stentry>
+            <stentry><codeph>20000</codeph></stentry>
+          </strow>
+        </simpletable>
+      </conbody>
+    </concept>
+  </concept>
 </concept>


[15/28] impala git commit: IMPALA-6338: Disable flaky bloom filter test

Posted by ta...@apache.org.
IMPALA-6338: Disable flaky bloom filter test

The underlying issue in IMPALA-6338 causes successful queries that are
cancelled internally due to all results having been returned to, in
rare cases, have info missing from the profile. This has caused flaky
tests but has low impact on users, and unfortunately with the current
query lifecycle logic in the coordinator, there is no simple solution.

There is ongoing work to improve query lifecycle logic in the
coordinator holistically, see IMPALA-5384. This work will eventually
address the underlying cause of IMPALA-6338. Until then, we disable
the tests that have been flaky.

Change-Id: Ie30b88fb8fb7780fc3a7153c05fdc3606145ce35
Reviewed-on: http://gerrit.cloudera.org:8080/9822
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/caaa137d
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/caaa137d
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/caaa137d

Branch: refs/heads/2.x
Commit: caaa137d71707cc1f38d0962ec65a02f885a6de5
Parents: e910915
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Tue Mar 27 13:09:41 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 .../functional-query/queries/QueryTest/bloom_filters.test       | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/caaa137d/testdata/workloads/functional-query/queries/QueryTest/bloom_filters.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/bloom_filters.test b/testdata/workloads/functional-query/queries/QueryTest/bloom_filters.test
index 593e66b..4908e20 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/bloom_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/bloom_filters.test
@@ -71,8 +71,9 @@ select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
-row_regex: .*1 of 1 Runtime Filter Published.*
-row_regex: .*Filter 0 \(1.00 MB\).*
+# TODO: reenable once IMPALA-6338 is fixed
+#row_regex: .*1 of 1 Runtime Filter Published.*
+#row_regex: .*Filter 0 \(1.00 MB\).*
 ====
 
 


[16/28] impala git commit: IMPALA-6667: [DOCS] max_cached_file_handles is enabled by default

Posted by ta...@apache.org.
IMPALA-6667: [DOCS] max_cached_file_handles is enabled by default

Change-Id: I61b1dc10c61415c673b49d339ba0d27f6a32a51e
Reviewed-on: http://gerrit.cloudera.org:8080/9915
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/4c6e1dba
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/4c6e1dba
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/4c6e1dba

Branch: refs/heads/2.x
Commit: 4c6e1dba7bd2473e10a67980b8c96c9151bc1023
Parents: adecae3
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue Apr 3 14:49:28 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_scalability.xml | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/4c6e1dba/docs/topics/impala_scalability.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_scalability.xml b/docs/topics/impala_scalability.xml
index beb450f..22e8e72 100644
--- a/docs/topics/impala_scalability.xml
+++ b/docs/topics/impala_scalability.xml
@@ -977,7 +977,7 @@ so other secure services might be affected temporarily.
   <title>Kerberos-Related Memory Overhead for Large Clusters</title>
   <conbody>
     <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_intro"/>
-    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="vm_overcommit_memory_end"/>
+    <p conref="../shared/impala_common.xml#common/vm_overcommit_memory_start" conrefend="../shared/impala_common.xml#common/vm_overcommit_memory_end"/>
   </conbody>
   </concept>
 
@@ -1032,26 +1032,26 @@ so other secure services might be affected temporarily.
         (that is, increase latency) to Impala queries, and reduce overall throughput for non-Impala
         workloads that also require accessing HDFS files.
       </p>
-      <p>
-        In <keyword keyref="impala210_full"/> and higher, you can reduce NameNode overhead by enabling
-        a caching feature for HDFS file handles. Data files that are accessed by different queries,
-        or even multiple times within the same query, can be accessed without a new <q>open</q>
-        call and without fetching the file details again from the NameNode.
-      </p>
+      <p> In <keyword keyref="impala210_full"/> and higher, you can reduce
+        NameNode overhead by enabling a caching feature for HDFS file handles.
+        Data files that are accessed by different queries, or even multiple
+        times within the same query, can be accessed without a new <q>open</q>
+        call and without fetching the file details again from the NameNode. </p>
       <p>
         Because this feature only involves HDFS data files, it does not apply to non-HDFS tables,
         such as Kudu or HBase tables, or tables that store their data on cloud services such as
         S3 or ADLS. Any read operations that perform remote reads also skip the cached file handles.
       </p>
-      <p>
-        This feature is turned off by default. To enable it, set the configuration option
-        <codeph>max_cached_file_handles</codeph> to a non-zero value for each <cmdname>impalad</cmdname>
-        daemon. Consider an initial starting value of 20 thousand, and adjust upward if NameNode
-        overhead is still significant, or downward if it is more important to reduce the extra memory usage
-        on each host. Each cache entry consumes 6 KB, meaning that caching 20,000 file handles requires
-        up to 120 MB on each DataNode. The exact memory usage varies depending on how many file handles
-        have actually been cached; memory is freed as file handles are evicted from the cache.
-      </p>
+      <p> The feature is enabled by default with 20,000 file handles to be
+        cached. To change the value, set the configuration option
+          <codeph>max_cached_file_handles</codeph> to a non-zero value for each
+          <cmdname>impalad</cmdname> daemon. From the initial default value of
+        20000, adjust upward if NameNode request load is still significant, or
+        downward if it is more important to reduce the extra memory usage on
+        each host. Each cache entry consumes 6 KB, meaning that caching 20,000
+        file handles requires up to 120 MB on each Impala executor. The exact
+        memory usage varies depending on how many file handles have actually
+        been cached; memory is freed as file handles are evicted from the cache. </p>
       <p>
         If a manual HDFS operation moves a file to the HDFS Trashcan while the file handle is cached,
         Impala still accesses the contents of that file. This is a change from prior behavior. Previously,


[06/28] impala git commit: IMPALA-6780: Fix always-true asserts in test_recover_partitions.py

Posted by ta...@apache.org.
IMPALA-6780: Fix always-true asserts in test_recover_partitions.py

Fixes the following syntax issue leading to an always-true assert:
assert (cond, msg) <-- always true

Instead, this should be used:
assert cond, msg <-- cond and msg can optionally have parenthesis

Testing:
- Locally ran test_recover_partitions.py
- Searched for other tests with similar mistakes (none identified)

Change-Id: If38efa62c2496b69ae891bf916482d697bfc719f
Reviewed-on: http://gerrit.cloudera.org:8080/9886
Tested-by: Impala Public Jenkins
Reviewed-by: Alex Behm <al...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a0450d24
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a0450d24
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a0450d24

Branch: refs/heads/2.x
Commit: a0450d2476208bf07a2c9401ae32039aa1bbb1df
Parents: 6a7f07d
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Apr 2 10:56:00 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 tests/metadata/test_recover_partitions.py | 46 +++++++++++++-------------
 1 file changed, 23 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/a0450d24/tests/metadata/test_recover_partitions.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py
index 0f87a9f..2d17d3d 100644
--- a/tests/metadata/test_recover_partitions.py
+++ b/tests/metadata/test_recover_partitions.py
@@ -88,13 +88,13 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
-    assert (self.has_value(PART_NAME, result.data),
-        "ALTER TABLE %s RECOVER PARTITIONS failed." % FQ_TBL_NAME)
+    assert self.has_value(PART_NAME, result.data),\
+        "ALTER TABLE %s RECOVER PARTITIONS failed." % FQ_TBL_NAME
     result = self.execute_query_expect_success(self.client,
         "select c from %s" % FQ_TBL_NAME)
-    assert (self.has_value(INSERTED_VALUE, result.data),
-        "Failed to load tables after ALTER TABLE %s RECOVER PARTITIONS."
-        % FQ_TBL_NAME)
+    assert self.has_value(INSERTED_VALUE, result.data),\
+        "Failed to load tables after ALTER TABLE %s RECOVER PARTITIONS."\
+        % FQ_TBL_NAME
 
     # Test that invalid partition values are ignored during partition recovery.
     result = self.execute_query_expect_success(self.client,
@@ -105,9 +105,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
-    assert (len(result.data) == old_length,
-        "ALTER TABLE %s RECOVER PARTITIONS failed to handle invalid partition values."
-        % FQ_TBL_NAME)
+    assert len(result.data) == old_length,\
+        "ALTER TABLE %s RECOVER PARTITIONS failed to handle invalid partition values."\
+        % FQ_TBL_NAME
 
     # Create a directory whose subdirectory names contain __HIVE_DEFAULT_PARTITION__
     # and check that is recovered as a NULL partition.
@@ -121,9 +121,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
-    assert (self.has_value("NULL", result.data),
-        "ALTER TABLE %s RECOVER PARTITIONS failed to handle null partition values."
-        % FQ_TBL_NAME)
+    assert self.has_value("NULL", result.data),\
+        "ALTER TABLE %s RECOVER PARTITIONS failed to handle null partition values."\
+        % FQ_TBL_NAME
     result = self.execute_query_expect_success(self.client,
         "select c from %s" % FQ_TBL_NAME)
     assert self.has_value(NULL_INSERTED_VALUE, result.data)
@@ -252,9 +252,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
-    assert ((old_length + 1) == len(result.data),
-        "ALTER TABLE %s RECOVER PARTITIONS failed to handle duplicate partition key values."
-        % FQ_TBL_NAME)
+    assert old_length + 1 == len(result.data),\
+        "ALTER TABLE %s RECOVER PARTITIONS failed to handle "\
+        "duplicate partition key values." % FQ_TBL_NAME
 
   @SkipIfLocal.hdfs_client
   def test_post_invalidate(self, vector, unique_database):
@@ -285,9 +285,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
     self.client.execute("INVALIDATE METADATA %s" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "select c from %s" % FQ_TBL_NAME)
-    assert (self.has_value(INSERTED_VALUE, result.data),
-        "INVALIDATE can't work on partitions recovered by ALTER TABLE %s RECOVER PARTITIONS."
-        % FQ_TBL_NAME)
+    assert self.has_value(INSERTED_VALUE, result.data),\
+        "INVALIDATE can't work on partitions recovered by "\
+        "ALTER TABLE %s RECOVER PARTITIONS." % FQ_TBL_NAME
     self.execute_query_expect_success(self.client,
         "INSERT INTO TABLE %s PARTITION(i=002, p='p2') VALUES(4)" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
@@ -329,9 +329,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME)
     result = self.execute_query_expect_success(self.client,
         "SHOW PARTITIONS %s" % FQ_TBL_NAME)
-    assert (len(result.data) == (old_length + 1),
-        "ALTER TABLE %s RECOVER PARTITIONS failed to handle some data types."
-        % FQ_TBL_NAME)
+    assert len(result.data) == (old_length + 1),\
+        "ALTER TABLE %s RECOVER PARTITIONS failed to handle some data types."\
+        % FQ_TBL_NAME
 
     # Test malformed partition values.
     self.check_invalid_partition_values(FQ_TBL_NAME, TBL_LOCATION,
@@ -386,9 +386,9 @@ class TestRecoverPartitions(ImpalaTestSuite):
           "ALTER TABLE %s RECOVER PARTITIONS" % fq_tbl_name)
       result = self.execute_query_expect_success(self.client,
           "SHOW PARTITIONS %s" % fq_tbl_name)
-      assert (len(result.data) == old_length,
-        "ALTER TABLE %s RECOVER PARTITIONS failed to handle invalid partition key values."
-        % fq_tbl_name)
+      assert len(result.data) == old_length,\
+        "ALTER TABLE %s RECOVER PARTITIONS failed to handle "\
+        "invalid partition key values." % fq_tbl_name
 
   def has_value(self, value, lines):
     """Check if lines contain value."""


[13/28] impala git commit: IMPALA-6807: [DOCS] Update the known issue for HDFS-12528

Posted by ta...@apache.org.
IMPALA-6807: [DOCS] Update the known issue for HDFS-12528

Added a new recommendation for the new setting with the fix version
of HDFS, 2.10 and higher.

Change-Id: If51cb111a9ddc67be4a1cf42502a8a021486b7e4
Reviewed-on: http://gerrit.cloudera.org:8080/9929
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/aab49461
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/aab49461
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/aab49461

Branch: refs/heads/2.x
Commit: aab49461f5b7f5cab01768abdf75c710c740afed
Parents: dc1922f
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Wed Apr 4 16:22:42 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_known_issues.xml | 61 +++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/aab49461/docs/topics/impala_known_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_known_issues.xml b/docs/topics/impala_known_issues.xml
index a8a8451..a09188e 100644
--- a/docs/topics/impala_known_issues.xml
+++ b/docs/topics/impala_known_issues.xml
@@ -409,25 +409,54 @@ https://issues.apache.org/jira/browse/IMPALA-2144 - Don't have
       <title>Interaction of File Handle Cache with HDFS Appends and Short-Circuit Reads</title>
       <conbody>
         <p>
-          If a data file used by Impala is being continuously appended or overwritten in place by an
-          HDFS mechanism, such as <cmdname>hdfs dfs -appendToFile</cmdname>, interaction with the
-          file handle caching feature in <keyword keyref="impala210_full"/> and higher could cause
-          short-circuit reads to sometimes be disabled on some DataNodes. When a mismatch is detected
-          between the cached file handle and a data block that was rewritten because of an append,
-          short-circuit reads are turned off on the affected host for a 10-minute period.
+          If a data file used by Impala is being continuously appended or
+          overwritten in place by an HDFS mechanism, such as <cmdname>hdfs dfs
+            -appendToFile</cmdname>, interaction with the file handle caching
+          feature in <keyword keyref="impala210_full"/> and higher could cause
+          short-circuit reads to sometimes be disabled on some DataNodes. When a
+          mismatch is detected between the cached file handle and a data block
+          that was rewritten because of an append, short-circuit reads are
+          turned off on the affected host for a 10-minute period.
         </p>
         <p>
-          The possibility of encountering such an issue is the reason why the file handle caching
-          feature is currently turned off by default. See <xref keyref="scalability_file_handle_cache"/>
-          for information about this feature and how to enable it.
+          The possibility of encountering such an issue is the reason why the
+          file handle caching feature is currently turned off by default. See
+            <xref keyref="scalability_file_handle_cache"/> for information about
+          this feature and how to enable it.
         </p>
-        <p><b>Bug:</b> <xref href="https://issues.apache.org/jira/browse/HDFS-12528" scope="external" format="html">HDFS-12528</xref></p>
-        <p><b>Severity:</b> High</p>
-        <!-- <p><b>Resolution:</b> </p> -->
-        <p><b>Workaround:</b> Verify whether your ETL process is susceptible to this issue before enabling the file handle caching feature.
-          You can set the <cmdname>impalad</cmdname> configuration option <codeph>unused_file_handle_timeout_sec</codeph> to a time period
-          that is shorter than the HDFS setting <codeph>dfs.client.read.shortcircuit.streams.cache.expiry.ms</codeph>. (Keep in mind that
-          the HDFS setting is in milliseconds while the Impala setting is in seconds.)
+        <p>
+          <b>Bug:</b>
+          <xref href="https://issues.apache.org/jira/browse/HDFS-12528"
+            scope="external" format="html">HDFS-12528</xref>
+        </p>
+
+        <p>
+          <b>Severity:</b> High
+        </p>
+
+        <p><b>Workaround:</b> Verify whether your ETL process is susceptible to
+          this issue before enabling the file handle caching feature. You can
+          set the <cmdname>impalad</cmdname> configuration option
+            <codeph>unused_file_handle_timeout_sec</codeph> to a time period
+          that is shorter than the HDFS setting
+            <codeph>dfs.client.read.shortcircuit.streams.cache.expiry.ms</codeph>.
+          (Keep in mind that the HDFS setting is in milliseconds while the
+          Impala setting is in seconds.)
+        </p>
+
+        <p>
+          <b>Resolution:</b> Fixed in HDFS 2.10 and higher. Use the new HDFS
+          parameter <codeph>dfs.domain.socket.disable.interval.seconds</codeph>
+          to specify the amount of time that short circuit reads are disabled on
+          encountering an error. The default value is 10 minutes
+            (<codeph>600</codeph> seconds). It is recommended that you set
+            <codeph>dfs.domain.socket.disable.interval.seconds</codeph> to a
+          small value, such as <codeph>1</codeph> second, when using the file
+          handle cache. Setting <codeph>
+            dfs.domain.socket.disable.interval.seconds</codeph> to
+            <codeph>0</codeph> is not recommended as a non-zero interval
+          protects the system if there is a persistent problem with short
+          circuit reads.
         </p>
       </conbody>
     </concept>


[07/28] impala git commit: IMPALA-6571: NullPointerException in SHOW CREATE TABLE for HBase tables

Posted by ta...@apache.org.
IMPALA-6571: NullPointerException in SHOW CREATE TABLE for HBase tables

This patch fixes the NullPointerException in SHOW CREATE TABLE for HBase
tables.

Testing:
- Moved the content of back hbase-show-create-table.test to
  show-create-table.test
- Ran show-create-table end-to-end tests

Change-Id: Ibe018313168fac5dcbd80be9a8f28b71a2c0389b
Reviewed-on: http://gerrit.cloudera.org:8080/9884
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/09b58f6c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/09b58f6c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/09b58f6c

Branch: refs/heads/2.x
Commit: 09b58f6c8b24ae6b4fddab72466d5fc61c7c7ca8
Parents: 0a8cac2
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Mon Apr 2 09:19:36 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/ToSqlUtils.java  | 11 +++++----
 .../QueryTest/hbase-show-create-table.test      | 24 --------------------
 .../queries/QueryTest/show-create-table.test    | 24 ++++++++++++++++++++
 tests/metadata/test_show_create_table.py        |  3 ++-
 4 files changed, 33 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/09b58f6c/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
index 1940cde..9a164f0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
@@ -45,6 +45,7 @@ import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HBaseTable;
 import org.apache.impala.catalog.HdfsCompression;
 import org.apache.impala.catalog.HdfsFileFormat;
+import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.KuduColumn;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.RowFormat;
@@ -231,10 +232,8 @@ public class ToSqlUtils {
       }
     }
     RowFormat rowFormat = RowFormat.fromStorageDescriptor(msTable.getSd());
-    HdfsFileFormat format = HdfsFileFormat.fromHdfsInputFormatClass(
-        msTable.getSd().getInputFormat());
-    HdfsCompression compression = HdfsCompression.fromHdfsInputFormatClass(
-        msTable.getSd().getInputFormat());
+    HdfsFileFormat format = null;
+    HdfsCompression compression = null;
     String location = isHbaseTable ? null : msTable.getSd().getLocation();
     Map<String, String> serdeParameters = msTable.getSd().getSerdeInfo().getParameters();
 
@@ -270,6 +269,10 @@ public class ToSqlUtils {
         // We shouldn't output the columns for external tables
         colsSql = null;
       }
+    } else if (table instanceof HdfsTable) {
+      String inputFormat = msTable.getSd().getInputFormat();
+      format = HdfsFileFormat.fromHdfsInputFormatClass(inputFormat);
+      compression = HdfsCompression.fromHdfsInputFormatClass(inputFormat);
     }
     HdfsUri tableLocation = location == null ? null : new HdfsUri(location);
     return getCreateTableSql(table.getDb().getName(), table.getName(), comment, colsSql,

http://git-wip-us.apache.org/repos/asf/impala/blob/09b58f6c/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test
deleted file mode 100644
index 8062216..0000000
--- a/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test
+++ /dev/null
@@ -1,24 +0,0 @@
-====
----- QUERY
-SHOW CREATE TABLE alltypes
----- RESULTS
-CREATE EXTERNAL TABLE alltypes (
-  id INT COMMENT 'Add a comment',
-  bigint_col BIGINT,
-  bool_col BOOLEAN,
-  date_string_col STRING,
-  double_col DOUBLE,
-  float_col FLOAT,
-  int_col INT,
-  month INT,
-  smallint_col SMALLINT,
-  string_col STRING,
-  timestamp_col TIMESTAMP,
-  tinyint_col TINYINT,
-  year INT
-)
-STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
-WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,d:bool_col,d:tinyint_col,d:smallint_col,d:int_col,d:bigint_col,d:float_col,d:double_col,d:date_string_col,d:string_col,d:timestamp_col,d:year,d:month',
-                      'serialization.format'='1')
-TBLPROPERTIES ('hbase.table.name'='functional_hbase.alltypes',
-               'storage_handler'='org.apache.hadoop.hive.hbase.HBaseStorageHandler')

http://git-wip-us.apache.org/repos/asf/impala/blob/09b58f6c/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
index 81b619b..db38984 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
@@ -372,3 +372,27 @@ SORT BY (id)
 STORED AS TEXTFILE
 LOCATION '$$location_uri$$'
 ====
+---- QUERY
+SHOW CREATE TABLE functional_hbase.alltypes
+---- RESULTS
+CREATE EXTERNAL TABLE functional_hbase.alltypes (
+  id INT COMMENT 'Add a comment',
+  bigint_col BIGINT,
+  bool_col BOOLEAN,
+  date_string_col STRING,
+  double_col DOUBLE,
+  float_col FLOAT,
+  int_col INT,
+  month INT,
+  smallint_col SMALLINT,
+  string_col STRING,
+  timestamp_col TIMESTAMP,
+  tinyint_col TINYINT,
+  year INT
+)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,d:bool_col,d:tinyint_col,d:smallint_col,d:int_col,d:bigint_col,d:float_col,d:double_col,d:date_string_col,d:string_col,d:timestamp_col,d:year,d:month',
+                      'serialization.format'='1')
+TBLPROPERTIES ('hbase.table.name'='functional_hbase.alltypes',
+               'storage_handler'='org.apache.hadoop.hive.hbase.HBaseStorageHandler')
+====
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/impala/blob/09b58f6c/tests/metadata/test_show_create_table.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_show_create_table.py b/tests/metadata/test_show_create_table.py
index d0ce87f..00b63ef 100644
--- a/tests/metadata/test_show_create_table.py
+++ b/tests/metadata/test_show_create_table.py
@@ -34,7 +34,8 @@ class TestShowCreateTable(ImpalaTestSuite):
   # Properties to filter before comparing results
   FILTER_TBL_PROPERTIES = ["transient_lastDdlTime", "numFiles", "numPartitions",
                            "numRows", "rawDataSize", "totalSize", "COLUMN_STATS_ACCURATE",
-                           "STATS_GENERATED_VIA_STATS_TASK"]
+                           "STATS_GENERATED_VIA_STATS_TASK", "last_modified_by",
+                           "last_modified_time"]
 
   @classmethod
   def get_workload(self):


[10/28] impala git commit: KUDU-2401: External TLS certificate with Intermediate CA in server cert file fails

Posted by ta...@apache.org.
KUDU-2401: External TLS certificate with Intermediate CA in server cert file fails

Take 2 certificate files: cert.pem and truststore.pem

cert.pem has 2 certificates in it:
A cert for that node (with CN="hostname", and signed by CN=CertToolkitIntCA)
And the intermediate CA cert (with CN=CertToolkitIntCA, and signed by CN=CertToolkitRootCA)

truststore.pem has 1 certificate in it:
A cert which is the root CA (with CN=CertToolkitRootCA, self-signed)

This previously would not work with KRPC because in
TlsContext::VerifyCertChainUnlocked(), we would only verify X509_verify_cert()
with the top certificate in the server certificate chain.

With this change, we pass the chain to X509_STORE_CTX_init() as well to make
sure that the entire chain gets checked against the CA.

A test is added that uses the specific certificate format mentioned above
and added to rpc-test.

TODO: Add a test case that has multiple intermediate CAs. Right now we're testing
with only one intermediate CA.

Change-Id: If4af35e97ec6f91c1d9ed902128bd7f4e260f0f4
Reviewed-on: http://gerrit.cloudera.org:8080/9940
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e380c8ff
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e380c8ff
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e380c8ff

Branch: refs/heads/2.x
Commit: e380c8ffa80f9063371e130081e4d3c13c99c610
Parents: e0845ce
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Thu Apr 5 11:30:13 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/kudu/rpc/rpc-test.cc             |  35 ++++-
 be/src/kudu/security/test/test_certs.cc | 219 +++++++++++++++++++++++++++
 be/src/kudu/security/test/test_certs.h  |   9 +-
 be/src/kudu/security/tls_context.cc     |   4 +-
 4 files changed, 263 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e380c8ff/be/src/kudu/rpc/rpc-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/rpc-test.cc b/be/src/kudu/rpc/rpc-test.cc
index a1ab9cc..51b698a 100644
--- a/be/src/kudu/rpc/rpc-test.cc
+++ b/be/src/kudu/rpc/rpc-test.cc
@@ -165,7 +165,8 @@ TEST_P(TestRpc, TestCall) {
   }
 }
 
-TEST_P(TestRpc, TestCallWithChainCerts) {
+// Test for KUDU-2091 and KUDU-2220.
+TEST_P(TestRpc, TestCallWithChainCertAndChainCA) {
   bool enable_ssl = GetParam();
   // We're only interested in running this test with TLS enabled.
   if (!enable_ssl) return;
@@ -195,6 +196,38 @@ TEST_P(TestRpc, TestCallWithChainCerts) {
   ASSERT_OK(DoTestSyncCall(p, GenericCalculatorService::kAddMethodName));
 }
 
+// Test for KUDU-2041.
+TEST_P(TestRpc, TestCallWithChainCertAndRootCA) {
+  bool enable_ssl = GetParam();
+  // We're only interested in running this test with TLS enabled.
+  if (!enable_ssl) return;
+
+  string rpc_certificate_file;
+  string rpc_private_key_file;
+  string rpc_ca_certificate_file;
+  ASSERT_OK(security::CreateTestSSLCertWithChainSignedByRoot(GetTestDataDirectory(),
+                                                             &rpc_certificate_file,
+                                                             &rpc_private_key_file,
+                                                             &rpc_ca_certificate_file));
+  // Set up server.
+  Sockaddr server_addr;
+  ASSERT_OK(StartTestServer(&server_addr, enable_ssl));
+
+  // Set up client.
+  SCOPED_TRACE(strings::Substitute("Connecting to $0", server_addr.ToString()));
+  shared_ptr<Messenger> client_messenger;
+  ASSERT_OK(CreateMessenger("Client", &client_messenger, 1, enable_ssl,
+      rpc_certificate_file, rpc_private_key_file, rpc_ca_certificate_file));
+
+  Proxy p(client_messenger, server_addr, server_addr.host(),
+          GenericCalculatorService::static_service_name());
+  ASSERT_STR_CONTAINS(p.ToString(), strings::Substitute("kudu.rpc.GenericCalculatorService@"
+                                                            "{remote=$0, user_credentials=",
+                                                        server_addr.ToString()));
+
+  ASSERT_OK(DoTestSyncCall(p, GenericCalculatorService::kAddMethodName));
+}
+
 // Test making successful RPC calls while using a TLS certificate with a password protected
 // private key.
 TEST_P(TestRpc, TestCallWithPasswordProtectedKey) {

http://git-wip-us.apache.org/repos/asf/impala/blob/e380c8ff/be/src/kudu/security/test/test_certs.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/test/test_certs.cc b/be/src/kudu/security/test/test_certs.cc
index bc82140..88cf5cf 100644
--- a/be/src/kudu/security/test/test_certs.cc
+++ b/be/src/kudu/security/test/test_certs.cc
@@ -503,6 +503,12 @@ TOQYXv+dMtOkYg==
 // OpenSSL_command_line_Root_and_Intermediate_CA_including_OCSP_CRL%20and_revocation.html
 //
 // The parts relating to the OSCP and CRL were omitted.
+//
+// | serverCert TRUSTS intermediateCA TRUSTS rootCA |
+//
+// The 'cert_file' here contains the serverCert and intermediateCA.
+// The 'ca_cert_file' contains the rootCA and the same intermediateCA.
+// This was added to test KUDU-2091 and KUDU-2220.
 Status CreateTestSSLCertSignedByChain(const string& dir,
                                       string* cert_file,
                                       string* key_file,
@@ -746,5 +752,218 @@ Wd40Cr+wAdHKN6t/oransoxu0EZ3HcSOI1umFg==
   return Status::OK();
 }
 
+//
+// These certificates were generated by following the steps outlined in this tutorial
+// for creating the Root CA, Intermediate CA and end-user cert:
+// https://raymii.org/s/tutorials/ \
+// OpenSSL_command_line_Root_and_Intermediate_CA_including_OCSP_CRL%20and_revocation.html
+//
+// The parts relating to the OSCP and CRL were omitted.
+//
+// | serverCert TRUSTS intermediateCA TRUSTS rootCA |
+//
+// The 'cert_file' here contains the serverCert and intermediateCA.
+// The 'ca_cert_file' contains only the rootCA.
+// This was added to test KUDU-2041.
+Status CreateTestSSLCertWithChainSignedByRoot(const string& dir,
+                                              string* cert_file,
+                                              string* key_file,
+                                              string* ca_cert_file) {
+  const char* kCert = R"(
+-----BEGIN CERTIFICATE-----
+MIIFizCCA3OgAwIBAgICEAAwDQYJKoZIhvcNAQEFBQAwUTEXMBUGA1UEAwwOSW50
+ZXJtZWRpYXRlQ0ExCzAJBgNVBAgMAkNBMQswCQYDVQQGEwJVUzENMAsGA1UECgwE
+QWNtZTENMAsGA1UECwwES3VkdTAeFw0xNzA4MTEyMTM4MDZaFw00NDEyMjYyMTM4
+MDZaMEwxEjAQBgNVBAMMCWxvY2FsaG9zdDELMAkGA1UECAwCQ0ExCzAJBgNVBAYT
+AlVTMQ0wCwYDVQQKDARBY21lMQ0wCwYDVQQLDARLdWR1MIICIjANBgkqhkiG9w0B
+AQEFAAOCAg8AMIICCgKCAgEAqevNYH73n4kARZtMsHRucdKmqVd/xxztMlK5VOor
+ERUBhKVVOw3kpDrN9z80ldIkpOrtrfE7Ame/nA9v4k6P3minPEm1qCA/kvaAodtT
+4HjAkrPc+fto6VO6+aUV6l+ckAV/79lOuc7AutNlvvPtBQQcgOKvlNUSRKwM7ndy
+dO4ZAa+uP9Wtsd0gl8b5F3P8vwevD3a0+iDvwSd3pi2s/BeVgRwvOxJzud8ipZ/A
+ZmZN8Df9nHw5lsqLdNnqHXjTVCNXLnYXQC4gKU56fzyZL595liuefyQxiGY+dCCn
+CpqlSsHboJVC/F3OaQi3xVRTB5l2Nwb149EIadwCF0OulZCuYljJ5y9H2bECXEjP
+e5aOdz9d8W3/T7p9vBKWctToeCpqKXUd+8RPudh0D0sUHuwQ4u4S1K6X+eK+gGhT
+HOnPwt+P8ytG0M463z5Gh9feW9ZDIYoiFckheFBAHxsgDWhjYpFmYireLLXMbyaM
+s5v/AxPNRAsx3vAAd0M0vGOpdgEJ9V1MsKmxkPO/tDC3zmnv6uJhtJfrOAKxwiGC
+fDe4IoSC6H5fTxeAgw6BG5onS1UPLADL8NA/M1y8qiSCZS/5S0cHoJp5AxDfZSSR
+O49ispjqcONRwckcRJ5Pbl0IA+wGyg2DuI9LaqS5kKWp5AE8VCLPz7yepDkRnnjO
+3m8CAwEAAaNyMHAwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUZBZLZZaUfyIK/8B7
+GIIWDqeEvDgwHwYDVR0jBBgwFoAU8KctfaqAq0887CHqDsIC0Rkg7oQwCwYDVR0P
+BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMBMA0GCSqGSIb3DQEBBQUAA4ICAQA3
+XJXk9CbzdZUQugPI43LY88g+WjbTJfc/KtPSkHN3GjBBh8C0He7A2tp6Xj/LELmx
+crq62FzcFBnq8/iSdFITaYWRo0V/mXlpv2cpPebtwqbARCXUHGvF4/dGk/kw7uK/
+ohZJbeNySuQmQ5SQyfTdVA30Z0OSZ4jp24jC8uME7L8XOcFDgCRw01QNOISpi/5J
+BqeuFihmu/odYMHiEJdCXqe+4qIFfTh0mbgQ57l/geZm0K8uCEiOdTzSMoO8YdO2
+tm6EGNnc4yrVywjIHIvSy6YtNzd4ZM1a1CkEfPvGwe/wI1DI/zl3aJ721kcMPken
+rgEA4xXTPh6gZNMELIGZfu/mOTCFObe8rrh4QSaW4L+xa/VrLEnQRxuXAYGnmDWF
+e79aA+uXdS4+3OysNgEf4qDBt/ZquS/31DBdfJ59VfXWxp2yxMcGhcfiOdnx2Jy5
+KO8wdpXJA/7uwTJzsjLrIgfZnserOiBwE4luaHhDmKDGNVQvhkMq5tdtMdzuwn3/
+n6P1UwbFPiRGIzEAo0SSC1PRT8phv+5y0B1+gcj/peFymZVE+gRcrv9irVQqUpAY
+Lo9xrClAJ2xx4Ouz1GprKPoHdVyqtgcLXN4Oyi8Tehu96Zf6GytSEfTXsbQp+GgR
+TGRhKnDySjPhLp/uObfVwioyuAyA5mVCwjsZ/cvUUA==
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIHmDCCA4CgAwIBAgICEAAwDQYJKoZIhvcNAQEFBQAwVjELMAkGA1UEBhMCVVMx
+CzAJBgNVBAgMAkNBMQswCQYDVQQHDAJTRjENMAsGA1UECgwEQWNtZTENMAsGA1UE
+CwwES3VkdTEPMA0GA1UEAwwGUk9PVENBMB4XDTE3MDgxMTIxMzUzNVoXDTQ0MTIy
+NzIxMzUzNVowUTEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0ExCzAJBgNVBAgMAkNB
+MQswCQYDVQQGEwJVUzENMAsGA1UECgwEQWNtZTENMAsGA1UECwwES3VkdTCCAiIw
+DQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAM1X35LT/eBWBt0Uqqh3DSUyY3K8
+HLIlX3ZXg2Nx6y8yqhw5UGVFZl0uYBDo2DSlTl4sey+AxLIbpQI9ArRA+xqmFynV
+jheB9otudnA8hVwi/e9o+m+VSjG+HPRjSS5hwdPgpJG8DCPSmGyUUFtf3v0NxkUq
+Is+fB5qhQ36aQkI+MwQsSlHR+YrrKKVnE3f911wr9OScQP5KHjrZLQex8OmpWD9G
+v4P9jfVSUwmNEXXjmXDhNG/1R4ofX6HogZR6lBmRNGbcjjWRZQmPrOe9YcdkMLD0
+CdaUyKikqqW6Ilxs7scfuCGqwBWqh66tY18MBMHnt0bL26atTPduKYqulJ1pijio
+DUrzqtAzm7PirqPZ4aOJ9PNjdQs9zH3Zad3pcjfjpdKj4a/asX0st631J5jE6MLB
+LcbAerb/Csr/+tD0TOxwWlA+p/6wPb8ECflQLkvDDEY5BrRGdqYDpEOdm1F9DWQh
+y0RB8rWJMkxC/tTqYHfeaphzCxndLRsZQKVcPiqWCT7b431umIjPaDhsykNlcU3N
+f0V7V/fLY6wwuACngS0BLQuMrXy5FyhmWnUBeWwHfAeTxCkHlF+cVT6wHmeOuGbC
+c1piq7O7puKdC3UjO7Nn+WoOb2B6Qm/dajHpj5myxYJa5tGQGeUnWPwjjMQR557k
+HzugGAzkuG1ASQrhAgMBAAGjdTBzMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYE
+FPCnLX2qgKtPPOwh6g7CAtEZIO6EMB8GA1UdIwQYMBaAFE/9XKaDey5kC8f3bCeU
+HW46abboMAsGA1UdDwQEAwIBpjATBgNVHSUEDDAKBggrBgEFBQcDATANBgkqhkiG
+9w0BAQUFAAOCBAEAIaD2yzjTFdn61A4Qi+ek3fBJaDNQZytd0rHb49v3T+mdj/MI
+yShI1qezDFkg2FP1LfNgjuQl/T+g0BloXatAhZ/dj20Y8oN6bmilV+r2YLJbvbTn
+3hI+MxNf3Ue3FmIrwKK3QdkWcDBURpyYaDO71oxPl9QNfdhWCGHB/oWKU2y4Qt/O
+aPy+CmBAZEclX+hsdUBDJG5vuujpv4myCFwpLgFKNQX3XqCPLc4SRjfyla2YmeZv
+j7KKYh8XOWBbBF0BnWD94WzUDIBmFlUfS32aJTvd7tVaWXwH8rGwDfLN8i05UD9G
+zc3uuFH+UdzWVymk/4svKIPlB2nw9vPV8hvRRah0yFN3EQqAF0vQtwVJF/VwtZdg
+ahH0DykYTf7cKtFXE40xB7YgwDLXd3UiXfo3USW28uKqsrO52xYuUTBn+xkilds1
+tNKwtpXFWP2PUk92ficxoqi1cJnHxIIt5HKskFPgfIpzkpR8IM/vsom1a5fn4TT1
+aJbO5FsZTXQMxFLYWiSOMhTZMp3iNduxMYPosngjjKPEIkTQHKkedpF+CAGIMOKE
+BVa0vHyF34laKMMDT8d9yxwBJLqjlBohNsLLZa/Y90ThaMw+QYn/GZATB+7ng+ip
+VdGAQrghsGSxP+47HZ6WgBrlRdUWN1d1tlN2NBMHLucpbra5THGzl5MlaSVBYZb6
+yXI+2lwcTnnEkKv2zoA4ZHWdtLn/b1y4NKNg205TA+sOZcl6B1BgMe/rFuXdZe9Q
+/b6Tjz65qL4y1ByBVBJNhQQairw6cypHzwzC3w6ub1ZXtFqnTlU8fFcHGeOyydYS
+NfoepF0w2v0ounqD+6rN1CH/ERVb4FCEN19HQ3z+rAj19z2h6m/l5QEKI7bz8ghD
+8yxyqJz+L9XpfOo1yZfHQJckilY6BBIGWyeetJBmvkwv2WPt+3pX1u7h5LkvNRj2
+3fItf486zqtzUi+i/E//rS4gD/rRr4a85U8GSfp3LSAbtmfC0LNYUYA9Dcc0LSpl
+9alNuEpBHSHXlCVh4bcOb0L9n5XNdMcUYBo14hQdP0K1G7TounuAXFKYIQeyNyoi
+OAZ+eb7Y2xNnkY/ps/kyhsZgOJyiDZhdcruK3FIUGYlg5aVjQTB8H0c3/5SZnSky
+6779yMKztFXj9ctYU0YyJXWdF0xP/vi1gjQx/hJnDfXFfIOmeJdQSC08BGyK/PeC
+8zAS380bgzOza/eBL6IK0RqytbWgdoLrUQQfa1+f7AQxDDdoOkUenM0HSWjKfCuG
+m1/N7KUDHtnjVIHWqRefTPg1/tQjVY8/zgxN8MyAy+D95y4rawjsJf1dL6c0+zGv
+Wd40Cr+wAdHKN6t/oransoxu0EZ3HcSOI1umFg==
+-----END CERTIFICATE-----
+)";
+  const char* kKey = R"(
+-----BEGIN RSA PRIVATE KEY-----
+MIIJKAIBAAKCAgEAqevNYH73n4kARZtMsHRucdKmqVd/xxztMlK5VOorERUBhKVV
+Ow3kpDrN9z80ldIkpOrtrfE7Ame/nA9v4k6P3minPEm1qCA/kvaAodtT4HjAkrPc
++fto6VO6+aUV6l+ckAV/79lOuc7AutNlvvPtBQQcgOKvlNUSRKwM7ndydO4ZAa+u
+P9Wtsd0gl8b5F3P8vwevD3a0+iDvwSd3pi2s/BeVgRwvOxJzud8ipZ/AZmZN8Df9
+nHw5lsqLdNnqHXjTVCNXLnYXQC4gKU56fzyZL595liuefyQxiGY+dCCnCpqlSsHb
+oJVC/F3OaQi3xVRTB5l2Nwb149EIadwCF0OulZCuYljJ5y9H2bECXEjPe5aOdz9d
+8W3/T7p9vBKWctToeCpqKXUd+8RPudh0D0sUHuwQ4u4S1K6X+eK+gGhTHOnPwt+P
+8ytG0M463z5Gh9feW9ZDIYoiFckheFBAHxsgDWhjYpFmYireLLXMbyaMs5v/AxPN
+RAsx3vAAd0M0vGOpdgEJ9V1MsKmxkPO/tDC3zmnv6uJhtJfrOAKxwiGCfDe4IoSC
+6H5fTxeAgw6BG5onS1UPLADL8NA/M1y8qiSCZS/5S0cHoJp5AxDfZSSRO49ispjq
+cONRwckcRJ5Pbl0IA+wGyg2DuI9LaqS5kKWp5AE8VCLPz7yepDkRnnjO3m8CAwEA
+AQKCAgAE3oL2Hu1Nnwlo9ThPXibEEDtCYwWAWS3a4U/6RPOS+70dZfd5R76jjiPU
+z/TbzjfKmgjRkTYVrY9qE28rVwD8aJdSPPJ9rN7lgTbSbIyMxCkQiyLr7u5ksUeM
+W9Sy8KZ14hJ2dw2weWJAeEpUHH1QRXvjnZtWcnyhhySfuMCI5UHGMJiXr7HYhPOo
+JcWBjItTlg7ILKim+kakjFL7aheo6awZFQutb6vtSZ2ejWNgC9Jz7cbQsyabUZaJ
+dK0mxw2XPaQD6tJjvm6hgGQ2PTBOkw1S5lEWZ50bwYJMpZrjzOarq751bZGL1cxS
+ajOJ7g6rCxS+Iu7s5lKNZgaRUBkymATYccoigfZDR//fHKAmdgjgPstqy1NJL+uX
+bIuNE0bR+mBM2JQzNjPIcE67PG+0aQdO4dj0TnTzkTP1JSsa6Tz4ckOUgt7IBK8j
+ilCQpHgOB900hXC6xVRnAaU/uuSYEtfi2eFBKHT02OqH51yNZ2jsgOQJCvxNrrdI
+OmA0AaZ2tVXTTXe6qebaNjjp2cziiO5ma+5mdI1vmLQAA9v0micO+eKp/pi5e0r6
+j+9uyR2Oo4qnHg04TIfDyglW3uKz1eI0RPfBN/dx3WLpydxKeywXPH6EzfBFk8pz
+ST2sy+1ZN4vn0bDSTjSLsLBW+xBKYINqKkBD2Kg6B7aeNINjEQKCAQEA4BQByyZV
+9va91T+rQiNPifuI4PKgmLTl0wxM1Jg/H4YCyLtuEkmuvwfeAjaUaUuk2MDs3xfQ
+4pu8vtAnDapq5vJ/lMg0m3+NIwoks+xee9F//M4du9j67USvX5Qw/6Cnx1zAvrWR
+TyZncKUocQowcXM9VU0xcv4qVCMaH5F5Q/1VvG7uAtGCnB8TOHzV7GUaj9xxSqDc
+f3+p9BzsecpPZsdpVi01dQbCKi9neZwFv1Vr3MvieNDOGqSGn8X5EjSHY5PzCaXL
+S+/HoFWOSzWcuNdzKJRjVkC8U+eHoEabaRnD47dfJntN3qOQ6Mwf6P2jMN9GqlQu
+DQlvpMxBwQT1/QKCAQEAwiC4hr6EZKaLmeZBLggsS+oifHReXQy6tf2n7kCAwAgL
++65K8UW+YUtQyHL6UFfD27vvW5yp6LZxMRUD3ghguHPMQcejgoQTfGmth1bCb9tx
+zqfxuWUoNauqZiP4/kgxoh815Kt+kC8BRCXNIWjH38zxY+ncv0b4oKP7lYna/7Xk
+URLmMFr92QVAydRxY9kQTHQTCd+ZQrFT97xEoosgzkKhmynSfWNx7ymYmCrHzscA
+TutpD26u4CA4Wh4ZdVPEF10lGR531SAFEqXCkaUvIfwPphPmOtum2LZdEYho9C76
+71kLzzoJOJUNo2L9ORd5I4tOrMT0tmN+MpS1cPXb2wKCAQAu3aBeZ9807vhXQKDG
+DXKWTmibe8OBDNzAnmL3V/xj0HiGmUT1SDnnNHMHjXjO6QZKW1dvdaC3tJDua8Sv
+RARl1zQ93v25xBy1xmpUw0wjo3acXlOztTcOJv5zBCCXZneQ5+JcQMdqgYLC+ZgS
+xGnLYKnkTGfaQDSEMm9FSPzO7o5fAeh/6Gfj1VAE0X9AmQjMK/P6Atj7Ra07JE2F
+T3355h0u6/exST+U6SNAORSupuQPYwkz8aAZzG1nv1VPrHLgrdH4I5f4gucCrsI7
+ErR7qHwqcZaxNIrvFY61Q+8/NSdWWkTpXIK13Qny1raZ2WqnTxuNhlu3WFDka+AY
+ybvVAoIBACOaxL1R7A5ZzXjolkPPE/DIfJK+9R+z2frPDyHPis2trCT5Dp254PUP
+Tz20eNyLfEys53WyAifAbnpGFHOArdymwGvAJekmODy1VTJhY0AIy5LPkrIiL4HI
+fnRFXMGmlBPcDZJnMctYE69gD4N1KFOPzyY4Gliqt6ce7GG86wHDZqDICpgL2EsZ
+f4yE/lcF1Mtw7pz8+asVwwTI7v2w7s9lwSYoQYbl2lu3EVm3XvY54YCYBKjj8AcD
+YdKFer3eIzT1zHwS7n+UY9nLtSfpV/+vr18Sf0OETdGpgOBaWIWQqE2F03iqeE58
+aAfze+YgvAMc5c0iQo/BJ8A3LiANt8kCggEBAK8cFEBm+Z1g5s1IaWxqTIylR5XF
+5RFty9HyUXtkAd2d1qVzBaBG3SXg4cRsW0EhcUV2XP3iTFIPXEEABRRu5U6DEal6
+wQclrhfP4hiRQHp2Ny6jDj70NCSeUmyEu2lmwEJJYsDSOCnVmtt+qlgmk4yI1Dua
+nXhLcPLqopuhEZs2V/l2Q6E5i4vrs71Y7of+vsAvvt42Vx5wsGdPQihc5E1MI7GB
+hxmQys1MwG3Jyd7Zk88MVNveASeEIc7UAmr/TGL+RIv4bxNi/1HrgekBf1jnFUU3
+4fsdqKy0W+rOgOlGN8TX7NYCz3B41UEiyf+/gZ/TcLKAyGnoAO727Ngayqo=
+-----END RSA PRIVATE KEY-----
+)";
+  const char* kRootCaCert = R"(
+-----BEGIN CERTIFICATE-----
+MIIJfzCCBWegAwIBAgIJAOquFl/JjmRLMA0GCSqGSIb3DQEBCwUAMFYxCzAJBgNV
+BAYTAlVTMQswCQYDVQQIDAJDQTELMAkGA1UEBwwCU0YxDTALBgNVBAoMBEFjbWUx
+DTALBgNVBAsMBEt1ZHUxDzANBgNVBAMMBlJPT1RDQTAeFw0xNzA4MTEyMTMyMTla
+Fw00NDEyMjcyMTMyMTlaMFYxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTELMAkG
+A1UEBwwCU0YxDTALBgNVBAoMBEFjbWUxDTALBgNVBAsMBEt1ZHUxDzANBgNVBAMM
+BlJPT1RDQTCCBCIwDQYJKoZIhvcNAQEBBQADggQPADCCBAoCggQBAOHxuOsGDBJt
+quCb+lrJ3uXvvBv6f1w1aP+WqDEqveQlOi68sV/DVUR3s+en/MHA5jYAVo2D+eR7
+v/zjrAzCeqCpipbDcxA2e00+kggGHc1BoLtXXTPPCcTQt/0jjX26GXlaJRrY5MAy
+ZJ35vkJ5wCTw7DttfyRzR/RplI6DfO3t2kkSFpSsjGFJZQRZn/L2OM8Ii/tEhede
+UP/Rv8KIKM2+P9RS0VIutiI+/mOpH0QZHHEgnHy7x/CcNCd+wDG516YoJXp9c/1n
+aRLK+jA0bNCf0ZktMpuifoFzpNJ3pvDkjgTLhbiMkS8VKc66Z/Mv0EVOrdiMla/X
+0OSWqEZctxIcVIGDbMqngy62dghMBmxpVkfNmu6RqyS3HmPFrhRXJIIogdBo8mdJ
+xFCCvOgA6suaZnQtQC0mlRi5XGnTocpeHYUZ1c1hO2ZdVrFTh3atJsD80kVYxYuK
+YMq3QaK2zZUK6TUIFue1UqLf2dpIFzskLY6bEVob7Rdl8AHdFBJ8cGOyYKpG+rwO
+n3XQudt8YwDUCvw+8MGRXQiwUnzT/3gSuLNjlQOdcqN78wT5mdp6QZwareptyRwT
+yk/wWnfZlcFO33aPnUhvzzI5TzTB6EqG+3oNYkuXXy/glvOFluyQcPfsYXVOnXOj
+xF0hjKcpx10KQSvXjT9SRYr8NcOC7Yjy3f+WF+nwV+EzevqC2iTr1u8ymqUvpgFJ
+snvO8G/tycfxrwjI/4IghBgwqhcqD4wp/NleXy3A7GE3kFusL10i1bjwxBlz5qER
+uKaxU164HXPl4gv+Qt3eGqJE8KHDwTp8x+619S0+Gd8fY6Yj6/v9WyDef0SKGscm
+t3iqYNA39yNHAj++cjcCrJwBfINVvnTsVFKsCwUpjVuNOGRfZv0uHLAv6LaePQk5
+FKHwlLlPRx7ZcwHpkzTvp/ixYPb/cNJOw8fVW5CoWXYEzDUJY0oU8BWlQNHQ/e4q
+V7Yxa/vourUUvOACDzyQ6hCO95dQdDMCDQqC2VVL45+TUJ3eU1gDHge4T2js/qL8
+iJ+auZapiZjUQzLFse4XkgDrkMrD4fkOQOw4x9AhJF/SrnI8UPNjNOmAyOlqGTdd
+IyLesKXgnOGASSmc7JRk+YBpu9PQXIgHHQZIao1zPGP5k/ylp8XPYitC9MKzRRam
+67aJmutJxEtw7VJhoyz5m5LhLysPLY+R01+QqZK9/7qwWaX6PvMmm42zq9YKncOM
+cC/4eAPnwbj6yhpFoaUD5qzloci3+tvYgb+wr3f1F9SPrpe5xJz3NTXdQj8FsGjl
+ShL+bybUQ7wzZQObUhWtXSayagQg1MAxUCn7Aglmo/A/1+teacyuCEIbrhmODYM3
+Okji9gmGv+cCAwEAAaNQME4wHQYDVR0OBBYEFE/9XKaDey5kC8f3bCeUHW46abbo
+MB8GA1UdIwQYMBaAFE/9XKaDey5kC8f3bCeUHW46abboMAwGA1UdEwQFMAMBAf8w
+DQYJKoZIhvcNAQELBQADggQBAMXuMpJzYV3QMH780os0dZyJ+zi4FETVjAWFAPME
+qzN27W0L9+HZcGpz5i5FLdmc0F3u1cyCrJ4nCCWMrEIVmrLInFRaH1u9HUO78jPw
+Uw/MZLF+sf7uE8IAdVzLQG0A3QjAVoOX3IEOxEaQYYlVQHonyq2pBt8mkGqiPI3R
+E9cTnP/R1Ncd1wZkFL/n5qSNGTr/eg1O/jFB5xH96xm18Z7HgJDku2JCKQK6kqTM
+B7LjAFwWzTg8cnewVFRzIvJe83w9lHs1SW3R9fz7fIEBbZQ3z+n1cSj5jDjaT1+U
+YWTj+gAklZT4M/vImXF0XqbZytUOqe16HfBInc0G/kakUIcs6le2hmfhccJdG25I
+e5TH6ZdMumt7//hVPBPN5fhYKc2uHpzbtmxUjuKG8Na8/w+y2O+sW5CfpNtrYLyB
+WATHGtBB3ckBAICLlhoQiY/ku9r6BfczI86MbSy5vG5CD2sYGhVEl3PQXAnvBKax
+svZS3z9f16SZm61FWwz+r0XCe7LBiKe9YpODyE8lFDymZyW0vKYzDLzCy/mXhU/j
+locrf5r4YK0cOxNQC/jK7MLDFxPQbYg2SuAPW4DF2QzgKn2RuatdOB12S05afawj
+bhrbikIfEtD3erUMMJwaV9dxhHL835rfexxbRzBCdbjWg7Qiw4r8+PJB/mSSvsVO
+RAI02p8IqW5O+iXkU4V2Mapzdpo6b8O6TplHRXbRxWuen87g87KHhgZaC6TmWgvT
+It3ARZx3tkBoJwf41ELmWcakqiT9aQslc5weafw3SZp6+w0QU0qqFwCFLJWHETa5
+/PVHDEkBoXDMnqMlu7E9PUks4Op9T2f7bNy94GZXRbSp2VKjV68sds739DhVIZ+M
+MIaEutz3UndEuGGlcVuqXlda+H5xp57RnMZSKbT240kGdci51WahhfkX7dLY6c/b
+jaNWyGSfM+wFlky97t7ANbPP85SDgrrSyb6rTIt1zU2c5+vvjNVvDhlS6n7ls/Pi
+lMWWs5Ka66E8oZFwYygfIiEv6FcNWrSZ/vCMuS02WJovsZd4YrYtNbpkx6shaA5t
+BOIpuelPbQNPlOaJ+YnRuuppomPnXx5X3RlHld7xsExHDNsi57H0PBDq/W5O1S4t
+rHm3SItJQ4ndFHBGjZ7kSOyHtCLWZ8cB75sPduVC2PnRL/kt3tmfFFVsUurLGz4n
+wgCg1OuflNcc9wIF8lZMjm0TZkQMGYBIfBA7x8/Vs2XSFuaT9vbWoC07LXftl13g
+HsMg1UUSqnMBUQStG42lbVFF1yIfPZThEPxD2RJTCw8FTLBmNrJyBsZ0BGagwe1C
+KH5H1VGmllMdZDHOamHHKA8mEDI4eAKY3HoOS4rfioT8Tks=
+-----END CERTIFICATE-----
+)";
+
+  *cert_file = JoinPathSegments(dir, "test.cert");
+  *key_file = JoinPathSegments(dir, "test.key");
+  *ca_cert_file = JoinPathSegments(dir, "testchainca.cert");
+
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kCert, *cert_file));
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kKey, *key_file));
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kRootCaCert, *ca_cert_file));
+  return Status::OK();
+}
+
 } // namespace security
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/e380c8ff/be/src/kudu/security/test/test_certs.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/test/test_certs.h b/be/src/kudu/security/test/test_certs.h
index 89d1654..7767cb2 100644
--- a/be/src/kudu/security/test/test_certs.h
+++ b/be/src/kudu/security/test/test_certs.h
@@ -69,11 +69,18 @@ Status CreateTestSSLCertWithEncryptedKey(const std::string& dir,
                                          std::string* key_password);
 
 // Same as the CreateTestSSLCertWithPlainKey() except that the 'cert_file' is
-// signed by a CA chain.
+// signed by a CA chain ('ca_cert_file' is a chain of certificates).
 Status CreateTestSSLCertSignedByChain(const std::string& dir,
                                       std::string* cert_file,
                                       std::string* key_file,
                                       std::string* ca_cert_file);
 
+// Same as the CreateTestSSLCertWithPlainKey() except that the 'cert_file' is
+// a chain signed by a root CA ('ca_cert_file' is only the root CA).
+Status CreateTestSSLCertWithChainSignedByRoot(const std::string& dir,
+                                              std::string* cert_file,
+                                              std::string* key_file,
+                                              std::string* ca_cert_file);
+
 } // namespace security
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/e380c8ff/be/src/kudu/security/tls_context.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/tls_context.cc b/be/src/kudu/security/tls_context.cc
index f94e3d2..dac9a31 100644
--- a/be/src/kudu/security/tls_context.cc
+++ b/be/src/kudu/security/tls_context.cc
@@ -163,8 +163,8 @@ Status TlsContext::VerifyCertChainUnlocked(const Cert& cert) {
   X509_STORE* store = SSL_CTX_get_cert_store(ctx_.get());
   auto store_ctx = ssl_make_unique<X509_STORE_CTX>(X509_STORE_CTX_new());
 
-  OPENSSL_RET_NOT_OK(X509_STORE_CTX_init(store_ctx.get(), store, cert.GetTopOfChainX509(), nullptr),
-                     "could not init X509_STORE_CTX");
+  OPENSSL_RET_NOT_OK(X509_STORE_CTX_init(store_ctx.get(), store, cert.GetTopOfChainX509(),
+                     cert.GetRawData()), "could not init X509_STORE_CTX");
   int rc = X509_verify_cert(store_ctx.get());
   if (rc != 1) {
     int err = X509_STORE_CTX_get_error(store_ctx.get());


[20/28] impala git commit: IMPALA-4430: Update build scripts to die hard when IMPALA_HOME has spaces

Posted by ta...@apache.org.
IMPALA-4430: Update build scripts to die hard when IMPALA_HOME has spaces

Change-Id: I08b3d2b3f3e14c568d1672ee86ff2c52e8017b81
Reviewed-on: http://gerrit.cloudera.org:8080/9385
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/100fb2ce
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/100fb2ce
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/100fb2ce

Branch: refs/heads/2.x
Commit: 100fb2ce208e86863b413de2f4ece9c43cf90fd0
Parents: e1123eb
Author: njanarthanan <nj...@cloudera.com>
Authored: Wed Feb 21 13:39:30 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh | 17 ++++++++++++-----
 buildall.sh          | 10 ++++++++--
 2 files changed, 20 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/100fb2ce/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 621586e..b2d9d15 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -45,14 +45,21 @@ if [[ ! -e "$JAVA" ]]; then
   return 1
 fi
 
-if [ -z "$IMPALA_HOME" ]; then
-  if [[ ! -z "$ZSH_NAME" ]]; then
-    export IMPALA_HOME=$(dirname "$(cd $(dirname ${(%):-%x}) >/dev/null && pwd)")
-  else
-    export IMPALA_HOME=$(dirname "$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null && pwd)")
+if ! [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then
+  if [ -z "$IMPALA_HOME" ]; then
+    if [[ ! -z "$ZSH_NAME" ]]; then
+      export IMPALA_HOME=$(dirname "$(cd $(dirname ${(%):-%x}) >/dev/null && pwd)")
+    else
+      export IMPALA_HOME=$(dirname "$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null && pwd)")
+    fi
   fi
 fi
 
+if [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then
+  echo "IMPALA_HOME cannot have spaces in the path"
+  exit 1
+fi
+
 export IMPALA_TOOLCHAIN=${IMPALA_TOOLCHAIN-"$IMPALA_HOME/toolchain"}
 if [ -z "$IMPALA_TOOLCHAIN" ]; then
   echo "IMPALA_TOOLCHAIN must be specified. Please set it to a valid directory or"\

http://git-wip-us.apache.org/repos/asf/impala/blob/100fb2ce/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index d14c2ea..9d8b15e 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -18,13 +18,19 @@
 # under the License.
 
 set -euo pipefail
-trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
 
 # run buildall.sh -help to see options
-
 ROOT=`dirname "$0"`
 ROOT=`cd "$ROOT" >/dev/null; pwd`
 
+if [[ "'$ROOT'" =~ [[:blank:]] ]]
+then
+   echo "IMPALA_HOME cannot have spaces in the path"
+   exit 1
+fi
+
+trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
+
 # Grab this *before* we source impala-config.sh to see if the caller has
 # kerberized environment variables already or not.
 NEEDS_RE_SOURCE_NOTE=1


[21/28] impala git commit: IMPALA-6070: Expose using Docker to run tests faster.

Posted by ta...@apache.org.
IMPALA-6070: Expose using Docker to run tests faster.

Allows running the tests that make up the "core" suite in about 2 hours.
By comparison, https://jenkins.impala.io/job/ubuntu-16.04-from-scratch/buildTimeTrend
tends to run in about 3.5 hours.

This commit:
* Adds "echo" statements in a few places, to facilitate timing.
* Adds --skip-parallel/--skip-serial flags to run-tests.py,
  and exposes them in run-all-tests.sh.
* Marks TestRuntimeFilters as a serial test. This test runs
  queries that need > 1GB of memory, and, combined with
  other tests running in parallel, can kill the parallel test
  suite.
* Adds "test-with-docker.py", which runs a full build, data load,
  and executes tests inside of Docker containers, generating
  a timeline at the end. In short, one container is used
  to do the build and data load, and then this container is
  re-used to run various tests in parallel. All logs are
  left on the host system.

Besides the obvious win of getting test results more quickly, this
commit serves as an example of how to get various bits of Impala
development working inside of Docker containers. For example, Kudu
relies on atomic rename of directories, which isn't available in most
Docker filesystems, and entrypoint.sh works around it.

In addition, the timeline generated by the build suggests where further
optimizations can be made. Most obviously, dataload eats up a precious
~30-50 minutes, on a largely idle machine.

This work is significantly CPU and memory hungry. It was developed on a
32-core, 120GB RAM Google Compute Engine machine. I've worked out
parallelism configurations such that it runs nicely on 60GB of RAM
(c4.8xlarge) and over 100GB (eg., m4.10xlarge, which has 160GB). There is
some simple logic to guess at some knobs, and there are knobs.  By and
large, EC2 and GCE price machines linearly, so, if CPU usage can be kept
up, it's not wasteful to run on bigger machines.

Change-Id: I82052ef31979564968effef13a3c6af0d5c62767
Reviewed-on: http://gerrit.cloudera.org:8080/9085
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/93543cfb
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/93543cfb
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/93543cfb

Branch: refs/heads/2.x
Commit: 93543cfbca657a2064a86c030a59736f907d60d6
Parents: e380c8f
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Sat Oct 21 21:27:00 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 bin/bootstrap_system.sh                  |  11 +
 bin/rat_exclude_files.txt                |   1 +
 bin/run-all-tests.sh                     |   5 +-
 buildall.sh                              |   6 +-
 docker/README.md                         |   5 +
 docker/annotate.py                       |  34 ++
 docker/entrypoint.sh                     | 329 +++++++++++++++
 docker/monitor.py                        | 329 +++++++++++++++
 docker/test-with-docker.py               | 579 ++++++++++++++++++++++++++
 docker/timeline.html.template            | 142 +++++++
 testdata/bin/run-all.sh                  |   2 +
 tests/query_test/test_runtime_filters.py |   3 +
 tests/run-tests.py                       |  34 +-
 13 files changed, 1467 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/bin/bootstrap_system.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index 3a9c42b..3f88dd3 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -89,10 +89,14 @@ function apt-get {
   return 1
 }
 
+echo ">>> Installing packages"
+
 apt-get update
 apt-get --yes install apt-utils
 apt-get --yes install git
 
+echo ">>> Checking out Impala"
+
 # If there is no Impala git repo, get one now
 if ! [[ -d ~/Impala ]]
 then
@@ -103,6 +107,7 @@ SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
 echo "$SET_IMPALA_HOME" >> ~/.bashrc
 eval "$SET_IMPALA_HOME"
 
+echo ">>> Installing build tools"
 apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
         krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
         libsasl2-modules-gssapi-mit libssl-dev make maven ninja-build ntp \
@@ -122,6 +127,8 @@ SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
 echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
 eval "$SET_JAVA_HOME"
 
+echo ">>> Configuring system"
+
 sudo service ntp stop
 sudo ntpdate us.pool.ntp.org
 # If on EC2, use Amazon's ntp servers
@@ -191,10 +198,14 @@ sudo chown $(whoami) /var/lib/hadoop-hdfs/
 echo "* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
 
 # LZO is not needed to compile or run Impala, but it is needed for the data load
+echo ">>> Checking out Impala-lzo"
 if ! [[ -d ~/Impala-lzo ]]
 then
   git clone https://github.com/cloudera/impala-lzo.git ~/Impala-lzo
 fi
+
+echo ">>> Checking out and building hadoop-lzo"
+
 if ! [[ -d ~/hadoop-lzo ]]
 then
   git clone https://github.com/cloudera/hadoop-lzo.git ~/hadoop-lzo

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 5bb13f0..a2b6267 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -79,6 +79,7 @@ tests/comparison/ORACLE.txt
 bin/distcc/README.md
 tests/comparison/POSTGRES.txt
 docs/README.md
+docker/README.md
 be/src/thirdparty/pcg-cpp-0.98/README.md
 
 # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 4ca0f54..4743a4f 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -55,6 +55,8 @@ fi
 # Extra arguments passed to start-impala-cluster for tests. These do not apply to custom
 # cluster tests.
 : ${TEST_START_CLUSTER_ARGS:=}
+# Extra args to pass to run-tests.py
+: ${RUN_TESTS_ARGS:=}
 if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
   # TODO: Remove abort_on_config_error flag from here and create-load-data.sh once
   # checkConfiguration() accepts the local filesystem (see IMPALA-1850).
@@ -188,7 +190,8 @@ do
   if [[ "$EE_TEST" == true ]]; then
     # Run end-to-end tests.
     # KERBEROS TODO - this will need to deal with ${KERB_ARGS}
-    if ! "${IMPALA_HOME}/tests/run-tests.py" ${COMMON_PYTEST_ARGS} ${EE_TEST_FILES}; then
+    if ! "${IMPALA_HOME}/tests/run-tests.py" ${COMMON_PYTEST_ARGS} \
+        ${RUN_TESTS_ARGS} ${EE_TEST_FILES}; then
       #${KERB_ARGS};
       TEST_RET_CODE=1
     fi

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index 9d8b15e..56cdb9a 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -334,7 +334,7 @@ bootstrap_dependencies() {
     echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping download of Python dependencies."
     echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
   else
-    echo "Downloading Python dependencies"
+    echo ">>> Downloading Python dependencies"
     # Download all the Python dependencies we need before doing anything
     # of substance. Does not re-download anything that is already present.
     if ! "$IMPALA_HOME/infra/python/deps/download_requirements"; then
@@ -344,7 +344,7 @@ bootstrap_dependencies() {
       echo "Finished downloading Python dependencies"
     fi
 
-    echo "Downloading and extracting toolchain dependencies."
+    echo ">>> Downloading and extracting toolchain dependencies."
     "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     echo "Toolchain bootstrap complete."
   fi
@@ -357,6 +357,7 @@ build_fe() {
 
 # Build all components.
 build_all_components() {
+  echo ">>> Building all components"
   # Build the Impala frontend, backend and external data source API.
   MAKE_IMPALA_ARGS+=" -fe -cscope -tarballs"
   if [[ -e "$IMPALA_LZO" ]]
@@ -421,6 +422,7 @@ start_test_cluster_dependencies() {
 # This does all data loading, except for the metastore snapshot which must be loaded
 # earlier before the cluster is running.
 load_test_data() {
+  echo ">>> Loading test data"
   "$IMPALA_HOME/bin/create_testdata.sh"
   # We have 4 cases:
   # - test-warehouse and metastore snapshots exists.

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/README.md
----------------------------------------------------------------------
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..f4e7709
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,5 @@
+# Docker-related scripts for Impala
+
+`test-with-docker.py` runs the Impala build and tests inside of Docker
+containers, parallelizing the test execution across test suites. See that file
+for more details.

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/annotate.py
----------------------------------------------------------------------
diff --git a/docker/annotate.py b/docker/annotate.py
new file mode 100755
index 0000000..f83854f
--- /dev/null
+++ b/docker/annotate.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python -u
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Prepends input with timestamp. Unlike similar perl version,
+# produces microsecond timestamps. Python is unavailable in
+# the base Ubuntu image.
+#
+# Note that "python -u" disables buffering.
+
+import sys
+import datetime
+
+while True:
+  line = sys.stdin.readline()
+  if line == "":
+    sys.exit(0)
+  sys.stdout.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f "))
+  sys.stdout.write(line)

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/entrypoint.sh
----------------------------------------------------------------------
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100755
index 0000000..d371d25
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,329 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Entrypoint code for test-with-docker.py containers. test-with-docker.py
+# will create Docker containers with this script as the entrypoint,
+# with a variety of arguments. See test-with-docker.py for a more
+# general overview.
+#
+# This assumes that the following are already mounted inside
+# the container:
+#   /etc/localtime                      -> /mnt/localtime
+#     Helps timestamps be in the time zone of the host
+#   $IMPALA_HOME [a git repo of Impala] -> /repo
+#     Used to check out Impala afresh
+#   $IMPALA_HOME/logs/docker/<n1>/<n2> -> /logs
+#     Used to save logs out to host.
+#     <n1> represents the --name passed into
+#     test-with-docker for the test run. <n2>
+#     indicates which specific container is being run.
+#   ~/.ccache [configurable]            -> /ccache
+#     Used to speed up builds.
+#
+# Usage:
+#   entrypoint.sh build <uid>
+#   entrypoint.sh test_suite <suite>
+#      where <suite> is one of: BE_TEST JDBC_TEST CLUSTER_TEST
+#                               EE_TEST_SERIAL EE_TEST_PARALLEL
+
+# Boostraps the container by creating a user and adding basic tools like Python and git.
+# Takes a uid as an argument for the user to be created.
+function build() {
+  # Handy for testing.
+  if [[ $TEST_TEST_WITH_DOCKER ]]; then
+    # We sleep busily so that CPU metrics will show usage, to
+    # better exercise the timeline code.
+    echo sleeping busily for 4 seconds
+    bash -c 'while [[ $SECONDS -lt 4 ]]; do :; done'
+    return
+  fi
+
+  # Configure timezone, so any timestamps that appear are coherent with the host.
+  configure_timezone
+
+  # Assert we're superuser.
+  [ "$(id -u)" = 0 ]
+  if id $1 2> /dev/null; then
+    echo "User with id $1 already exists. Please run this as a user id missing from " \
+      "the base Ubuntu container."
+    echo
+    echo "Container users:"
+    paste <(cut -d : -f3 /etc/passwd) <(cut -d : -f1 /etc/passwd) | sort -n
+    exit 1
+  fi
+  apt-get update
+  apt-get install -y sudo git lsb-release python
+
+  adduser --disabled-password --gecos "" --uid $1 impdev
+  echo "impdev ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+  su impdev -c "$0 build_impdev"
+}
+
+# Sets up Impala environment
+function impala_environment() {
+  pushd /home/impdev/Impala
+  export IMPALA_HOME=/home/impdev/Impala
+  export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+  source bin/impala-config.sh
+  popd
+}
+
+# Starts SSH and PostgreSQL; configures container as necessary;
+# prepares Kudu for starting.
+function boot_container() {
+  pushd /home/impdev/Impala
+
+  # Required for metastore
+  sudo service postgresql start
+
+  # Required for starting HBase
+  sudo service ssh start
+
+  # Make log directories. This is typically done in buildall.sh.
+  mkdir -p logs/be_tests logs/fe_tests/coverage logs/ee_tests logs/custom_cluster_tests
+
+  # Update /etc/hosts to remove the entry for the unique docker hostname,
+  # and instead point it to 127.0.0.1. Otherwise, HttpFS returns Location:
+  # redirects to said hostname, but the relevant datanode isn't listening
+  # on the wildcard address.
+  sed -e /$(hostname)/d /etc/hosts -e /127.0.0.1/s,localhost,"localhost $(hostname)," \
+    > /tmp/hosts
+  # "sed -i" in place doesn't work on Docker, because /etc/hosts is a bind mount.
+  sudo cp /tmp/hosts /etc/hosts
+
+  echo Hostname: $(hostname)
+  echo Hosts file:
+  cat /etc/hosts
+
+  # Make a copy of Kudu's WALs to avoid isue with Docker filesystems (aufs and
+  # overlayfs) that don't support os.rename(2) on directories, which Kudu
+  # requires. We make a fresh copy of the data, in which case rename(2) works
+  # presumably because there's only one layer involved. See
+  # https://issues.apache.org/jira/browse/KUDU-1419.
+  cd /home/impdev/Impala/testdata
+  for x in cluster/cdh*/node-*/var/lib/kudu/*/wal; do
+    mv $x $x-orig
+    cp -r $x-orig $x
+    rm -r $x-orig
+  done
+
+  # Wait for postgresql to really start; if it doesn't, Hive Metastore will fail to start.
+  for i in {1..120}; do
+    echo connecting to postgresql attempt $i
+    if sudo -u postgres psql -c "select 1"; then
+      break
+    else
+      sleep 2
+    fi
+  done
+  sudo -u postgres psql -c "select 1"
+
+  popd
+}
+
+# Runs bootstrap_system.sh and then builds Impala.
+function build_impdev() {
+  # Assert we're impdev now.
+  [ "$(id -un)" = impdev ]
+
+  # Link in ccache from host.
+  ln -s /ccache /home/impdev/.ccache
+
+  # Instead of doing a full "git clone" of /repo, which is the host's checkout,
+  # we only fetch one branch, without tags. This keeps the checkout
+  # considerably lighter.
+  mkdir /home/impdev/Impala
+  pushd /home/impdev/Impala
+  git init
+  git fetch /repo --no-tags HEAD
+  git checkout -b test-with-docker FETCH_HEAD
+
+  # Link in logs. Logs are on the host since that's the most important thing to
+  # look at after the tests are run.
+  ln -sf /logs logs
+
+  bin/bootstrap_system.sh
+  impala_environment
+
+  # Builds Impala and loads test data.
+  # Note that IMPALA-6494 prevents us from using shared library builds,
+  # which are smaller and thereby speed things up.
+  ./buildall.sh -noclean -format -testdata -skiptests
+
+  # Shut down things cleanly.
+  testdata/bin/kill-all.sh
+
+  # Shutting down PostgreSQL nicely speeds up it's start time for new containers.
+  sudo service postgresql stop
+
+  # Clean up things we don't need to reduce image size
+  find be -name '*.o' -execdir rm '{}' + # ~1.6GB
+
+  popd
+}
+
+# Runs a suite passed in as the first argument. Tightly
+# coupled with Impala's run-all-tests and the suite names.
+# from test-with-docker.py.
+#
+# Before running tests, starts up the minicluster.
+function test_suite() {
+  cd /home/impdev/Impala
+
+  # These test suites are for testing.
+  if [[ $1 == NOOP ]]; then
+    return 0
+  fi
+  if [[ $1 == NOOP_FAIL ]]; then
+    return 1
+  fi
+  if [[ $1 == NOOP_SLEEP_FOREVER ]]; then
+    # Handy to test timeouts.
+    while true; do sleep 60; done
+  fi
+
+  # Assert that we're running as impdev
+  [ "$(id -un)" = impdev ]
+
+  # Assert that /home/impdev/Impala/logs is a symlink to /logs.
+  [ "$(readlink /home/impdev/Impala/logs)" = /logs ]
+
+  boot_container
+  impala_environment
+
+  # By default, the JVM will use 1/4 of your OS memory for its heap size. For a
+  # long-running test, this will delay GC inside of impalad's leading to
+  # unnecessarily large process RSS footprints. We cap the heap size at
+  # a more reasonable size.  Note that "test_insert_large_string" fails
+  # at 2g and 3g, so the suite that includes it (EE_TEST_PARALLEL) gets
+  # additional memory.
+  #
+  # Similarly, bin/start-impala-cluster typically configures the memlimit
+  # to be 80% of the machine memory, divided by the number of daemons.
+  # If multiple containers are to be run simultaneously, this is scaled
+  # down in test-with-docker.py (and further configurable with --impalad-mem-limit-bytes)
+  # and passed in via $IMPALAD_MEM_LIMIT_BYTES to the container. There is a
+  # relationship between the number of parallel tests that can be run by py.test and this
+  # limit.
+  JVM_HEAP_GB=2
+  if [[ $1 = EE_TEST_PARALLEL ]]; then
+    JVM_HEAP_GB=4
+  fi
+  export TEST_START_CLUSTER_ARGS="--jvm_args=-Xmx${JVM_HEAP_GB}g \
+    --impalad_args=--mem_limit=$IMPALAD_MEM_LIMIT_BYTES"
+
+  # BE tests don't require the minicluster, so we can run them directly.
+  if [[ $1 = BE_TEST ]]; then
+    # IMPALA-6494: thrift-server-test fails in Ubuntu16.04 for the moment; skip it.
+    export SKIP_BE_TEST_PATTERN='thrift-server-test*'
+    if ! bin/run-backend-tests.sh; then
+      echo "Tests $1 failed!"
+      return 1
+    else
+      echo "Tests $1 succeeded!"
+      return 0
+    fi
+  fi
+
+  # Start the minicluster
+  testdata/bin/run-all.sh
+
+  export MAX_PYTEST_FAILURES=0
+  # Choose which suite to run; this is how run-all.sh chooses between them.
+  export FE_TEST=false
+  export BE_TEST=false
+  export EE_TEST=false
+  export JDBC_TEST=false
+  export CLUSTER_TEST=false
+
+  eval "export ${1}=true"
+
+  if [[ ${1} = "EE_TEST_SERIAL" ]]; then
+    # We bucket the stress tests with the parallel tests.
+    export RUN_TESTS_ARGS="--skip-parallel --skip-stress"
+    export EE_TEST=true
+  elif [[ ${1} = "EE_TEST_PARALLEL" ]]; then
+    export RUN_TESTS_ARGS="--skip-serial"
+    export EE_TEST=true
+  fi
+
+  ret=0
+
+  # Run tests.
+  if ! time -p bin/run-all-tests.sh; then
+    ret=1
+    echo "Tests $1 failed!"
+  else
+    echo "Tests $1 succeeded!"
+  fi
+  # Oddly, I've observed bash fail to exit (and wind down the container),
+  # leading to test-with-docker.py hitting a timeout. Killing the minicluster
+  # daemons fixes this.
+  testdata/bin/kill-all.sh || true
+  return $ret
+}
+
+# Ubuntu's tzdata package is very finnicky, and if you
+# mount /etc/localtime from the host to the container directly,
+# it fails to install. However, if you make it a symlink
+# and configure /etc/timezone to something that's not an
+# empty string, you'll get the right behavior.
+#
+# The post installation script is findable by looking for "tzdata.postinst"
+#
+# Use this command to reproduce the Ubuntu issue:
+#   docker run -v /etc/localtime:/mnt/localtime -ti ubuntu:16.04 bash -c '
+#     date
+#     ln -sf /mnt/localtime /etc/localtime
+#     date +%Z > /etc/timezone
+#     date
+#     apt-get update > /dev/null
+#     apt-get install tzdata
+#     date'
+function configure_timezone() {
+  if ! diff -q /etc/localtime /mnt/localtime 2> /dev/null; then
+    ln -sf /mnt/localtime /etc/localtime
+    date +%Z > /etc/timezone
+  fi
+}
+
+function main() {
+  set -e
+
+  # Run given command
+  CMD="$1"
+  shift
+
+  echo ">>> ${CMD} $@ (begin)"
+  set -x
+  if "${CMD}" "$@"; then
+    ret=0
+  else
+    ret=$?
+  fi
+  set +x
+  echo ">>> ${CMD} $@ ($ret) (end)"
+  exit $ret
+}
+
+# Run main() unless we're being sourced.
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  main "$@"
+fi

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/monitor.py
----------------------------------------------------------------------
diff --git a/docker/monitor.py b/docker/monitor.py
new file mode 100644
index 0000000..64cde0c
--- /dev/null
+++ b/docker/monitor.py
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Monitors Docker containers for CPU and memory usage, and
+# prepares an HTML timeline based on said monitoring.
+#
+# Usage example:
+#   mon = monitor.ContainerMonitor("monitoring.txt")
+#   mon.start()
+#   # container1 is an object with attributes id, name, and logfile.
+#   mon.add(container1)
+#   mon.add(container2)
+#   mon.stop()
+#   timeline = monitor.Timeline("monitoring.txt",
+#       [container1, container2],
+#       re.compile(">>> "))
+#   timeline.create("output.html")
+
+import datetime
+import json
+import logging
+import os
+import shutil
+import subprocess
+import threading
+import time
+
+
+# Unit for reporting user/system CPU seconds in cpuacct.stat.
+# See https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt and time(7).
+USER_HZ = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+
+
+def total_memory():
+  """Returns total RAM on system, in GB."""
+  return _memory()[0]
+
+
+def used_memory():
+  """Returns total used RAM on system, in GB."""
+  return _memory()[1]
+
+
+def _memory():
+  """Returns (total, used) memory on system, in GB.
+
+  Used is computed as total - available.
+
+  Calls "free" and parses output. Sample output for reference:
+
+                total        used        free      shared     buffers       cache   available
+  Mem:    126747197440 26363965440 56618553344    31678464  2091614208 41673064448 99384889344
+  Swap:             0           0           0
+  """
+
+  free_lines = subprocess.check_output(["free", "-b", "-w"]).split('\n')
+  free_grid = [x.split() for x in free_lines]
+  # Identify columns for "total" and "available"
+  total_idx = free_grid[0].index("total")
+  available_idx = free_grid[0].index("available")
+  total = int(free_grid[1][1 + total_idx])
+  available = int(free_grid[1][1 + available_idx])
+  used = total - available
+  total_gb = total / (1024.0 * 1024.0 * 1024.0)
+  used_gb = used / (1024.0 * 1024.0 * 1024.0)
+  return (total_gb, used_gb)
+
+
+def datetime_to_seconds_since_epoch(dt):
+  """Converts a Python datetime to seconds since the epoch."""
+  return time.mktime(dt.timetuple())
+
+
+def split_timestamp(line):
+  """Parses timestamp at beginning of a line.
+
+  Returns a tuple of seconds since the epoch and the rest
+  of the line. Returns None on parse failures.
+  """
+  LENGTH = 26
+  FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+  t = line[:LENGTH]
+  return (datetime_to_seconds_since_epoch(datetime.datetime.strptime(t, FORMAT)),
+          line[LENGTH + 1:])
+
+
+class ContainerMonitor(object):
+  """Monitors Docker containers.
+
+  Monitoring data is written to a file. An example is:
+
+  2018-02-02 09:01:37.143591 d8f640989524be3939a70557a7bf7c015ba62ea5a105a64c94472d4ebca93c50 cpu user 2 system 5
+  2018-02-02 09:01:37.143591 d8f640989524be3939a70557a7bf7c015ba62ea5a105a64c94472d4ebca93c50 memory cache 11481088 rss 4009984 rss_huge 0 mapped_file 8605696 dirty 24576 writeback 0 pgpgin 4406 pgpgout 624 pgfault 3739 pgmajfault 99 inactive_anon 0 active_anon 3891200 inactive_file 7614464 active_file 3747840 unevictable 0 hierarchical_memory_limit 9223372036854771712 total_cache 11481088 total_rss 4009984 total_rss_huge 0 total_mapped_file 8605696 total_dirty 24576 total_writeback 0 total_pgpgin 4406 total_pgpgout 624 total_pgfault 3739 total_pgmajfault 99 total_inactive_anon 0 total_active_anon 3891200 total_inactive_file 7614464 total_active_file 3747840 total_unevictable 0
+
+  That is, the format is:
+
+  <timestamp> <container> cpu user <usercpu> system <systemcpu>
+  <timestamp> <container> memory <contents of memory.stat without newlines>
+
+  <usercpu> and <systemcpu> are in the units of USER_HZ.
+  See https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt for documentation
+  on memory.stat; it's in the "memory" cgroup, often mounted at
+  /sys/fs/cgroup/memory/<cgroup>/memory.stat.
+
+  This format is parsed back by the Timeline class below and should
+  not be considered an API.
+  """
+
+  def __init__(self, output_path, frequency_seconds=1):
+    """frequency_seconds is how often metrics are gathered"""
+    self.containers = []
+    self.output_path = output_path
+    self.keep_monitoring = None
+    self.monitor_thread = None
+    self.frequency_seconds = frequency_seconds
+
+  def start(self):
+    self.keep_monitoring = True
+    self.monitor_thread = threading.Thread(target=self._monitor)
+    self.monitor_thread.setDaemon(True)
+    self.monitor_thread.start()
+
+  def stop(self):
+    self.keep_monitoring = False
+    self.monitor_thread.join()
+
+  def add(self, container):
+    """Adds monitoring for container, which is an object with property 'id'."""
+    self.containers.append(container)
+
+  @staticmethod
+  def _metrics_from_stat_file(root, container, stat):
+    """Returns metrics stat file contents.
+
+    root: a cgroups root (a path as a string)
+    container: an object with string attribute id
+    stat: a string filename
+
+    Returns contents of <root>/<container.id>/<stat>
+    with newlines replaced with spaces.
+    Returns None on errors.
+    """
+    dirname = os.path.join(root, "docker", container.id)
+    if not os.path.isdir(dirname):
+      # Container may no longer exist.
+      return None
+    try:
+      statcontents = file(os.path.join(dirname, stat)).read()
+      return statcontents.replace("\n", " ").strip()
+    except IOError, e:
+      # Ignore errors; cgroup can disappear on us.
+      logging.warning("Ignoring exception reading cgroup. " +
+                      "This can happen if container just exited. " + str(e))
+      return None
+
+  def _monitor(self):
+    """Monitors CPU usage of containers.
+
+    Otput is stored in self.output_path.
+    Also, keeps track of minimum and maximum memory usage (for the machine).
+    """
+    # Ubuntu systems typically mount cpuacct cgroup in /sys/fs/cgroup/cpu,cpuacct,
+    # but this can vary by OS distribution.
+    all_cgroups = subprocess.check_output(
+        "findmnt -n -o TARGET -t cgroup --source cgroup".split()
+    ).split("\n")
+    cpuacct_root = [c for c in all_cgroups if "cpuacct" in c][0]
+    memory_root = [c for c in all_cgroups if "memory" in c][0]
+    logging.info("Using cgroups: cpuacct %s, memory %s", cpuacct_root, memory_root)
+    self.min_memory_usage_gb = None
+    self.max_memory_usage_gb = None
+
+    with file(self.output_path, "w") as output:
+      while self.keep_monitoring:
+        # Use a single timestamp for a given round of monitoring.
+        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
+        for c in self.containers:
+          cpu = self._metrics_from_stat_file(cpuacct_root, c, "cpuacct.stat")
+          memory = self._metrics_from_stat_file(memory_root, c, "memory.stat")
+          if cpu:
+            output.write("%s %s cpu %s\n" % (now, c.id, cpu))
+          if memory:
+            output.write("%s %s memory %s\n" % (now, c.id, memory))
+        output.flush()
+
+        # Machine-wide memory usage
+        m = used_memory()
+        if self.min_memory_usage_gb is None:
+          self.min_memory_usage_gb, self.max_memory_usage_gb = m, m
+        else:
+          self.min_memory_usage_gb = min(self.min_memory_usage_gb, m)
+          self.max_memory_usage_gb = max(self.max_memory_usage_gb, m)
+        time.sleep(self.frequency_seconds)
+
+
+class Timeline(object):
+  """Given metric and log data for containers, creates a timeline report.
+
+  This is a standalone HTML file with a timeline for the log files and CPU charts for
+  the containers. The HTML uses https://developers.google.com/chart/ for rendering
+  the charts, which happens in the browser.
+  """
+
+  def __init__(self, monitor_file, containers, interesting_re):
+    self.monitor_file = monitor_file
+    self.containers = containers
+    self.interesting_re = interesting_re
+
+  def logfile_timeline(self, container):
+    """Returns a list of (name, timestamp, line) tuples for interesting lines in
+    the container's logfile. container is expected to have name and logfile attributes.
+    """
+    interesting_lines = [
+        line.strip()
+        for line in file(container.logfile)
+        if self.interesting_re.search(line)]
+    return [(container.name,) + split_timestamp(line) for line in interesting_lines]
+
+  @staticmethod
+  def parse_metrics(f):
+    """Parses timestamped metric lines.
+
+    Given metrics lines like:
+
+    2017-10-25 10:08:30.961510 87d5562a5fe0ea075ebb2efb0300d10d23bfa474645bb464d222976ed872df2a cpu user 33 system 15
+
+    Returns an iterable of (ts, container, user_cpu, system_cpu)
+    """
+    prev_by_container = {}
+    for line in f:
+      ts, rest = split_timestamp(line.rstrip())
+      try:
+        container, metric_type, rest2 = rest.split(" ", 2)
+        if metric_type != "cpu":
+          continue
+        _, user_cpu_s, _, system_cpu_s = rest2.split(" ", 3)
+      except:
+        logging.warning("Skipping metric line: %s", line)
+        continue
+
+      prev_ts, prev_user, prev_system = prev_by_container.get(
+          container, (None, None, None))
+      user_cpu = int(user_cpu_s)
+      system_cpu = int(system_cpu_s)
+      if prev_ts is not None:
+        # Timestamps are seconds since the epoch and are floats.
+        dt = ts - prev_ts
+        assert type(dt) == float
+        if dt != 0:
+          yield ts, container, (user_cpu - prev_user)/dt/USER_HZ,\
+              (system_cpu - prev_system)/dt/USER_HZ
+      prev_by_container[container] = ts, user_cpu, system_cpu
+
+  def create(self, output):
+    # Read logfiles
+    timelines = []
+    for c in self.containers:
+      if not os.path.exists(c.logfile):
+        logging.warning("Missing log file: %s", c.logfile)
+        continue
+      timelines.append(self.logfile_timeline(c))
+
+    # Convert timelines to JSON
+    min_ts = None
+    timeline_json = []
+    for timeline in timelines:
+      for current_line, next_line in zip(timeline, timeline[1:]):
+        name, ts_current, msg = current_line
+        _, ts_next, _ = next_line
+        timeline_json.append(
+            [name, msg, ts_current, ts_next]
+        )
+    if not timeline_json:
+      logging.warning("No timeline data; skipping timeline")
+      return
+
+    min_ts = min(x[2] for x in timeline_json)
+
+    for row in timeline_json:
+      row[2] = row[2] - min_ts
+      row[3] = row[3] - min_ts
+
+    # metrics_by_container: container -> [ ts, user, system ]
+    metrics_by_container = dict()
+    max_metric_ts = 0
+    container_by_id = dict()
+    for c in self.containers:
+      container_by_id[c.id] = c
+
+    for ts, container_id, user, system in self.parse_metrics(file(self.monitor_file)):
+      container = container_by_id.get(container_id)
+      if not container:
+        continue
+
+      if ts > max_metric_ts:
+        max_metric_ts = ts
+      if ts < min_ts:
+        # We ignore metrics that show up before the timeline's
+        # first messages. This largely avoids a bug in the
+        # Google Charts visualization code wherein one of the series seems
+        # to wrap around.
+        continue
+      metrics_by_container.setdefault(
+          container.name, []).append((ts - min_ts, user, system))
+
+    with file(output, "w") as o:
+      template_path = os.path.join(os.path.dirname(__file__), "timeline.html.template")
+      shutil.copyfileobj(file(template_path), o)
+      o.write("\n<script>\nvar data = \n")
+      json.dump(dict(timeline=timeline_json, metrics=metrics_by_container,
+                     max_ts=(max_metric_ts - min_ts)), o, indent=2)
+      o.write("</script>")
+      o.close()

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/test-with-docker.py
----------------------------------------------------------------------
diff --git a/docker/test-with-docker.py b/docker/test-with-docker.py
new file mode 100755
index 0000000..59640b4
--- /dev/null
+++ b/docker/test-with-docker.py
@@ -0,0 +1,579 @@
+#!/usr/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+CLI_HELP = """\
+Runs tests inside of docker containers, parallelizing different types of
+tests. This script first creates a docker container, checks out this repo
+into it, bootstraps the container with Impala dependencies, and builds Impala
+and its test data.  Then, it saves the resulting container, and launches new
+containers to run tests in parallel.  An HTML, visual timeline is generated
+as part of the build, in logs/docker/*/timeline.html.
+"""
+
+# To execute run:
+#   docker/test-with-docker.py
+# After waiting for that to finish, inspect results in logs/docker.
+#
+# Visually, the timeline looks as follows, produced on a 32-core, 100GB RAM
+# machine:
+# .......                      1h05m Checkout, setup machine, build (8m with ccache),
+#                                    generate testdata (52m); missing ccache
+#                                    adds about 7 minutes (very sensitive to number
+#                                    of available cores)
+#        ...                     11m Commit the Docker container
+#           .                    10m FE tests
+#           .                    10m JDBC tests
+#           ....                 45m serial EE tests
+#           ......             1h02m cluster tests
+#           ...                  31m BE (C++) tests
+#           ....                 36m parallel EE tests
+# Total time: 2h25m.
+#
+# CPU usage is sustained high for the parallel EE tests and for
+# the C++ compile (when it's not ccache'd), but is otherwise low.
+# Because every parallel track consumes memory (a cluster),
+# increasing parallelism and memory must be balanced.
+#
+# Memory usage is thorny. The minicluster memory can
+# be tweaked somewhat by how much memory to give to the JVM
+# and what to set --mem_limit too. Furthermore, parallel
+# cluster tests use more memory when more parallelism happens.
+#
+# The code that runs inside of the containers is in entrypoint.sh,
+# whereas the code that invokes docker is here.
+#
+# We avoid using Dockerfile and "docker build": they make it hard or impossible
+# to cross-mount host directories into containers or use --privileged, and using
+# them would require generating them dynamically. They're more trouble than
+# they're worth for this use case.
+#
+# In practice, the containers are about 100GB (with 45GB
+# being test data and ~40GB being the tests).
+#
+# Requirements:
+#  * Docker
+#    This has been tested on Ubuntu16.04 with Docker
+#    from the Ubuntu repos, i.e., Docker 1.13.1.
+#  * About 150 GB of disk space available to Docker.
+#  * 75GB of RAM.
+#
+# This script tries to clean up images and containers created by this process, though
+# this can be disabled for debugging.
+#
+# To clean up containers and images manually, you can use:
+#   for x in $(docker ps -aq --filter label=pwd=$IMPALA_HOME); do
+#       docker stop $x; docker rm $x; done
+#   for x in $(docker images -q --filter label=pwd=$IMPALA_HOME); do docker rmi $x; done
+#
+# Core dumps:
+# On an Ubuntu host, core dumps and Docker don't mix by default, because apport is not
+# running inside of the container. See https://github.com/moby/moby/issues/11740
+# To enable core dumps, run the following command on the host:
+#   $echo 'core.%e.%p' | sudo tee /proc/sys/kernel/core_pattern
+#
+# TODOs:
+#  - Support for executing other flavors, like exhaustive, or file systems,
+#    like S3.
+#
+# Suggested speed improvement TODOs:
+#   - Speed up testdata generation
+#   - Skip generating test data for variants not being run
+#   - Make container image smaller; perhaps make BE test binaries
+#     smaller
+#   - Split up cluster tests into two groups
+#   - Analyze .xml junit files to find slow tests; eradicate
+#     or move to different suite.
+#   - Avoid building BE tests, and build them during execution,
+#     saving on container space as well as baseline build
+#     time.
+
+# We do not use Impala's python environment here, nor do we depend on
+# non-standard python libraries to avoid needing extra build steps before
+# triggering this.
+import argparse
+import datetime
+import logging
+import multiprocessing
+import multiprocessing.pool
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+
+if __name__ == '__main__' and __package__ is None:
+  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+  import monitor
+
+base = os.path.dirname(os.path.abspath(__file__))
+
+
+def main():
+
+  logging.basicConfig(level=logging.INFO,
+                      format='%(asctime)s %(threadName)s: %(message)s')
+
+  default_parallel_test_concurrency, default_suite_concurrency, default_memlimit_gb = \
+      _compute_defaults()
+  parser = argparse.ArgumentParser(
+      description=CLI_HELP, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  group = parser.add_mutually_exclusive_group()
+  group.add_argument('--cleanup-containers', dest="cleanup_containers",
+                     action='store_true', default=True,
+                     help='Removes containers when finished.')
+  group.add_argument('--no-cleanup-containers',
+                     dest="cleanup_containers", action='store_false')
+  group = parser.add_mutually_exclusive_group()
+  parser.add_argument(
+      '--parallel-test-concurrency', type=int,
+      default=default_parallel_test_concurrency,
+      help='For the ee-test-parallel suite, how many tests to run concurrently.')
+  parser.add_argument(
+      '--suite-concurrency', type=int, default=default_suite_concurrency,
+      help='Number of concurrent suites to run in parallel.')
+  parser.add_argument(
+      '--impalad-mem-limit-bytes', type=int, default=default_memlimit_gb,
+      help='Memlimit to pass to impalad for miniclusters.')
+  group.add_argument(
+      '--cleanup-image', dest="cleanup_image",
+      action='store_true', default=True,
+      help="Whether to remove image when done.")
+  group.add_argument('--no-cleanup-image', dest="cleanup_image", action='store_false')
+  parser.add_argument(
+      '--build-image', metavar='IMAGE',
+      help='Skip building, and run tests on pre-existing image.')
+  parser.add_argument(
+      '--suite', metavar='VARIANT', action='append',
+      help="Run specific test suites; can be specified multiple times. \
+          If not specified, all tests are run. Choices: " + ",".join(ALL_SUITES))
+  parser.add_argument(
+      '--name', metavar='NAME',
+      help="Use a specific name for the test run. The name is used " +
+      "as a prefix for the container and image names, and " +
+      "as part of the log directory naming. Defaults to include a timestamp.",
+      default=datetime.datetime.now().strftime("i-%Y%m%d-%H%M%S"))
+  parser.add_argument('--timeout', metavar='MINUTES',
+                      help="Timeout for test suites, in minutes.",
+                      type=int,
+                      default=60*2)
+  parser.add_argument('--ccache-dir', metavar='DIR',
+                      help="CCache directory to use",
+                      default=os.path.expanduser("~/.ccache"))
+  parser.add_argument('--test', action="store_true")
+  args = parser.parse_args()
+
+  if not args.suite:
+    args.suite = ALL_SUITES
+  t = TestWithDocker(
+      build_image=args.build_image, suites=args.suite,
+      name=args.name, timeout=args.timeout, cleanup_containers=args.cleanup_containers,
+      cleanup_image=args.cleanup_image, ccache_dir=args.ccache_dir, test_mode=args.test,
+      parallel_test_concurrency=args.parallel_test_concurrency,
+      suite_concurrency=args.suite_concurrency,
+      impalad_mem_limit_bytes=args.impalad_mem_limit_bytes)
+
+  logging.getLogger('').addHandler(
+      logging.FileHandler(os.path.join(_make_dir_if_not_exist(t.log_dir), "log.txt")))
+
+  logging.info("Arguments: %s", args)
+
+  ret = t.run()
+  t.create_timeline()
+
+  if not ret:
+    sys.exit(1)
+
+
+def _compute_defaults():
+  """Compute default config options based on memory.
+
+  The goal is to work reasonably on machines with
+  about 60GB of memory, like Amazon's c4.8xlarge (36 CPUs, 60GB)
+  or c5.9xlarge (36 CPUs, 72GB) or m4.4xlarge (16 CPUs, 64 GB).
+
+  Based on some experiments, we set up defaults for different
+  machine sizes based on memory, with an eye towards
+  having reasonable runtimes as well.
+
+  Experiments on memory usage:
+
+  suite               parallelism usage
+                    Xmx    memlimit
+  ee-test-parallel  4GB  8  5GB   33GB
+  ee-test-parallel  4GB 16  7GB   37GB
+  ee-test-serial    4GB  -  5GB   18GB
+  cluster-test      4GB  -    -   13GB
+  be-test           4GB  - 10GB   19GB
+  fe-test           4GB  - 10GB    9GB
+  """
+  total_memory_gb = monitor.total_memory()
+  cpus = multiprocessing.cpu_count()
+  logging.info("CPUs: %s Memory (GB): %s", cpus, total_memory_gb)
+
+  parallel_test_concurrency = min(cpus, 8)
+  memlimit_gb = 7
+
+  if total_memory_gb >= 95:
+    suite_concurrency = 4
+    parallel_test_concurrency = min(cpus, 12)
+  elif total_memory_gb >= 65:
+    suite_concurrency = 3
+  elif total_memory_gb >= 35:
+    suite_concurrency = 2
+  else:
+    logging.warning("This tool should be run on a machine with more memory.")
+    suite_concurrency = 1
+
+  return parallel_test_concurrency, suite_concurrency, memlimit_gb * 1024 * 1024 * 1024
+
+
+# The names of all the test tracks supported.  NOOP isn't included here, but is
+# handy for testing.  These are organized slowest-to-fastest, so that, when
+# parallelism of suites is limited, the total time is not impacted.
+ALL_SUITES = [
+    "EE_TEST_SERIAL",
+    "EE_TEST_PARALLEL",
+    "CLUSTER_TEST",
+    "BE_TEST",
+    "FE_TEST",
+    "JDBC_TEST",
+]
+
+
+def _call(args, check=True):
+  """Wrapper for calling a subprocess.
+
+  args is the first argument of subprocess.Popen, typically
+  an array, e.g., ["echo", "hi"].
+
+  If check is set, raise an exception on failure.
+  """
+  logging.info("Calling: %s", args)
+  if check:
+    subprocess.check_call(args, stdin=None)
+  else:
+    return subprocess.call(args, stdin=None)
+
+
+def _check_output(*args, **kwargs):
+  """Wrapper for subprocess.check_output, with logging."""
+  logging.info("Running: %s, %s; cmdline: %s.", args, kwargs, " ".join(*args))
+  return subprocess.check_output(*args, **kwargs)
+
+
+def _make_dir_if_not_exist(*parts):
+  d = os.path.join(*parts)
+  if not os.path.exists(d):
+    os.makedirs(d)
+  return d
+
+
+class Container(object):
+  """Encapsulates a container, with some metadata."""
+
+  def __init__(self, id_, name, logfile, exitcode=None, running=None):
+    self.id = id_
+    self.name = name
+    self.logfile = logfile
+    self.exitcode = exitcode
+    self.running = running
+    self.start = None
+    self.end = None
+
+  def runtime_seconds(self):
+    if self.start and self.end:
+      return self.end - self.start
+
+  def __str__(self):
+    return "Container<" + \
+        ",".join(["%s=%s" % (k, v) for k, v in self.__dict__.items()]) \
+        + ">"
+
+
+class TestWithDocker(object):
+  """Tests Impala using Docker containers for parallelism."""
+
+  def __init__(self, build_image, suites, name, timeout, cleanup_containers,
+               cleanup_image, ccache_dir, test_mode,
+               suite_concurrency, parallel_test_concurrency,
+               impalad_mem_limit_bytes):
+    self.build_image = build_image
+    self.suites = [TestSuiteRunner(self, suite) for suite in suites]
+    self.name = name
+    self.containers = []
+    self.timeout_minutes = timeout
+    self.git_root = _check_output(["git", "rev-parse", "--show-toplevel"]).strip()
+    self.cleanup_containers = cleanup_containers
+    self.cleanup_image = cleanup_image
+    self.image = None
+    if build_image and cleanup_image:
+      # Refuse to clean up external image.
+      raise Exception("cleanup_image and build_image cannot be both specified")
+    self.ccache_dir = ccache_dir
+    self.log_dir = os.path.join(self.git_root, "logs", "docker", self.name)
+    self.monitoring_output_file = os.path.join(self.log_dir, "metrics.txt")
+    self.monitor = monitor.ContainerMonitor(self.monitoring_output_file)
+    self.test_mode = test_mode
+    self.suite_concurrency = suite_concurrency
+    self.parallel_test_concurrency = parallel_test_concurrency
+    self.impalad_mem_limit_bytes = impalad_mem_limit_bytes
+
+  def _create_container(self, image, name, logdir, logname, entrypoint, extras=None):
+    """Returns a new container.
+
+    logdir - subdirectory to create under self.log_dir,
+      which will get mounted to /logs
+    logname - name of file in logdir that will be created
+    extras - extra arguments to pass to docker
+    entrypoint - entrypoint arguments, as a list.
+    """
+    if extras is None:
+      extras = []
+    if self.test_mode:
+      extras = ["-e", "TEST_TEST_WITH_DOCKER=true"] + extras
+
+    container_id = _check_output([
+        "docker", "create",
+        # Required for some of the ntp handling in bootstrap and Kudu;
+        # requirement may be lifted in newer Docker versions.
+        "--privileged",
+        "--name", name,
+        "--hostname", name,
+        # Label with the git root directory for easier cleanup
+        "--label=pwd=" + self.git_root,
+        # Consistent locales
+        "-e", "LC_ALL=C",
+        "-e", "IMPALAD_MEM_LIMIT_BYTES=" +
+        str(self.impalad_mem_limit_bytes),
+        # Mount the git directory so that clones can be local
+        "-v", self.git_root + ":/repo:ro",
+        "-v", self.ccache_dir + ":/ccache",
+        # Share timezone between host and container
+        "-v", "/etc/localtime:/mnt/localtime",
+        "-v", _make_dir_if_not_exist(self.log_dir,
+                                     logdir) + ":/logs",
+        "-v", base + ":/mnt/base:ro"]
+        + extras
+        + [image]
+        + entrypoint).strip()
+    return Container(name=name, id_=container_id,
+                     logfile=os.path.join(self.log_dir, logdir, logname))
+
+  def _run_container(self, container):
+    """Runs container, and returns True if the container had a successful exit value.
+
+    This blocks while the container is running. The container output is
+    run through annotate.py to add timestamps and saved into the container's log file.
+    """
+    container.running = True
+
+    with file(container.logfile, "w") as log_output:
+      container.start = time.time()
+      # Sets up a "docker start ... | annotate.py > logfile" pipeline using
+      # subprocess.
+      annotate = subprocess.Popen(
+          [os.path.join(self.git_root, "docker", "annotate.py")],
+          stdin=subprocess.PIPE,
+          stdout=log_output,
+          stderr=log_output)
+
+      logging.info("Starting container %s; logging to %s", container.name,
+                   container.logfile)
+      docker = subprocess.Popen(["docker", "start", "--attach", container.id],
+                                stdin=None, stdout=annotate.stdin, stderr=annotate.stdin)
+
+      ret = docker.wait()
+      annotate.stdin.close()
+      annotate.wait()
+
+      logging.info("Container %s returned %s", container, ret)
+      container.exitcode = ret
+      container.running = False
+      container.end = time.time()
+      return ret == 0
+
+  @staticmethod
+  def _stop_container(container):
+    """Stops container. Ignores errors (e.g., if it's already exited)."""
+    _call(["docker", "stop", container.id], check=False)
+    if container.running:
+      container.end = time.time()
+      container.running = False
+
+  @staticmethod
+  def _rm_container(container):
+    """Removes container."""
+    _call(["docker", "rm", container.id], check=False)
+
+  def _create_build_image(self):
+    """Creates the "build image", with Impala compiled and data loaded."""
+    container = self._create_container(
+        image="ubuntu:16.04", name=self.name,
+        logdir="build",
+        logname="log-build.txt",
+        # entrypoint.sh will create a user with our uid; this
+        # allows the shared file systems to work seamlessly
+        entrypoint=["/mnt/base/entrypoint.sh", "build", str(os.getuid())])
+    self.containers.append(container)
+    self.monitor.add(container)
+    try:
+      logging.info("Docker container for build: %s", container)
+      _check_output(["docker", "start", container.id])
+      if not self._run_container(container):
+        raise Exception("Build container failed.")
+      logging.info("Committing docker container.")
+      self.image = _check_output(
+          ["docker", "commit",
+           "-c", "LABEL pwd=" + self.git_root,
+           container.id, "impala:built-" + self.name]).strip()
+      logging.info("Committed docker image: %s", self.image)
+    finally:
+      if self.cleanup_containers:
+        self._stop_container(container)
+        self._rm_container(container)
+
+  def _run_tests(self):
+    start_time = time.time()
+    timeout_seconds = self.timeout_minutes * 60
+    deadline = start_time + timeout_seconds
+    pool = multiprocessing.pool.ThreadPool(processes=self.suite_concurrency)
+    outstanding_suites = []
+    for suite in self.suites:
+      suite.task = pool.apply_async(suite.run)
+      outstanding_suites.append(suite)
+
+    ret = True
+    while time.time() < deadline and len(outstanding_suites) > 0:
+      for suite in list(outstanding_suites):
+        task = suite.task
+        if task.ready():
+          this_task_ret = task.get()
+          outstanding_suites.remove(suite)
+          if this_task_ret:
+            logging.info("Suite %s succeeded.", suite.name)
+          else:
+            logging.info("Suite %s failed.", suite.name)
+            ret = False
+      time.sleep(10)
+    if len(outstanding_suites) > 0:
+      for container in self.containers:
+        self._stop_container(container)
+      for suite in outstanding_suites:
+        suite.task.get()
+      raise Exception("Tasks not finished within timeout (%s minutes): %s" %
+                      (self.timeout_minutes, ",".join([
+                          suite.name for suite in outstanding_suites])))
+    return ret
+
+  def run(self):
+    # Create logs directories and ccache dir.
+    _make_dir_if_not_exist(self.ccache_dir)
+    _make_dir_if_not_exist(self.log_dir)
+
+    self.monitor.start()
+    try:
+      if not self.build_image:
+        self._create_build_image()
+      else:
+        self.image = self.build_image
+      ret = self._run_tests()
+      logging.info("Containers:")
+      for c in self.containers:
+        def to_success_string(exitcode):
+          if exitcode == 0:
+            return "SUCCESS"
+          return "FAILURE"
+        logging.info("%s %s %s %s", to_success_string(c.exitcode), c.name, c.logfile,
+                     c.runtime_seconds())
+      return ret
+    finally:
+      self.monitor.stop()
+      if self.cleanup_image and self.image:
+        _call(["docker", "rmi", self.image], check=False)
+      logging.info("Memory usage: %s GB min, %s GB max",
+                   self.monitor.min_memory_usage_gb,
+                   self.monitor.max_memory_usage_gb)
+
+  # Strings (really, regular expressions) pulled out into to the visual timeline.
+  _INTERESTING_STRINGS = [
+      ">>> ",
+  ]
+  _INTERESTING_RE = re.compile("|".join("(%s)" % (s,) for s in _INTERESTING_STRINGS))
+
+  def create_timeline(self):
+    """Creates timeline into log directory."""
+    timeline = monitor.Timeline(
+        monitor_file=self.monitoring_output_file,
+        containers=self.containers,
+        interesting_re=self._INTERESTING_RE)
+    timeline.create(os.path.join(self.log_dir, "timeline.html"))
+
+
+class TestSuiteRunner(object):
+  """Runs a single test suite."""
+
+  def __init__(self, test_with_docker, suite):
+    self.test_with_docker = test_with_docker
+    self.suite = suite
+    self.task = None
+    self.name = self.suite.lower()
+
+  def run(self):
+    """Runs given test. Returns true on success, based on exit code."""
+    test_with_docker = self.test_with_docker
+    suite = self.suite
+    self.start = time.time()
+
+    # io-file-mgr-test expects a real-ish file system at /tmp;
+    # we mount a temporary directory into the container to appease it.
+    tmpdir = tempfile.mkdtemp(prefix=test_with_docker.name + "-" + self.name)
+    # Container names are sometimes used as hostnames, and DNS names shouldn't
+    # have underscores.
+    container_name = test_with_docker.name + "-" + self.name.replace("_", "-")
+
+    container = test_with_docker._create_container(
+        image=test_with_docker.image,
+        name=container_name,
+        extras=[
+            "-v", tmpdir + ":/tmp",
+            "-u", str(os.getuid()),
+            "-e", "NUM_CONCURRENT_TESTS=" +
+            str(test_with_docker.parallel_test_concurrency),
+        ],
+        logdir=self.name,
+        logname="log-test-" + self.suite + ".txt",
+        entrypoint=["/mnt/base/entrypoint.sh", "test_suite", suite])
+
+    test_with_docker.containers.append(container)
+    test_with_docker.monitor.add(container)
+    try:
+      return test_with_docker._run_container(container)
+    except:
+      return False
+    finally:
+      logging.info("Cleaning up containers for %s" % (suite,))
+      test_with_docker._stop_container(container)
+      if test_with_docker.cleanup_containers:
+        test_with_docker._rm_container(container)
+
+
+if __name__ == "__main__":
+  main()

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/docker/timeline.html.template
----------------------------------------------------------------------
diff --git a/docker/timeline.html.template b/docker/timeline.html.template
new file mode 100644
index 0000000..c8de821
--- /dev/null
+++ b/docker/timeline.html.template
@@ -0,0 +1,142 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<!--
+
+Template/header for a timeline visualization of a multi-container build.
+The timelines represent interesting log lines, with one row per container.
+The charts represent CPU usage within those containers.
+
+To use this, concatenate this with a '<script>' block defining
+a global variable named data.
+
+The expected format of data is exemplified by the following,
+and is tightly coupled with the implementation generating
+it in monitor.py. The intention of this unfriendly file format
+is to do as much munging as plausible in Python.
+
+To make the visualization relative to the start time (i.e., to say that all
+builds start at 00:00), the timestamps are all seconds since the build began.
+To make the visualization work with them, the timestamps are then converted
+into local time, and get displayed reasonably. This is a workaround to the fact
+that the visualization library for the timelines does not accept any data types
+that represent duration, but we still want timestamp-style formatting.
+
+var data = {
+  // max timestamp seen, in seconds since the epoch
+  "max_ts": 8153.0,
+  // map of container name to an array of metrics
+  "metrics": {
+    "i-20180312-140548-ee-test-serial": [
+      // a single metric point is an array of timestamp, user CPU, system CPU
+      // CPU is the percent of 1 CPU used since the previous timestamp.
+      [
+        4572.0,
+        0.11,
+        0.07
+      ]
+    ]
+  },
+  // Array of timelines
+  "timeline": [
+    // a timeline entry contains a name (for the entire row of the timeline),
+    // the message (for a segment of the timeline), and start and end timestamps
+    // for the segment.
+    [
+      "i-20180312-140548",
+      "+ echo '>>> build' '4266 (begin)'",
+      0.0,
+      0.0
+    ]
+  ]
+}
+-->
+
+<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
+
+<script type="text/javascript">
+google.charts.load("current", {packages:["timeline", "corechart"]});
+google.charts.setOnLoadCallback(drawChart);
+
+function ts_to_hms(secs) {
+  var s = secs % 60;
+  var m = Math.floor(secs / 60) % 60;
+  var h = Math.floor(secs / (60 * 60));
+  return [h, m, s];
+}
+
+/* Returns a Date object corresponding to secs seconds since the epoch, in
+ * localtime. Date(x) and Date(0, 0, 0, 0, 0, 0, 0, x) differ in that the
+ * former returns UTC whereas the latter returns the browser local time.
+ * For consistent handling within this visualization, we use localtime.
+ *
+ * Beware that local time can be discontinuous around time changes.
+ */
+function ts_to_date(secs) {
+  // secs may be a float, so we use millis as a common denominator unit
+  var millis = 1000 * secs;
+  return new Date(1970 /* yr; beginning of unix epoch */, 0 /* mo */, 0 /* d */,
+      0 /* hr */, 0 /* min */, 0 /* sec */, millis);
+}
+
+function drawChart() {
+  var container = document.getElementById('container');
+  var timelineContainer = document.createElement("div");
+  container.appendChild(timelineContainer);
+  var chart = new google.visualization.Timeline(timelineContainer);
+  var dataTable = new google.visualization.DataTable();
+  dataTable.addColumn({ type: 'string', id: 'Position' });
+  dataTable.addColumn({ type: 'string', id: 'Name' });
+  // timeofday isn't supported here
+  dataTable.addColumn({ type: 'datetime', id: 'Start' });
+  dataTable.addColumn({ type: 'datetime', id: 'End' });
+  // Timeline
+  for (i = 0; i < data.timeline.length; ++i) {
+    var row = data.timeline[i];
+    dataTable.addRow([ row[0], row[1], ts_to_date(row[2]), ts_to_date(row[3]) ]);
+  }
+  chart.draw(dataTable, { height: "400px" } );
+
+  for (const k of Object.keys(data.metrics)) {
+    var lineChart = document.createElement("div");
+    container.appendChild(lineChart);
+
+    var dataTable = new google.visualization.DataTable();
+    dataTable.addColumn({ type: 'timeofday', id: 'Time' });
+    dataTable.addColumn({ type: 'number', id: 'User' });
+    dataTable.addColumn({ type: 'number', id: 'System' });
+
+    for (const row of data.metrics[k]) {
+      dataTable.addRow([ ts_to_hms(row[0]), row[1], row[2] ]);
+    }
+    var options = {
+      title: 'CPU',
+      legend: { position: 'bottom' },
+      hAxis: {
+        minValue: [0, 0, 0],
+        maxValue: ts_to_hms(data.max_ts)
+      }
+    };
+
+    var chart = new google.visualization.LineChart(lineChart);
+    chart.draw(dataTable, options);
+  }
+}
+</script>
+<div id="container" style="height: 200px;"></div>

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/testdata/bin/run-all.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-all.sh b/testdata/bin/run-all.sh
index fb85811..f722b89 100755
--- a/testdata/bin/run-all.sh
+++ b/testdata/bin/run-all.sh
@@ -35,6 +35,8 @@ fi
 
 # Kill and clean data for a clean start.
 echo "Killing running services..."
+# Create log dir, in case there's nothing to kill.
+mkdir -p ${IMPALA_CLUSTER_LOGS_DIR}
 $IMPALA_HOME/testdata/bin/kill-all.sh &>${IMPALA_CLUSTER_LOGS_DIR}/kill-all.log
 
 echo "Starting cluster services..."

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/tests/query_test/test_runtime_filters.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_runtime_filters.py b/tests/query_test/test_runtime_filters.py
index c3763ff..14f5884 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -26,6 +26,9 @@ from tests.common.skip import SkipIfLocal
 
 WAIT_TIME_MS = specific_build_type_timeout(60000, slow_build_timeout=100000)
 
+# Some of the queries in runtime_filters consume a lot of memory, leading to
+# significant memory reservations in parallel tests.
+@pytest.mark.execute_serially
 @SkipIfLocal.multiple_impalad
 class TestRuntimeFilters(ImpalaTestSuite):
   @classmethod

http://git-wip-us.apache.org/repos/asf/impala/blob/93543cfb/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 9552d0d..95e0d11 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -18,8 +18,10 @@
 # under the License.
 #
 # Runs the Impala query tests, first executing the tests that cannot be run in parallel
-# and then executing the remaining tests in parallel. All additional command line options
-# are passed to py.test.
+# (the serial tests), then executing the stress tests, and then
+# executing the remaining tests in parallel. To run only some of
+# these, use --skip-serial, --skip-stress, or --skip-parallel.
+# All additional command line options are passed to py.test.
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_service import ImpaladService
 import itertools
@@ -219,6 +221,15 @@ def print_metrics(substring):
 
 if __name__ == "__main__":
   exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv
+  skip_serial = '--skip-serial' in sys.argv
+  if skip_serial:
+    sys.argv.remove("--skip-serial")
+  skip_stress = '--skip-stress' in sys.argv
+  if skip_stress:
+    sys.argv.remove("--skip-stress")
+  skip_parallel = '--skip-parallel' in sys.argv
+  if skip_parallel:
+    sys.argv.remove("--skip-parallel")
   test_executor = TestExecutor(exit_on_error=exit_on_error)
 
   # If the user is just asking for --help, just print the help test and then exit.
@@ -241,18 +252,21 @@ if __name__ == "__main__":
   else:
     print_metrics('connections')
     # First run query tests that need to be executed serially
-    base_args = ['-m', 'execute_serially']
-    test_executor.run_tests(base_args + build_test_args('serial'))
-    print_metrics('connections')
+    if not skip_serial:
+      base_args = ['-m', 'execute_serially']
+      test_executor.run_tests(base_args + build_test_args('serial'))
+      print_metrics('connections')
 
     # Run the stress tests tests
-    base_args = ['-m', 'stress', '-n', NUM_STRESS_CLIENTS]
-    test_executor.run_tests(base_args + build_test_args('stress'))
-    print_metrics('connections')
+    if not skip_stress:
+      base_args = ['-m', 'stress', '-n', NUM_STRESS_CLIENTS]
+      test_executor.run_tests(base_args + build_test_args('stress'))
+      print_metrics('connections')
 
     # Run the remaining query tests in parallel
-    base_args = ['-m', 'not execute_serially and not stress', '-n', NUM_CONCURRENT_TESTS]
-    test_executor.run_tests(base_args + build_test_args('parallel'))
+    if not skip_parallel:
+      base_args = ['-m', 'not execute_serially and not stress', '-n', NUM_CONCURRENT_TESTS]
+      test_executor.run_tests(base_args + build_test_args('parallel'))
 
     # The total number of tests executed at this point is expected to be >0
     # If it is < 0 then the script needs to exit with a non-zero


[12/28] impala git commit: IMPALA-6389: Make '\0' delimited text files work

Posted by ta...@apache.org.
IMPALA-6389: Make '\0' delimited text files work

Initially I didn't want to fully implement this, as the metadata
for these tables can't even be fully stored in Postgres; however
after digging into some older documentation, it appears that the
ASCII NUL character actually has been used as a field separator
in various vendors CSV implementation.

Therefore, this patch attempts to make things as non-broken as
possible and allows \0 as a field or tuple delimiter.  Collection
column delimiters are not allowed to be \0, as they genuinly may
not exist and we don't want to force special escaping on an
arbitrary character.  Note that the field delimiter must be distinct
from the tuple delimiter when they both exist; if it is not, the
effect will be that there is no field delimiter (this is actually
possible with single column tables).

Testing: Created a zero delimited table as described in the JIRA,
using MySQL backed Hive metastore; ran select * from tab_separated
on the table, updated the unit test.  Additionally, build ASAN
and ran the unit test.

Change-Id: I2190c57681f29f34ee1eb393e30dfdda5839098c
Reviewed-on: http://gerrit.cloudera.org:8080/9857
Tested-by: Impala Public Jenkins
Reviewed-by: Zach Amsden <za...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/dc1922f4
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/dc1922f4
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/dc1922f4

Branch: refs/heads/2.x
Commit: dc1922f4862fa9271cd07febed3f0df79450bf20
Parents: 3fd345d
Author: Zach Amsden <za...@cloudera.com>
Authored: Wed Mar 7 01:44:43 2018 +0000
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/exec/delimited-text-parser-test.cc  | 61 +++++++++++++++-----
 be/src/exec/delimited-text-parser.cc       | 74 +++++++++++++++++--------
 be/src/exec/delimited-text-parser.h        | 43 +++++++++-----
 be/src/exec/delimited-text-parser.inline.h | 70 ++++++++++++-----------
 be/src/exec/hdfs-sequence-scanner.cc       |  2 +-
 be/src/exec/hdfs-sequence-scanner.h        |  3 +-
 be/src/exec/hdfs-text-scanner.cc           |  2 +-
 be/src/exec/hdfs-text-scanner.h            |  3 +-
 8 files changed, 172 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/delimited-text-parser-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser-test.cc b/be/src/exec/delimited-text-parser-test.cc
index 3156b36..76d9444 100644
--- a/be/src/exec/delimited-text-parser-test.cc
+++ b/be/src/exec/delimited-text-parser-test.cc
@@ -24,7 +24,7 @@
 
 namespace impala {
 
-void Validate(DelimitedTextParser* parser, const string& data,
+void Validate(TupleDelimitedTextParser* parser, const string& data,
     int expected_offset, char tuple_delim, int expected_num_tuples,
     int expected_num_fields) {
   parser->ParserReset();
@@ -72,8 +72,8 @@ TEST(DelimitedTextParser, Basic) {
   bool is_materialized_col[NUM_COLS];
   for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
-                                       TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
+  TupleDelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
   // Note that only complete tuples "count"
   Validate(&no_escape_parser, "no_delims", -1, TUPLE_DELIM, 0, 0);
   Validate(&no_escape_parser, "abc||abc", 4, TUPLE_DELIM, 1, 1);
@@ -81,9 +81,9 @@ TEST(DelimitedTextParser, Basic) {
   Validate(&no_escape_parser, "a|bcd", 2, TUPLE_DELIM, 0, 0);
 
   // Test with escape char
-  DelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
-                                    TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
-                                    ESCAPE_CHAR);
+  TupleDelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
+                                         TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
+                                         ESCAPE_CHAR);
   Validate(&escape_parser, "a@|a|bcd", 5, TUPLE_DELIM, 0, 0);
   Validate(&escape_parser, "a@@|a|bcd", 4, TUPLE_DELIM, 1, 1);
   Validate(&escape_parser, "a@@@|a|bcd", 7, TUPLE_DELIM, 0, 0);
@@ -127,8 +127,8 @@ TEST(DelimitedTextParser, Fields) {
   bool is_materialized_col[NUM_COLS];
   for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
-                                       TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
+  TupleDelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
 
   Validate(&no_escape_parser, "a,b|c,d|e,f", 4, TUPLE_DELIM, 1, 3);
   Validate(&no_escape_parser, "b|c,d|e,f", 2, TUPLE_DELIM, 1, 3);
@@ -137,9 +137,9 @@ TEST(DelimitedTextParser, Fields) {
   const string str10("a,\0|c,d|e", 9);
   Validate(&no_escape_parser, str10, 4, TUPLE_DELIM, 1, 2);
 
-  DelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
-                                    TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
-                                    ESCAPE_CHAR);
+  TupleDelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
+                                         TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
+                                         ESCAPE_CHAR);
 
   Validate(&escape_parser, "a,b|c,d|e,f", 4, TUPLE_DELIM, 1, 3);
   Validate(&escape_parser, "a,@|c|e,f", 6, TUPLE_DELIM, 0, 1);
@@ -148,14 +148,21 @@ TEST(DelimitedTextParser, Fields) {
 
 TEST(DelimitedTextParser, SpecialDelimiters) {
   const char TUPLE_DELIM = '\n'; // implies '\r' and "\r\n" are also delimiters
+  const char NUL_DELIM = '\0';
   const int NUM_COLS = 1;
+  const int MAX_COLS = 2;
 
-  bool is_materialized_col[NUM_COLS];
-  for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
+  bool is_materialized_col[MAX_COLS];
+  for (int i = 0; i < MAX_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser tuple_delim_parser(NUM_COLS, 0, is_materialized_col,
+  TupleDelimitedTextParser tuple_delim_parser(NUM_COLS, 0, is_materialized_col,
       TUPLE_DELIM);
 
+  TupleDelimitedTextParser nul_tuple_parser(NUM_COLS, 0, is_materialized_col, NUL_DELIM);
+
+  TupleDelimitedTextParser nul_field_parser(MAX_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, NUL_DELIM);
+
   // Non-SSE case
   Validate(&tuple_delim_parser, "A\r\nB", 3, TUPLE_DELIM, 0, 0);
   Validate(&tuple_delim_parser, "A\rB", 2, TUPLE_DELIM, 0, 0);
@@ -165,6 +172,16 @@ TEST(DelimitedTextParser, SpecialDelimiters) {
   Validate(&tuple_delim_parser, "A\rB\nC\r\nD", 2, TUPLE_DELIM, 2, 2);
   Validate(&tuple_delim_parser, "\r\r\n\n", 1, TUPLE_DELIM, 2, 2);
 
+  // NUL tuple delimiter; no field delimiter
+  const string nul1("\0\0\0", 3);
+  const string nul2("AAA\0BBB\0", 8);
+  const string nul3("\n\0\r\0\r\n\0", 7);
+  const string nul4("\n\0\r\0\r\n", 6);
+  Validate(&nul_tuple_parser, nul1, 1, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nul2, 4, NUL_DELIM, 1, 1);
+  Validate(&nul_tuple_parser, nul3, 2, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nul4, 2, NUL_DELIM, 1, 1);
+
   // SSE case
   string data = "\rAAAAAAAAAAAAAAA";
   DCHECK_EQ(data.size(), SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -178,6 +195,22 @@ TEST(DelimitedTextParser, SpecialDelimiters) {
   data = "\r\nAAA\n\r\r\nAAAAAAA";
   DCHECK_EQ(data.size(), SSEUtil::CHARS_PER_128_BIT_REGISTER);
   Validate(&tuple_delim_parser, data, 2, TUPLE_DELIM, 3, 3);
+
+  // NUL SSE case
+  const string nulsse1("AAAAA\0AAAAAAAAAAA\0AAAAAAAAAAAA\0\0", 32);
+  const string nulsse2("AAAAA\0AAAAAAAAAAA\0AAAAAAAAAAAA\0A", 32);
+  const string nulsse3("AAA\0BBBbbbbbbbbbbbbbbbbbbb\0cccc,ddd\0", 36);
+  const string nulsse4("AAA\0BBBbbbbbbbbbbbbbbbbbbb\0cccc,dddd", 36);
+  Validate(&nul_tuple_parser, nulsse1, 6, NUL_DELIM, 3, 3);
+  Validate(&nul_tuple_parser, nulsse2, 6, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nulsse3, 4, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nulsse4, 4, NUL_DELIM, 1, 1);
+
+  // NUL Field delimiters
+  const string field1("\na\0b\0c\n", 7);
+  const string field2("aaaa\na\0b\0c\naaaaa\0b\na\0b\0c\n", 25);
+  Validate(&nul_field_parser, field1, 1, TUPLE_DELIM, 1, 2);
+  Validate(&nul_field_parser, field2, 5, TUPLE_DELIM, 3, 6);
 }
 
 // TODO: expand test for other delimited text parser functions/cases.

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/delimited-text-parser.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.cc b/be/src/exec/delimited-text-parser.cc
index 18fcde1..7db65fd 100644
--- a/be/src/exec/delimited-text-parser.cc
+++ b/be/src/exec/delimited-text-parser.cc
@@ -24,7 +24,8 @@
 
 using namespace impala;
 
-DelimitedTextParser::DelimitedTextParser(
+template<bool DELIMITED_TUPLES>
+DelimitedTextParser<DELIMITED_TUPLES>::DelimitedTextParser(
     int num_cols, int num_partition_keys, const bool* is_materialized_col,
     char tuple_delim, char field_delim, char collection_item_delim, char escape_char)
     : is_materialized_col_(is_materialized_col),
@@ -72,7 +73,7 @@ DelimitedTextParser::DelimitedTextParser(
     memset(low_mask_, 0, sizeof(low_mask_));
   }
 
-  if (tuple_delim != '\0') {
+  if (DELIMITED_TUPLES) {
     search_chars[num_delims_++] = tuple_delim_;
     ++num_tuple_delims_;
     // Hive will treats \r (^M) as an alternate tuple delimiter, but \r\n is a
@@ -82,29 +83,43 @@ DelimitedTextParser::DelimitedTextParser(
       ++num_tuple_delims_;
     }
     xmm_tuple_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(search_chars));
-  }
-
-  if (field_delim != '\0' || collection_item_delim != '\0') {
+    if (field_delim_ != tuple_delim_) search_chars[num_delims_++] = field_delim_;
+  } else {
     search_chars[num_delims_++] = field_delim_;
-    search_chars[num_delims_++] = collection_item_delim_;
   }
 
+  if (collection_item_delim != '\0') search_chars[num_delims_++] = collection_item_delim_;
+
   DCHECK_GT(num_delims_, 0);
   xmm_delim_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(search_chars));
 
   ParserReset();
 }
 
-void DelimitedTextParser::ParserReset() {
+template
+DelimitedTextParser<true>::DelimitedTextParser(
+    int num_cols, int num_partition_keys, const bool* is_materialized_col,
+    char tuple_delim, char field_delim, char collection_item_delim, char escape_char);
+
+template
+DelimitedTextParser<false>::DelimitedTextParser(
+    int num_cols, int num_partition_keys, const bool* is_materialized_col,
+    char tuple_delim, char field_delim, char collection_item_delim, char escape_char);
+
+template<bool DELIMITED_TUPLES>
+void DelimitedTextParser<DELIMITED_TUPLES>::ParserReset() {
   current_column_has_escape_ = false;
   last_char_is_escape_ = false;
   last_row_delim_offset_ = -1;
   column_idx_ = num_partition_keys_;
 }
 
+template void DelimitedTextParser<true>::ParserReset();
+
 // Parsing raw csv data into FieldLocation descriptors.
-Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remaining_len,
-    char** byte_buffer_ptr, char** row_end_locations,
+template<bool DELIMITED_TUPLES>
+Status DelimitedTextParser<DELIMITED_TUPLES>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
     FieldLocation* field_locations,
     int* num_tuples, int* num_fields, char** next_column_start) {
   // Start of this batch.
@@ -133,10 +148,10 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
   while (remaining_len > 0) {
     bool new_tuple = false;
     bool new_col = false;
-    unfinished_tuple_ = true;
+    if (DELIMITED_TUPLES) unfinished_tuple_ = true;
 
     if (!last_char_is_escape_) {
-      if (tuple_delim_ != '\0' && (**byte_buffer_ptr == tuple_delim_ ||
+      if (DELIMITED_TUPLES && (**byte_buffer_ptr == tuple_delim_ ||
            (tuple_delim_ == '\n' && **byte_buffer_ptr == '\r'))) {
         new_tuple = true;
         new_col = true;
@@ -166,6 +181,7 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
         row_end_locations[*num_tuples] = *byte_buffer_ptr;
         ++(*num_tuples);
       }
+      DCHECK(DELIMITED_TUPLES);
       unfinished_tuple_ = false;
       last_row_delim_offset_ = **byte_buffer_ptr == '\r' ? remaining_len - 1 : -1;
       if (*num_tuples == max_tuples) {
@@ -185,7 +201,7 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
 
   // For formats that store the length of the row, the row is not delimited:
   // e.g. Sequence files.
-  if (tuple_delim_ == '\0') {
+  if (!DELIMITED_TUPLES) {
     DCHECK_EQ(remaining_len, 0);
     RETURN_IF_ERROR(AddColumn<true>(*byte_buffer_ptr - *next_column_start,
         next_column_start, num_fields, field_locations));
@@ -193,18 +209,30 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
     DCHECK(status.ok());
     column_idx_ = num_partition_keys_;
     ++(*num_tuples);
-    unfinished_tuple_ = false;
   }
   return Status::OK();
 }
 
-// Find the first instance of the tuple delimiter. This will find the start of the first
-// full tuple in buffer by looking for the end of the previous tuple.
-int64_t DelimitedTextParser::FindFirstInstance(const char* buffer, int64_t len) {
+template
+Status DelimitedTextParser<true>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
+    FieldLocation* field_locations,
+    int* num_tuples, int* num_fields, char** next_column_start);
+
+template
+Status DelimitedTextParser<false>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
+    FieldLocation* field_locations,
+    int* num_tuples, int* num_fields, char** next_column_start);
+
+template<bool DELIMITED_TUPLES>
+int64_t DelimitedTextParser<DELIMITED_TUPLES>::FindFirstInstance(const char* buffer,
+    int64_t len) {
   int64_t tuple_start = 0;
   const char* buffer_start = buffer;
   bool found = false;
 
+  DCHECK(DELIMITED_TUPLES);
   // If the last char in the previous buffer was \r then either return the start of
   // this buffer or skip a \n at the beginning of the buffer.
   if (last_row_delim_offset_ != -1) {
@@ -226,13 +254,10 @@ restart:
       int tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
       if (tuple_mask != 0) {
         found = true;
-        for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
-          if ((tuple_mask & SSEUtil::SSE_BITMASK[i]) != 0) {
-            tuple_start += i + 1;
-            buffer += i + 1;
-            break;
-          }
-        }
+        // Find first set bit (1-based)
+        int i = ffs(tuple_mask);
+        tuple_start += i;
+        buffer += i;
         break;
       }
       tuple_start += SSEUtil::CHARS_PER_128_BIT_REGISTER;
@@ -295,3 +320,6 @@ restart:
   }
   return tuple_start;
 }
+
+template
+int64_t DelimitedTextParser<true>::FindFirstInstance(const char* buffer, int64_t len);

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/delimited-text-parser.h
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.h b/be/src/exec/delimited-text-parser.h
index b966081..9b89127 100644
--- a/be/src/exec/delimited-text-parser.h
+++ b/be/src/exec/delimited-text-parser.h
@@ -25,22 +25,27 @@
 
 namespace impala {
 
+template <bool DELIMITED_TUPLES>
 class DelimitedTextParser {
  public:
 
   /// The Delimited Text Parser parses text rows that are delimited by specific
   /// characters:
-  ///   tuple_delim: delimits tuples
+  ///   tuple_delim: delimits tuples.  Only used if DELIMITED_TUPLES is true.
   ///   field_delim: delimits fields
   ///   collection_item_delim: delimits collection items
   ///   escape_char: escape delimiters, make them part of the data.
-  //
+  ///
+  /// If the template parameter DELIMITED_TUPLES is false there is no support
+  /// for tuple delimiters and we do not need to search for them.  Any value
+  /// may be passed for tuple_delim, as it is ignored.
+  ///
   /// 'num_cols' is the total number of columns including partition keys.
-  //
+  ///
   /// 'is_materialized_col' should be initialized to an array of length 'num_cols', with
   /// is_materialized_col[i] = <true if column i should be materialized, false otherwise>
   /// Owned by caller.
-  //
+  ///
   /// The main method is ParseData which fills in a vector of pointers and lengths to the
   /// fields.  It also can handle an escape character which masks a tuple or field
   /// delimiter that occurs in the data.
@@ -91,14 +96,14 @@ class DelimitedTextParser {
   /// This function is used to parse sequence file records which do not need to
   /// parse for tuple delimiters. Returns an error status if any column exceeds the
   /// size limit. See AddColumn() for details.
-  template <bool process_escapes>
+  /// This function is disabled for non-sequence file parsing.
+  template <bool PROCESS_ESCAPES>
   Status ParseSingleTuple(int64_t len, char* buffer, FieldLocation* field_locations,
       int* num_fields);
 
   /// FindFirstInstance returns the position after the first non-escaped tuple
   /// delimiter from the starting offset.
   /// Used to find the start of a tuple if jumping into the middle of a text file.
-  /// Also used to find the sync marker for Sequenced and RC files.
   /// If no tuple delimiter is found within the buffer, return -1;
   int64_t FindFirstInstance(const char* buffer, int64_t len);
 
@@ -119,13 +124,16 @@ class DelimitedTextParser {
   /// by the number fields added.
   /// 'field_locations' will be updated with the start and length of the fields.
   /// Returns an error status if 'len' exceeds the size limit specified in AddColumn().
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status FillColumns(int64_t len, char** last_column, int* num_fields,
       impala::FieldLocation* field_locations);
 
   /// Return true if we have not seen a tuple delimiter for the current tuple being
   /// parsed (i.e., the last byte read was not a tuple delimiter).
-  bool HasUnfinishedTuple() { return unfinished_tuple_; }
+  bool HasUnfinishedTuple() {
+    DCHECK(DELIMITED_TUPLES);
+    return unfinished_tuple_;
+  }
 
  private:
   /// Initialize the parser state.
@@ -133,7 +141,7 @@ class DelimitedTextParser {
 
   /// Helper routine to add a column to the field_locations vector.
   /// Template parameter:
-  ///   process_escapes -- if true the the column may have escape characters
+  ///   PROCESS_ESCAPES -- if true the the column may have escape characters
   ///                      and the negative of the len will be stored.
   ///   len: length of the current column. The length of a column must fit in a 32-bit
   ///        signed integer (i.e. <= 2147483647 bytes). If a column is larger than that,
@@ -144,23 +152,29 @@ class DelimitedTextParser {
   /// Output:
   ///   field_locations: updated with start and length of current field.
   /// Return an error status if 'len' exceeds the size limit specified above.
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status AddColumn(int64_t len, char** next_column_start, int* num_fields,
       FieldLocation* field_locations);
 
   /// Helper routine to parse delimited text using SSE instructions.
   /// Identical arguments as ParseFieldLocations.
-  /// If the template argument, 'process_escapes' is true, this function will handle
+  /// If the template argument, 'PROCESS_ESCAPES' is true, this function will handle
   /// escapes, otherwise, it will assume the text is unescaped.  By using templates,
   /// we can special case the un-escaped path for better performance.  The unescaped
   /// path is optimized away by the compiler. Returns an error status if the length
   /// of any column exceeds the size limit. See AddColumn() for details.
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status ParseSse(int max_tuples, int64_t* remaining_len,
       char** byte_buffer_ptr, char** row_end_locations_,
       FieldLocation* field_locations,
       int* num_tuples, int* num_fields, char** next_column_start);
 
+  bool IsFieldOrCollectionItemDelimiter(char c) {
+    return (!DELIMITED_TUPLES && c == field_delim_) ||
+      (DELIMITED_TUPLES && field_delim_ != tuple_delim_ && c == field_delim_) ||
+      (collection_item_delim_ != '\0' && c == collection_item_delim_);
+  }
+
   /// SSE(xmm) register containing the tuple search character(s).
   __m128i xmm_tuple_search_;
 
@@ -214,7 +228,7 @@ class DelimitedTextParser {
   /// Character delimiting collection items (to become slots).
   char collection_item_delim_;
 
-  /// Character delimiting tuples.
+  /// Character delimiting tuples.  Only used if DELIMITED_TUPLES is true.
   char tuple_delim_;
 
   /// Whether or not the current column has an escape character in it
@@ -228,5 +242,8 @@ class DelimitedTextParser {
   bool unfinished_tuple_;
 };
 
+using TupleDelimitedTextParser = DelimitedTextParser<true>;
+using SequenceDelimitedTextParser = DelimitedTextParser<false>;
+
 }// namespace impala
 #endif// IMPALA_EXEC_DELIMITED_TEXT_PARSER_H

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/delimited-text-parser.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.inline.h b/be/src/exec/delimited-text-parser.inline.h
index 02fa132..9fe737e 100644
--- a/be/src/exec/delimited-text-parser.inline.h
+++ b/be/src/exec/delimited-text-parser.inline.h
@@ -52,9 +52,10 @@ inline void ProcessEscapeMask(uint16_t escape_mask, bool* last_char_is_escape,
   *delim_mask &= ~escape_mask;
 }
 
-template <bool process_escapes>
-inline Status DelimitedTextParser::AddColumn(int64_t len, char** next_column_start,
-    int* num_fields, FieldLocation* field_locations) {
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::AddColumn(int64_t len,
+    char** next_column_start, int* num_fields, FieldLocation* field_locations) {
   if (UNLIKELY(!BitUtil::IsNonNegative32Bit(len))) {
     return Status(TErrorCode::TEXT_PARSER_TRUNCATED_COLUMN, len);
   }
@@ -62,26 +63,27 @@ inline Status DelimitedTextParser::AddColumn(int64_t len, char** next_column_sta
     // Found a column that needs to be parsed, write the start/len to 'field_locations'
     field_locations[*num_fields].start = *next_column_start;
     int64_t field_len = len;
-    if (process_escapes && current_column_has_escape_) {
+    if (PROCESS_ESCAPES && current_column_has_escape_) {
       field_len = -len;
     }
     field_locations[*num_fields].len = static_cast<int32_t>(field_len);
     ++(*num_fields);
   }
-  if (process_escapes) current_column_has_escape_ = false;
+  if (PROCESS_ESCAPES) current_column_has_escape_ = false;
   *next_column_start += len + 1;
   ++column_idx_;
   return Status::OK();
 }
 
-template <bool process_escapes>
-inline Status DelimitedTextParser::FillColumns(int64_t len, char** last_column,
-    int* num_fields, FieldLocation* field_locations) {
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::FillColumns(int64_t len,
+    char** last_column, int* num_fields, FieldLocation* field_locations) {
   // Fill in any columns missing from the end of the tuple.
   char* dummy = NULL;
   if (last_column == NULL) last_column = &dummy;
   while (column_idx_ < num_cols_) {
-    RETURN_IF_ERROR(AddColumn<process_escapes>(len, last_column,
+    RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(len, last_column,
         num_fields, field_locations));
     // The rest of the columns will be null.
     last_column = &dummy;
@@ -103,8 +105,9 @@ inline Status DelimitedTextParser::FillColumns(int64_t len, char** last_column,
 ///  Needle   = 'abcd000000000000' (we're searching for any a's, b's, c's or d's)
 ///  Haystack = 'asdfghjklhjbdwwc' (the raw string)
 ///  Result   = '1010000000011001'
-template <bool process_escapes>
-inline Status DelimitedTextParser::ParseSse(int max_tuples,
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::ParseSse(int max_tuples,
     int64_t* remaining_len, char** byte_buffer_ptr,
     char** row_end_locations, FieldLocation* field_locations,
     int* num_tuples, int* num_fields, char** next_column_start) {
@@ -146,7 +149,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
     uint16_t escape_mask = 0;
     // If the table does not use escape characters, skip processing for it.
-    if (process_escapes) {
+    if (PROCESS_ESCAPES) {
       DCHECK(escape_char_ != '\0');
       xmm_escape_mask = SSE4_cmpestrm<SSEUtil::STRCHR_MODE>(xmm_escape_search_, 1,
           xmm_buffer, SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -156,8 +159,10 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
     char* last_char = *byte_buffer_ptr + 15;
     bool last_char_is_unescaped_delim = delim_mask >> 15;
-    unfinished_tuple_ = !(last_char_is_unescaped_delim &&
-        (*last_char == tuple_delim_ || (tuple_delim_ == '\n' && *last_char == '\r')));
+    if (DELIMITED_TUPLES) {
+      unfinished_tuple_ = !(last_char_is_unescaped_delim &&
+          (*last_char == tuple_delim_ || (tuple_delim_ == '\n' && *last_char == '\r')));
+    }
 
     int last_col_idx = 0;
     // Process all non-zero bits in the delim_mask from lsb->msb.  If a bit
@@ -170,7 +175,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
       // clear current bit
       delim_mask &= ~(SSEUtil::SSE_BITMASK[n]);
 
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         // Determine if there was an escape character between [last_col_idx, n]
         bool escaped = (escape_mask & low_mask_[last_col_idx] & high_mask_[n]) != 0;
         current_column_has_escape_ |= escaped;
@@ -179,13 +184,14 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
       char* delim_ptr = *byte_buffer_ptr + n;
 
-      if (*delim_ptr == field_delim_ || *delim_ptr == collection_item_delim_) {
-        RETURN_IF_ERROR(AddColumn<process_escapes>(delim_ptr - *next_column_start,
+      if (IsFieldOrCollectionItemDelimiter(*delim_ptr)) {
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(delim_ptr - *next_column_start,
             next_column_start, num_fields, field_locations));
         continue;
       }
 
-      if (*delim_ptr == tuple_delim_ || (tuple_delim_ == '\n' && *delim_ptr == '\r')) {
+      if (DELIMITED_TUPLES &&
+          (*delim_ptr == tuple_delim_ || (tuple_delim_ == '\n' && *delim_ptr == '\r'))) {
         if (UNLIKELY(
                 last_row_delim_offset_ == *remaining_len - n && *delim_ptr == '\n')) {
           // If the row ended in \r\n then move the next start past the \n
@@ -193,7 +199,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
           last_row_delim_offset_ = -1;
           continue;
         }
-        RETURN_IF_ERROR(AddColumn<process_escapes>(delim_ptr - *next_column_start,
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(delim_ptr - *next_column_start,
             next_column_start, num_fields, field_locations));
         Status status = FillColumns<false>(0, NULL, num_fields, field_locations);
         DCHECK(status.ok());
@@ -204,7 +210,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
         last_row_delim_offset_ = *delim_ptr == '\r' ? *remaining_len - n - 1 : -1;
         if (UNLIKELY(*num_tuples == max_tuples)) {
           (*byte_buffer_ptr) += (n + 1);
-          if (process_escapes) last_char_is_escape_ = false;
+          if (PROCESS_ESCAPES) last_char_is_escape_ = false;
           *remaining_len -= (n + 1);
           // If the last character we processed was \r then set the offset to 0
           // so that we will use it at the beginning of the next batch.
@@ -214,7 +220,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
       }
     }
 
-    if (process_escapes) {
+    if (PROCESS_ESCAPES) {
       // Determine if there was an escape character between (last_col_idx, 15)
       bool unprocessed_escape = escape_mask & low_mask_[last_col_idx] & high_mask_[15];
       current_column_has_escape_ |= unprocessed_escape;
@@ -227,9 +233,10 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 }
 
 /// Simplified version of ParseSSE which does not handle tuple delimiters.
-template <bool process_escapes>
-inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char* buffer,
-    FieldLocation* field_locations, int* num_fields) {
+template<>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<false>::ParseSingleTuple(int64_t remaining_len,
+    char* buffer, FieldLocation* field_locations, int* num_fields) {
   char* next_column_start = buffer;
   __m128i xmm_buffer, xmm_delim_mask, xmm_escape_mask;
 
@@ -246,7 +253,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
 
       uint16_t escape_mask = 0;
       // If the table does not use escape characters, skip processing for it.
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         DCHECK(escape_char_ != '\0');
         xmm_escape_mask = SSE4_cmpestrm<SSEUtil::STRCHR_MODE>(xmm_escape_search_, 1,
             xmm_buffer, SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -263,7 +270,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
         DCHECK_GE(n, 0);
         DCHECK_LT(n, 16);
 
-        if (process_escapes) {
+        if (PROCESS_ESCAPES) {
           // Determine if there was an escape character between [last_col_idx, n]
           bool escaped = (escape_mask & low_mask_[last_col_idx] & high_mask_[n]) != 0;
           current_column_has_escape_ |= escaped;
@@ -273,11 +280,11 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
         // clear current bit
         delim_mask &= ~(SSEUtil::SSE_BITMASK[n]);
 
-        RETURN_IF_ERROR(AddColumn<process_escapes>(buffer + n - next_column_start,
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(buffer + n - next_column_start,
             &next_column_start, num_fields, field_locations));
       }
 
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         // Determine if there was an escape character between (last_col_idx, 15)
         bool unprocessed_escape = escape_mask & low_mask_[last_col_idx] & high_mask_[15];
         current_column_has_escape_ |= unprocessed_escape;
@@ -296,9 +303,8 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
       last_char_is_escape_ = false;
     }
 
-    if (!last_char_is_escape_ &&
-          (*buffer == field_delim_ || *buffer == collection_item_delim_)) {
-      RETURN_IF_ERROR(AddColumn<process_escapes>(buffer - next_column_start,
+    if (!last_char_is_escape_ && IsFieldOrCollectionItemDelimiter(*buffer)) {
+      RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(buffer - next_column_start,
           &next_column_start, num_fields, field_locations));
     }
 
@@ -308,7 +314,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
 
   // Last column does not have a delimiter after it.  Add that column and also
   // pad with empty cols if the input is ragged.
-  return FillColumns<process_escapes>(buffer - next_column_start,
+  return FillColumns<PROCESS_ESCAPES>(buffer - next_column_start,
       &next_column_start, num_fields, field_locations);
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/hdfs-sequence-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.cc b/be/src/exec/hdfs-sequence-scanner.cc
index 346a18a..8a9151e 100644
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -73,7 +73,7 @@ Status HdfsSequenceScanner::InitNewRange() {
   text_converter_.reset(new TextConverter(hdfs_partition->escape_char(),
       scan_node_->hdfs_table()->null_column_value()));
 
-  delimited_text_parser_.reset(new DelimitedTextParser(
+  delimited_text_parser_.reset(new SequenceDelimitedTextParser(
       scan_node_->hdfs_table()->num_cols(), scan_node_->num_partition_keys(),
       scan_node_->is_materialized_col(), '\0', hdfs_partition->field_delim(),
       hdfs_partition->collection_delim(), hdfs_partition->escape_char()));

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/hdfs-sequence-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.h b/be/src/exec/hdfs-sequence-scanner.h
index 4845edb..463ffc7 100644
--- a/be/src/exec/hdfs-sequence-scanner.h
+++ b/be/src/exec/hdfs-sequence-scanner.h
@@ -153,6 +153,7 @@
 
 namespace impala {
 
+template <bool>
 class DelimitedTextParser;
 
 class HdfsSequenceScanner : public BaseSequenceScanner {
@@ -222,7 +223,7 @@ class HdfsSequenceScanner : public BaseSequenceScanner {
   Status GetRecord(uint8_t** record_ptr, int64_t* record_len) WARN_UNUSED_RESULT;
 
   /// Helper class for picking fields and rows from delimited text.
-  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+  boost::scoped_ptr<DelimitedTextParser<false>> delimited_text_parser_;
   std::vector<FieldLocation> field_locations_;
 
   /// Data that is fixed across headers.  This struct is shared between scan ranges.

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index 253bcc8..b78115d 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -203,7 +203,7 @@ Status HdfsTextScanner::InitNewRange() {
     collection_delim = '\0';
   }
 
-  delimited_text_parser_.reset(new DelimitedTextParser(
+  delimited_text_parser_.reset(new TupleDelimitedTextParser(
       scan_node_->hdfs_table()->num_cols(), scan_node_->num_partition_keys(),
       scan_node_->is_materialized_col(), hdfs_partition->line_delim(),
       field_delim, collection_delim, hdfs_partition->escape_char()));

http://git-wip-us.apache.org/repos/asf/impala/blob/dc1922f4/be/src/exec/hdfs-text-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.h b/be/src/exec/hdfs-text-scanner.h
index 610c612..25886ba 100644
--- a/be/src/exec/hdfs-text-scanner.h
+++ b/be/src/exec/hdfs-text-scanner.h
@@ -25,6 +25,7 @@
 
 namespace impala {
 
+template<bool>
 class DelimitedTextParser;
 class ScannerContext;
 struct HdfsFileDesc;
@@ -237,7 +238,7 @@ class HdfsTextScanner : public HdfsScanner {
   int slot_idx_;
 
   /// Helper class for picking fields and rows from delimited text.
-  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+  boost::scoped_ptr<DelimitedTextParser<true>> delimited_text_parser_;
 
   /// Return field locations from the Delimited Text Parser.
   std::vector<FieldLocation> field_locations_;


[04/28] impala git commit: IMPALA-6773: Remove Ubuntu 14.04 from supported platforms in bootstrap_*.sh

Posted by ta...@apache.org.
IMPALA-6773: Remove Ubuntu 14.04 from supported platforms in bootstrap_*.sh

Since it's not straight-forward to get Java 8 for Ubuntu 14.04, I'm
removing it from the setups supported by the bootstrap scripts.

Change-Id: I4b6d63c9713cc93a786f82e086feca1a91937e16
Reviewed-on: http://gerrit.cloudera.org:8080/9873
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/12d74448
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/12d74448
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/12d74448

Branch: refs/heads/2.x
Commit: 12d744481486f9e5b29a72022eb5d149119360f7
Parents: b96cbfd
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Fri Mar 30 13:44:21 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 bin/bootstrap_development.sh |  2 +-
 bin/bootstrap_system.sh      | 20 +++++---------------
 2 files changed, 6 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/12d74448/bin/bootstrap_development.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_development.sh b/bin/bootstrap_development.sh
index b6ea369..8ff6f48 100755
--- a/bin/bootstrap_development.sh
+++ b/bin/bootstrap_development.sh
@@ -18,7 +18,7 @@
 # under the License.
 
 # This script bootstraps a development environment from almost nothing; it is known to
-# work on Ubuntu 14.04 and 16.04. It clobbers some local environment and system
+# work on Ubuntu 16.04. It clobbers some local environment and system
 # configurations, so it is best to run this in a fresh install. It also sets up the
 # ~/.bashrc for the calling user and impala-config-local.sh with some environment
 # variables to make Impala compile and run after this script is complete.

http://git-wip-us.apache.org/repos/asf/impala/blob/12d74448/bin/bootstrap_system.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index 08a93aa..3a9c42b 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -18,7 +18,7 @@
 # under the License.
 
 # This script bootstraps a system for Impala development from almost nothing; it is known
-# to work on Ubuntu 14.04 and 16.04. It clobbers some local environment and system
+# to work on Ubuntu 16.04. It clobbers some local environment and system
 # configurations, so it is best to run this in a fresh install. It also sets up the
 # ~/.bashrc for the calling user and impala-config-local.sh with some environment
 # variables to make Impala compile and run after this script is complete.
@@ -69,9 +69,9 @@ then
   exit 1
 fi
 
-if ! [[ $DISTRIB_RELEASE = 14.04 || $DISTRIB_RELEASE = 16.04 ]]
+if ! [[ $DISTRIB_RELEASE = 16.04 ]]
 then
-  echo "This script only supports Ubuntu 14.04 and 16.04" >&2
+  echo "This script only supports 16.04" >&2
   exit 1
 fi
 
@@ -107,7 +107,7 @@ apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
         krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
         libsasl2-modules-gssapi-mit libssl-dev make maven ninja-build ntp \
         ntpdate python-dev python-setuptools postgresql ssh wget vim-common psmisc \
-        lsof
+        lsof openjdk-8-jdk openjdk-8-source openjdk-8-dbg
 
 if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
 then
@@ -118,14 +118,7 @@ fi
 # TODO: check that there is enough space on disk to do a build and data load
 # TODO: make this work with non-bash shells
 
-JDK_VERSION=8
-if [[ $DISTRIB_RELEASE = 14.04 ]]
-then
-  JDK_VERSION=7
-fi
-apt-get --yes install openjdk-${JDK_VERSION}-jdk openjdk-${JDK_VERSION}-source \
-  openjdk-${JDK_VERSION}-dbg
-SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-${JDK_VERSION}-openjdk-amd64"
+SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
 echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
 eval "$SET_JAVA_HOME"
 
@@ -148,9 +141,6 @@ sudo service ntp start || grep docker /proc/1/cgroup
 if [[ $DISTRIB_RELEASE = 16.04 ]]
 then
   SET_LD_LIBRARY_PATH='export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}'
-elif [[ $DISTRIB_RELEASE = 14.04 ]]
-then
-  SET_LD_LIBRARY_PATH="unset LD_LIBRARY_PATH"
 fi
 echo "$SET_LD_LIBRARY_PATH" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
 eval "$SET_LD_LIBRARY_PATH"


[23/28] impala git commit: IMPALA-6771: Fix in-predicate set up bug

Posted by ta...@apache.org.
IMPALA-6771: Fix in-predicate set up bug

Fixes a bug that introduced default initialized values in the set data
structure used to check for set membership that can cause wrong results.

Testing:
Added a test case that checks for the same.

Change-Id: I7e776dbcb7ee4a9b64e1295134a27d332f5415b6
Reviewed-on: http://gerrit.cloudera.org:8080/9891
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bd63208b
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bd63208b
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bd63208b

Branch: refs/heads/2.x
Commit: bd63208bfcfcfa893b979e76358cd40f71114979
Parents: 4c6e1db
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Apr 2 13:55:48 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/in-predicate.h                             |  3 ++-
 .../functional-query/queries/QueryTest/exprs.test       | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/bd63208b/be/src/exprs/in-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/in-predicate.h b/be/src/exprs/in-predicate.h
index 2d439ba..87ae8b8 100644
--- a/be/src/exprs/in-predicate.h
+++ b/be/src/exprs/in-predicate.h
@@ -350,7 +350,8 @@ void InPredicate::SetLookupPrepare(
   state->contains_null = false;
   // Collect all values in a vector to use the bulk insert API to avoid N^2 behavior
   // with flat_set.
-  vector<SetType> element_list(ctx->GetNumArgs());
+  std::vector<SetType> element_list;
+  element_list.reserve(ctx->GetNumArgs() - 1);
   for (int i = 1; i < ctx->GetNumArgs(); ++i) {
     DCHECK(ctx->IsArgConstant(i));
     T* arg = reinterpret_cast<T*>(ctx->GetConstantArg(i));

http://git-wip-us.apache.org/repos/asf/impala/blob/bd63208b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 2902c97..1e5a589 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2994,3 +2994,15 @@ select cast('2001-1-2' as timestamp)
 ---- TYPES
 timestamp
 ====
+---- QUERY
+# IMPALA-6771: Test that the in-predicate set does not have default initialized
+# values that can result in wrong results. For a string column the default initialized
+# value is an empty string.
+select count(*) from functional.alltypes
+where regexp_replace(string_col, '1', '')
+in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
+---- RESULTS
+6570
+---- TYPES
+bigint
+====


[03/28] impala git commit: IMPALA-6719: Reset metadata database name case sensitivity

Posted by ta...@apache.org.
IMPALA-6719: Reset metadata database name case sensitivity

Fix issue with database case name case sensitivity in reset metadata
statement.

Testing:
- Created end-to-end reset metadata tests.

Change-Id: Id880aa559cec0afe8fbb7d33ccce83f7b5e474cb
Reviewed-on: http://gerrit.cloudera.org:8080/9788
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6a7f07de
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6a7f07de
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6a7f07de

Branch: refs/heads/2.x
Commit: 6a7f07deaf8e641be5b23513c19d0696e49ca133
Parents: 851f622
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Fri Mar 23 11:23:21 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 .../impala/catalog/CatalogServiceCatalog.java   |  2 +-
 tests/metadata/test_reset_metadata.py           | 34 ++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/6a7f07de/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 5bef242..20a3e2f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -977,7 +977,7 @@ public class CatalogServiceCatalog extends Catalog {
       // Load Java UDFs from HMS into the temporary db.
       loadJavaFunctions(tmpDb, javaFns);
 
-      Db db = dbCache_.get().get(dbName);
+      Db db = getDb(dbName);
       if (db == null) {
         throw new DatabaseNotFoundException("Database does not exist: " + dbName);
       }

http://git-wip-us.apache.org/repos/asf/impala/blob/6a7f07de/tests/metadata/test_reset_metadata.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_reset_metadata.py b/tests/metadata/test_reset_metadata.py
new file mode 100644
index 0000000..07ee7f1
--- /dev/null
+++ b/tests/metadata/test_reset_metadata.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from test_ddl_base import TestDdlBase
+
+class TestResetMetadata(TestDdlBase):
+  def test_reset_metadata_case_sensitivity(self, unique_database):
+    # IMPALA-6719: Issue with database name case sensitivity in reset metadata.
+    table = 'newtable'
+    self.client.execute('create table %s.%s (i int)' % (unique_database, table))
+
+    self.client.execute('refresh %s.%s' % (unique_database, table))
+    self.client.execute('refresh %s.%s' % (unique_database.upper(), table.upper()))
+
+    self.client.execute('invalidate metadata %s.%s' % (unique_database, table))
+    self.client.execute('invalidate metadata %s.%s' % (unique_database.upper(),
+                                                       table.upper()))
+
+    self.client.execute('refresh functions %s' % unique_database)
+    self.client.execute('refresh functions %s' % unique_database.upper())


[26/28] impala git commit: Revert "Remove Yarn from minicluster by default."

Posted by ta...@apache.org.
Revert "Remove Yarn from minicluster by default."

This reverts commit c05df104570fa2cb7067599bbe3b87740ca9f09e.

Change-Id: I00151795581d22a9852cceaca1d21325d68dbe59
Reviewed-on: http://gerrit.cloudera.org:8080/9969
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Philip Zeyliger <ph...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f09f68a3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f09f68a3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f09f68a3

Branch: refs/heads/2.x
Commit: f09f68a3aee437226bf33db1fd969069f54ef446
Parents: 6bee662
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Tue Apr 10 16:19:52 2018 +0000
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:01 2018 +0000

----------------------------------------------------------------------
 testdata/cluster/admin | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f09f68a3/testdata/cluster/admin
----------------------------------------------------------------------
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 74b5a9c..cb33e21 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This will create/control/destroy a local hdfs/yarn/kms/kudu cluster.
+# This will create/control/destroy a local hdfs+yarn cluster.
 #
 # All roles run on 127.0.0.1, just like the standard mini cluster included with hadoop.
 # The difference is with this cluster, each role runs in its own process and has its own
@@ -31,14 +31,12 @@ set -euo pipefail
 trap 'echo Error in $0 at line $LINENO: $(awk "NR == $LINENO" $0)' ERR
 
 : ${IMPALA_KERBERIZE=}
-: ${INCLUDE_YARN=}
 
-while getopts vky OPT; do
+while getopts vk OPT; do
   case $OPT in
     v) set -x;;
     k) export IMPALA_KERBERIZE=1;;
-    y) export INCLUDE_YARN=1;;
-    ?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;;
+    ?) echo "Usage: $0 [-v (verbose) -k (kerberize)] ACTION (see source...)"; exit 1;;
   esac
 done
 shift $(($OPTIND-1))
@@ -56,10 +54,7 @@ export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
 
 if [[ "$TARGET_FILESYSTEM" == "hdfs" ]]; then
   # The check above indicates that the regular mini-cluster is in use.
-  SUPPORTED_SERVICES=(hdfs kms)
-  if [ -n "${INCLUDE_YARN}" ]; then
-    SUPPORTED_SERVICES+=(yarn)
-  fi
+  SUPPORTED_SERVICES=(hdfs yarn kms)
 else
   # Either a remote distributed file system or a local non-distributed file system is
   # in use. Currently the only service that is expected to work is Kudu, though in theory


[11/28] impala git commit: IMPALA-4168: [DOCS] Adds Oracle-style hint placement for INSERT/UPSERT

Posted by ta...@apache.org.
IMPALA-4168: [DOCS] Adds Oracle-style hint placement for INSERT/UPSERT

Change-Id: I43e0a782087c2e67f2e012424fb9261be445efc9
Reviewed-on: http://gerrit.cloudera.org:8080/9030
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Reviewed-by: Kim Jin Chul <ji...@gmail.com>
Reviewed-by: John Russell <jr...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e1123eb1
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e1123eb1
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e1123eb1

Branch: refs/heads/2.x
Commit: e1123eb186ddc556bfb8db1d3165b1f287057ffe
Parents: 80e385a
Author: Jinchul <ji...@gmail.com>
Authored: Tue Jan 16 21:13:43 2018 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_hints.xml  | 42 ++++++++++++++++++++++++++++++++++++++
 docs/topics/impala_insert.xml |  9 ++++----
 docs/topics/impala_upsert.xml |  2 +-
 3 files changed, 48 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e1123eb1/docs/topics/impala_hints.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_hints.xml b/docs/topics/impala_hints.xml
index d489048..6cafcfb 100644
--- a/docs/topics/impala_hints.xml
+++ b/docs/topics/impala_hints.xml
@@ -77,6 +77,12 @@ INSERT <varname>insert_clauses</varname>
   [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
   [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
   SELECT <varname>remainder_of_query</varname>;
+
+<ph rev="2.12.0 IMPALA-4168">
+UPSERT [{ /* +SHUFFLE */ | /* +NOSHUFFLE */ }]
+  [<ph rev="IMPALA-2522 2.8.0">/* +CLUSTERED */</ph>]
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
 </codeblock>
 
     <p rev="2.0.0">
@@ -112,6 +118,26 @@ INSERT <varname>insert_clauses</varname>
   -- +SHUFFLE|NOSHUFFLE
   SELECT <varname>remainder_of_query</varname>;
 
+<ph rev="2.12.0 IMPALA-4168">
+INSERT /* +SHUFFLE|NOSHUFFLE */
+  <varname>insert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
+
+<ph rev="2.12.0 IMPALA-4168">
+INSERT -- +SHUFFLE|NOSHUFFLE
+  <varname>insert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
+
+<ph rev="2.12.0 IMPALA-4168">
+UPSERT /* +SHUFFLE|NOSHUFFLE */
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
+
+<ph rev="2.12.0 IMPALA-4168">
+UPSERT -- +SHUFFLE|NOSHUFFLE
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
+
 <ph rev="IMPALA-2924">SELECT <varname>select_list</varname> FROM
 <varname>table_ref</varname>
   /* +{SCHEDULE_CACHE_LOCAL | SCHEDULE_DISK_LOCAL | SCHEDULE_REMOTE}
@@ -125,6 +151,22 @@ INSERT <varname>insert_clauses</varname>
 INSERT <varname>insert_clauses</varname>
   /* +CLUSTERED */
   SELECT <varname>remainder_of_query</varname>;</ph>
+
+<ph rev="2.12.0 IMPALA-4168">INSERT -- +CLUSTERED
+  <varname>insert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;
+
+INSERT /* +CLUSTERED */
+  <varname>insert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;
+
+UPSERT -- +CLUSTERED
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;
+
+UPSERT /* +CLUSTERED */
+  <varname>upsert_clauses</varname>
+  SELECT <varname>remainder_of_query</varname>;</ph>
 </codeblock>
 
     <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/e1123eb1/docs/topics/impala_insert.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_insert.xml b/docs/topics/impala_insert.xml
index a83692d..3880a70 100644
--- a/docs/topics/impala_insert.xml
+++ b/docs/topics/impala_insert.xml
@@ -49,7 +49,7 @@ under the License.
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
 <codeblock>[<varname>with_clause</varname>]
-INSERT { INTO | OVERWRITE } [TABLE] <varname>table_name</varname>
+  INSERT <ph rev="2.12.0 IMPALA-4168">[<varname>hint_clause</varname>]</ph> { INTO | OVERWRITE } [TABLE] <varname>table_name</varname>
   [(<varname>column_list</varname>)]
   [ PARTITION (<varname>partition_clause</varname>)]
 {
@@ -204,9 +204,10 @@ hint_with_brackets ::= [SHUFFLE] | [NOSHUFFLE]
       </li>
 
       <li rev="1.2.2">
-        An optional hint clause immediately before the <codeph>SELECT</codeph> keyword, to fine-tune the behavior
-        when doing an <codeph>INSERT ... SELECT</codeph> operation into partitioned Parquet tables. The hint
-        keywords are <codeph>[SHUFFLE]</codeph> and <codeph>[NOSHUFFLE]</codeph>, including the square brackets.
+        An optional hint clause immediately either before the <codeph>SELECT</codeph> keyword or after the
+        <codeph>INSERT</codeph> keyword, to fine-tune the behavior when doing an <codeph>INSERT ... SELECT</codeph>
+        operation into partitioned Parquet tables. The hint clause cannot be specified in multiple places.
+        The hint keywords are <codeph>[SHUFFLE]</codeph> and <codeph>[NOSHUFFLE]</codeph>, including the square brackets.
         Inserting into partitioned Parquet tables can be a resource-intensive operation because it potentially
         involves many files being written to HDFS simultaneously, and separate
         <ph rev="parquet_block_size">large</ph> memory buffers being allocated to buffer the data for each

http://git-wip-us.apache.org/repos/asf/impala/blob/e1123eb1/docs/topics/impala_upsert.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_upsert.xml b/docs/topics/impala_upsert.xml
index 5830675..a95b0f8 100644
--- a/docs/topics/impala_upsert.xml
+++ b/docs/topics/impala_upsert.xml
@@ -68,7 +68,7 @@ under the License.
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
 <codeblock>
-UPSERT INTO [TABLE] [<varname>db_name</varname>.]<varname>table_name</varname>
+UPSERT <ph rev="2.12.0 IMPALA-4168">[<varname>hint_clause</varname>]</ph> INTO [TABLE] [<varname>db_name</varname>.]<varname>table_name</varname>
   [(<varname>column_list</varname>)]
 {
     [<varname>hint_clause</varname>] <varname>select_statement</varname>


[14/28] impala git commit: IMPALA-6785: reset failed heartbeat count when re-registering

Posted by ta...@apache.org.
IMPALA-6785: reset failed heartbeat count when re-registering

When a subscriber re-registers with the same subscriber ID, we need
to reset failure detection info so that we don't erroneously think
that the subscriber has failed.

The bug is a mix-up between whether the subscriber or registration ID is
the key for the failure detected. The symptom was that, if the first
topic update won the race with the first heartbeat, no subsequent topic
updates were delivered. It was introduced when IMPALA-3613 updated some,
but not all, places to use the subscriber ID as the key. It wasn't
detected because we had no tests directly testing this code path, and
it appears that the race almost never happens on smaller clusters.

This patch fixes the problem in two places. Fixing either place is
actually sufficient to fix the bug.

Testing:
Add a regression that reliably reproduces the issue by delaying the
heartbeat and checking that topic updates continue to be sent. The
test reliably fails before the fix and passes after.

Change-Id: I2ad409e2a8e22d081fce97b085b9469ab046bf07
Reviewed-on: http://gerrit.cloudera.org:8080/9913
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bf8694fd
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bf8694fd
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bf8694fd

Branch: refs/heads/2.x
Commit: bf8694fdaf2e37c0043932433bbda771e787e773
Parents: 100fb2c
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Apr 3 14:13:28 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/statestore/statestore.cc     |  8 +++++---
 be/src/statestore/statestore.h      |  6 +++++-
 tests/statestore/test_statestore.py | 28 +++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/bf8694fd/be/src/statestore/statestore.cc
----------------------------------------------------------------------
diff --git a/be/src/statestore/statestore.cc b/be/src/statestore/statestore.cc
index 1072dee..02363fe 100644
--- a/be/src/statestore/statestore.cc
+++ b/be/src/statestore/statestore.cc
@@ -548,8 +548,7 @@ Status Statestore::RegisterSubscriber(const SubscriberId& subscriber_id,
     shared_ptr<Subscriber> current_registration(
         new Subscriber(subscriber_id, *registration_id, location, topic_registrations));
     subscribers_.emplace(subscriber_id, current_registration);
-    failure_detector_->UpdateHeartbeat(
-        PrintId(current_registration->registration_id()), true);
+    failure_detector_->UpdateHeartbeat(subscriber_id, true);
     num_subscribers_metric_->SetValue(subscribers_.size());
     subscriber_set_metric_->Add(subscriber_id);
 
@@ -878,6 +877,9 @@ void Statestore::DoSubscriberUpdate(UpdateKind update_kind, int thread_id,
                   << "or re-registered (last known registration ID: "
                   << update.registration_id << ")";
         UnregisterSubscriber(subscriber.get());
+      } else {
+        LOG(INFO) << "Failure was already detected for subscriber '" << subscriber->id()
+                  << "'. Won't send another " << update_kind_str;
       }
     } else {
       // Schedule the next message.
@@ -907,7 +909,7 @@ void Statestore::UnregisterSubscriber(Subscriber* subscriber) {
   heartbeat_client_cache_->CloseConnections(subscriber->network_address());
 
   // Prevent the failure detector from growing without bound
-  failure_detector_->EvictPeer(PrintId(subscriber->registration_id()));
+  failure_detector_->EvictPeer(subscriber->id());
 
   // Delete all transient entries
   {

http://git-wip-us.apache.org/repos/asf/impala/blob/bf8694fd/be/src/statestore/statestore.h
----------------------------------------------------------------------
diff --git a/be/src/statestore/statestore.h b/be/src/statestore/statestore.h
index 3058e94..482e48b 100644
--- a/be/src/statestore/statestore.h
+++ b/be/src/statestore/statestore.h
@@ -517,7 +517,11 @@ class Statestore : public CacheLineAligned {
 
   /// Failure detector for subscribers. If a subscriber misses a configurable number of
   /// consecutive heartbeat messages, it is considered failed and a) its transient topic
-  /// entries are removed and b) its entry in the subscriber map is erased.
+  /// entries are removed and b) its entry in the subscriber map is erased. The
+  /// subscriber ID is used to identify peers for failure detection purposes. Subscriber
+  /// state is evicted from the failure detector when the subscriber is unregistered,
+  /// so old subscribers do not occupy memory and the failure detection state does not
+  /// carry over to any new registrations of the previous subscriber.
   boost::scoped_ptr<MissedHeartbeatFailureDetector> failure_detector_;
 
   /// Metric that track the registered, non-failed subscribers.

http://git-wip-us.apache.org/repos/asf/impala/blob/bf8694fd/tests/statestore/test_statestore.py
----------------------------------------------------------------------
diff --git a/tests/statestore/test_statestore.py b/tests/statestore/test_statestore.py
index ba51c8d..737ff9c 100644
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -17,6 +17,7 @@
 
 from collections import defaultdict
 import json
+import logging
 import socket
 import threading
 import traceback
@@ -39,6 +40,8 @@ from Status.ttypes import TStatus
 
 from tests.common.environ import specific_build_type_timeout
 
+LOG = logging.getLogger('test_statestore')
+
 # Tests for the statestore. The StatestoreSubscriber class is a skeleton implementation of
 # a Python-based statestore subscriber with additional hooks to allow testing. Each
 # StatestoreSubscriber runs its own server so that the statestore may contact it.
@@ -174,6 +177,7 @@ class StatestoreSubscriber(object):
     # Variables to notify for updates on each topic.
     self.update_event = threading.Condition()
     self.heartbeat_cb, self.update_cb = heartbeat_cb, update_cb
+    self.subscriber_id = "python-test-client-%s" % uuid.uuid1()
     self.exception = None
 
   def Heartbeat(self, args):
@@ -250,7 +254,6 @@ class StatestoreSubscriber(object):
 
   def register(self, topics=None):
     """Call the Register() RPC"""
-    self.subscriber_id = "python-test-client-%s" % uuid.uuid1()
     if topics is None: topics = []
     request = Subscriber.TRegisterSubscriberRequest(
       topic_registrations=topics,
@@ -515,3 +518,26 @@ class TestStatestore():
           .wait_for_update(persistent_topic_name, 1)
           .wait_for_update(transient_topic_name, 1)
     )
+
+  def test_heartbeat_failure_reset(self):
+    """Regression test for IMPALA-6785: the heartbeat failure count for the subscriber ID
+    should be reset when it resubscribes, not after the first successful heartbeat. Delay
+    the heartbeat to force the topic update to finish first."""
+
+    sub = StatestoreSubscriber(heartbeat_cb=lambda sub, args: time.sleep(0.5))
+    topic_name = "test_heartbeat_failure_reset"
+    reg = TTopicRegistration(topic_name=topic_name, is_transient=True)
+    sub.start()
+    sub.register(topics=[reg])
+    LOG.info("Registered with id {0}".format(sub.subscriber_id))
+    sub.wait_for_heartbeat(1)
+    sub.kill()
+    LOG.info("Killed, waiting for statestore to detect failure via heartbeats")
+    sub.wait_for_failure()
+    # IMPALA-6785 caused only one topic update to be send. Wait for multiple updates to
+    # be received to confirm that the subsequent updates are being scheduled repeatedly.
+    target_updates = sub.update_counts[topic_name] + 5
+    sub.start()
+    sub.register(topics=[reg])
+    LOG.info("Re-registered with id {0}, waiting for update".format(sub.subscriber_id))
+    sub.wait_for_update(topic_name, target_updates)


[18/28] impala git commit: Fixing typo in SimpleLogger::Flush()

Posted by ta...@apache.org.
Fixing typo in SimpleLogger::Flush()

Change-Id: I5ce0525e8db133f4b1df653b995e3eb108619050
Reviewed-on: http://gerrit.cloudera.org:8080/9893
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e9109156
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e9109156
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e9109156

Branch: refs/heads/2.x
Commit: e91091564e72fa6e1a62ccb31eb72b31db50497d
Parents: 93543cf
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Sat Mar 31 20:55:11 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/util/simple-logger.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e9109156/be/src/util/simple-logger.h
----------------------------------------------------------------------
diff --git a/be/src/util/simple-logger.h b/be/src/util/simple-logger.h
index dd87722..7018ac6 100644
--- a/be/src/util/simple-logger.h
+++ b/be/src/util/simple-logger.h
@@ -45,7 +45,7 @@ class SimpleLogger {
   Status AppendEntry(const std::string& entry);
 
   /// Flushes the log file to disk by closing and re-opening the file. This function is
-  /// thread safe and blocks while a WriteEntry() is in progress
+  /// thread safe and blocks while an AppendEntry() is in progress
   Status Flush();
 
  private:


[05/28] impala git commit: IMPALA-6779: [DOCS] Improve the REPLICA_PREFERENCE doc

Posted by ta...@apache.org.
IMPALA-6779: [DOCS] Improve the REPLICA_PREFERENCE doc

Added detail usage notes for REPLICA_PREFERENCE.

Change-Id: If38f9c881f553568c2516ecc23ec501f23ee1f28
Reviewed-on: http://gerrit.cloudera.org:8080/9877
Reviewed-by: John Russell <jr...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b96cbfd0
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b96cbfd0
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b96cbfd0

Branch: refs/heads/2.x
Commit: b96cbfd09a76ad1e14b970e1e450ac3935042db2
Parents: a0450d2
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Mar 30 14:54:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_replica_preference.xml | 49 ++++++++++++++++++++------
 1 file changed, 38 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b96cbfd0/docs/topics/impala_replica_preference.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_replica_preference.xml b/docs/topics/impala_replica_preference.xml
index 45a5dbd..6c0d3ab 100644
--- a/docs/topics/impala_replica_preference.xml
+++ b/docs/topics/impala_replica_preference.xml
@@ -21,7 +21,13 @@ under the License.
 <concept id="replica_preference" rev="2.7.0">
 
   <title>REPLICA_PREFERENCE Query Option (<keyword keyref="impala27"/> or higher only)</title>
-  <titlealts audience="PDF"><navtitle>REPLICA_PREFERENCE</navtitle></titlealts>
+
+  <titlealts audience="PDF">
+
+    <navtitle>REPLICA_PREFERENCE</navtitle>
+
+  </titlealts>
+
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -38,29 +44,50 @@ under the License.
     </p>
 
     <p>
-      The <codeph>REPLICA_PREFERENCE</codeph> query option
-      lets you spread the load more evenly if hotspots and bottlenecks persist, by allowing hosts to do local reads,
-      or even remote reads, to retrieve the data for cached blocks if Impala can determine that it would be
-      too expensive to do all such processing on a particular host.
+      The <codeph>REPLICA_PREFERENCE</codeph> query option lets you distribute the work more
+      evenly if hotspots and bottlenecks persist. It causes the access cost of all replicas of a
+      data block to be considered equal to or worse than the configured value. This allows
+      Impala to schedule reads to suboptimal replicas (e.g. local in the presence of cached
+      ones) in order to distribute the work across more executor nodes.
     </p>
 
     <p>
-      <b>Type:</b> numeric (0, 2, 4)
-      or corresponding mnemonic strings (<codeph>CACHE_LOCAL</codeph>, <codeph>DISK_LOCAL</codeph>, <codeph>REMOTE</codeph>).
-      The gaps in the numeric sequence are to accomodate other intermediate
-      values that might be added in the future.
+      Allowed values are: <codeph>CACHE_LOCAL</codeph> (<codeph>0</codeph>),
+      <codeph>DISK_LOCAL</codeph> (<codeph>2</codeph>), <codeph>REMOTE</codeph>
+      (<codeph>4</codeph>)
     </p>
 
     <p>
-      <b>Default:</b> 0 (equivalent to <codeph>CACHE_LOCAL</codeph>)
+      <b>Type:</b> Enum
+    </p>
+
+    <p>
+      <b>Default:</b> <codeph>CACHE_LOCAL (0)</codeph>
     </p>
 
     <p conref="../shared/impala_common.xml#common/added_in_270"/>
 
+    <p>
+      <b>Usage Notes:</b>
+    </p>
+
+    <p>
+      By default Impala selects the best replica it can find in terms of access cost. The
+      preferred order is cached, local, and remote. With <codeph>REPLICA_PREFERENCE</codeph>,
+      the preference of all replicas are capped at the selected value. For example, when
+      <codeph>REPLICA_PREFERENCE</codeph> is set to <codeph>DISK_LOCAL</codeph>, cached and
+      local replicas are treated with the equal preference. When set to
+      <codeph>REMOTE</codeph>, all three types of replicas, cached, local, remote, are treated
+      with equal preference.
+    </p>
+
     <p conref="../shared/impala_common.xml#common/related_info"/>
+
     <p>
-      <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/>, <xref href="impala_schedule_random_replica.xml#schedule_random_replica"/>
+      <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/>,
+      <xref href="impala_schedule_random_replica.xml#schedule_random_replica"/>
     </p>
 
   </conbody>
+
 </concept>


[08/28] impala git commit: IMPALA-6792: Fail status reporting if coordinator refuses connections

Posted by ta...@apache.org.
IMPALA-6792: Fail status reporting if coordinator refuses connections

The ReportExecStatusAux() function is run on a dedicated thread per
fragment instance. This thread will run until the fragment instance
completes executing.

On every attempt to send a report to the coordinator, it will attempt
to send up to 3 RPCs. If all 3 of them fail, then the fragment instance
will cancel itself.

However, there is one case where a failure to send the RPC will not
be considered a failed RPC. If when we attempt to obtain a new
connection, we end up creating a new connection
(via ClientCache::CreateClient()) instead of getting a previously
cached connection, and this new connection fails to even Open(),
it will not be counted as a RPC failure.

This patch counts such an error as a failed RPC too.

This patch also clarifies some of the error log messages and introduces
a flag to control the sleep interval between failed ReportExecStatus RPC
retries.

Change-Id: If668838f99f78b5ffa713488178b2eb5799ba220
Reviewed-on: http://gerrit.cloudera.org:8080/9916
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/da3437a3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/da3437a3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/da3437a3

Branch: refs/heads/2.x
Commit: da3437a31b28c7fe598baf0f81e780e7f1dc82d5
Parents: bd63208
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Tue Apr 3 14:24:21 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/query-state.cc | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/da3437a3/be/src/runtime/query-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/query-state.cc b/be/src/runtime/query-state.cc
index ad5748f..04a4283 100644
--- a/be/src/runtime/query-state.cc
+++ b/be/src/runtime/query-state.cc
@@ -38,6 +38,10 @@
 
 #include "common/names.h"
 
+DEFINE_int32(report_status_retry_interval_ms, 100,
+    "The interval in milliseconds to wait before retrying a failed status report RPC to "
+    "the coordinator.");
+
 using namespace impala;
 
 QueryState::ScopedRef::ScopedRef(const TUniqueId& query_id) {
@@ -249,28 +253,40 @@ void QueryState::ReportExecStatusAux(bool done, const Status& status,
   DCHECK_EQ(res.status.status_code, TErrorCode::OK);
   // Try to send the RPC 3 times before failing. Sleep for 100ms between retries.
   // It's safe to retry the RPC as the coordinator handles duplicate RPC messages.
+  Status client_status;
   for (int i = 0; i < 3; ++i) {
-    Status client_status;
     ImpalaBackendConnection client(ExecEnv::GetInstance()->impalad_client_cache(),
         query_ctx().coord_address, &client_status);
     if (client_status.ok()) {
       rpc_status = client.DoRpc(&ImpalaBackendClient::ReportExecStatus, params, &res);
       if (rpc_status.ok()) break;
     }
-    if (i < 2) SleepForMs(100);
+    if (i < 2) SleepForMs(FLAGS_report_status_retry_interval_ms);
   }
   Status result_status(res.status);
-  if ((!rpc_status.ok() || !result_status.ok()) && instances_started) {
+  if ((!client_status.ok() || !rpc_status.ok() || !result_status.ok()) &&
+      instances_started) {
     // TODO: should we try to keep rpc_status for the final report? (but the final
     // report, following this Cancel(), may not succeed anyway.)
     // TODO: not keeping an error status here means that all instances might
     // abort with CANCELLED status, despite there being an error
-    if (!rpc_status.ok()) {
-      // TODO: Fix IMPALA-2990. Cancelling fragment instances here may cause query to
-      // hang as the coordinator may not be aware of the cancellation. Remove the log
-      // statement once IMPALA-2990 is fixed.
-      LOG(ERROR) << "Cancelling fragment instances due to failure to report status. "
-                 << "Query " << PrintId(query_id()) << " may hang. See IMPALA-2990.";
+    // TODO: Fix IMPALA-2990. Cancelling fragment instances without sending the
+    // ReporExecStatus RPC may cause query to hang as the coordinator may not be aware
+    // of the cancellation. Remove the log statements once IMPALA-2990 is fixed.
+    if (!client_status.ok()) {
+      LOG(ERROR) << "Cancelling fragment instances due to failure to obtain a connection "
+                 << "to the coordinator. (" << client_status.GetDetail()
+                 << "). Query " << PrintId(query_id()) << " may hang. See IMPALA-2990.";
+    } else if (!rpc_status.ok()) {
+      LOG(ERROR) << "Cancelling fragment instances due to failure to reach the "
+                 << "coordinator. (" << rpc_status.GetDetail()
+                 << "). Query " << PrintId(query_id()) << " may hang. See IMPALA-2990.";
+    } else if (!result_status.ok()) {
+      // If the ReportExecStatus RPC succeeded in reaching the coordinator and we get
+      // back a non-OK status, it means that the coordinator expects us to cancel the
+      // fragment instances for this query.
+      LOG(INFO) << "Cancelling fragment instances as directed by the coordinator. "
+                << "Returned status: " << result_status.GetDetail();
     }
     Cancel();
   }


[19/28] impala git commit: [DOCS] Removed old files no longer in use

Posted by ta...@apache.org.
[DOCS] Removed old files no longer in use

Change-Id: Ia0aaa756bf0fc0092186ebc293543408b403aa5e
Reviewed-on: http://gerrit.cloudera.org:8080/9938
Reviewed-by: John Russell <jr...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e0845ce1
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e0845ce1
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e0845ce1

Branch: refs/heads/2.x
Commit: e0845ce13cd27f893e29d6f892f5638852c43c1f
Parents: aab4946
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Apr 5 14:48:49 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 docs/Cloudera-Impala-Release-Notes.ditamap | 28 -----------------
 docs/impala.ditamap                        |  2 +-
 docs/impala_release_notes.ditamap          | 28 +++++++++++++++++
 docs/topics/impala_alter_function.xml      | 39 -----------------------
 docs/topics/impala_window_functions.xml    | 41 -------------------------
 5 files changed, 29 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e0845ce1/docs/Cloudera-Impala-Release-Notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/Cloudera-Impala-Release-Notes.ditamap b/docs/Cloudera-Impala-Release-Notes.ditamap
deleted file mode 100644
index 4f86e42..0000000
--- a/docs/Cloudera-Impala-Release-Notes.ditamap
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
-<map audience="standalone">
-  <title>Cloudera Impala Release Notes</title>
-  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
-  <topicref href="topics/impala_new_features.xml"/>
-  <topicref href="topics/impala_incompatible_changes.xml"/>
-  <topicref href="topics/impala_known_issues.xml"/>
-  <topicref href="topics/impala_fixed_issues.xml"/>
-</map>

http://git-wip-us.apache.org/repos/asf/impala/blob/e0845ce1/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 3168b3d..757b54f 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -306,7 +306,7 @@ under the License.
 <!-- End of former contents of Installing-and-Using-Impala_xi42979.ditamap. -->
   <topicref audience="standalone" href="topics/impala_faq.xml"/>
   <topicref audience="standalone" href="topics/impala_release_notes.xml">
-    <mapref href="Cloudera-Impala-Release-Notes.ditamap" format="ditamap"
+    <mapref href="impala_release_notes.ditamap" format="ditamap"
       audience="standalone"/>
   </topicref>
 

http://git-wip-us.apache.org/repos/asf/impala/blob/e0845ce1/docs/impala_release_notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_release_notes.ditamap b/docs/impala_release_notes.ditamap
new file mode 100644
index 0000000..58ad5ab
--- /dev/null
+++ b/docs/impala_release_notes.ditamap
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
+<map audience="standalone">
+  <title>Apache Impala Release Notes</title>
+  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
+  <topicref href="topics/impala_new_features.xml"/>
+  <topicref href="topics/impala_incompatible_changes.xml"/>
+  <topicref href="topics/impala_known_issues.xml"/>
+  <topicref href="topics/impala_fixed_issues.xml"/>
+</map>

http://git-wip-us.apache.org/repos/asf/impala/blob/e0845ce1/docs/topics/impala_alter_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_function.xml b/docs/topics/impala_alter_function.xml
deleted file mode 100644
index 243222f..0000000
--- a/docs/topics/impala_alter_function.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept audience="hidden" rev="1.x" id="alter_function">
-
-  <title>ALTER FUNCTION Statement</title>
-  <titlealts audience="PDF"><navtitle>ALTER FUNCTION</navtitle></titlealts>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-      <data name="Category" value="SQL"/>
-      <data name="Category" value="DDL"/>
-      <data name="Category" value="Developers"/>
-      <data name="Category" value="Data Analysts"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p/>
-  </conbody>
-</concept>

http://git-wip-us.apache.org/repos/asf/impala/blob/e0845ce1/docs/topics/impala_window_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_window_functions.xml b/docs/topics/impala_window_functions.xml
deleted file mode 100644
index 9716403..0000000
--- a/docs/topics/impala_window_functions.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept rev="1.3.0" id="window_functions">
-
-  <title>Window Functions</title>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p>
-      <indexterm audience="hidden">window functions</indexterm>
-      Window functions are a special category of built-in functions. They produce one output value for each input
-      row, like scalar functions such as <codeph>length()</codeph> or <codeph>substr()</codeph>. Yet like aggregate
-      functions, they also examine the contents of multiple input rows to compute each output value.
-    </p>
-
-    <p outputclass="toc inpage"/>
-  </conbody>
-</concept>


[25/28] impala git commit: Remove Yarn from minicluster by default.

Posted by ta...@apache.org.
Remove Yarn from minicluster by default.

Turns out that we start Yarn as part of the minicluster, but we never use it.
(HiveServer2 is configured to run MR jobs "locally" in process.) Likely, this
Yarn integration is a vestige of Yarn/Llama integration.  We can save memory by
not starting it by default.

There are some less-common tooks like tests/comparison/cluster.py which use
Yarn (and Hadoop Streaming). In deference to those tools, I've left a mechanism
to start Yarn rather than excising it altogether. After running
buildall the regular way, add Yarn to the cluster by running:
  testdata/cluster/admin -y start_cluster

I tested by running core tests. I did not test the kerberized minicluster.

Change-Id: I5504cc40b89e3c6d53fac0b7aa4b395fa63e8d79


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6bee6621
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6bee6621
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6bee6621

Branch: refs/heads/2.x
Commit: 6bee6621d2f26d2a370823e1ba654a1b2d9d7aab
Parents: d8e53d0
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Mon Apr 9 15:10:43 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:01 2018 +0000

----------------------------------------------------------------------
 testdata/cluster/admin | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/6bee6621/testdata/cluster/admin
----------------------------------------------------------------------
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index cb33e21..74b5a9c 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This will create/control/destroy a local hdfs+yarn cluster.
+# This will create/control/destroy a local hdfs/yarn/kms/kudu cluster.
 #
 # All roles run on 127.0.0.1, just like the standard mini cluster included with hadoop.
 # The difference is with this cluster, each role runs in its own process and has its own
@@ -31,12 +31,14 @@ set -euo pipefail
 trap 'echo Error in $0 at line $LINENO: $(awk "NR == $LINENO" $0)' ERR
 
 : ${IMPALA_KERBERIZE=}
+: ${INCLUDE_YARN=}
 
-while getopts vk OPT; do
+while getopts vky OPT; do
   case $OPT in
     v) set -x;;
     k) export IMPALA_KERBERIZE=1;;
-    ?) echo "Usage: $0 [-v (verbose) -k (kerberize)] ACTION (see source...)"; exit 1;;
+    y) export INCLUDE_YARN=1;;
+    ?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;;
   esac
 done
 shift $(($OPTIND-1))
@@ -54,7 +56,10 @@ export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
 
 if [[ "$TARGET_FILESYSTEM" == "hdfs" ]]; then
   # The check above indicates that the regular mini-cluster is in use.
-  SUPPORTED_SERVICES=(hdfs yarn kms)
+  SUPPORTED_SERVICES=(hdfs kms)
+  if [ -n "${INCLUDE_YARN}" ]; then
+    SUPPORTED_SERVICES+=(yarn)
+  fi
 else
   # Either a remote distributed file system or a local non-distributed file system is
   # in use. Currently the only service that is expected to work is Kudu, though in theory


[17/28] impala git commit: IMPALA-6711: loosen shell test prompt regex

Posted by ta...@apache.org.
IMPALA-6711: loosen shell test prompt regex

We have seen this test fail because the fully-qualified domain name
differed between the python test process and the impala shell process
(see JIRA for details). The exact domain name is irrelevant to the test
- we only really care about whether the prompt appeared or not.

Change-Id: I24078ef97d56e5bb32fd866af861e3a1d19c8c44
Reviewed-on: http://gerrit.cloudera.org:8080/9831
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d73eb3b2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d73eb3b2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d73eb3b2

Branch: refs/heads/2.x
Commit: d73eb3b297c07bdecb48fe29b2702628be6484ca
Parents: caaa137
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Mar 27 15:03:04 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 tests/shell/test_shell_interactive.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d73eb3b2/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index e23acc4..deb75ea 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -43,6 +43,9 @@ SHELL_CMD = "%s/bin/impala-shell.sh" % os.environ['IMPALA_HOME']
 SHELL_HISTORY_FILE = os.path.expanduser("~/.impalahistory")
 QUERY_FILE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'tests', 'shell')
 
+# Regex to match the interactive shell prompt that is expected after each command.
+PROMPT_REGEX = r'\[[^:]+:2100[0-9]\]'
+
 class TestImpalaShellInteractive(object):
   """Test the impala shell interactively"""
 
@@ -226,8 +229,6 @@ class TestImpalaShellInteractive(object):
     Ensure that multiline queries are preserved when they're read back from history.
     Additionally, also test that comments are preserved.
     """
-    # regex for pexpect, a shell prompt is expected after each command..
-    prompt_regex = '\[{0}:2100[0-9]\]'.format(socket.getfqdn())
     # readline gets its input from tty, so using stdin does not work.
     child_proc = pexpect.spawn(SHELL_CMD)
     # List of (input query, expected text in output).
@@ -240,10 +241,10 @@ class TestImpalaShellInteractive(object):
         ("select /*comment*/\n1;", "[4]: select /*comment*/\n1;"),
         ("select\n/*comm\nent*/\n1;", "[5]: select\n/*comm\nent*/\n1;")]
     for query, _ in queries:
-      child_proc.expect(prompt_regex)
+      child_proc.expect(PROMPT_REGEX)
       child_proc.sendline(query)
       child_proc.expect("Fetched 1 row\(s\) in .*s")
-    child_proc.expect(prompt_regex)
+    child_proc.expect(PROMPT_REGEX)
     child_proc.sendline('quit;')
     p = ImpalaShell()
     p.send_cmd('history')


[24/28] impala git commit: Remove Yarn from minicluster by default. (2nd try)

Posted by ta...@apache.org.
Remove Yarn from minicluster by default. (2nd try)

Remove Yarn from minicluster by default.

Turns out that we start Yarn as part of the minicluster, but we never use it.
(HiveServer2 is configured to run MR jobs "locally" in process.) Likely, this
Yarn integration is a vestige of Yarn/Llama integration.  We can save memory by
not starting it by default.

There are some less-common tooks like tests/comparison/cluster.py which use
Yarn (and Hadoop Streaming). In deference to those tools, I've left a mechanism
to start Yarn rather than excising it altogether. After running
buildall the regular way, add Yarn to the cluster by running:
  testdata/cluster/admin -y start_cluster

I tested by running core tests. I did not test the kerberized minicluster.

[Due to a git mishap, a version of this was previously checked in and reverted.]

Change-Id: I97053a44bbe32048e6c35cc28680d1c7696af13f
Reviewed-on: http://gerrit.cloudera.org:8080/9970
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c2e4c43b
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c2e4c43b
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c2e4c43b

Branch: refs/heads/2.x
Commit: c2e4c43b6e3265cd9e92f6afaa4a10a307919ca7
Parents: f09f68a
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Tue Apr 10 09:21:20 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:01 2018 +0000

----------------------------------------------------------------------
 testdata/cluster/admin | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/c2e4c43b/testdata/cluster/admin
----------------------------------------------------------------------
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index cb33e21..74b5a9c 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -18,7 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This will create/control/destroy a local hdfs+yarn cluster.
+# This will create/control/destroy a local hdfs/yarn/kms/kudu cluster.
 #
 # All roles run on 127.0.0.1, just like the standard mini cluster included with hadoop.
 # The difference is with this cluster, each role runs in its own process and has its own
@@ -31,12 +31,14 @@ set -euo pipefail
 trap 'echo Error in $0 at line $LINENO: $(awk "NR == $LINENO" $0)' ERR
 
 : ${IMPALA_KERBERIZE=}
+: ${INCLUDE_YARN=}
 
-while getopts vk OPT; do
+while getopts vky OPT; do
   case $OPT in
     v) set -x;;
     k) export IMPALA_KERBERIZE=1;;
-    ?) echo "Usage: $0 [-v (verbose) -k (kerberize)] ACTION (see source...)"; exit 1;;
+    y) export INCLUDE_YARN=1;;
+    ?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;;
   esac
 done
 shift $(($OPTIND-1))
@@ -54,7 +56,10 @@ export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
 
 if [[ "$TARGET_FILESYSTEM" == "hdfs" ]]; then
   # The check above indicates that the regular mini-cluster is in use.
-  SUPPORTED_SERVICES=(hdfs yarn kms)
+  SUPPORTED_SERVICES=(hdfs kms)
+  if [ -n "${INCLUDE_YARN}" ]; then
+    SUPPORTED_SERVICES+=(yarn)
+  fi
 else
   # Either a remote distributed file system or a local non-distributed file system is
   # in use. Currently the only service that is expected to work is Kudu, though in theory


[22/28] impala git commit: IMPALA-6457: [DOCS] DECIMAL support for Kudu tables

Posted by ta...@apache.org.
IMPALA-6457: [DOCS] DECIMAL support for Kudu tables

Removed DECIMAL from the unsupported data types for Kudu.

Change-Id: I5a2498b4a28c2a53a3fec9b634a770e42dcac499
Reviewed-on: http://gerrit.cloudera.org:8080/9918
Reviewed-by: Grant Henke <gr...@apache.org>
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/adecae3e
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/adecae3e
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/adecae3e

Branch: refs/heads/2.x
Commit: adecae3e2a12b390a8e783ab9f276164af47aa37
Parents: bf8694f
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue Apr 3 15:04:20 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 docs/shared/impala_common.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/adecae3e/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 985dcb7..6c25b8e 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -3854,7 +3854,7 @@ sudo pip-python install ssl</codeblock>
       </p>
 
       <p id="kudu_unsupported_data_type" rev="kudu">
-        Currently, the data types <codeph>DECIMAL</codeph>, <codeph>CHAR</codeph>, <codeph>VARCHAR</codeph>,
+        Currently, the data types <codeph>CHAR</codeph>, <codeph>VARCHAR</codeph>,
         <codeph>ARRAY</codeph>, <codeph>MAP</codeph>, and <codeph>STRUCT</codeph> cannot be used with Kudu tables.
       </p>
 


[27/28] impala git commit: IMPALA-6824: Fix crash in RuntimeProfile::EventSequence::AddNewerEvents()

Posted by ta...@apache.org.
IMPALA-6824: Fix crash in RuntimeProfile::EventSequence::AddNewerEvents()

When events_ is empty in runtime-profile-counters.h:347, events_.back()
is undefined. This can lead to a crash. The fix is to check for
events_.empty() before calling back().

This change adds a backend test to make sure that updating empty event
sequences doesn't crash.

Change-Id: I76d30517544e8aa40e7d041f6a65a5dd361ae3c1
Reviewed-on: http://gerrit.cloudera.org:8080/9951
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d8e53d03
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d8e53d03
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d8e53d03

Branch: refs/heads/2.x
Commit: d8e53d03932dd4dd14b7d4f976fb552d2d621ed6
Parents: d73eb3b
Author: Lars Volker <lv...@cloudera.com>
Authored: Sun Apr 8 22:58:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:01 2018 +0000

----------------------------------------------------------------------
 be/src/util/runtime-profile-counters.h |  4 ++--
 be/src/util/runtime-profile-test.cc    | 32 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d8e53d03/be/src/util/runtime-profile-counters.h
----------------------------------------------------------------------
diff --git a/be/src/util/runtime-profile-counters.h b/be/src/util/runtime-profile-counters.h
index c8d8bcd..52410b5 100644
--- a/be/src/util/runtime-profile-counters.h
+++ b/be/src/util/runtime-profile-counters.h
@@ -344,10 +344,10 @@ class RuntimeProfile::EventSequence {
     DCHECK_EQ(timestamps.size(), labels.size());
     DCHECK(std::is_sorted(timestamps.begin(), timestamps.end()));
     boost::lock_guard<SpinLock> event_lock(lock_);
-    int64_t last_timestamp = events_.back().second;
+    int64_t last_timestamp = events_.empty() ? 0 : events_.back().second;
     for (int64_t i = 0; i < timestamps.size(); ++i) {
       if (timestamps[i] <= last_timestamp) continue;
-      events_.push_back(make_pair(labels[i], timestamps[i]));
+      events_.emplace_back(labels[i], timestamps[i]);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/d8e53d03/be/src/util/runtime-profile-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/runtime-profile-test.cc b/be/src/util/runtime-profile-test.cc
index 0955203..b194cc9 100644
--- a/be/src/util/runtime-profile-test.cc
+++ b/be/src/util/runtime-profile-test.cc
@@ -613,6 +613,38 @@ TEST(CountersTest, EventSequences) {
   }
 }
 
+TEST(CountersTest, UpdateEmptyEventSequence) {
+  // IMPALA-6824: This test makes sure that adding events to an empty event sequence does
+  // not crash.
+  ObjectPool pool;
+
+  // Create the profile to send in the update and add some events.
+  RuntimeProfile* profile = RuntimeProfile::Create(&pool, "Profile");
+  RuntimeProfile::EventSequence* seq = profile->AddEventSequence("event sequence");
+  seq->Start();
+  // Sleep for 10ms to make sure the events are logged at a time > 0.
+  SleepForMs(10);
+  seq->MarkEvent("aaaa");
+  seq->MarkEvent("bbbb");
+
+  vector<RuntimeProfile::EventSequence::Event> events;
+  seq->GetEvents(&events);
+  EXPECT_EQ(2, events.size());
+
+  TRuntimeProfileTree thrift_profile;
+  profile->ToThrift(&thrift_profile);
+
+  // Create the profile that will be updated and add the empty event sequence to it.
+  RuntimeProfile* updated_profile = RuntimeProfile::Create(&pool, "Updated Profile");
+  seq = updated_profile->AddEventSequence("event sequence");
+  updated_profile->Update(thrift_profile);
+
+  // Verify that the events have been updated successfully.
+  events.clear();
+  seq->GetEvents(&events);
+  EXPECT_EQ(2, events.size());
+}
+
 void ValidateSampler(const StreamingSampler<int, 10>& sampler, int expected_num,
     int expected_period, int expected_delta) {
   const int* samples = NULL;


[02/28] impala git commit: IMPALA-6791: distcc server setup script

Posted by ta...@apache.org.
IMPALA-6791: distcc server setup script

Adds a script that can be run to configure a working distcc server with
ccache. This is based roughly on the manual steps in the README.

Testing:
Tested the scripts on Ubuntu and CentOS 6 systems. They resulted in a
working distcc server.

Change-Id: I9a145911f095eb8e173694475cc2dac65eb7c7bb
Reviewed-on: http://gerrit.cloudera.org:8080/9901
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0a8cac2a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0a8cac2a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0a8cac2a

Branch: refs/heads/2.x
Commit: 0a8cac2a64acdbfe1f7742ccdca31537e43adbac
Parents: 12d7444
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Apr 2 17:35:05 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:55:59 2018 +0000

----------------------------------------------------------------------
 bin/distcc/README.md              |  19 +++++-
 bin/distcc/distcc_server_setup.sh | 113 +++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0a8cac2a/bin/distcc/README.md
----------------------------------------------------------------------
diff --git a/bin/distcc/README.md b/bin/distcc/README.md
index 4ebca45..a8487b2 100644
--- a/bin/distcc/README.md
+++ b/bin/distcc/README.md
@@ -26,6 +26,14 @@ The rest of the setup is done for you; here is a short description of what they
 # Usage
 
 ### First time
+1. Symlink /opt/Impala-Toolchain to a directory writable by your user and point
+  IMPALA_TOOLCHAIN at that directory. This ensures toolchain binaries are at the
+  same path locally as on the distcc servers
+
+        mkdir -p "$IMPALA_TOOLCHAIN"/toolchain
+        sudo ln -s "$IMPALA_TOOLCHAIN"/toolchain /opt/Impala-Toolchain
+        echo 'export IMPALA_TOOLCHAIN=/opt/Impala-Toolchain' >> bin/impala-config-local.sh
+
 1. Source bin/impala-config.sh in the Impala repo. Step #2 depends on this.
 
         source "$IMPALA_HOME"/bin/impala-config.sh
@@ -70,7 +78,16 @@ source "$IMPALA_HOME"/bin/impala-config.sh   # Skip if already done
 source "$IMPALA_HOME"/bin/distcc/distcc_env.sh
 ```
 
-# Setting up a new distcc server
+# Setting up a new distcc server (automatic)
+1. Run distcc_server_setup.sh to do initial setup for a server that accepts connections
+   from the 172.\* private IP address block:
+  sudo ./bin/distcc/distcc_server_setup.sh 172.16.0.0/12
+1. Download toolchain packages. This is run initially and can be rerun to download
+  new packages that a new version of Impala depends on.
+  (. ./bin/impala-config.sh && ./infra/python/deps/download_requirements &&
+   DOWNLOAD_CDH_COMPONENTS=false ./bin/bootstrap_toolchain.py)
+
+# Setting up a new distcc server (manual)
 
 1. Install "distccd" and "ccache".
 1. Configure distccd (edit /etc/sysconfig/distccd on a RHEL server) with the options

http://git-wip-us.apache.org/repos/asf/impala/blob/0a8cac2a/bin/distcc/distcc_server_setup.sh
----------------------------------------------------------------------
diff --git a/bin/distcc/distcc_server_setup.sh b/bin/distcc/distcc_server_setup.sh
new file mode 100755
index 0000000..893a82d
--- /dev/null
+++ b/bin/distcc/distcc_server_setup.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script automates setup of distcc servers on Ubuntu and CentOS. It has been tested
+# on Ubuntu 14.04 and 16.04 and CentOS 6 and 7. See bin/distcc/README.md for manual setup
+# instructions and background.
+#
+# Usage:
+# ------
+# The script expects to be run as root and requires that the user specify a range of
+# IPs that it should accept connections from. E.g. To configure a distcc server that
+# accepts connections from the 172.* private IP address block. The IP address argument
+# is forwarded to the --allow flag of distccd.
+#
+#   sudo ./bin/distcc/distcc_server_setup.sh 172.16.0.0/12
+#
+# Environment overrides:
+# ---------------------
+# CCACHE_DIR: directory to use for distccd's ccache.
+# CCACHE_SIZE: size of ccache, passed to ccache's -M option
+set -eu -o pipefail
+trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
+
+if [[ $# != 1 ]]; then
+  echo "Usage: $0 <allowed IP address range>"
+  exit 1
+fi
+set -x
+ALLOWED_NETS=$1
+
+# Find the absolute path to repo root.
+IMPALA_HOME=$(cd "$(dirname "$0")/../.."; pwd)
+
+LSB_ID=$(lsb_release -is)
+LSB_VERSION=$(lsb_release -rs)
+if [[ "$LSB_ID" == Ubuntu ]]; then
+  if ! [[ $LSB_VERSION == 14.04 || $LSB_VERSION == 16.04 ]]; then
+    echo "This script only supports Ubuntu 14.04 and 16.04" >&2
+    exit 1
+  fi
+  LINUX_FLAVOUR=ubuntu
+  DISTCCD_USER=distccd
+  DISTCCD_SERVICE=distcc
+elif [[ "$LSB_ID" == CentOS ]]; then
+  if ! [[ $LSB_VERSION == 6.* || $LSB_VERSION = 7.* ]]; then
+    echo "This script only supports CentOS 6 and 7" >&2
+    exit 1
+  fi
+  LINUX_FLAVOUR=redhat
+  DISTCCD_USER=nobody
+  DISTCCD_SERVICE=distccd
+else
+  echo "This script only supports Ubuntu and CentOS" >&2
+  exit 1
+fi
+
+echo "Installing required packages"
+if [[ $LINUX_FLAVOUR = ubuntu ]]; then
+  apt-get install --yes distcc ccache
+else
+  yum install -y distcc-server ccache
+fi
+
+echo "Configuring ccache for distccd user"
+export CCACHE_DIR=${CCACHE_DIR-/opt/ccache}
+CCACHE_SIZE=${CCACHE_SIZE:-25G}
+mkdir -p "${CCACHE_DIR}"
+chown ${DISTCCD_USER} "${CCACHE_DIR}"
+HOME=${CCACHE_DIR} sudo -E -u ${DISTCCD_USER} ccache -M "${CCACHE_SIZE}"
+
+echo "Configuring distcc"
+if [[ $LINUX_FLAVOUR = ubuntu ]]; then
+  cat << EOF >> /etc/default/distcc
+# BEGIN: Settings automatically generated by distcc_server_setup.sh
+STARTDISTCC="true"
+ALLOWEDNETS="${ALLOWED_NETS}"
+LISTENER="0.0.0.0"
+JOBS=$(nproc)
+# CCACHE_DIR is picked up by ccache from environment.
+export CCACHE_DIR=${CCACHE_DIR}
+# END: Settings automatically generated by distcc_server_setup.sh
+EOF
+else
+  cat << EOF >> /etc/sysconfig/distccd
+# BEGIN: Settings automatically generated by distcc_server_setup.sh
+OPTIONS="--jobs $(($(nproc) * 2)) --allow ${ALLOWED_NETS} --log-level=warn --nice=-15"
+# CCACHE_DIR is picked up by ccache from environment. CentOS 6 requires CCACHE_DIR to
+# be exported while CentOS 7 seems to ignore the "export VAR=val" syntax.
+CCACHE_DIR=${CCACHE_DIR}
+export CCACHE_DIR
+# END: Settings automatically generated by distcc_server_setup.sh
+EOF
+fi
+service ${DISTCCD_SERVICE} restart
+
+echo "Symlinking /opt/Impala-Toolchain to default toolchain location"
+ln -f -s -T "${IMPALA_HOME}/toolchain" /opt/Impala-Toolchain


[28/28] impala git commit: IMPALA-6805: Show current database in Impala shell prompt

Posted by ta...@apache.org.
IMPALA-6805: Show current database in Impala shell prompt

Prompt format:
[host:port] db_name>

Testing:
- Added new shell tests
- Ran end-to-end shell tests

Change-Id: Ifb0ae58507321e426e5f0f16518671420974a3fc
Reviewed-on: http://gerrit.cloudera.org:8080/9927
Reviewed-by: Fredy Wijaya <fw...@cloudera.com>
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/10024
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Philip Zeyliger <ph...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bbe53429
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bbe53429
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bbe53429

Branch: refs/heads/2.x
Commit: bbe534298df1a23267c4109c69dc2645a0c9ecaa
Parents: c2e4c43
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Wed Apr 4 13:05:31 2018 -0700
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Thu Apr 12 02:59:17 2018 +0000

----------------------------------------------------------------------
 shell/impala_shell.py                 | 15 +++++++++--
 tests/shell/test_shell_interactive.py | 42 ++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/bbe53429/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index f809e46..67391bf 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -104,6 +104,7 @@ class ImpalaShell(object, cmd.Cmd):
 
   # If not connected to an impalad, the server version is unknown.
   UNKNOWN_SERVER_VERSION = "Not Connected"
+  PROMPT_FORMAT = "[{host}:{port}] {db}> "
   DISCONNECTED_PROMPT = "[Not connected] > "
   UNKNOWN_WEBSERVER = "0.0.0.0"
   # Message to display in shell when cancelling a query
@@ -768,7 +769,8 @@ class ImpalaShell(object, cmd.Cmd):
     if self.imp_client.connected:
       self._print_if_verbose('Connected to %s:%s' % self.impalad)
       self._print_if_verbose('Server version: %s' % self.server_version)
-      self.prompt = "[%s:%s] > " % self.impalad
+      self.prompt = ImpalaShell.PROMPT_FORMAT.format(
+        host=self.impalad[0], port=self.impalad[1], db=ImpalaShell.DEFAULT_DB)
       if self.refresh_after_connect:
         self.cmdqueue.append('invalidate metadata' + ImpalaShell.CMD_DELIM)
         print_to_stderr("Invalidating Metadata")
@@ -1138,7 +1140,16 @@ class ImpalaShell(object, cmd.Cmd):
     """Executes a USE... query"""
     query = self._create_beeswax_query(args)
     if self._execute_stmt(query) is CmdStatus.SUCCESS:
-      self.current_db = args
+      self.current_db = args.strip('`').strip()
+      self.prompt = ImpalaShell.PROMPT_FORMAT.format(host=self.impalad[0],
+                                                     port=self.impalad[1],
+                                                     db=self.current_db)
+    elif args.strip('`') == self.current_db:
+      # args == current_db means -d option was passed but the "use [db]" operation failed.
+      # We need to set the current_db to None so that it does not show a database, which
+      # may not exist.
+      self.current_db = None
+      return CmdStatus.ERROR
     else:
       return CmdStatus.ERROR
 

http://git-wip-us.apache.org/repos/asf/impala/blob/bbe53429/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index deb75ea..5c3ee2c 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -58,11 +58,11 @@ class TestImpalaShellInteractive(object):
   def teardown_class(cls):
     restore_shell_history(cls.tempfile_name)
 
-  def _expect_with_cmd(self, proc, cmd, expectations=()):
+  def _expect_with_cmd(self, proc, cmd, expectations=(), db="default"):
     """Executes a command on the expect process instance and verifies a set of
     assertions defined by the expections."""
     proc.sendline(cmd + ";")
-    proc.expect(":21000] >")
+    proc.expect(":21000] {db}>".format(db=db))
     if not expectations: return
     for e in expectations:
       assert e in proc.before
@@ -71,7 +71,7 @@ class TestImpalaShellInteractive(object):
   def test_local_shell_options(self):
     """Test that setting the local shell options works"""
     proc = pexpect.spawn(SHELL_CMD)
-    proc.expect(":21000] >")
+    proc.expect(":21000] default>")
     self._expect_with_cmd(proc, "set", ("LIVE_PROGRESS: False", "LIVE_SUMMARY: False"))
     self._expect_with_cmd(proc, "set live_progress=true")
     self._expect_with_cmd(proc, "set", ("LIVE_PROGRESS: True", "LIVE_SUMMARY: False"))
@@ -179,11 +179,14 @@ class TestImpalaShellInteractive(object):
     assert get_num_open_sessions(initial_impala_service) == num_sessions_initial + 1, \
         "Not connected to %s:21000" % hostname
     p.send_cmd("connect %s:21001" % hostname)
+
     # Wait for a little while
     sleep(2)
     # The number of sessions on the target impalad should have been incremented.
     assert get_num_open_sessions(target_impala_service) == num_sessions_target + 1, \
         "Not connected to %s:21001" % hostname
+    assert "[%s:21001] default>" % hostname in p.get_result().stdout
+
     # The number of sessions on the initial impalad should have been decremented.
     assert get_num_open_sessions(initial_impala_service) == num_sessions_initial, \
         "Connection to %s:21000 should have been closed" % hostname
@@ -260,7 +263,7 @@ class TestImpalaShellInteractive(object):
       os.remove(SHELL_HISTORY_FILE)
     assert not os.path.exists(SHELL_HISTORY_FILE)
     child_proc = pexpect.spawn(SHELL_CMD)
-    child_proc.expect(":21000] >")
+    child_proc.expect(":21000] default>")
     self._expect_with_cmd(child_proc, "@1", ("Command index out of range"))
     self._expect_with_cmd(child_proc, "rerun -1", ("Command index out of range"))
     self._expect_with_cmd(child_proc, "select 'first_command'", ("first_command"))
@@ -268,7 +271,7 @@ class TestImpalaShellInteractive(object):
     self._expect_with_cmd(child_proc, "@ -1", ("first_command"))
     self._expect_with_cmd(child_proc, "select 'second_command'", ("second_command"))
     child_proc.sendline('history;')
-    child_proc.expect(":21000] >")
+    child_proc.expect(":21000] default>")
     assert '[1]: select \'first_command\';' in child_proc.before;
     assert '[2]: select \'second_command\';' in child_proc.before;
     assert '[3]: history;' in child_proc.before;
@@ -494,6 +497,35 @@ class TestImpalaShellInteractive(object):
     result = run_impala_shell_interactive(query)
     assert '| id   |' in result.stdout
 
+  @pytest.mark.execute_serially
+  def test_shell_prompt(self):
+    proc = pexpect.spawn(SHELL_CMD)
+    proc.expect(":21000] default>")
+    self._expect_with_cmd(proc, "use foo", (), 'default')
+    self._expect_with_cmd(proc, "use functional", (), 'functional')
+    self._expect_with_cmd(proc, "use foo", (), 'functional')
+    self._expect_with_cmd(proc, 'use `tpch`', (), 'tpch')
+    self._expect_with_cmd(proc, 'use ` tpch `', (), 'tpch')
+
+    proc = pexpect.spawn(SHELL_CMD, ['-d', 'functional'])
+    proc.expect(":21000] functional>")
+    self._expect_with_cmd(proc, "use foo", (), 'functional')
+    self._expect_with_cmd(proc, "use tpch", (), 'tpch')
+    self._expect_with_cmd(proc, "use foo", (), 'tpch')
+
+    proc = pexpect.spawn(SHELL_CMD, ['-d', ' functional '])
+    proc.expect(":21000] functional>")
+
+    proc = pexpect.spawn(SHELL_CMD, ['-d', '` functional `'])
+    proc.expect(":21000] functional>")
+
+    # Start an Impala shell with an invalid DB.
+    proc = pexpect.spawn(SHELL_CMD, ['-d', 'foo'])
+    proc.expect(":21000] default>")
+    self._expect_with_cmd(proc, "use foo", (), 'default')
+    self._expect_with_cmd(proc, "use functional", (), 'functional')
+    self._expect_with_cmd(proc, "use foo", (), 'functional')
+
 def run_impala_shell_interactive(input_lines, shell_args=None):
   """Runs a command in the Impala shell interactively."""
   # if argument "input_lines" is a string, makes it into a list


[09/28] impala git commit: IMPALA-6801: Cleanup request_pool

Posted by ta...@apache.org.
IMPALA-6801: Cleanup request_pool

Eliminate the copy of this field in the QuerySchedule. Instead,
just set it directly in the TQueryCtx early on. Then, the
TQueryCtx doesn't need to be copied by the coordinator.

Change-Id: I3bee843ef7d72ba14d487fdb56e55fa3660aafd3
Reviewed-on: http://gerrit.cloudera.org:8080/9909
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3fd345d3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3fd345d3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3fd345d3

Branch: refs/heads/2.x
Commit: 3fd345d325792e4af87c1f232f56f0638a73afe8
Parents: da3437a
Author: Dan Hecht <dh...@cloudera.com>
Authored: Tue Apr 3 10:42:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 11 22:56:00 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/coordinator.cc              | 12 +++++-------
 be/src/runtime/coordinator.h               |  3 ---
 be/src/scheduling/query-schedule.h         |  8 +++-----
 be/src/scheduling/scheduler.cc             |  9 ++++++---
 be/src/service/client-request-state.h      |  7 +++----
 be/src/service/impala-beeswax-server.cc    |  2 +-
 be/src/service/impala-hs2-server.cc        |  2 +-
 be/src/service/impala-http-handler.cc      |  2 +-
 be/src/service/impala-server.cc            |  9 +++++++--
 be/src/service/impala-server.h             | 16 ++++++++--------
 common/thrift/ImpalaInternalService.thrift |  4 ++--
 11 files changed, 37 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index e6b3bca..5a3de5d 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -90,11 +90,9 @@ Status Coordinator::Exec() {
   const TQueryExecRequest& request = schedule_.request();
   DCHECK(request.plan_exec_info.size() > 0);
 
-  VLOG_QUERY << "Exec() query_id=" << schedule_.query_id()
+  VLOG_QUERY << "Exec() query_id=" << query_id()
              << " stmt=" << request.query_ctx.client_request.stmt;
   stmt_type_ = request.stmt_type;
-  query_ctx_ = request.query_ctx;
-  query_ctx_.__set_request_pool(schedule_.request_pool());
 
   query_profile_ =
       RuntimeProfile::Create(obj_pool(), "Execution Profile " + PrintId(query_id()));
@@ -117,7 +115,7 @@ Status Coordinator::Exec() {
   // TODO: revisit this, it may not be true anymore
   lock_guard<mutex> l(lock_);
 
-  query_state_ = ExecEnv::GetInstance()->query_exec_mgr()->CreateQueryState(query_ctx_);
+  query_state_ = ExecEnv::GetInstance()->query_exec_mgr()->CreateQueryState(query_ctx());
   query_state_->AcquireExecResourceRefcount(); // Decremented in ReleaseExecResources().
   filter_mem_tracker_ = query_state_->obj_pool()->Add(new MemTracker(
       -1, "Runtime Filter (Coordinator)", query_state_->query_mem_tracker(), false));
@@ -355,7 +353,7 @@ void Coordinator::StartBackendExec() {
   for (BackendState* backend_state: backend_states_) {
     ExecEnv::GetInstance()->exec_rpc_thread_pool()->Offer(
         [backend_state, this, &debug_options]() {
-          backend_state->Exec(query_ctx_, debug_options, filter_routing_table_,
+          backend_state->Exec(query_ctx(), debug_options, filter_routing_table_,
             exec_complete_barrier_.get());
         });
   }
@@ -783,7 +781,7 @@ void Coordinator::ReleaseAdmissionControlResources() {
 void Coordinator::ReleaseAdmissionControlResourcesLocked() {
   if (released_admission_control_resources_) return;
   LOG(INFO) << "Release admission control resources for query_id="
-            << PrintId(query_ctx_.query_id);
+            << PrintId(query_ctx().query_id);
   AdmissionController* admission_controller =
       ExecEnv::GetInstance()->admission_controller();
   if (admission_controller != nullptr) admission_controller->ReleaseQuery(schedule_);
@@ -941,7 +939,7 @@ void Coordinator::FilterState::Disable(MemTracker* tracker) {
 }
 
 const TUniqueId& Coordinator::query_id() const {
-  return query_ctx_.query_id;
+  return query_ctx().query_id;
 }
 
 void Coordinator::GetTExecSummary(TExecSummary* exec_summary) {

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index 6665c08..8e556ec 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -187,9 +187,6 @@ class Coordinator { // NOLINT: The member variables could be re-ordered to save
   /// owned by the ClientRequestState that owns this coordinator
   const QuerySchedule& schedule_;
 
-  /// copied from TQueryExecRequest; constant across all fragments
-  TQueryCtx query_ctx_;
-
   /// copied from TQueryExecRequest, governs when to call ReportQuerySummary
   TStmtType::type stmt_type_;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/scheduling/query-schedule.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/query-schedule.h b/be/src/scheduling/query-schedule.h
index 28ccd6f..be219a8 100644
--- a/be/src/scheduling/query-schedule.h
+++ b/be/src/scheduling/query-schedule.h
@@ -143,8 +143,9 @@ class QuerySchedule {
   const TUniqueId& query_id() const { return query_id_; }
   const TQueryExecRequest& request() const { return request_; }
   const TQueryOptions& query_options() const { return query_options_; }
-  const std::string& request_pool() const { return request_pool_; }
-  void set_request_pool(const std::string& pool_name) { request_pool_ = pool_name; }
+
+  // Valid after Schedule() succeeds.
+  const std::string& request_pool() const { return request().query_ctx.request_pool; }
 
   /// Gets the estimated memory (bytes) per-node. Returns the user specified estimate
   /// (MEM_LIMIT query parameter) if provided or the estimate from planning if available,
@@ -250,9 +251,6 @@ class QuerySchedule {
   /// Used to generate consecutive fragment instance ids.
   TUniqueId next_instance_id_;
 
-  /// Request pool to which the request was submitted for admission.
-  std::string request_pool_;
-
   /// Indicates if the query has been admitted for execution.
   bool is_admitted_;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/scheduling/scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 2ba1563..60302d3 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -700,10 +700,13 @@ Status Scheduler::Schedule(QuerySchedule* schedule) {
 
   // TODO: Move to admission control, it doesn't need to be in the Scheduler.
   string resolved_pool;
+  // Re-resolve the pool name to propagate any resolution errors now that this request
+  // is known to require a valid pool.
   RETURN_IF_ERROR(request_pool_service_->ResolveRequestPool(
-      schedule->request().query_ctx, &resolved_pool));
-  schedule->set_request_pool(resolved_pool);
-  schedule->summary_profile()->AddInfoString("Request Pool", resolved_pool);
+          schedule->request().query_ctx, &resolved_pool));
+  // Resolved pool name should have been set in the TQueryCtx and shouldn't have changed.
+  DCHECK_EQ(resolved_pool, schedule->request_pool());
+  schedule->summary_profile()->AddInfoString("Request Pool", schedule->request_pool());
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/client-request-state.h
----------------------------------------------------------------------
diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h
index 657f3de..05cd762 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -159,12 +159,11 @@ class ClientRequestState {
   Coordinator* coord() const { return coord_.get(); }
   QuerySchedule* schedule() { return schedule_.get(); }
 
-  /// Resource pool associated with this query, or an empty string if the schedule has not
-  /// been created and had the pool set yet, or this StmtType doesn't go through admission
-  /// control.
+  /// Admission control resource pool associated with this query.
   std::string request_pool() const {
-    return schedule_ == nullptr ? "" : schedule_->request_pool();
+    return query_ctx_.__isset.request_pool ? query_ctx_.request_pool : "";
   }
+
   int num_rows_fetched() const { return num_rows_fetched_; }
   void set_fetched_rows() { fetched_rows_ = true; }
   bool fetched_rows() const { return fetched_rows_; }

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index c441285..4875adb 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -458,7 +458,7 @@ Status ImpalaServer::QueryToTQueryContext(const Query& query,
 
   // Only query options not set in the session or confOverlay can be overridden by the
   // pool options.
-  AddPoolQueryOptions(query_ctx, ~set_query_options_mask);
+  AddPoolConfiguration(query_ctx, ~set_query_options_mask);
   VLOG_QUERY << "TClientRequest.queryOptions: "
              << ThriftDebugString(query_ctx->client_request.query_options);
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/impala-hs2-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index 80ace87..765fccf 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -256,7 +256,7 @@ Status ImpalaServer::TExecuteStatementReqToTQueryContext(
   }
   // Only query options not set in the session or confOverlay can be overridden by the
   // pool options.
-  AddPoolQueryOptions(query_ctx, ~set_query_options_mask);
+  AddPoolConfiguration(query_ctx, ~set_query_options_mask);
   VLOG_QUERY << "TClientRequest.queryOptions: "
              << ThriftDebugString(query_ctx->client_request.query_options);
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/impala-http-handler.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 3841bfe..9b8d597 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -361,7 +361,7 @@ void ImpalaHttpHandler::QueryStateToJson(const ImpalaServer::QueryStateRecord& r
   Value val_waiting_time(waiting_time_str.c_str(), document->GetAllocator());
   value->AddMember("waiting_time", val_waiting_time, document->GetAllocator());
 
-  Value resource_pool(record.request_pool.c_str(), document->GetAllocator());
+  Value resource_pool(record.resource_pool.c_str(), document->GetAllocator());
   value->AddMember("resource_pool", resource_pool, document->GetAllocator());
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 9b1c2f5..829ac79 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -787,7 +787,7 @@ void ImpalaServer::ArchiveQuery(const ClientRequestState& query) {
 
 ImpalaServer::~ImpalaServer() {}
 
-void ImpalaServer::AddPoolQueryOptions(TQueryCtx* ctx,
+void ImpalaServer::AddPoolConfiguration(TQueryCtx* ctx,
     const QueryOptionsMask& override_options_mask) {
   // Errors are not returned and are only logged (at level 2) because some incoming
   // requests are not expected to be mapped to a pool and will not have query options,
@@ -801,6 +801,7 @@ void ImpalaServer::AddPoolQueryOptions(TQueryCtx* ctx,
              << " ResolveRequestPool status: " << status.GetDetail();
     return;
   }
+  ctx->__set_request_pool(resolved_pool);
 
   TPoolConfig config;
   status = exec_env_->request_pool_service()->GetPoolConfig(resolved_pool, &config);
@@ -1722,7 +1723,11 @@ ImpalaServer::QueryStateRecord::QueryStateRecord(const ClientRequestState& reque
   }
   all_rows_returned = request_state.eos();
   last_active_time_ms = request_state.last_active_ms();
-  request_pool = request_state.request_pool();
+  // For statement types other than QUERY/DML, show an empty string for resource pool
+  // to indicate that they are not subjected to admission control.
+  if (stmt_type == TStmtType::QUERY || stmt_type == TStmtType::DML) {
+    resource_pool = request_state.request_pool();
+  }
   user_has_profile_access = request_state.user_has_profile_access();
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/be/src/service/impala-server.h
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index 2af89fd..fb3f261 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -706,9 +706,9 @@ class ImpalaServer : public ImpalaServiceIf,
     // The most recent time this query was actively being processed, in Unix milliseconds.
     int64_t last_active_time_ms;
 
-    /// Request pool to which the request was submitted for admission, or an empty string
-    /// if this request doesn't have a pool.
-    std::string request_pool;
+    /// Resource pool to which the request was submitted for admission, or an empty
+    /// string if this request doesn't go through admission control.
+    std::string resource_pool;
 
     /// Initialise from an exec_state. If copy_profile is true, print the query
     /// profile to a string and copy that into this.profile (which is expensive),
@@ -789,11 +789,11 @@ class ImpalaServer : public ImpalaServiceIf,
   void CancelFromThreadPool(uint32_t thread_id,
       const CancellationWork& cancellation_work);
 
-  /// Helper method to add any pool query options to the query_ctx. Must be called before
-  /// ExecuteInternal() at which point the TQueryCtx is const and cannot be mutated.
-  /// override_options_mask indicates which query options can be overridden by the pool
-  /// default query options.
-  void AddPoolQueryOptions(TQueryCtx* query_ctx,
+  /// Helper method to add the pool name and query options to the query_ctx. Must be
+  /// called before ExecuteInternal() at which point the TQueryCtx is const and cannot
+  /// be mutated. override_options_mask indicates which query options can be overridden
+  /// by the pool default query options.
+  void AddPoolConfiguration(TQueryCtx* query_ctx,
       const QueryOptionsMask& override_options_mask);
 
   /// Register timeout value upon opening a new session. This will wake up

http://git-wip-us.apache.org/repos/asf/impala/blob/3fd345d3/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index da7a554..591b804 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -418,8 +418,8 @@ struct TQueryCtx {
   // List of tables with scan ranges that map to blocks with missing disk IDs.
   15: optional list<CatalogObjects.TTableName> tables_missing_diskids
 
-  // The pool to which this request has been submitted. Used to update pool statistics
-  // for admission control.
+  // The resolved admission control pool to which this request will be submitted. May be
+  // unset for statements that aren't subjected to admission control (e.g. USE, SET).
   16: optional string request_pool
 
   // String containing a timestamp (in UTC) set as the query submission time. It