You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2019/02/13 03:38:56 UTC

[impala] branch master updated (dbe9fef -> 9492d45)

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from dbe9fef  IMPALA-8186: script to configure docker network
     new 5b32a0d  IMPALA-7214: [DOCS] More on decoupling impala and DataNodes
     new d2b8b7b  Add support for compiling using OpenSSL 1.1
     new 6938831  Turn off shell debug tracing for create-load-data.sh
     new 9492d45  IMPALA-8183: fix test_reportexecstatus_retry flakiness

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/util/openssl-util.cc              | 45 +++++++++++++++++++++++---------
 docs/topics/impala_processes.xml         | 10 +++----
 docs/topics/impala_troubleshooting.xml   | 39 +++++++++++++--------------
 testdata/bin/create-load-data.sh         |  1 -
 tests/custom_cluster/test_rpc_timeout.py |  4 +--
 5 files changed, 57 insertions(+), 42 deletions(-)


[impala] 02/04: Add support for compiling using OpenSSL 1.1

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit d2b8b7b9b0f3a02e2418d9182007b736bb739a1b
Author: Hector Acosta <he...@cloudera.com>
AuthorDate: Fri Feb 8 14:50:17 2019 -0800

    Add support for compiling using OpenSSL 1.1
    
    Change-Id: Iaccf1b2dedf0d957a2665df8f9afca4139754264
    Reviewed-on: http://gerrit.cloudera.org:8080/12420
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/openssl-util.cc | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc
index 2b66b86..da583cf 100644
--- a/be/src/util/openssl-util.cc
+++ b/be/src/util/openssl-util.cc
@@ -25,6 +25,7 @@
 #include <openssl/evp.h>
 #include <openssl/rand.h>
 #include <openssl/sha.h>
+#include <openssl/tls1.h>
 
 #include "common/atomic.h"
 #include "gutil/port.h" // ATTRIBUTE_WEAK
@@ -70,7 +71,13 @@ static const int RNG_RESEED_INTERVAL = 128;
 static const int RNG_RESEED_BYTES = 512;
 
 int MaxSupportedTlsVersion() {
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
   return SSLv23_method()->version;
+#else
+  // OpenSSL 1.1+ doesn't let us detect the supported TLS version at runtime. Assume
+  // that the OpenSSL library we're linked against supports only up to TLS1.2
+  return TLS1_2_VERSION;
+#endif
 }
 
 bool IsInternalTlsConfigured() {
@@ -97,13 +104,25 @@ struct ScopedEVPCipherCtx {
   DISALLOW_COPY_AND_ASSIGN(ScopedEVPCipherCtx);
 
   explicit ScopedEVPCipherCtx(int padding) {
-    EVP_CIPHER_CTX_init(&ctx);
-    EVP_CIPHER_CTX_set_padding(&ctx, padding);
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
+    ctx = static_cast<EVP_CIPHER_CTX*>(malloc(sizeof(*ctx)));
+    EVP_CIPHER_CTX_init(ctx);
+#else
+    ctx = EVP_CIPHER_CTX_new();
+#endif
+    EVP_CIPHER_CTX_set_padding(ctx, padding);
   }
 
-  ~ScopedEVPCipherCtx() { EVP_CIPHER_CTX_cleanup(&ctx); }
+  ~ScopedEVPCipherCtx() {
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
+    EVP_CIPHER_CTX_cleanup(ctx);
+    free(ctx);
+#else
+    EVP_CIPHER_CTX_free(ctx);
+#endif
+  }
 
-  EVP_CIPHER_CTX ctx;
+  EVP_CIPHER_CTX* ctx;
 };
 
 // Callback used by OpenSSLErr() - write the error given to us through buf to the
@@ -170,13 +189,13 @@ Status EncryptionKey::EncryptInternal(
   // mode is well-optimized(instruction level parallelism) with hardware acceleration
   // on x86 and PowerPC
   const EVP_CIPHER* evpCipher = GetCipher();
-  int success = encrypt ? EVP_EncryptInit_ex(&ctx.ctx, evpCipher, NULL, key_, iv_) :
-                          EVP_DecryptInit_ex(&ctx.ctx, evpCipher, NULL, key_, iv_);
+  int success = encrypt ? EVP_EncryptInit_ex(ctx.ctx, evpCipher, NULL, key_, iv_) :
+                          EVP_DecryptInit_ex(ctx.ctx, evpCipher, NULL, key_, iv_);
   if (success != 1) {
     return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex", err_context);
   }
   if (IsGcmMode()) {
-    if (EVP_CIPHER_CTX_ctrl(&ctx.ctx, EVP_CTRL_GCM_SET_IVLEN, AES_BLOCK_SIZE, NULL)
+    if (EVP_CIPHER_CTX_ctrl(ctx.ctx, EVP_CTRL_GCM_SET_IVLEN, AES_BLOCK_SIZE, NULL)
         != 1) {
       return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
     }
@@ -189,8 +208,8 @@ Status EncryptionKey::EncryptInternal(
     int in_len = static_cast<int>(min<int64_t>(len - offset, numeric_limits<int>::max()));
     int out_len;
     success = encrypt ?
-        EVP_EncryptUpdate(&ctx.ctx, out + offset, &out_len, data + offset, in_len) :
-        EVP_DecryptUpdate(&ctx.ctx, out + offset, &out_len, data + offset, in_len);
+        EVP_EncryptUpdate(ctx.ctx, out + offset, &out_len, data + offset, in_len) :
+        EVP_DecryptUpdate(ctx.ctx, out + offset, &out_len, data + offset, in_len);
     if (success != 1) {
       return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate", err_context);
     }
@@ -201,7 +220,7 @@ Status EncryptionKey::EncryptInternal(
 
   if (IsGcmMode() && !encrypt) {
     // Set expected tag value
-    if (EVP_CIPHER_CTX_ctrl(&ctx.ctx, EVP_CTRL_GCM_SET_TAG, AES_BLOCK_SIZE, gcm_tag_)
+    if (EVP_CIPHER_CTX_ctrl(ctx.ctx, EVP_CTRL_GCM_SET_TAG, AES_BLOCK_SIZE, gcm_tag_)
         != 1) {
       return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
     }
@@ -209,14 +228,14 @@ Status EncryptionKey::EncryptInternal(
 
   // Finalize encryption or decryption.
   int final_out_len;
-  success = encrypt ? EVP_EncryptFinal_ex(&ctx.ctx, out + offset, &final_out_len) :
-                      EVP_DecryptFinal_ex(&ctx.ctx, out + offset, &final_out_len);
+  success = encrypt ? EVP_EncryptFinal_ex(ctx.ctx, out + offset, &final_out_len) :
+                      EVP_DecryptFinal_ex(ctx.ctx, out + offset, &final_out_len);
   if (success != 1) {
     return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal", err_context);
   }
 
   if (IsGcmMode() && encrypt) {
-    if (EVP_CIPHER_CTX_ctrl(&ctx.ctx, EVP_CTRL_GCM_GET_TAG, AES_BLOCK_SIZE, gcm_tag_)
+    if (EVP_CIPHER_CTX_ctrl(ctx.ctx, EVP_CTRL_GCM_GET_TAG, AES_BLOCK_SIZE, gcm_tag_)
         != 1) {
       return OpenSSLErr("EVP_CIPHER_CTX_ctrl", err_context);
     }


[impala] 01/04: IMPALA-7214: [DOCS] More on decoupling impala and DataNodes

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5b32a0d60110be7c21184819c2dffbb7cbff750f
Author: Alex Rodoni <ar...@cloudera.com>
AuthorDate: Tue Feb 12 12:40:42 2019 -0800

    IMPALA-7214: [DOCS] More on decoupling impala and DataNodes
    
    Change-Id: I4b6f1c704c1e328af9f0beec73f8b6b61fba992e
    Reviewed-on: http://gerrit.cloudera.org:8080/12457
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 docs/topics/impala_processes.xml       | 10 +++------
 docs/topics/impala_troubleshooting.xml | 39 +++++++++++++++++-----------------
 2 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/docs/topics/impala_processes.xml b/docs/topics/impala_processes.xml
index 71986d3..70366dd 100644
--- a/docs/topics/impala_processes.xml
+++ b/docs/topics/impala_processes.xml
@@ -55,10 +55,7 @@ under the License.
         Start one instance of the Impala catalog service.
       </li>
 
-      <li>
-        Start the main Impala service on one or more DataNodes, ideally on all DataNodes to maximize local
-        processing and avoid network traffic due to remote reads.
-      </li>
+      <li> Start the main Impala daemon services. </li>
     </ol>
 
     <p>
@@ -101,9 +98,8 @@ under the License.
 
 <codeblock rev="1.2">$ sudo service impala-catalog start</codeblock>
 
-      <p>
-        Start the Impala service on each DataNode using a command similar to the following:
-      </p>
+      <p> Start the Impala daemon services using a command similar to the
+        following: </p>
 
       <p>
 <codeblock>$ sudo service impala-server start</codeblock>
diff --git a/docs/topics/impala_troubleshooting.xml b/docs/topics/impala_troubleshooting.xml
index 250c899..80b7363 100644
--- a/docs/topics/impala_troubleshooting.xml
+++ b/docs/topics/impala_troubleshooting.xml
@@ -123,17 +123,17 @@ terminate called after throwing an instance of 'boost::exception_detail::clone_i
   <concept id="trouble_io" rev="">
     <title>Troubleshooting I/O Capacity Problems</title>
     <conbody>
-      <p>
-        Impala queries are typically I/O-intensive. If there is an I/O problem with storage devices,
-        or with HDFS itself, Impala queries could show slow response times with no obvious cause
-        on the Impala side. Slow I/O on even a single DataNode could result in an overall slowdown, because
-        queries involving clauses such as <codeph>ORDER BY</codeph>, <codeph>GROUP BY</codeph>, or <codeph>JOIN</codeph>
-        do not start returning results until all DataNodes have finished their work.
-      </p>
-      <p>
-        To test whether the Linux I/O system itself is performing as expected, run Linux commands like
-        the following on each DataNode:
-      </p>
+      <p> Impala queries are typically I/O-intensive. If there is an I/O problem
+        with storage devices, or with HDFS itself, Impala queries could show
+        slow response times with no obvious cause on the Impala side. Slow I/O
+        on even a single Impala daemon could result in an overall slowdown,
+        because queries involving clauses such as <codeph>ORDER BY</codeph>,
+          <codeph>GROUP BY</codeph>, or <codeph>JOIN</codeph> do not start
+        returning results until all executor Impala daemons have finished their
+        work. </p>
+      <p> To test whether the Linux I/O system itself is performing as expected,
+        run Linux commands like the following on each host Impala daemon is
+        running: </p>
 <codeblock>
 $ sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0
 vm.drop_caches = 3
@@ -265,14 +265,15 @@ $ sudo dd if=/dev/sdd bs=1M of=/dev/null count=1k
                 </p>
 
                 <p>
-                  <note>
-                    Replace <varname>hostname</varname> and <varname>port</varname> with the hostname and port of
-                    your Impala state store host machine and web server port. The default port is 25010.
-                  </note>
-                  The number of <codeph>impalad</codeph> instances listed should match the expected number of
-                  <codeph>impalad</codeph> instances installed in the cluster. There should also be one
-                  <codeph>impalad</codeph> instance installed on each DataNode
-                </p>
+                  <note> Replace <varname>hostname</varname> and
+                      <varname>port</varname> with the hostname and port of your
+                    Impala state store host machine and web server port. The
+                    default port is 25010. </note> The number of
+                    <codeph>impalad</codeph> instances listed should match the
+                  expected number of <codeph>impalad</codeph> instances
+                  installed in the cluster. There should also be one
+                    <codeph>impalad</codeph> instance installed on each
+                  DataNode.</p>
               </entry>
               <entry>
                 <p>


[impala] 04/04: IMPALA-8183: fix test_reportexecstatus_retry flakiness

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9492d451d5d5a82bfc6f4c93c3a0c6e6d0cc4981
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Tue Feb 12 22:47:52 2019 +0000

    IMPALA-8183: fix test_reportexecstatus_retry flakiness
    
    The test is designed to cause ReportExecStatus() rpcs to fail by
    backing up the control service queue. Previously, after a failed
    ReportExecStatus() we would wait 'report_status_retry_interval_ms'
    between retries, which was 100ms by default and wasn't touched by the
    test. That 100ms was right on the edge of being enough time for the
    coordinator to keep up with processing the reports, so that some would
    fail but most would succeed. It was always possible that we could hit
    IMPALA-2990 in this setup, but it was unlikely.
    
    Now, with IMPALA-4555 'report_status_retry_interval_ms' was removed
    and we instead wait 'status_report_interval_ms' between retries. By
    default, this is 5000ms, so it should give the coordinator even more
    time and make these issues less likely. However, the test sets
    'status_report_interval_ms' to 10ms, which isn't nearly enough time
    for the coordinator to do its processing, causing lots of the
    ReportExecStatus() rpcs to fail and making us hit IMPALA-2990 pretty
    often.
    
    The solution is to set 'status_report_interval_ms' to 100ms in the
    test, which roughly achieves the same retry frequency as before. The
    same change is made to a similar test test_reportexecstatus_timeout.
    
    Testing:
    - Ran test_reportexecstatus_retry in a loop 400 times without seeing a
      failure. It previously repro-ed for me about once per 50 runs.
    - Manually verified that both tests are still hitting the error paths
      that they are supposed to be testing.
    
    Change-Id: I7027a6e099c543705e5845ee0e5268f1f9a3fb05
    Reviewed-on: http://gerrit.cloudera.org:8080/12461
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_rpc_timeout.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/custom_cluster/test_rpc_timeout.py b/tests/custom_cluster/test_rpc_timeout.py
index d007ef4..e1a959c 100644
--- a/tests/custom_cluster/test_rpc_timeout.py
+++ b/tests/custom_cluster/test_rpc_timeout.py
@@ -128,7 +128,7 @@ class TestRPCTimeout(CustomClusterTestSuite):
 
   # Inject jitter into the RPC handler of ReportExecStatus() to trigger RPC timeout.
   @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args("--status_report_interval_ms=10"
+  @CustomClusterTestSuite.with_args("--status_report_interval_ms=100"
       " --backend_client_rpc_timeout_ms=1000")
   def test_reportexecstatus_timeout(self, vector):
     query_options = {'debug_action': 'REPORT_EXEC_STATUS_DELAY:JITTER@1500@0.5'}
@@ -137,7 +137,7 @@ class TestRPCTimeout(CustomClusterTestSuite):
   # Use a small service queue memory limit and a single service thread to exercise
   # the retry paths in the ReportExecStatus() RPC
   @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args("--status_report_interval_ms=10"
+  @CustomClusterTestSuite.with_args("--status_report_interval_ms=100"
       " --control_service_queue_mem_limit=1 --control_service_num_svc_threads=1")
   def test_reportexecstatus_retry(self, vector):
     self.execute_query_verify_metrics(self.TEST_QUERY, None, 10)


[impala] 03/04: Turn off shell debug tracing for create-load-data.sh

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6938831ae98f3e9cdb63ecbf63a608c17bdc0b6b
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Thu Feb 7 14:48:07 2019 -0800

    Turn off shell debug tracing for create-load-data.sh
    
    This removes a "set -x" from testdata/bin/create-load-data.sh.
    
    Change-Id: I524ec48d0264f6180a13d6d068832809bcc86596
    Reviewed-on: http://gerrit.cloudera.org:8080/12398
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 testdata/bin/create-load-data.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index a32d44c..44452ab 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -31,7 +31,6 @@
 set -euo pipefail
 . $IMPALA_HOME/bin/report_build_error.sh
 setup_report_build_error
-set -x
 
 . ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
 . ${IMPALA_HOME}/testdata/bin/run-step.sh