You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2018/10/17 17:03:06 UTC

[11/11] impala git commit: IMPALA-7678: Reapply "IMPALA-7660: Support ECDH ciphers for debug webserver"

IMPALA-7678: Reapply "IMPALA-7660: Support ECDH ciphers for debug webserver"

This patch reverses the revert of IMPALA-7660.

The problem with IMPALA-7660 was that urllib.urlopen added the
'context' parameter in 2.7.9, so it isn't present on rhel7, which uses
2.7.5

The fix is to switch to using the 'requests' library, which supports
ssl connections on all the platforms Impala is supported on.

This patch also adds more info to the error message printed by
start-impala-cluster.py when the debug webserver cannot be reached yet
to help with debugging these issues in the future.

Testing:
- Ran full builds on rhel7, rhel6, and ubuntu16.

Change-Id: I679469ed7f27944f75004ec4b16d513e6ea6b544
Reviewed-on: http://gerrit.cloudera.org:8080/11625
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5e92d139
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5e92d139
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5e92d139

Branch: refs/heads/master
Commit: 5e92d139b951e77d3f9f355c1e46736454a654b0
Parents: 0cd9151
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Mon Oct 8 15:43:53 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Wed Oct 17 05:39:32 2018 +0000

----------------------------------------------------------------------
 be/src/thirdparty/squeasel/squeasel.c   | 36 ++++++++++++++++++++++++----
 tests/common/impala_cluster.py          | 27 +++++++++++++--------
 tests/common/impala_service.py          | 36 +++++++++++++++++-----------
 tests/custom_cluster/test_client_ssl.py | 35 ++++++++++++++++++++++-----
 tests/custom_cluster/test_redaction.py  | 22 +++++++----------
 tests/run-tests.py                      |  6 ++---
 6 files changed, 110 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/be/src/thirdparty/squeasel/squeasel.c
----------------------------------------------------------------------
diff --git a/be/src/thirdparty/squeasel/squeasel.c b/be/src/thirdparty/squeasel/squeasel.c
index 2149497..045740d 100644
--- a/be/src/thirdparty/squeasel/squeasel.c
+++ b/be/src/thirdparty/squeasel/squeasel.c
@@ -4298,11 +4298,37 @@ static int set_ssl_option(struct sq_context *ctx) {
     (void) SSL_CTX_use_certificate_chain_file(ctx->ssl_ctx, pem);
   }
 
-  if (ctx->config[SSL_CIPHERS] != NULL &&
-      (SSL_CTX_set_cipher_list(ctx->ssl_ctx, ctx->config[SSL_CIPHERS]) == 0)) {
-    cry(fc(ctx), "SSL_CTX_set_cipher_list: error setting ciphers (%s): %s",
-        ctx->config[SSL_CIPHERS], ssl_error());
-    return 0;
+  if (ctx->config[SSL_CIPHERS] != NULL) {
+    if (SSL_CTX_set_cipher_list(ctx->ssl_ctx, ctx->config[SSL_CIPHERS]) == 0) {
+      cry(fc(ctx), "SSL_CTX_set_cipher_list: error setting ciphers (%s): %s",
+          ctx->config[SSL_CIPHERS], ssl_error());
+      return 0;
+    }
+#ifndef OPENSSL_NO_ECDH
+#if OPENSSL_VERSION_NUMBER < 0x10002000L
+    // OpenSSL 1.0.1 and below only support setting a single ECDH curve at once.
+    // We choose prime256v1 because it's the first curve listed in the "modern
+    // compatibility" section of the Mozilla Server Side TLS recommendations,
+    // accessed Feb. 2017.
+    EC_KEY* ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1);
+    if (ecdh == NULL) {
+      cry(fc(ctx), "EC_KEY_new_by_curve_name: %s", ssl_error());
+    }
+
+    int rc = SSL_CTX_set_tmp_ecdh(ctx->ssl_ctx, ecdh);
+    if (rc <= 0) {
+      cry(fc(ctx), "SSL_CTX_set_tmp_ecdh: %s", ssl_error());
+    }
+#elif OPENSSL_VERSION_NUMBER < 0x10100000L
+    // OpenSSL 1.0.2 provides the set_ecdh_auto API which internally figures out
+    // the best curve to use.
+    int rc = SSL_CTX_set_ecdh_auto(ctx->ssl_ctx, 1);
+    if (rc <= 0) {
+      cry(fc(ctx), "SSL_CTX_set_ecdh_auto: %s", ssl_error());
+    }
+#endif
+#endif
+
   }
 
   return 1;

http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/common/impala_cluster.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index f25c8ed..f0c1f8f 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -215,13 +215,19 @@ class Process(object):
 
 # Base class for all Impala processes
 class BaseImpalaProcess(Process):
-  def __init__(self, cmd, hostname):
+  def __init__(self, cmd):
     super(BaseImpalaProcess, self).__init__(cmd)
-    self.hostname = hostname
+    self.hostname = self._get_hostname()
 
   def _get_webserver_port(self, default=None):
     return int(self._get_arg_value('webserver_port', default))
 
+  def _get_webserver_certificate_file(self):
+    return self._get_arg_value("webserver_certificate_file", "")
+
+  def _get_hostname(self):
+    return self._get_arg_value("hostname", socket.gethostname())
+
   def _get_arg_value(self, arg_name, default=None):
     """Gets the argument value for given argument name"""
     for arg in self.cmd:
@@ -235,11 +241,12 @@ class BaseImpalaProcess(Process):
 # Represents an impalad process
 class ImpaladProcess(BaseImpalaProcess):
   def __init__(self, cmd):
-    super(ImpaladProcess, self).__init__(cmd, socket.gethostname())
+    super(ImpaladProcess, self).__init__(cmd)
     self.service = ImpaladService(self.hostname, self._get_webserver_port(default=25000),
                                   self.__get_beeswax_port(default=21000),
                                   self.__get_be_port(default=22000),
-                                  self.__get_hs2_port(default=21050))
+                                  self.__get_hs2_port(default=21050),
+                                  self._get_webserver_certificate_file())
 
   def __get_beeswax_port(self, default=None):
     return int(self._get_arg_value('beeswax_port', default))
@@ -263,17 +270,17 @@ class ImpaladProcess(BaseImpalaProcess):
 # Represents a statestored process
 class StateStoreProcess(BaseImpalaProcess):
   def __init__(self, cmd):
-    super(StateStoreProcess, self).__init__(cmd, socket.gethostname())
-    self.service =\
-        StateStoredService(self.hostname, self._get_webserver_port(default=25010))
+    super(StateStoreProcess, self).__init__(cmd)
+    self.service = StateStoredService(self.hostname,
+        self._get_webserver_port(default=25010), self._get_webserver_certificate_file())
 
 
 # Represents a catalogd process
 class CatalogdProcess(BaseImpalaProcess):
   def __init__(self, cmd):
-    super(CatalogdProcess, self).__init__(cmd, socket.gethostname())
-    self.service = CatalogdService(self.hostname,
-        self._get_webserver_port(default=25020), self.__get_port(default=26000))
+    super(CatalogdProcess, self).__init__(cmd)
+    self.service = CatalogdService(self.hostname, self._get_webserver_port(default=25020),
+        self._get_webserver_certificate_file(), self.__get_port(default=26000))
 
   def __get_port(self, default=None):
     return int(self._get_arg_value('catalog_service_port', default))

http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/common/impala_service.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index 16419ae..cfd4b55 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -22,7 +22,7 @@
 import json
 import logging
 import re
-import urllib
+import requests
 from time import sleep, time
 
 from tests.common.impala_connection import create_connection, create_ldap_connection
@@ -41,31 +41,36 @@ LOG.setLevel(level=logging.DEBUG)
 # Base class for all Impala services
 # TODO: Refactor the retry/timeout logic into a common place.
 class BaseImpalaService(object):
-  def __init__(self, hostname, webserver_port):
+  def __init__(self, hostname, webserver_port, webserver_certificate_file):
     self.hostname = hostname
     self.webserver_port = webserver_port
+    self.webserver_certificate_file = webserver_certificate_file
 
   def open_debug_webpage(self, page_name, timeout=10, interval=1):
     start_time = time()
 
     while (time() - start_time < timeout):
       try:
-        return urllib.urlopen("http://%s:%d/%s" %
-            (self.hostname, int(self.webserver_port), page_name))
-      except Exception:
-        LOG.info("Debug webpage not yet available.")
+        protocol = "http"
+        if self.webserver_certificate_file != "":
+          protocol = "https"
+        url = "%s://%s:%d/%s" % \
+            (protocol, self.hostname, int(self.webserver_port), page_name)
+        return requests.get(url, verify=self.webserver_certificate_file)
+      except Exception as e:
+        LOG.info("Debug webpage not yet available: %s", str(e))
       sleep(interval)
     assert 0, 'Debug webpage did not become available in expected time.'
 
   def read_debug_webpage(self, page_name, timeout=10, interval=1):
-    return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).read()
+    return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).text
 
   def get_thrift_profile(self, query_id, timeout=10, interval=1):
     """Returns thrift profile of the specified query ID, if available"""
     page_name = "query_profile_encoded?query_id=%s" % (query_id)
     try:
       response = self.open_debug_webpage(page_name, timeout=timeout, interval=interval)
-      tbuf = response.read()
+      tbuf = response.text
     except Exception as e:
       LOG.info("Thrift profile for query %s not yet available: %s", query_id, str(e))
       return None
@@ -166,8 +171,9 @@ class BaseImpalaService(object):
 # new connections or accessing the debug webpage.
 class ImpaladService(BaseImpalaService):
   def __init__(self, hostname, webserver_port=25000, beeswax_port=21000, be_port=22000,
-               hs2_port=21050):
-    super(ImpaladService, self).__init__(hostname, webserver_port)
+               hs2_port=21050, webserver_certificate_file=""):
+    super(ImpaladService, self).__init__(
+        hostname, webserver_port, webserver_certificate_file)
     self.beeswax_port = beeswax_port
     self.be_port = be_port
     self.hs2_port = hs2_port
@@ -327,8 +333,9 @@ class ImpaladService(BaseImpalaService):
 # Allows for interacting with the StateStore service to perform operations such as
 # accessing the debug webpage.
 class StateStoredService(BaseImpalaService):
-  def __init__(self, hostname, webserver_port):
-    super(StateStoredService, self).__init__(hostname, webserver_port)
+  def __init__(self, hostname, webserver_port, webserver_certificate_file):
+    super(StateStoredService, self).__init__(
+        hostname, webserver_port, webserver_certificate_file)
 
   def wait_for_live_subscribers(self, num_subscribers, timeout=15, interval=1):
     self.wait_for_metric_value('statestore.live-backends', num_subscribers,
@@ -338,8 +345,9 @@ class StateStoredService(BaseImpalaService):
 # Allows for interacting with the Catalog service to perform operations such as
 # accessing the debug webpage.
 class CatalogdService(BaseImpalaService):
-  def __init__(self, hostname, webserver_port, service_port):
-    super(CatalogdService, self).__init__(hostname, webserver_port)
+  def __init__(self, hostname, webserver_port, webserver_certificate_file, service_port):
+    super(CatalogdService, self).__init__(
+        hostname, webserver_port, webserver_certificate_file)
     self.service_port = service_port
 
   def get_catalog_version(self, timeout=10, interval=1):

http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/custom_cluster/test_client_ssl.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py
index b6f7e04..edea640 100644
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -19,6 +19,7 @@
 import logging
 import os
 import pytest
+import requests
 import signal
 import ssl
 import socket
@@ -110,13 +111,28 @@ class TestClientSsl(CustomClusterTestSuite):
     assert "Query Status: Cancelled" in result.stdout
     assert impalad.wait_for_num_in_flight_queries(0)
 
+  WEBSERVER_SSL_ARGS = ("--webserver_certificate_file=%(cert_dir)s/server-cert.pem "
+                        "--webserver_private_key_file=%(cert_dir)s/server-key.pem "
+                        "--hostname=localhost"  # Must match hostname in certificate
+                        % {'cert_dir': CERT_DIR})
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(impalad_args=WEBSERVER_SSL_ARGS,
+                                    statestored_args=WEBSERVER_SSL_ARGS,
+                                    catalogd_args=WEBSERVER_SSL_ARGS)
+  def test_webserver_ssl(self):
+    "Tests that the debug web pages are reachable when run with ssl."
+    self._verify_ssl_webserver()
+
   # Test that the shell can connect to a ECDH only cluster.
-  TLS_ECDH_ARGS = ("--ssl_client_ca_certificate=%s/server-cert.pem "
-                  "--ssl_server_certificate=%s/server-cert.pem "
-                  "--ssl_private_key=%s/server-key.pem "
-                  "--hostname=localhost "  # Required to match hostname in certificate"
-                  "--ssl_cipher_list=ECDHE-RSA-AES128-GCM-SHA256 "
-                  % (CERT_DIR, CERT_DIR, CERT_DIR))
+  TLS_ECDH_ARGS = ("--ssl_client_ca_certificate=%(cert_dir)s/server-cert.pem "
+                   "--ssl_server_certificate=%(cert_dir)s/server-cert.pem "
+                   "--ssl_private_key=%(cert_dir)s/server-key.pem "
+                   "--hostname=localhost "  # Must match hostname in certificate
+                   "--ssl_cipher_list=ECDHE-RSA-AES128-GCM-SHA256 "
+                   "--webserver_certificate_file=%(cert_dir)s/server-cert.pem "
+                   "--webserver_private_key_file=%(cert_dir)s/server-key.pem "
+                   % {'cert_dir': CERT_DIR})
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(impalad_args=TLS_ECDH_ARGS,
@@ -128,6 +144,7 @@ class TestClientSsl(CustomClusterTestSuite):
   def test_tls_ecdh(self, vector):
     self._verify_negative_cases()
     self._validate_positive_cases("%s/server-cert.pem" % self.CERT_DIR)
+    self._verify_ssl_webserver()
 
   # Test that the shell can connect to a TLS1.2 only cluster, and for good measure
   # restrict the cipher suite to just one choice.
@@ -209,3 +226,9 @@ class TestClientSsl(CustomClusterTestSuite):
       result = run_impala_shell_cmd(shell_options)
       for msg in [self.SSL_ENABLED, self.CONNECTED, self.FETCHED]:
         assert msg in result.stderr
+
+  def _verify_ssl_webserver(self):
+    for port in ["25000", "25010", "25020"]:
+      url = "https://localhost:%s" % port
+      response = requests.get(url, verify="%s/server-cert.pem" % self.CERT_DIR)
+      assert response.status_code == requests.codes.ok, url

http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/custom_cluster/test_redaction.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_redaction.py b/tests/custom_cluster/test_redaction.py
index 7789e06..3bf831e 100644
--- a/tests/custom_cluster/test_redaction.py
+++ b/tests/custom_cluster/test_redaction.py
@@ -109,10 +109,9 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase):
     # TODO: The HS2 interface may be better about exposing the query handle even if a
     #       query fails. Maybe investigate that after the switch to HS2.
     regex = re.compile(r'query_id=(\w+:\w+)')
-    for line in self.create_impala_service().open_debug_webpage('queries'):
-      match = regex.search(line)
-      if match:
-        return match.group(1)
+    match = regex.search(self.create_impala_service().read_debug_webpage('queries'))
+    if match:
+      return match.group(1)
     raise Exception('Unable to find any query id')
 
   def assert_server_fails_to_start(self, rules, start_options, expected_error_message):
@@ -147,9 +146,8 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase):
       for response_format in ('html', 'json'):
         # The 'html' param is actually ignored by the server.
         url = page + '?query_id=' + query_id + "&" + response_format
-        results = grep_file(impala_service.open_debug_webpage(url), unredacted_value)
-        assert not results, "Web page %s should not contain '%s' but does" \
-            % (url, unredacted_value)
+        assert unredacted_value not in impala_service.read_debug_webpage(url), \
+            "Web page %s should not contain '%s' but does" % (url, unredacted_value)
     # But the redacted value should be shown.
     self.assert_web_ui_contains(query_id, redacted_value)
 
@@ -159,17 +157,15 @@ class TestRedaction(CustomClusterTestSuite, unittest.TestCase):
     impala_service = self.create_impala_service()
     for page in ('queries', 'query_stmt', 'query_plan_text', 'query_profile'):
       url = '%s?query_id=%s' % (page, query_id)
-      results = grep_file(impala_service.open_debug_webpage(url), search)
-      assert results, "Web page %s should contain '%s' but does not" \
-          % (url, search)
+      assert search in impala_service.read_debug_webpage(url), \
+          "Web page %s should contain '%s' but does not" % (url, search)
 
   def assert_query_profile_contains(self, query_id, search):
     ''' Asserts that the query profile for 'query_id' contains 'search' string'''
     impala_service = self.create_impala_service()
     url = 'query_profile?query_id=%s' % query_id
-    results = grep_file(impala_service.open_debug_webpage(url), search)
-    assert results, "Query profile %s should contain '%s' but does not" \
-        % (url, search)
+    assert search in impala_service.read_debug_webpage(url), \
+        "Query profile %s should contain '%s' but does not" % (url, search)
 
   @pytest.mark.execute_serially
   def test_bad_rules(self):

http://git-wip-us.apache.org/repos/asf/impala/blob/5e92d139/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index c2fd420..9aaeaa5 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -214,10 +214,8 @@ def print_metrics(substring):
     print ">" * 80
     port = impalad._get_webserver_port()
     print "connections metrics for impalad at port {0}:".format(port)
-    debug_info = json.loads(ImpaladService(
-            impalad.hostname,
-            webserver_port=port)
-            .open_debug_webpage('metrics?json').read())
+    debug_info = json.loads(ImpaladService(impalad.hostname, webserver_port=port)
+        .read_debug_webpage('metrics?json'))
     for metric in debug_info['metric_group']['metrics']:
       if substring in metric['name']:
         print json.dumps(metric, indent=1)