You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/01 22:43:44 UTC

[1/2] impala git commit: IMPALA-5690: Part 2: Upgrade thrift to 0.9.3-p4

Repository: impala
Updated Branches:
  refs/heads/master 202807e2f -> 583dcc31b


IMPALA-5690: Part 2: Upgrade thrift to 0.9.3-p4

Dependency changes:
- BE and python use thrift 0.9.3-p4 from native-toolchain.
- FE uses thrift 0.9.3 from apache maven repo.
- Fb303 and http components dependencies are no longer needed in FE and
  are removed.
- The minimum openssl version requirement is increased to 1.0.1.

Configuration change:
- Thrift codegen option movable_type is enabled. New code no longer
  needs to use std::swap to avoid copying.

Cherry-picks: not for 2.x

Change-Id: I639227721502eaa10398d9490ff6ac63aa71b3a6
Reviewed-on: http://gerrit.cloudera.org:8080/9300
Reviewed-by: Tianyi Wang <tw...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0b6be850
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0b6be850
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0b6be850

Branch: refs/heads/master
Commit: 0b6be850ca640f17aba3212f96993ca7f77fc0a7
Parents: 202807e
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Mon Feb 12 16:08:26 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 1 22:18:54 2018 +0000

----------------------------------------------------------------------
 CMakeLists.txt                              |  2 +-
 be/src/common/init.cc                       |  6 ++
 be/src/rpc/TAcceptQueueServer.cpp           | 60 ++++++++--------
 be/src/rpc/TAcceptQueueServer.h             | 90 ++----------------------
 be/src/rpc/authentication.cc                |  2 +-
 be/src/rpc/thrift-server-test.cc            | 18 ++---
 be/src/rpc/thrift-server.cc                 | 15 ++--
 be/src/rpc/thrift-server.h                  |  2 +-
 be/src/rpc/thrift-thread.h                  |  7 --
 be/src/rpc/thrift-util.cc                   |  4 +-
 bin/impala-config.sh                        |  6 +-
 buildall.sh                                 |  3 +
 common/thrift/CMakeLists.txt                | 24 ++++---
 fe/pom.xml                                  | 21 ------
 infra/python/deps/compiled-requirements.txt |  3 -
 15 files changed, 82 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 43cd258..0246602 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -156,7 +156,7 @@ message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
 # platform that this build might ultimately be deployed on. If no suitable OpenSSL is
 # found, use the toolchain version - but beware that Impala will then fail to start if
 # deployed on a system with a lower version.
-find_package(OpenSSL 1.0.0 QUIET)
+find_package(OpenSSL 1.0.1 QUIET)
 if (NOT ${OPENSSL_FOUND})
   set_dep_root(OPENSSL)
   set(OPENSSL_FOUND ON)

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/common/init.cc
----------------------------------------------------------------------
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 745112a..25bfbda 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -17,6 +17,7 @@
 
 #include "common/init.h"
 
+#include <csignal>
 #include <gperftools/heap-profiler.h>
 #include <gperftools/malloc_extension.h>
 
@@ -232,6 +233,11 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
   LOG(INFO) << "Using hostname: " << FLAGS_hostname;
   impala::LogCommandLineFlags();
 
+  // When a process calls send(2) on a socket closed on the other end, linux generates
+  // SIGPIPE. MSG_NOSIGNAL can be passed to send(2) to disable it, which thrift does. But
+  // OpenSSL doesn't have place for this parameter so the signal must be disabled
+  // manually.
+  signal(SIGPIPE, SIG_IGN);
   InitThriftLogging();
 
   LOG(INFO) << CpuInfo::DebugString();

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/TAcceptQueueServer.cpp
----------------------------------------------------------------------
diff --git a/be/src/rpc/TAcceptQueueServer.cpp b/be/src/rpc/TAcceptQueueServer.cpp
index 5c1b1da..0f89a7e 100644
--- a/be/src/rpc/TAcceptQueueServer.cpp
+++ b/be/src/rpc/TAcceptQueueServer.cpp
@@ -22,16 +22,7 @@
 #include "rpc/TAcceptQueueServer.h"
 
 #include <thrift/concurrency/PlatformThreadFactory.h>
-#include <thrift/transport/TTransportException.h>
 
-#include <iostream>
-#include <string>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "common/status.h"
 #include "util/thread-pool.h"
 
 DEFINE_int32(accepted_cnxn_queue_depth, 10000,
@@ -56,22 +47,22 @@ class TAcceptQueueServer::Task : public Runnable {
       shared_ptr<TProtocol> input, shared_ptr<TProtocol> output,
       shared_ptr<TTransport> transport)
     : server_(server),
-      processor_(processor),
-      input_(input),
-      output_(output),
-      transport_(transport) {}
+      processor_(std::move(processor)),
+      input_(std::move(input)),
+      output_(std::move(output)),
+      transport_(std::move(transport)) {}
 
-  ~Task() {}
+  ~Task() override = default;
 
-  void run() {
+  void run() override {
     boost::shared_ptr<TServerEventHandler> eventHandler = server_.getEventHandler();
-    void* connectionContext = NULL;
-    if (eventHandler != NULL) {
+    void* connectionContext = nullptr;
+    if (eventHandler != nullptr) {
       connectionContext = eventHandler->createContext(input_, output_);
     }
     try {
       for (;;) {
-        if (eventHandler != NULL) {
+        if (eventHandler != nullptr) {
           eventHandler->processContext(connectionContext, transport_);
         }
         if (!processor_->process(input_, output_, connectionContext)
@@ -90,7 +81,7 @@ class TAcceptQueueServer::Task : public Runnable {
     } catch (...) {
       GlobalOutput("TAcceptQueueServer uncaught exception.");
     }
-    if (eventHandler != NULL) {
+    if (eventHandler != nullptr) {
       eventHandler->deleteContext(connectionContext, input_, output_);
     }
 
@@ -125,18 +116,27 @@ class TAcceptQueueServer::Task : public Runnable {
   shared_ptr<TTransport> transport_;
 };
 
+TAcceptQueueServer::TAcceptQueueServer(const boost::shared_ptr<TProcessor>& processor,
+    const boost::shared_ptr<TServerTransport>& serverTransport,
+    const boost::shared_ptr<TTransportFactory>& transportFactory,
+    const boost::shared_ptr<TProtocolFactory>& protocolFactory,
+    const boost::shared_ptr<ThreadFactory>& threadFactory,
+    int32_t maxTasks)
+    : TServer(processor, serverTransport, transportFactory, protocolFactory),
+      threadFactory_(threadFactory), maxTasks_(maxTasks) {
+  init();
+}
+
 void TAcceptQueueServer::init() {
   stop_ = false;
   metrics_enabled_ = false;
-  queue_size_metric_ = NULL;
+  queue_size_metric_ = nullptr;
 
   if (!threadFactory_) {
     threadFactory_.reset(new PlatformThreadFactory);
   }
 }
 
-TAcceptQueueServer::~TAcceptQueueServer() {}
-
 // New.
 void TAcceptQueueServer::SetupConnection(boost::shared_ptr<TTransport> client) {
   if (metrics_enabled_) queue_size_metric_->Increment(-1);
@@ -174,25 +174,25 @@ void TAcceptQueueServer::SetupConnection(boost::shared_ptr<TTransport> client) {
     // Start the thread!
     thread->start();
   } catch (TException& tx) {
-    if (inputTransport != NULL) {
+    if (inputTransport != nullptr) {
       inputTransport->close();
     }
-    if (outputTransport != NULL) {
+    if (outputTransport != nullptr) {
       outputTransport->close();
     }
-    if (client != NULL) {
+    if (client != nullptr) {
       client->close();
     }
     string errStr = string("TAcceptQueueServer: Caught TException: ") + tx.what();
     GlobalOutput(errStr.c_str());
   } catch (string s) {
-    if (inputTransport != NULL) {
+    if (inputTransport != nullptr) {
       inputTransport->close();
     }
-    if (outputTransport != NULL) {
+    if (outputTransport != nullptr) {
       outputTransport->close();
     }
-    if (client != NULL) {
+    if (client != nullptr) {
       client->close();
     }
     string errStr = "TAcceptQueueServer: Unknown exception: " + s;
@@ -205,7 +205,7 @@ void TAcceptQueueServer::serve() {
   serverTransport_->listen();
 
   // Run the preServe event
-  if (eventHandler_ != NULL) {
+  if (eventHandler_ != nullptr) {
     eventHandler_->preServe();
   }
 
@@ -283,7 +283,7 @@ void TAcceptQueueServer::serve() {
 }
 
 void TAcceptQueueServer::InitMetrics(MetricGroup* metrics, const string& key_prefix) {
-  DCHECK(metrics != NULL);
+  DCHECK(metrics != nullptr);
   stringstream queue_size_ss;
   queue_size_ss << key_prefix << ".connection-setup-queue-size";
   queue_size_metric_ = metrics->AddGauge(queue_size_ss.str(), 0);

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/TAcceptQueueServer.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/TAcceptQueueServer.h b/be/src/rpc/TAcceptQueueServer.h
index 61335f9..4d066ca 100644
--- a/be/src/rpc/TAcceptQueueServer.h
+++ b/be/src/rpc/TAcceptQueueServer.h
@@ -52,47 +52,18 @@ using apache::thrift::concurrency::ThreadFactory;
 class TAcceptQueueServer : public TServer {
  public:
   class Task;
-
-  // TODO: Determine which c'tors are used and remove unused ones.
-  template <typename ProcessorFactory>
-  TAcceptQueueServer(const boost::shared_ptr<ProcessorFactory>& processorFactory,
-      const boost::shared_ptr<TServerTransport>& serverTransport,
-      const boost::shared_ptr<TTransportFactory>& transportFactory,
-      const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-      int32_t maxTasks = 0,
-      THRIFT_OVERLOAD_IF(ProcessorFactory, TProcessorFactory));
-
-  template <typename ProcessorFactory>
-  TAcceptQueueServer(const boost::shared_ptr<ProcessorFactory>& processorFactory,
-      const boost::shared_ptr<TServerTransport>& serverTransport,
-      const boost::shared_ptr<TTransportFactory>& transportFactory,
-      const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-      const boost::shared_ptr<ThreadFactory>& threadFactory,
-      int32_t maxTasks = 0,
-      THRIFT_OVERLOAD_IF(ProcessorFactory, TProcessorFactory));
-
-  template <typename Processor>
-  TAcceptQueueServer(const boost::shared_ptr<Processor>& processor,
-      const boost::shared_ptr<TServerTransport>& serverTransport,
-      const boost::shared_ptr<TTransportFactory>& transportFactory,
-      const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-      int32_t maxTasks = 0,
-      THRIFT_OVERLOAD_IF(Processor, TProcessor));
-
-  template <typename Processor>
-  TAcceptQueueServer(const boost::shared_ptr<Processor>& processor,
+  TAcceptQueueServer(const boost::shared_ptr<TProcessor>& processor,
       const boost::shared_ptr<TServerTransport>& serverTransport,
       const boost::shared_ptr<TTransportFactory>& transportFactory,
       const boost::shared_ptr<TProtocolFactory>& protocolFactory,
       const boost::shared_ptr<ThreadFactory>& threadFactory,
-      int32_t maxTasks = 0,
-      THRIFT_OVERLOAD_IF(Processor, TProcessor));
+      int32_t maxTasks = 0);
 
-  virtual ~TAcceptQueueServer();
+  ~TAcceptQueueServer() override = default;
 
-  virtual void serve();
+  void serve() override;
 
-  void stop() {
+  void stop() override {
     stop_ = true;
     serverTransport_->interrupt();
   }
@@ -126,57 +97,6 @@ class TAcceptQueueServer : public TServer {
   impala::IntGauge* queue_size_metric_;
 };
 
-template <typename ProcessorFactory>
-TAcceptQueueServer::TAcceptQueueServer(
-    const boost::shared_ptr<ProcessorFactory>& processorFactory,
-    const boost::shared_ptr<TServerTransport>& serverTransport,
-    const boost::shared_ptr<TTransportFactory>& transportFactory,
-    const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-    int32_t maxTasks,
-    THRIFT_OVERLOAD_IF_DEFN(ProcessorFactory, TProcessorFactory))
-  : TServer(processorFactory, serverTransport, transportFactory, protocolFactory),
-    maxTasks_(maxTasks) {
-  init();
-}
-
-template <typename ProcessorFactory>
-TAcceptQueueServer::TAcceptQueueServer(
-    const boost::shared_ptr<ProcessorFactory>& processorFactory,
-    const boost::shared_ptr<TServerTransport>& serverTransport,
-    const boost::shared_ptr<TTransportFactory>& transportFactory,
-    const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-    const boost::shared_ptr<ThreadFactory>& threadFactory,
-    int32_t maxTasks,
-    THRIFT_OVERLOAD_IF_DEFN(ProcessorFactory, TProcessorFactory))
-  : TServer(processorFactory, serverTransport, transportFactory, protocolFactory),
-    threadFactory_(threadFactory), maxTasks_(maxTasks) {
-  init();
-}
-
-template <typename Processor>
-TAcceptQueueServer::TAcceptQueueServer(const boost::shared_ptr<Processor>& processor,
-    const boost::shared_ptr<TServerTransport>& serverTransport,
-    const boost::shared_ptr<TTransportFactory>& transportFactory,
-    const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-    int32_t maxTasks,
-    THRIFT_OVERLOAD_IF_DEFN(Processor, TProcessor))
-  : TServer(processor, serverTransport, transportFactory, protocolFactory),
-    maxTasks_(maxTasks) {
-  init();
-}
-
-template <typename Processor>
-TAcceptQueueServer::TAcceptQueueServer(const boost::shared_ptr<Processor>& processor,
-    const boost::shared_ptr<TServerTransport>& serverTransport,
-    const boost::shared_ptr<TTransportFactory>& transportFactory,
-    const boost::shared_ptr<TProtocolFactory>& protocolFactory,
-    const boost::shared_ptr<ThreadFactory>& threadFactory,
-    int32_t maxTasks,
-    THRIFT_OVERLOAD_IF_DEFN(Processor, TProcessor))
-  : TServer(processor, serverTransport, transportFactory, protocolFactory),
-    threadFactory_(threadFactory), maxTasks_(maxTasks) {
-  init();
-}
 } // namespace server
 } // namespace thrift
 } // namespace apache

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/authentication.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/authentication.cc b/be/src/rpc/authentication.cc
index 3eb77bb..461f155 100644
--- a/be/src/rpc/authentication.cc
+++ b/be/src/rpc/authentication.cc
@@ -904,7 +904,7 @@ Status NoAuthProvider::WrapClientTransport(const string& hostname,
 }
 
 Status AuthManager::Init() {
-  ssl_socket_factory_.reset(new TSSLSocketFactory());
+  ssl_socket_factory_.reset(new TSSLSocketFactory(TLSv1_0));
 
   bool use_ldap = false;
   const string excl_msg = "--$0 and --$1 are mutually exclusive "

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/thrift-server-test.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-server-test.cc b/be/src/rpc/thrift-server-test.cc
index 03bd295..8df75f1 100644
--- a/be/src/rpc/thrift-server-test.cc
+++ b/be/src/rpc/thrift-server-test.cc
@@ -331,7 +331,7 @@ TEST(SslTest, MismatchedCiphers) {
 TEST(SslTest, StringToProtocol) {
   SSLProtocol version;
   map<string, SSLProtocol> TEST_CASES = {
-      {"tlsv1", TLSv1_0_plus}, {"tlsv1.1", TLSv1_1_plus}, {"tlsv1.2", TLSv1_2_plus}};
+      {"tlsv1", TLSv1_0}, {"tlsv1.1", TLSv1_1}, {"tlsv1.2", TLSv1_2}};
   for (auto p : TEST_CASES) {
     EXPECT_OK(SSLProtoVersions::StringToProtocol(p.first, &version));
     EXPECT_EQ(p.second, version) << "TLS version: " << p.first;
@@ -351,16 +351,12 @@ TEST(SslTest, TLSVersionControl) {
     set<SSLProtocol> whitelist;
   };
 
-  // Test all configurations supported by Thrift, even if some won't work with the linked
-  // OpenSSL(). We catch those by checking IsSupported() for both the client and ther
-  // server.
-  vector<Config> configs = {{TLSv1_0, {TLSv1_0, TLSv1_0_plus}},
-      {TLSv1_0_plus,
-          {TLSv1_0, TLSv1_1, TLSv1_2, TLSv1_0_plus, TLSv1_1_plus, TLSv1_2_plus}},
-      {TLSv1_1, {TLSv1_1_plus, TLSv1_1, TLSv1_0_plus}},
-      {TLSv1_1_plus, {TLSv1_1, TLSv1_2, TLSv1_0_plus, TLSv1_1_plus, TLSv1_2_plus}},
-      {TLSv1_2, {TLSv1_2, TLSv1_0_plus, TLSv1_1_plus, TLSv1_2_plus}},
-      {TLSv1_2_plus, {TLSv1_2, TLSv1_0_plus, TLSv1_1_plus, TLSv1_2_plus}}};
+  // All configurations supported by linked OpenSSL should work. We catch unsupported
+  // protocols by checking IsSupported() for both the client and the server.
+  vector<Config> configs = {
+      {TLSv1_0, {TLSv1_0, TLSv1_1, TLSv1_2}},
+      {TLSv1_1, {TLSv1_0, TLSv1_1, TLSv1_2}},
+      {TLSv1_2, {TLSv1_0, TLSv1_1, TLSv1_2}}};
 
   for (const auto& config : configs) {
     // For each config, start a server with the requested protocol spec, and then try to

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/thrift-server.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-server.cc b/be/src/rpc/thrift-server.cc
index 8f948cd..891a334 100644
--- a/be/src/rpc/thrift-server.cc
+++ b/be/src/rpc/thrift-server.cc
@@ -65,10 +65,11 @@ namespace impala {
 
 // Specifies the allowed set of values for --ssl_minimum_version. To keep consistent with
 // Apache Kudu, specifying a single version enables all versions including and succeeding
-// that one (e.g. TLSv1.1 enables v1.1 and v1.2). Specifying TLSv1.1_only enables only
-// v1.1.
+// that one (e.g. TLSv1.1 enables v1.1 and v1.2).
 map<string, SSLProtocol> SSLProtoVersions::PROTO_MAP = {
-    {"tlsv1.2", TLSv1_2_plus}, {"tlsv1.1", TLSv1_1_plus}, {"tlsv1", TLSv1_0_plus}};
+    {"tlsv1.2", TLSv1_2},
+    {"tlsv1.1", TLSv1_1},
+    {"tlsv1", TLSv1_0}};
 
 Status SSLProtoVersions::StringToProtocol(const string& in, SSLProtocol* protocol) {
   for (const auto& proto : SSLProtoVersions::PROTO_MAP) {
@@ -82,15 +83,15 @@ Status SSLProtoVersions::StringToProtocol(const string& in, SSLProtocol* protoco
 }
 
 bool SSLProtoVersions::IsSupported(const SSLProtocol& protocol) {
-  DCHECK_LE(protocol, TLSv1_2_plus);
+  DCHECK_LE(protocol, TLSv1_2);
   int max_supported_tls_version = MaxSupportedTlsVersion();
   DCHECK_GE(max_supported_tls_version, TLS1_VERSION);
 
   switch (max_supported_tls_version) {
     case TLS1_VERSION:
-      return protocol == TLSv1_0_plus || protocol == TLSv1_0;
+      return protocol == TLSv1_0;
     case TLS1_1_VERSION:
-      return protocol != TLSv1_2_plus && protocol != TLSv1_2;
+      return protocol == TLSv1_0 || protocol == TLSv1_1;
     default:
       DCHECK_GE(max_supported_tls_version, TLS1_2_VERSION);
       return true;
@@ -437,7 +438,7 @@ Status ThriftServer::Start() {
   RETURN_IF_ERROR(CreateSocket(&server_socket));
   RETURN_IF_ERROR(auth_provider_->GetServerTransportFactory(&transport_factory));
   server_.reset(new TAcceptQueueServer(processor_, server_socket, transport_factory,
-        protocol_factory, thread_factory, max_concurrent_connections_));
+      protocol_factory, thread_factory, max_concurrent_connections_));
   if (metrics_ != NULL) {
     (static_cast<TAcceptQueueServer*>(server_.get()))->InitMetrics(metrics_,
         Substitute("impala.thrift-server.$0", name_));

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/thrift-server.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-server.h b/be/src/rpc/thrift-server.h
index d95d90e..76afa3a 100644
--- a/be/src/rpc/thrift-server.h
+++ b/be/src/rpc/thrift-server.h
@@ -320,7 +320,7 @@ class ThriftServerBuilder {
 
   bool enable_ssl_ = false;
   apache::thrift::transport::SSLProtocol version_ =
-      apache::thrift::transport::SSLProtocol::TLSv1_0_plus;
+      apache::thrift::transport::SSLProtocol::TLSv1_0;
   std::string certificate_;
   std::string private_key_;
   std::string pem_password_cmd_;

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/thrift-thread.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-thread.h b/be/src/rpc/thrift-thread.h
index 14943b2..468964b 100644
--- a/be/src/rpc/thrift-thread.h
+++ b/be/src/rpc/thrift-thread.h
@@ -18,13 +18,6 @@
 #ifndef IMPALA_RPC_THRIFT_THREAD_H
 #define IMPALA_RPC_THRIFT_THREAD_H
 
-/// This is required for thrift's Thread.h to compile. Thrift also includes an experimental
-/// Boost-based thread implementation that is enabled by #define USE_BOOST_THREAD. It is
-/// important that USE_BOOST_THREAD is defined if and only if it was defined when compiling
-/// Thrift itself, as it causes the size of id_t to change, which will cause crashes if
-/// it's different sizes in Impala and in Thrift.
-#define HAVE_PTHREAD_H
-
 #include <thrift/concurrency/Thread.h>
 
 #include "common/logging.h"

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/be/src/rpc/thrift-util.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-util.cc b/be/src/rpc/thrift-util.cc
index c0ba537..a9fd0dc 100644
--- a/be/src/rpc/thrift-util.cc
+++ b/be/src/rpc/thrift-util.cc
@@ -57,14 +57,14 @@ using namespace apache::thrift::server;
 using namespace apache::thrift::protocol;
 using namespace apache::thrift::concurrency;
 
-// IsRecvTimeoutTException() and IsSendFailTException() make assumption about the
+// IsRecvTimeoutTException() and IsConnResetTException() make assumption about the
 // implementation of read(), write() and write_partial() in TSocket.cpp and those
 // functions may change between different versions of Thrift.
 static_assert(PACKAGE_VERSION[0] == '0', "");
 static_assert(PACKAGE_VERSION[1] == '.', "");
 static_assert(PACKAGE_VERSION[2] == '9', "");
 static_assert(PACKAGE_VERSION[3] == '.', "");
-static_assert(PACKAGE_VERSION[4] == '0', "");
+static_assert(PACKAGE_VERSION[4] == '3', "");
 static_assert(PACKAGE_VERSION[5] == '\0', "");
 
 // Thrift defines operator< but does not implement it. This is a stub

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 6404c57..941beb1 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -79,7 +79,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=102-02a8e245df
+export IMPALA_TOOLCHAIN_BUILD_ID=107-764a0ddc79
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -146,9 +146,9 @@ export IMPALA_TPC_DS_VERSION=2.1.0
 unset IMPALA_TPC_DS_URL
 export IMPALA_TPC_H_VERSION=2.17.0
 unset IMPALA_TPC_H_URL
-export IMPALA_THRIFT_VERSION=0.9.0-p11
+export IMPALA_THRIFT_VERSION=0.9.3-p4
 unset IMPALA_THRIFT_URL
-export IMPALA_THRIFT_JAVA_VERSION=0.9.0
+export IMPALA_THRIFT_JAVA_VERSION=0.9.3
 unset IMPALA_THRIFT_JAVA_URL
 export IMPALA_ZLIB_VERSION=1.2.8
 unset IMPALA_ZLIB_URL

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index 56cdb9a..1805a16 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -347,6 +347,9 @@ bootstrap_dependencies() {
     echo ">>> Downloading and extracting toolchain dependencies."
     "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     echo "Toolchain bootstrap complete."
+
+    # Reset python path to include thrift
+    . "$IMPALA_HOME/bin/set-pythonpath.sh"
   fi
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/common/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/common/thrift/CMakeLists.txt b/common/thrift/CMakeLists.txt
index 53e5e46..f2cbdd6 100644
--- a/common/thrift/CMakeLists.txt
+++ b/common/thrift/CMakeLists.txt
@@ -58,14 +58,18 @@ function(THRIFT_GEN VAR)
     # It depends on hive_meta_store, which in turn depends on fb303.
     # The java dependency is handled by maven.
     # We need to generate C++ src file for the parent dependencies using the "-r" option.
-    set(CPP_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen cpp -o ${BE_OUTPUT_DIR})
+    set(CPP_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen cpp:moveable_types -o
+        ${BE_OUTPUT_DIR})
     IF (FIL STREQUAL "beeswax.thrift")
-      set(CPP_ARGS -r ${THRIFT_INCLUDE_DIR_OPTION} --gen cpp -o ${BE_OUTPUT_DIR})
+      set(CPP_ARGS -r ${CPP_ARGS})
     ENDIF(FIL STREQUAL "beeswax.thrift")
 
-    IF (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
-      # HiveServer2 and Parquet thrift generation
-      # Do not generate Java source because we should use the jar from Hive or Parquet
+    IF (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift" OR
+        FIL STREQUAL "ImpalaService.thrift")
+      # Do not generate Java HiveServer2 and Parquet files because we should use the jar
+      # from Hive or Parquet.
+      # Also do not generate ImpalaService.thrift because the generated code doesn't
+      # compile with hive if the thrift version in hive is 0.9.0
       add_custom_command(
         OUTPUT ${OUTPUT_BE_FILE}
         COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${FIL}
@@ -74,7 +78,8 @@ function(THRIFT_GEN VAR)
         COMMENT "Running thrift compiler on ${FIL}"
         VERBATIM
       )
-    ELSE (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
+    ELSE (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift" OR
+        FIL STREQUAL "ImpalaService.thrift")
       add_custom_command(
         OUTPUT ${OUTPUT_BE_FILE}
         COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${FIL}
@@ -84,7 +89,8 @@ function(THRIFT_GEN VAR)
         COMMENT "Running thrift compiler on ${FIL}"
         VERBATIM
     )
-    ENDIF(FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift")
+    ENDIF (FIL STREQUAL ${TCLI_SERVICE_THRIFT} OR FIL STREQUAL "parquet.thrift" OR
+        FIL STREQUAL "ImpalaService.thrift")
   endforeach(FIL)
 
   set(${VAR} ${${VAR}} PARENT_SCOPE)
@@ -134,8 +140,8 @@ file(MAKE_DIRECTORY ${PYTHON_OUTPUT_DIR})
 file(MAKE_DIRECTORY ${HIVE_THRIFT_SOURCE_DIR})
 
 # Args passed to thrift for Java gen
-set(JAVA_FE_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java:hashcode -o ${FE_OUTPUT_DIR})
-set(JAVA_EXT_DS_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java:hashcode -o ${EXT_DS_OUTPUT_DIR})
+set(JAVA_FE_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java -o ${FE_OUTPUT_DIR})
+set(JAVA_EXT_DS_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java -o ${EXT_DS_OUTPUT_DIR})
 set(PYTHON_ARGS ${THRIFT_INCLUDE_DIR_OPTION} -r --gen py -o ${PYTHON_OUTPUT_DIR})
 
 set (EXT_DATA_SRC_FILES

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/fe/pom.xml
----------------------------------------------------------------------
diff --git a/fe/pom.xml b/fe/pom.xml
index 0abd088..fc41581 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -167,27 +167,6 @@ under the License.
       <groupId>org.apache.thrift</groupId>
       <artifactId>libthrift</artifactId>
       <version>${thrift.version}</version>
-      <!-- libthrift depends httpcore 4.1.3 which does not work with KMS. To workaround
-           this problem the dependency is excluded here and we explicitly add a newer
-           httpcore dependency version. See IMPALA-4210. TODO: Find a better fix. -->
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.httpcomponents</groupId>
-          <artifactId>httpcore</artifactId>
-      </exclusion>
-    </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpcore</artifactId>
-      <version>${httpcomponents.core.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libfb303</artifactId>
-      <version>${thrift.version}</version>
     </dependency>
 
     <dependency>

http://git-wip-us.apache.org/repos/asf/impala/blob/0b6be850/infra/python/deps/compiled-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt
index 289a78e..2c5590e 100644
--- a/infra/python/deps/compiled-requirements.txt
+++ b/infra/python/deps/compiled-requirements.txt
@@ -27,9 +27,6 @@ impyla == 0.14.0
   bitarray == 0.8.1
   sasl == 0.1.3
   six == 1.11.0
-  # Thrift usually comes from the thirdparty dir but in case the virtualenv is needed
-  # before thirdparty is built thrift will be installed anyways.
-  thrift == 0.9.0
   thrift-sasl == 0.1.0
 psutil == 0.7.1
 # Required for Kudu:


[2/2] impala git commit: IMPALA-6882: prevent instr. hoist from CpuInfo::IsSupported()

Posted by ta...@apache.org.
IMPALA-6882: prevent instr. hoist from CpuInfo::IsSupported()

Marking the __asm__ with __volatile__ *should* prevent the compiler from
speculatively executing the instruction before the branch.

Testing:
Added a regression test that tries to emulate the problematic pattern,
but I was unable to reproduce a crash on a system with popcnt support -
there's probably some instruction scheduling differences.

Manually checked that popcnt was inside the expected branch:

  (gdb) disassemble /s impala::HdfsScanNodeBase::StopAndFinalizeCounters
  ...
  160         if (LIKELY(CpuInfo::IsSupported(CpuInfo::POPCNT))) {
     0x0000000000e7261b <+491>:   and    $0x10,%esi
     0x0000000000e7261e <+494>:   je     0xe73031 <impala::HdfsScanNodeBase::StopAndFinalizeCounters()+3073>

  be/src/util/sse-util.h:
  147       __asm__ __volatile__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc");
     0x0000000000e72624 <+500>:   popcnt %rdx,%rdx
     0x0000000000e72629 <+505>:   movslq %edx,%rsi
  ...

Change-Id: I9ec51bdfcb9455c93ff69827929a59fcccb81b80
Reviewed-on: http://gerrit.cloudera.org:8080/10243
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/583dcc31
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/583dcc31
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/583dcc31

Branch: refs/heads/master
Commit: 583dcc31b0c1e6a4c63d72732db8cb7b86163604
Parents: 0b6be85
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Apr 30 10:09:04 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 1 22:41:18 2018 +0000

----------------------------------------------------------------------
 be/src/util/bit-util-test.cc | 23 +++++++++++++++++++++++
 be/src/util/sse-util.h       | 26 +++++++++++++++-----------
 2 files changed, 38 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/583dcc31/be/src/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc
index 6f70727..7c70e6e 100644
--- a/be/src/util/bit-util-test.cc
+++ b/be/src/util/bit-util-test.cc
@@ -304,6 +304,29 @@ TEST(BitUtil, RoundUpDown) {
   EXPECT_EQ(BitUtil::RoundDownNumi64(65), 1);
 }
 
+// Prevent inlining so that the compiler can't optimize out the check.
+__attribute__((noinline))
+int CpuInfoIsSupportedHoistHelper(int64_t cpu_info_flag, int arg) {
+  if (CpuInfo::IsSupported(cpu_info_flag)) {
+    // Assembly follows similar pattern to popcnt instruction but executes
+    // illegal instruction.
+    int64_t result;
+    __asm__ __volatile__("ud2" : "=a"(result): "mr"(arg): "cc");
+    return result;
+  } else {
+    return 12345;
+  }
+}
+
+// Regression test for IMPALA-6882 - make sure illegal instruction isn't hoisted out of
+// CpuInfo::IsSupported() checks. This doesn't test the bug precisely but is a canary for
+// this kind of optimization happening.
+TEST(BitUtil, CpuInfoIsSupportedHoist) {
+  constexpr int64_t CPU_INFO_FLAG = CpuInfo::SSSE3;
+  CpuInfo::TempDisable disable_sssse3(CPU_INFO_FLAG);
+  EXPECT_EQ(12345, CpuInfoIsSupportedHoistHelper(CPU_INFO_FLAG, 0));
+}
+
 }
 
 IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/impala/blob/583dcc31/be/src/util/sse-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/sse-util.h b/be/src/util/sse-util.h
index e55a248..8837718 100644
--- a/be/src/util/sse-util.h
+++ b/be/src/util/sse-util.h
@@ -72,9 +72,13 @@ namespace SSEUtil {
   };
 }
 
-/// Define the SSE 4.2 intrinsics.  The caller must first verify at runtime (or codegen
-/// IR load time) that the processor supports SSE 4.2 before calling these.  These are
-/// defined outside the namespace because the IR w/ SSE 4.2 case needs to use macros.
+/// Define the SSE 4.2 intrinsics. The caller must first verify at runtime (or codegen
+/// IR load time) that the processor supports SSE 4.2 before calling these. All __asm__
+/// blocks are marked __volatile__ to prevent hoisting the ASM out of checks for CPU
+/// support (e.g. IMPALA-6882).
+///
+/// These intrinsics are defined outside the namespace because the IR w/ SSE 4.2 case
+/// needs to use macros.
 #ifndef IR_COMPILE
 /// When compiling to native code (i.e. not IR), we cannot use the -msse4.2 compiler
 /// flag.  Otherwise, the compiler will emit SSE 4.2 instructions outside of the runtime
@@ -99,11 +103,11 @@ static inline __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int le
   /// Use asm reg rather than Yz output constraint to workaround LLVM bug 13199 -
   /// clang doesn't support Y-prefixed asm constraints.
   register volatile __m128i result asm ("xmm0");
-  __asm__ volatile ("pcmpestrm %5, %2, %1"
+  __asm__ __volatile__ ("pcmpestrm %5, %2, %1"
       : "=x"(result) : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE) : "cc");
 #else
   __m128i result;
-  __asm__ volatile ("pcmpestrm %5, %2, %1"
+  __asm__ __volatile__ ("pcmpestrm %5, %2, %1"
       : "=Yz"(result) : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE) : "cc");
 #endif
   return result;
@@ -112,35 +116,35 @@ static inline __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int le
 template<int MODE>
 static inline int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2) {
   int result;
-  __asm__("pcmpestri %5, %2, %1"
+  __asm__ __volatile__("pcmpestri %5, %2, %1"
       : "=c"(result) : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE) : "cc");
   return result;
 }
 
 static inline uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v) {
-  __asm__("crc32b %1, %0" : "+r"(crc) : "rm"(v));
+  __asm__ __volatile__("crc32b %1, %0" : "+r"(crc) : "rm"(v));
   return crc;
 }
 
 static inline uint32_t SSE4_crc32_u16(uint32_t crc, uint16_t v) {
-  __asm__("crc32w %1, %0" : "+r"(crc) : "rm"(v));
+  __asm__ __volatile__("crc32w %1, %0" : "+r"(crc) : "rm"(v));
   return crc;
 }
 
 static inline uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v) {
-  __asm__("crc32l %1, %0" : "+r"(crc) : "rm"(v));
+  __asm__ __volatile__("crc32l %1, %0" : "+r"(crc) : "rm"(v));
   return crc;
 }
 
 static inline uint32_t SSE4_crc32_u64(uint32_t crc, uint64_t v) {
   uint64_t result = crc;
-  __asm__("crc32q %1, %0" : "+r"(result) : "rm"(v));
+  __asm__ __volatile__("crc32q %1, %0" : "+r"(result) : "rm"(v));
   return result;
 }
 
 static inline int64_t POPCNT_popcnt_u64(uint64_t a) {
   int64_t result;
-  __asm__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc");
+  __asm__ __volatile__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc");
   return result;
 }