You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2016/12/07 06:41:44 UTC

[1/4] incubator-impala git commit: IMPALA-2864: Ensure that client connections are closed after a failed Open()

Repository: incubator-impala
Updated Branches:
  refs/heads/master 534999382 -> 6098ac716


IMPALA-2864: Ensure that client connections are closed after a failed Open()

When a client tries to Open() a socket and fails, we
previously assumed that the socket was never opened and therefore did
not close it. However, if Kerberos is enabled, the
ThriftClientImpl::Open() calls TSaslTransport::Open(), which not only
opens the socket but also does the initial handshake. If there was an
error during the handshake, we just returned with an error without
closing the socket, causing the server side to wait on the same
connection.

This patch closes the client side socket, thereby terminating the
connection to the server in the above scenario, so that the server
side doesn't need to hold on to a connection until a timeout
terminates the connection.

A thrift-server-test is added to test the RPC failure path.

Change-Id: Ia7e883d8224304ad13a051f595d0e8abf4f9671e
Reviewed-on: http://gerrit.cloudera.org:8080/5385
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/a65864a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/a65864a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/a65864a4

Branch: refs/heads/master
Commit: a65864a40ddc73aebeaf04abb5c5ca6fa70ba0ee
Parents: 5349993
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Tue Dec 8 13:07:49 2015 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 04:25:21 2016 +0000

----------------------------------------------------------------------
 be/src/rpc/thrift-client.cc        |  6 ++++++
 be/src/rpc/thrift-client.h         |  1 +
 be/src/rpc/thrift-server-test.cc   | 25 +++++++++++++++++++++++++
 be/src/runtime/client-cache.h      |  2 +-
 be/src/runtime/data-stream-test.cc |  2 +-
 be/src/testutil/bad-cert.pem       | 22 ++++++++++++++++++++++
 be/src/testutil/bad-key.pem        | 28 ++++++++++++++++++++++++++++
 7 files changed, 84 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/rpc/thrift-client.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-client.cc b/be/src/rpc/thrift-client.cc
index bfe54fc..e0b9a6f 100644
--- a/be/src/rpc/thrift-client.cc
+++ b/be/src/rpc/thrift-client.cc
@@ -42,6 +42,12 @@ Status ThriftClientImpl::Open() {
       transport_->open();
     }
   } catch (const TException& e) {
+    try {
+      transport_->close();
+    } catch (const TException& e) {
+      VLOG(1) << "Error closing socket to: " << address_ << ", ignoring (" << e.what()
+                << ")";
+    }
     return Status(Substitute("Couldn't open transport for $0 ($1)",
         lexical_cast<string>(address_), e.what()));
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/rpc/thrift-client.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-client.h b/be/src/rpc/thrift-client.h
index a194551..26982a1 100644
--- a/be/src/rpc/thrift-client.h
+++ b/be/src/rpc/thrift-client.h
@@ -52,6 +52,7 @@ class ThriftClientImpl {
 
   /// Open the connection to the remote server. May be called repeatedly, is idempotent
   /// unless there is a failure to connect.
+  /// If Open() fails, the connection remains closed.
   Status Open();
 
   /// Retry the Open num_retries time waiting wait_ms milliseconds between retries.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/rpc/thrift-server-test.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-server-test.cc b/be/src/rpc/thrift-server-test.cc
index 244097d..7be286e 100644
--- a/be/src/rpc/thrift-server-test.cc
+++ b/be/src/rpc/thrift-server-test.cc
@@ -42,6 +42,10 @@ const string& SERVER_CERT =
     Substitute("$0/be/src/testutil/server-cert.pem", IMPALA_HOME);
 const string& PRIVATE_KEY =
     Substitute("$0/be/src/testutil/server-key.pem", IMPALA_HOME);
+const string& BAD_SERVER_CERT =
+    Substitute("$0/be/src/testutil/bad-cert.pem", IMPALA_HOME);
+const string& BAD_PRIVATE_KEY =
+    Substitute("$0/be/src/testutil/bad-key.pem", IMPALA_HOME);
 const string& PASSWORD_PROTECTED_PRIVATE_KEY =
     Substitute("$0/be/src/testutil/server-key-password.pem", IMPALA_HOME);
 
@@ -193,4 +197,25 @@ TEST(ConcurrencyTest, DISABLED_ManyConcurrentConnections) {
   pool.DrainAndShutdown();
 }
 
+TEST(NoPasswordPemFile, BadServerCertificate) {
+  ThriftServer* server = new ThriftServer("DummyStatestore", MakeProcessor(),
+      FLAGS_state_store_port + 5, NULL, NULL, 5);
+  EXPECT_OK(server->EnableSsl(BAD_SERVER_CERT, BAD_PRIVATE_KEY));
+  EXPECT_OK(server->Start());
+  FLAGS_ssl_client_ca_certificate = SERVER_CERT;
+  ThriftClient<StatestoreServiceClient> ssl_client(
+      "localhost", FLAGS_state_store_port + 5, "", NULL, true);
+  EXPECT_OK(ssl_client.Open());
+  TRegisterSubscriberResponse resp;
+  EXPECT_THROW({
+    ssl_client.iface()->RegisterSubscriber(resp, TRegisterSubscriberRequest());
+  }, TSSLException);
+  // Close and reopen the socket
+  ssl_client.Close();
+  EXPECT_OK(ssl_client.Open());
+  EXPECT_THROW({
+    ssl_client.iface()->RegisterSubscriber(resp, TRegisterSubscriberRequest());
+  }, TSSLException);
+}
+
 IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/runtime/client-cache.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/client-cache.h b/be/src/runtime/client-cache.h
index e57b766..8f97b67 100644
--- a/be/src/runtime/client-cache.h
+++ b/be/src/runtime/client-cache.h
@@ -89,7 +89,7 @@ class ClientCacheHelper {
   Status ReopenClient(ClientFactory factory_method, ClientKey* client_key);
 
   /// Returns a client to the cache. Upon return, *client_key will be NULL, and the
-  /// associated client will be available in the per-host cache..
+  /// associated client will be available in the per-host cache.
   void ReleaseClient(ClientKey* client_key);
 
   /// Close all connections to a host (e.g., in case of failure) so that on their

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/runtime/data-stream-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/data-stream-test.cc b/be/src/runtime/data-stream-test.cc
index f5eb783..d53a146 100644
--- a/be/src/runtime/data-stream-test.cc
+++ b/be/src/runtime/data-stream-test.cc
@@ -618,7 +618,7 @@ TEST_F(DataStreamTest, CloseRecvrWhileReferencesRemain) {
   Status rpc_status;
   ImpalaBackendConnection client(exec_env_.impalad_client_cache(),
       MakeNetworkAddress("localhost", FLAGS_port), &rpc_status);
-  EXPECT_TRUE(rpc_status.ok());
+  EXPECT_OK(rpc_status);
   TTransmitDataParams params;
   params.protocol_version = ImpalaInternalServiceVersion::V1;
   params.__set_eos(true);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/testutil/bad-cert.pem
----------------------------------------------------------------------
diff --git a/be/src/testutil/bad-cert.pem b/be/src/testutil/bad-cert.pem
new file mode 100644
index 0000000..e656963
--- /dev/null
+++ b/be/src/testutil/bad-cert.pem
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDrTCCApWgAwIBAgIJAMcMGMuKLPyIMA0GCSqGSIb3DQEBCwUAMG0xCzAJBgNV
+BAYTAlVTMRMwEQYDVQQIDApTb21lLVN0YXRlMREwDwYDVQQKDAhDbG91ZGVyYTER
+MA8GA1UEAwwIYmFkLWhvc3QxIzAhBgkqhkiG9w0BCQEWFHNhaWxlc2hAY2xvdWRl
+cmEuY29tMB4XDTE1MTIwNzIzMDI0MVoXDTE2MDEwNjIzMDI0MVowbTELMAkGA1UE
+BhMCVVMxEzARBgNVBAgMClNvbWUtU3RhdGUxETAPBgNVBAoMCENsb3VkZXJhMREw
+DwYDVQQDDAhiYWQtaG9zdDEjMCEGCSqGSIb3DQEJARYUc2FpbGVzaEBjbG91ZGVy
+YS5jb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC7+wbcxCdWsINS
+BeCMaI2Bv5Z7poPMCSDdC/dvv3LRNF40w86qEZPs7o6Dw7JEUy40eDdDWcfZU4bT
+B24ukgtdjBvXE4JlgZMOojUX1s/qgtMPvi20qo9bOYT+jI20/wAtaIiKo05f+gm8
+kFWYbqCUOYMKwkMlhhvOjsiZDSRDcep27zturbbF1rXtZWL4HNnpaDNvRBJEg+Y+
+m67uUNFGt8wcP+Ytku2vqNxvzdYTVccxIzfNYQEt49pQ6RJgE+cFePKYWuz7IzJk
+YlZt/WjIMyzR7WRkhlSAc1llQXJwGFKRIkRj3R6M+fdiYR9zWJUKgv1176Wb6+lw
+EJaw1f6FAgMBAAGjUDBOMB0GA1UdDgQWBBQsKWaEDS8lAHCgjMI6a/xfUswb0DAf
+BgNVHSMEGDAWgBQsKWaEDS8lAHCgjMI6a/xfUswb0DAMBgNVHRMEBTADAQH/MA0G
+CSqGSIb3DQEBCwUAA4IBAQBzWjquoS7Q1raZPFuYDLmlXa3CxUjqggfk40Ovja0r
+ZedwScgWd8/NVfXDDWPTJLlT+wEIRrbFkQw65dVNLA4hSwLGVSmG10JgxP+uhv8O
+kzGMCmVEhJDkpp0sdYdz3bGzxZX74BXe8pOjhHbv//Kv94k3Tu9LdKMi7V4Kqlct
+3Xpjjks2kKG1KMYy8aBmWlDw3RmI0hL79bdGG53oIeunEA7chPOwz+nAN6fgFYCg
+swDe17iFRhw9yw2d7yRWfq3dph6ao/Z65t3IHsMTWL+P/pAjRvAgj+RR/LS6BHL3
+TAmsDl93SMh/51kh0Zq035iv7+2YgS/NdRG9d0kdcrZX
+-----END CERTIFICATE-----

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/a65864a4/be/src/testutil/bad-key.pem
----------------------------------------------------------------------
diff --git a/be/src/testutil/bad-key.pem b/be/src/testutil/bad-key.pem
new file mode 100644
index 0000000..0db3da9
--- /dev/null
+++ b/be/src/testutil/bad-key.pem
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQC7+wbcxCdWsINS
+BeCMaI2Bv5Z7poPMCSDdC/dvv3LRNF40w86qEZPs7o6Dw7JEUy40eDdDWcfZU4bT
+B24ukgtdjBvXE4JlgZMOojUX1s/qgtMPvi20qo9bOYT+jI20/wAtaIiKo05f+gm8
+kFWYbqCUOYMKwkMlhhvOjsiZDSRDcep27zturbbF1rXtZWL4HNnpaDNvRBJEg+Y+
+m67uUNFGt8wcP+Ytku2vqNxvzdYTVccxIzfNYQEt49pQ6RJgE+cFePKYWuz7IzJk
+YlZt/WjIMyzR7WRkhlSAc1llQXJwGFKRIkRj3R6M+fdiYR9zWJUKgv1176Wb6+lw
+EJaw1f6FAgMBAAECggEAc0znyqWuE2g1RCxCrRy8Hydqn/FkydOXir36SVq+jD94
+wRiRPJOHjj5Mv9lbELmMj7Zk/zSkdlLbUbkvBfWibwCvWt6mjqhJkSJBOpwR75/K
+4c8ercAoKiY/wvpnOOtoKnIBvjeorQnqyvQk7Fh+uiwEiqbZFL0LdUjzFZ2P7qVz
+nRvMsyXEbQ/ycA6Ji34viNgOFNjWwemDtCutHZdUPYsD/JfuGg6ivGQRqyEFlY9g
+Tw/l9idCx7JTAMzH8hMzJNbLEWE0Sa1TxPFXGj+WYjeXFuAZ42D6jZsGNU675Wa+
+SFTxXvkXk8sXybnYY67vd6hNUUiCzMq09AJpYZ5qSQKBgQD4ewuaBRisFkIImWO9
+vYBcpcma3MDyF7FHpyApIEzQYUQbKR+91yr5r51Q1z9UwwX3stA58hPUkXNu3iBA
+PQqO8aPmTGo9yPRpqPzzt4Gh4Pzzmcik81zF2BIkCLvzrHZrB8chM+Az3JKOxI9T
+Lb2PNKKY81Rq9UIkPkJsFAXL/wKBgQDBq0sh2IfjHp6AHKh9QpyfdM/u3ALkf2hL
+OvpcnjyLPVDvYEGgN8GafbcpoN6XLep2oz9Od+7GVwqLjRMspyZM1Js8UFhtz8eN
+/MsQfFW9ivIMCdMMU1ozOmlcfLoq4/etVaYfLQRtlMKvL8zRKneoXjBZV68V4kZ2
+PxGMfu8FewKBgQCj2A7IWn/wSST1op9AJ8qSTMdpFBMuDy1Yf/0W4TOFW/2aoz1I
+4q51wbTL74LVE1vF/uSKsPMegWJKQrGlahqiMvfODakoYG+5lDJnSiNyaHai8k55
+ZfdQha9Aj3nPrXLQFGrbm+dEizcgaL/RKyIJYb2teRW7CUm5uEv4FCPWZQKBgQCJ
+YtVyliOXt5Hi+fGAom9vIrObE5ItvEAlFhqi91GlyQKQPW1wlf0Odl4n9snQ3y6z
+qIzxQl0tcHO3mYVfqNefqzbQa4K/q6U5kXoQINPGGTop1hJUbRDQxIAXrxd187Aw
+01B8TzgT8HLHShZ2zzSBSQftaSl4UcOAgK8XRriS3wKBgQDav8s2vEzedic608My
+sORIRNvNdtOTl3bDpKt2ho9yR3no3zGwGga45O2uD+MIo8VbddXhrOtB5VATF0Ar
+YowZmXULv2nqZFV5vr5btXD1NF31YMnqXHCRqyxUvldIGzGeCaDW1SVZp8VoDyTz
+jvKjQamR7uMxgVFxa3O2Si1fCQ==
+-----END PRIVATE KEY-----


[2/4] incubator-impala git commit: IMPALA-3788: Add flag for Kudu read-your-writes

Posted by sa...@apache.org.
IMPALA-3788: Add flag for Kudu read-your-writes

The previous attempt to support for Kudu 'read-your-writes'
consistency successfully captured the latest observed ts
from the Kudu client after a write, and to propagate it to
future Kudu clients within the same session. That alone made
writes within a session linearizable, but it did not fully
address 'read-your-writes' semantics because the Kudu client
in the KuduScanner needed further configuration.

The Kudu client exposes an option to set the 'ReadMode',
which can be either READ_LATEST or READ_AT_SNAPSHOT. The
former is the default and allows the client to read the
latest known value for every row, and there is no
consistency among the version of the rows read within that
scan. When READ_AT_SNAPSHOT is enabled, the client will pick a
ts that is after the latest observed session ts (propagated
and set with SetLatestObservedTimestamp() by the previous
commit for IMPALA-3788) and perform a snapshot read at that
time. This timestamp is still determined per-client, so that
does not mean that the entire query performs a snapshot read
at the same timestamp-- doing that requires further work
in Kudu and will require another change in Impala as well.

That said, this behavior is sufficient to satisfy
'read-your-writes' consistency in all cases _except_ when a
DML statement is reading and writing the same table, e.g.
  INSERT INTO foo SELECT ... from foo
This case may result in reading rows that were inserted by a
different node of the same query. This case will be handled
when a global snapshot timestamp is supported and configured
by Impala.

Because this is performing a snapshot read, some rows may be
read from lagging replicas and thus those replicas will have
to wait before returning rows. This has implications for
the query execution behavior (e.g. queries may be more
likely to time out, may affect number of queries that can be
run), so the behavior is not yet enabled by default. It can
be enabled with the flag --kudu_read_mode READ_AT_SNAPSHOT
The goal is to make this the default behavior after
sufficient testing.

Change-Id: I003aba410548bc9158d1e11abbdcf710c31a82ff
Reviewed-on: http://gerrit.cloudera.org:8080/5288
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0d4bdc1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0d4bdc1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0d4bdc1b

Branch: refs/heads/master
Commit: 0d4bdc1b70464e71cd3dc44f6fbaf0aa619932e0
Parents: a65864a
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Tue Nov 29 15:25:40 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 05:01:01 2016 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-scanner.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0d4bdc1b/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index c13b6a8..ff9ca27 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -20,6 +20,7 @@
 #include <kudu/client/row_result.h>
 #include <thrift/protocol/TDebugProtocol.h>
 #include <vector>
+#include <string>
 
 #include "exprs/expr.h"
 #include "exprs/expr-context.h"
@@ -43,6 +44,9 @@ using kudu::client::KuduScanBatch;
 using kudu::client::KuduSchema;
 using kudu::client::KuduTable;
 
+DEFINE_string(kudu_read_mode, "READ_LATEST", "(Advanced) Sets the Kudu scan ReadMode. "
+    "Supported Kudu read modes are READ_LATEST and READ_AT_SNAPSHOT. Invalid values "
+    "result in using READ_LATEST.");
 DEFINE_bool(pick_only_leaders_for_tests, false,
             "Whether to pick only leader replicas, for tests purposes only.");
 DEFINE_int32(kudu_scanner_keep_alive_period_sec, 15,
@@ -53,6 +57,8 @@ DECLARE_int32(kudu_operation_timeout_ms);
 
 namespace impala {
 
+const string MODE_READ_AT_SNAPSHOT = "READ_AT_SNAPSHOT";
+
 KuduScanner::KuduScanner(KuduScanNode* scan_node, RuntimeState* state)
   : scan_node_(scan_node),
     state_(state),
@@ -132,6 +138,11 @@ Status KuduScanner::OpenNextScanToken(const string& scan_token)  {
     KUDU_RETURN_IF_ERROR(scanner_->SetSelection(kudu::client::KuduClient::LEADER_ONLY),
                          "Could not set replica selection.");
   }
+  kudu::client::KuduScanner::ReadMode mode =
+      MODE_READ_AT_SNAPSHOT.compare(FLAGS_kudu_read_mode) ?
+          kudu::client::KuduScanner::READ_AT_SNAPSHOT :
+          kudu::client::KuduScanner::READ_LATEST;
+  KUDU_RETURN_IF_ERROR(scanner_->SetReadMode(mode), "Could not set scanner ReadMode");
 
   KUDU_RETURN_IF_ERROR(scanner_->SetTimeoutMillis(FLAGS_kudu_operation_timeout_ms),
       "Could not set scanner timeout");


[3/4] incubator-impala git commit: IMPALA-4477: Bump Kudu version to latest master (60aa54e)

Posted by sa...@apache.org.
IMPALA-4477: Bump Kudu version to latest master (60aa54e)

Bumps the toolchain version to get a newer Kudu build.

Also fixes test failures resulting from changes in Kudu.
Notably error strings have changed (IMPALA-4590) and the
number of replicas must be odd (IMPALA-4589).

Note: The toolchain binaries starting with this build are
now using the toolchain binutils rather than the system
binutils.

Testing: private exhaustive build.

Change-Id: If1912f058c240fbe82b06f77e31add7755289be1
Reviewed-on: http://gerrit.cloudera.org:8080/5369
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5188f879
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5188f879
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5188f879

Branch: refs/heads/master
Commit: 5188f879a7940f96dcaf3892f08c0aa76f58abe2
Parents: 0d4bdc1
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Sat Dec 3 09:58:20 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 05:11:13 2016 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                                         | 4 ++--
 .../functional-query/queries/QueryTest/kudu_alter.test       | 2 +-
 .../functional-query/queries/QueryTest/kudu_create.test      | 4 ++--
 .../queries/QueryTest/kudu_partition_ddl.test                | 6 +++---
 .../functional-query/queries/QueryTest/kudu_stats.test       | 8 ++++----
 5 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5188f879/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 834f216..a3fdb57 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -57,7 +57,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-: ${IMPALA_TOOLCHAIN_BUILD_ID=267-98c642ffcb}
+: ${IMPALA_TOOLCHAIN_BUILD_ID=289-f12b0dd2e9}
 
 # This flag is used in $IMPALA_HOME/cmake_modules/toolchain.cmake.
 # If it's 0, Impala will be built with the compiler in the toolchain directory.
@@ -270,7 +270,7 @@ export IMPALA_GFLAGS_VERSION=2.0
 export IMPALA_GLOG_VERSION=0.3.2-p2
 export IMPALA_GPERFTOOLS_VERSION=2.5
 export IMPALA_GTEST_VERSION=1.6.0
-export IMPALA_KUDU_VERSION=f2aeba
+export IMPALA_KUDU_VERSION=60aa54e
 export IMPALA_LLVM_VERSION=3.8.0-p1
 export IMPALA_LLVM_ASAN_VERSION=3.8.0-p1
 # Debug builds should use the release+asserts build to get additional coverage.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5188f879/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 8722f8a..ac90e07 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -98,7 +98,7 @@ alter table tbl_to_alter add range partition 1000 < values
 # Try to insert a partition that overlaps with an existing partition
 alter table tbl_to_alter add range partition 10 < values <= 30
 ---- CATCH
-NonRecoverableException: New range partition conflicts with existing range partition: [(int32 id=11), (int32 id=31))
+NonRecoverableException: New range partition conflicts with existing range partition: 11 <= VALUES < 31
 ====
 ---- QUERY
 # Try to insert a partition that overlaps with an existing partition, use IF NOT EXISTS

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5188f879/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
index 651979d..d67103b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
@@ -21,14 +21,14 @@ INT,INT
 create table tab (x int, y boolean, primary key(x, y))
   partition by hash (x) into 3 buckets stored as kudu
 ---- CATCH
-NonRecoverableException: Key column may not have type of BOOL, FLOAT, or DOUBLE
+NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
 ====
 ---- QUERY
 # Float primary key column
 create table tab (x int, y float, primary key(x, y))
   partition by hash (x) into 3 buckets stored as kudu
 ---- CATCH
-NonRecoverableException: Key column may not have type of BOOL, FLOAT, or DOUBLE
+NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
 ====
 ---- QUERY
 # Primary keys should be declared first

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5188f879/testdata/workloads/functional-query/queries/QueryTest/kudu_partition_ddl.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_partition_ddl.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_partition_ddl.test
index 5e9c477..496492b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_partition_ddl.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_partition_ddl.test
@@ -231,7 +231,7 @@ create table simple_range_with_overlapping (id int, name string, valf float, val
   primary key (id, name)) partition by range (id)
   (partition values <= 10, partition values < 20, partition value = 5) stored as kudu
 ---- CATCH
-NonRecoverableException: overlapping range partitions: first range partition: [<start>, (int32 id=11)), second range partition: [<start>, (int32 id=20))
+NonRecoverableException: overlapping range partitions: first range partition: VALUES < 11, second range partition: VALUES < 20
 ====
 ---- QUERY
 -- Test range partitioning with the same partition specified multiple times
@@ -239,7 +239,7 @@ create table simple_range_duplicate_parts (id int, name string, valf float, vali
   primary key(id, name)) partition by range (id)
   (partition 10 < values <= 20, partition 10 < values <= 20) stored as kudu
 ---- CATCH
-NonRecoverableException: overlapping range partitions: first range partition: [(int32 id=11), (int32 id=21)), second range partition: [(int32 id=11), (int32 id=21))
+NonRecoverableException: overlapping range partitions: first range partition: 11 <= VALUES < 21, second range partition: 11 <= VALUES < 21
 ====
 ---- QUERY
 -- Test multi-column range partitioning with the same partition specified multiple times
@@ -247,5 +247,5 @@ create table range_multi_col_duplicate_parts (id int, name string, valf float,
   vali bigint, primary key (id, name)) partition by range (id, name)
   (partition value = (10, 'dimitris'), partition value = (10, 'dimitris')) stored as kudu
 ---- CATCH
-NonRecoverableException: overlapping range partitions: first range partition: [(int32 id=10, string name=dimitris), (int32 id=10, string name=dimitris\000)), second range partition: [(int32 id=10, string name=dimitris), (int32 id=10, string name=dimitris\000))
+NonRecoverableException: overlapping range partitions: first range partition: (10, "dimitris") <= VALUES < (10, "dimitris\000"), second range partition: (10, "dimitris") <= VALUES < (10, "dimitris\000")
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5188f879/testdata/workloads/functional-query/queries/QueryTest/kudu_stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_stats.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_stats.test
index d3b7bd3..697a3a3 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_stats.test
@@ -2,15 +2,15 @@
 ---- QUERY
 create table simple (id int primary key, name string, valf float, vali bigint)
   partition by range (partition values < 10, partition 10 <= values < 30,
-  partition 30 <= values) stored as kudu tblproperties('kudu.num_tablet_replicas' = '2')
+  partition 30 <= values) stored as kudu tblproperties('kudu.num_tablet_replicas' = '1')
 ---- RESULTS
 ====
 ---- QUERY
 show table stats simple
 ---- RESULTS
--1,'','8000000A',regex:.*?:\d+,2
--1,'8000000A','8000001E',regex:.*?:\d+,2
--1,'8000001E','',regex:.*?:\d+,2
+-1,'','8000000A',regex:.*?:\d+,1
+-1,'8000000A','8000001E',regex:.*?:\d+,1
+-1,'8000001E','',regex:.*?:\d+,1
 ---- TYPES
 INT,STRING,STRING,STRING,INT
 ---- LABELS


[4/4] incubator-impala git commit: IMPALA-4592: Improve error msg for non-deterministic predicates.

Posted by sa...@apache.org.
IMPALA-4592: Improve error msg for non-deterministic predicates.

Impala cannot correctly evaluate or assign some non-deterministic
predicates. This patch improves the error message shown when
trying to evaluate such unsupported predicates for the purpose
of partition pruning.

Change-Id: I94765f62bde94f4faa7fc5c26d928099ca1496d1
Reviewed-on: http://gerrit.cloudera.org:8080/5386
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6098ac71
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6098ac71
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6098ac71

Branch: refs/heads/master
Commit: 6098ac7162742c11350de708188ce6c3f7ce11a7
Parents: 5188f87
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Dec 6 14:30:43 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Dec 7 06:27:51 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/PartitionSet.java  |  5 +++--
 .../impala/planner/HdfsPartitionFilter.java       | 18 +++++++++++-------
 .../impala/planner/HdfsPartitionPruner.java       |  6 +++---
 .../queries/PlannerTest/hdfs.test                 | 11 +++++++++++
 4 files changed, 28 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6098ac71/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java b/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
index 6de2d93..3ba2ad2 100644
--- a/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
+++ b/fe/src/main/java/org/apache/impala/analysis/PartitionSet.java
@@ -25,7 +25,7 @@ import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.HdfsPartition;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.common.AnalysisException;
-import org.apache.impala.common.InternalException;
+import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.Reference;
 import org.apache.impala.planner.HdfsPartitionPruner;
 import org.apache.impala.thrift.TPartitionKeyValue;
@@ -87,7 +87,8 @@ public class PartitionSet extends PartitionSpecBase {
     try {
       HdfsPartitionPruner pruner = new HdfsPartitionPruner(desc);
       partitions_ = pruner.prunePartitions(analyzer, transformedConjuncts, true);
-    } catch (InternalException e) {
+    } catch (ImpalaException e) {
+      if (e instanceof AnalysisException) throw (AnalysisException) e;
       throw new AnalysisException("Partition expr evaluation failed in the backend.", e);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6098ac71/fe/src/main/java/org/apache/impala/planner/HdfsPartitionFilter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionFilter.java b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionFilter.java
index 7368358..3c0fb15 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionFilter.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionFilter.java
@@ -21,9 +21,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.ExprSubstitutionMap;
@@ -33,10 +30,14 @@ import org.apache.impala.analysis.SlotRef;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.HdfsPartition;
 import org.apache.impala.catalog.HdfsTable;
-import org.apache.impala.common.InternalException;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.service.FeSupport;
 import org.apache.impala.thrift.TColumnValue;
 import org.apache.impala.thrift.TResultRow;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -83,7 +84,7 @@ public class HdfsPartitionFilter {
    * that pass the filter.
    */
   public HashSet<Long> getMatchingPartitionIds(ArrayList<HdfsPartition> partitions,
-      Analyzer analyzer) throws InternalException {
+      Analyzer analyzer) throws ImpalaException {
     HashSet<Long> result = new HashSet<Long>();
     // List of predicates to evaluate
     ArrayList<Expr> predicates = new ArrayList<Expr>(partitions.size());
@@ -110,7 +111,7 @@ public class HdfsPartitionFilter {
    * for the partition cols with the respective partition-key values.
    */
   private Expr buildPartitionPredicate(HdfsPartition partition, Analyzer analyzer)
-      throws InternalException {
+      throws ImpalaException {
     // construct smap
     ExprSubstitutionMap sMap = new ExprSubstitutionMap();
     for (int i = 0; i < refdKeys_.size(); ++i) {
@@ -123,7 +124,10 @@ public class HdfsPartitionFilter {
       LOG.trace("buildPartitionPredicate: " + literalPredicate.toSql() + " " +
           literalPredicate.debugString());
     }
-    Preconditions.checkState(literalPredicate.isConstant());
+    if (!literalPredicate.isConstant()) {
+      throw new NotImplementedException(
+          "Unsupported non-deterministic predicate: " + predicate_.toSql());
+    }
     return literalPredicate;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6098ac71/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
index b8eb7ed..046240f 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
@@ -42,7 +42,7 @@ import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.HdfsPartition;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.common.AnalysisException;
-import org.apache.impala.common.InternalException;
+import org.apache.impala.common.ImpalaException;
 import org.apache.impala.rewrite.BetweenToCompoundRule;
 import org.apache.impala.rewrite.ExprRewriter;
 import org.slf4j.Logger;
@@ -94,7 +94,7 @@ public class HdfsPartitionPruner {
    */
   public List<HdfsPartition> prunePartitions(
       Analyzer analyzer, List<Expr> conjuncts, boolean allowEmpty)
-      throws InternalException, AnalysisException {
+      throws ImpalaException {
     // Start with creating a collection of partition filters for the applicable conjuncts.
     List<HdfsPartitionFilter> partitionFilters = Lists.newArrayList();
     // Conjuncts that can be evaluated from the partition key values.
@@ -438,7 +438,7 @@ public class HdfsPartitionPruner {
    * filters that could not be evaluated from the partition key values.
    */
   private void evalPartitionFiltersInBe(List<HdfsPartitionFilter> filters,
-      HashSet<Long> matchingPartitionIds, Analyzer analyzer) throws InternalException {
+      HashSet<Long> matchingPartitionIds, Analyzer analyzer) throws ImpalaException {
     Map<Long, HdfsPartition> partitionMap = tbl_.getPartitionMap();
     // Set of partition ids that pass a filter
     HashSet<Long> matchingIds = Sets.newHashSet();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6098ac71/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
index 606425b..83f1b46 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test
@@ -1019,3 +1019,14 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional.alltypes]
    partitions=0/24 files=0 size=0B
 ====
+# IMPALA-4592: Test that we bail on evaluating non-deterministic predicates when trying
+# to prune partitions.
+select * from functional.alltypes where rand() > 100
+---- PLAN
+not implemented: Unsupported non-deterministic predicate: rand() > 100
+====
+# IMPALA-4592: Same as above but the predicate references a partition column.
+select * from functional.alltypes where rand() > year
+---- PLAN
+not implemented: Unsupported non-deterministic predicate: rand() > year
+====