You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2017/04/07 21:38:40 UTC

[1/2] kudu git commit: [python] Fix flaky test test_connect_timeouts

Repository: kudu
Updated Branches:
  refs/heads/master 31f4589ba -> 795f5ee94


[python] Fix flaky test test_connect_timeouts

It only tests passing the timeout, but it's low enough that on our poor
jenkins workers it might actually time out connecting to the cluster.

Change-Id: I9df059724ae77c404bccc594e4fa01ac75d905a5
Reviewed-on: http://gerrit.cloudera.org:8080/6573
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/960a061c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/960a061c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/960a061c

Branch: refs/heads/master
Commit: 960a061c40a6e25797078e3fb16890e887ec67c3
Parents: 31f4589
Author: Jean-Daniel Cryans <jd...@apache.org>
Authored: Thu Apr 6 15:43:15 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Apr 7 21:04:51 2017 +0000

----------------------------------------------------------------------
 python/kudu/tests/test_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/960a061c/python/kudu/tests/test_client.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_client.py b/python/kudu/tests/test_client.py
index 779cd25..0a3d47e 100644
--- a/python/kudu/tests/test_client.py
+++ b/python/kudu/tests/test_client.py
@@ -258,8 +258,8 @@ class TestClient(KuduTestBase, unittest.TestCase):
     def test_connect_timeouts(self):
         # it works! any other way to check
         kudu.connect(self.master_hosts, self.master_ports,
-                     admin_timeout_ms=100,
-                     rpc_timeout_ms=100)
+                     admin_timeout_ms=1000,
+                     rpc_timeout_ms=1000)
 
     def test_capture_kudu_error(self):
         pass


[2/2] kudu git commit: [security] fixed shortened TSK validity interval

Posted by al...@apache.org.
[security] fixed shortened TSK validity interval

The TSK validity interval should be (authn_token_validity_interval +
tsk_propagation_interval + tsk_rotation_interval), which is
(auth_token_validity_interval + 2 * tsk_rotation_interval) since the
propagation interval is set equal to the rotation interval now.

Prior to this fix, as spotted by Dan, the TSK validity interval was
missing the rotation interval delta, which could lead to situations when
a valid authn token could not be verified due to already expired TSK.

Added an integration test to cover the fixed issue and exercise token
verification during and past token and its TSK lifecycle.

Change-Id: I84f9789276c4b48a3ba5274393fe30c8bf3ea85d
Reviewed-on: http://gerrit.cloudera.org:8080/6536
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <da...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/795f5ee9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/795f5ee9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/795f5ee9

Branch: refs/heads/master
Commit: 795f5ee948e525941c575b231e2c1f9456c160ac
Parents: 960a061
Author: Alexey Serbin <as...@cloudera.com>
Authored: Mon Apr 3 12:09:35 2017 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Fri Apr 7 21:38:03 2017 +0000

----------------------------------------------------------------------
 .../integration-tests/token_signer-itest.cc     | 175 ++++++++++++++++---
 src/kudu/security/token-test.cc                 |   6 +-
 src/kudu/security/token_signer.cc               |   3 +-
 src/kudu/security/token_signer.h                |  45 +++--
 4 files changed, 190 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/795f5ee9/src/kudu/integration-tests/token_signer-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/token_signer-itest.cc b/src/kudu/integration-tests/token_signer-itest.cc
index 20e960d..4e4c828 100644
--- a/src/kudu/integration-tests/token_signer-itest.cc
+++ b/src/kudu/integration-tests/token_signer-itest.cc
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
+#include <functional>
 #include <memory>
 #include <string>
+#include <vector>
 
 #include <gflags/gflags_declare.h>
 #include <gtest/gtest.h>
@@ -34,19 +37,23 @@
 #include "kudu/security/token.pb.h"
 #include "kudu/security/token_signer.h"
 #include "kudu/security/token_verifier.h"
+#include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/tserver/tablet_server.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/test_util.h"
 
 DECLARE_int64(authn_token_validity_seconds);
 DECLARE_int64(tsk_rotation_seconds);
+DECLARE_int32(heartbeat_interval_ms);
 
 using std::string;
 using std::unique_ptr;
 using std::vector;
+using kudu::security::TokenPB;
 using kudu::security::TokenSigningPublicKeyPB;
 using kudu::security::SignedTokenPB;
-using kudu::security::TokenPB;
+using kudu::security::VerificationResult;
 
 namespace kudu {
 namespace master {
@@ -54,8 +61,8 @@ namespace master {
 class TokenSignerITest : public KuduTest {
  public:
   TokenSignerITest() {
-    FLAGS_authn_token_validity_seconds = 60;
-    FLAGS_tsk_rotation_seconds = 20;
+    FLAGS_authn_token_validity_seconds = authn_token_validity_seconds_;
+    FLAGS_tsk_rotation_seconds = tsk_rotation_seconds_;
 
     // Hard-coded ports for the masters. This is safe, as this unit test
     // runs under a resource lock (see CMakeLists.txt in this directory).
@@ -63,6 +70,7 @@ class TokenSignerITest : public KuduTest {
     opts_.master_rpc_ports = { 11010, 11011, 11012 };
 
     opts_.num_masters = num_masters_ = opts_.master_rpc_ports.size();
+    opts_.num_tablet_servers = num_tablet_servers_;
   }
 
   void SetUp() override {
@@ -89,20 +97,13 @@ class TokenSignerITest : public KuduTest {
     return cluster_->GetLeaderMasterIndex(nullptr);
   }
 
-  static SignedTokenPB MakeToken() {
-    SignedTokenPB ret;
-    TokenPB token;
-    token.set_expire_unix_epoch_seconds(WallTime_Now() + 600);
-    CHECK(token.SerializeToString(ret.mutable_token_data()));
-    return ret;
-  }
-
-  Status SignToken(SignedTokenPB* token) {
+  Status MakeSignedToken(SignedTokenPB* token) {
+    static const string kUserName = "test";
     int leader_idx;
     RETURN_NOT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
     MiniMaster* leader = cluster_->mini_master(leader_idx);
     Master* master = leader->master();
-    RETURN_NOT_OK(master->token_signer()->SignToken(token));
+    RETURN_NOT_OK(master->token_signer()->GenerateAuthnToken(kUserName, token));
     return Status::OK();
   }
 
@@ -123,7 +124,11 @@ class TokenSignerITest : public KuduTest {
   }
 
  protected:
+  const int64_t authn_token_validity_seconds_ = 20;
+  const int64_t tsk_rotation_seconds_ = 20;
+
   int num_masters_;
+  const int num_tablet_servers_ = 3;
   MiniClusterOptions opts_;
   unique_ptr<MiniCluster> cluster_;
 };
@@ -135,8 +140,8 @@ class TokenSignerITest : public KuduTest {
 TEST_F(TokenSignerITest, TskAtLeaderMaster) {
   // Check the leader can sign tokens: this guarantees at least one TSK has been
   // generated and is available for token signing.
-  SignedTokenPB t(MakeToken());
-  ASSERT_OK(SignToken(&t));
+  SignedTokenPB t;
+  ASSERT_OK(MakeSignedToken(&t));
 
   // Get the public part of the signing key from the leader.
   vector<TokenSigningPublicKeyPB> leader_public_keys;
@@ -152,8 +157,8 @@ TEST_F(TokenSignerITest, TskAtLeaderMaster) {
 // the removal of expired TSK info is not covered by this scenario).
 TEST_F(TokenSignerITest, TskClusterRestart) {
   // Check the leader can sign tokens just after start.
-  SignedTokenPB t_pre(MakeToken());
-  ASSERT_OK(SignToken(&t_pre));
+  SignedTokenPB t_pre;
+  ASSERT_OK(MakeSignedToken(&t_pre));
 
   vector<TokenSigningPublicKeyPB> public_keys_before;
   ASSERT_OK(GetLeaderPublicKeys(&public_keys_before));
@@ -162,8 +167,8 @@ TEST_F(TokenSignerITest, TskClusterRestart) {
   ASSERT_OK(RestartCluster());
 
   // Check the leader can sign tokens after the restart.
-  SignedTokenPB t_post(MakeToken());
-  ASSERT_OK(SignToken(&t_post));
+  SignedTokenPB t_post;
+  ASSERT_OK(MakeSignedToken(&t_post));
   EXPECT_EQ(t_post.signing_key_seq_num(), t_pre.signing_key_seq_num());
 
   vector<TokenSigningPublicKeyPB> public_keys_after;
@@ -177,8 +182,8 @@ TEST_F(TokenSignerITest, TskClusterRestart) {
 // for token verification as the former leader
 // (it's assumed no new TSK generation happened in between).
 TEST_F(TokenSignerITest, TskMasterLeadershipChange) {
-  SignedTokenPB t_former_leader(MakeToken());
-  ASSERT_OK(SignToken(&t_former_leader));
+  SignedTokenPB t_former_leader;
+  ASSERT_OK(MakeSignedToken(&t_former_leader));
 
   vector<TokenSigningPublicKeyPB> public_keys;
   ASSERT_OK(GetLeaderPublicKeys(&public_keys));
@@ -188,8 +193,8 @@ TEST_F(TokenSignerITest, TskMasterLeadershipChange) {
   ASSERT_OK(WaitForNewLeader());
 
   // The new leader should use the same signing key.
-  SignedTokenPB t_new_leader(MakeToken());
-  ASSERT_OK(SignToken(&t_new_leader));
+  SignedTokenPB t_new_leader;
+  ASSERT_OK(MakeSignedToken(&t_new_leader));
   EXPECT_EQ(t_new_leader.signing_key_seq_num(),
             t_former_leader.signing_key_seq_num());
 
@@ -200,6 +205,130 @@ TEST_F(TokenSignerITest, TskMasterLeadershipChange) {
             public_keys_new_leader[0].SerializeAsString());
 }
 
+// Check for authn token verification results during and past its lifetime.
+//
+// Tablet servers should be able to successfully verify the authn token
+// up to the very end of the token validity interval. Past the duration of authn
+// token's validity interval the token is considered invalid and an attempt to
+// verify such a token should fail.
+//
+// This test exercises the edge case for the token validity interval:
+//   * Generate an authn token at the very end of TSK activity interval.
+//   * Make sure the TSK stays valid and can be used for token verification
+//     up to the very end of the token validity interval.
+TEST_F(TokenSignerITest, AuthnTokenLifecycle) {
+  using std::all_of;
+  using std::bind;
+  using std::equal_to;
+  using std::placeholders::_1;
+
+  if (!AllowSlowTests()) {
+    LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
+    return;
+  }
+  vector<TokenSigningPublicKeyPB> public_keys;
+  ASSERT_OK(GetLeaderPublicKeys(&public_keys));
+  ASSERT_EQ(1, public_keys.size());
+  const TokenSigningPublicKeyPB& public_key = public_keys[0];
+  ASSERT_TRUE(public_key.has_key_seq_num());
+  const int64_t key_seq_num = public_key.key_seq_num();
+  ASSERT_TRUE(public_key.has_expire_unix_epoch_seconds());
+  const int64_t key_expire = public_key.expire_unix_epoch_seconds();
+  const int64_t key_active_end = key_expire - authn_token_validity_seconds_;
+
+  SignedTokenPB stoken;
+  ASSERT_OK(MakeSignedToken(&stoken));
+  ASSERT_EQ(key_seq_num, stoken.signing_key_seq_num());
+
+  for (int i = 0; i < num_tablet_servers_; ++i) {
+    const tserver::TabletServer* ts = cluster_->mini_tablet_server(i)->server();
+    ASSERT_NE(nullptr, ts);
+    TokenPB token;
+    // There might be some delay due to parallel OS activity, but the public
+    // part of TSK should reach all tablet servers in a few heartbeat intervals.
+    AssertEventually([&] {
+        const VerificationResult res = ts->messenger()->token_verifier().
+            VerifyTokenSignature(stoken, &token);
+        ASSERT_EQ(VerificationResult::VALID, res);
+    }, MonoDelta::FromMilliseconds(5L * FLAGS_heartbeat_interval_ms));
+  }
+
+  // Get closer to the very end of the TSK's activity interval and generate
+  // another authn token. Such token has the expiration time close to the
+  // expiration time of the TSK itself. Make sure the authn token can be
+  // successfully verified up to its expiry.
+  //
+  // It's necessary to keep some margin due to double-to-int truncation issues
+  // (that's about WallTime_Now()) and to allow generating authn token before
+  // current TSK is replaced with the next one (that's to make this test stable
+  // on slow VMs). The margin should be relatively small compared with the
+  // TSK rotation interval.
+  const int64_t margin = tsk_rotation_seconds_ / 5;
+  while (true) {
+    if (key_active_end - margin <= WallTime_Now()) {
+      break;
+    }
+    SleepFor(MonoDelta::FromMilliseconds(500));
+  }
+
+  SignedTokenPB stoken_eotai; // end-of-TSK-activity-interval authn token
+  ASSERT_OK(MakeSignedToken(&stoken_eotai));
+  const int64_t key_seq_num_token_eotai = stoken_eotai.signing_key_seq_num();
+  ASSERT_EQ(key_seq_num, key_seq_num_token_eotai);
+
+  vector<bool> expired_at_tserver(num_tablet_servers_, false);
+  vector<bool> valid_at_tserver(num_tablet_servers_, false);
+  while (true) {
+    for (int i = 0; i < num_tablet_servers_; ++i) {
+      if (expired_at_tserver[i]) {
+        continue;
+      }
+      const tserver::TabletServer* ts = cluster_->mini_tablet_server(i)->server();
+      ASSERT_NE(nullptr, ts);
+      const int64_t time_pre = WallTime_Now();
+      TokenPB token;
+      const VerificationResult res = ts->messenger()->token_verifier().
+          VerifyTokenSignature(stoken_eotai, &token);
+      if (res == VerificationResult::VALID) {
+        // Both authn token and its TSK should be valid.
+        valid_at_tserver[i] = true;
+        ASSERT_GE(token.expire_unix_epoch_seconds(), time_pre);
+        ASSERT_GE(key_expire, time_pre);
+      } else {
+        expired_at_tserver[i] = true;
+        // The only expected error here is EXPIRED_TOKEN.
+        ASSERT_EQ(VerificationResult::EXPIRED_TOKEN, res);
+        const int64_t time_post = WallTime_Now();
+        ASSERT_LT(token.expire_unix_epoch_seconds(), time_post);
+      }
+    }
+    if (all_of(expired_at_tserver.begin(), expired_at_tserver.end(),
+               bind(equal_to<bool>(), _1, true))) {
+      break;
+    }
+    SleepFor(MonoDelta::FromMilliseconds(500));
+  }
+
+  // The end-of-TSK-activity-interval authn token should have been successfully
+  // validated by all tablet servers.
+  ASSERT_TRUE(all_of(valid_at_tserver.begin(), valid_at_tserver.end(),
+              bind(equal_to<bool>(), _1, true)));
+
+  while (WallTime_Now() < key_expire) {
+    SleepFor(MonoDelta::FromMilliseconds(500));
+  }
+  // Wait until current TSK expires and try to verify the token again.
+  // The token verification result should be EXPIRED_TOKEN.
+  for (int i = 0; i < num_tablet_servers_; ++i) {
+    const tserver::TabletServer* ts = cluster_->mini_tablet_server(i)->server();
+    ASSERT_NE(nullptr, ts);
+    TokenPB token;
+    ASSERT_EQ(VerificationResult::EXPIRED_TOKEN,
+              ts->messenger()->token_verifier().
+              VerifyTokenSignature(stoken_eotai, &token));
+  }
+}
+
 // TODO(aserbin): add a test case which corresponds to multiple signing keys
 // right after cluster start-up.
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/795f5ee9/src/kudu/security/token-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/security/token-test.cc b/src/kudu/security/token-test.cc
index 31bb7a2..6fc2772 100644
--- a/src/kudu/security/token-test.cc
+++ b/src/kudu/security/token-test.cc
@@ -126,7 +126,7 @@ TEST_F(TokenTest, TestIsCurrentKeyValid) {
   static const int64_t kAuthnTokenValiditySeconds = 1;
   static const int64_t kKeyRotationSeconds = 1;
   static const int64_t kKeyValiditySeconds =
-      kAuthnTokenValiditySeconds + kKeyRotationSeconds;
+      kAuthnTokenValiditySeconds + 2 * kKeyRotationSeconds;
 
   TokenSigner signer(kAuthnTokenValiditySeconds, kKeyRotationSeconds);
   EXPECT_FALSE(signer.IsCurrentKeyValid());
@@ -276,9 +276,9 @@ TEST_F(TokenTest, TestTokenSignerSignVerifyExport) {
 TEST_F(TokenTest, TestExportKeys) {
   // Test that the exported public keys don't contain private key material,
   // and have an appropriate expiration.
-  const int64_t key_exp_seconds = 20;
+  const int64_t key_exp_seconds = 30;
   const int64_t key_rotation_seconds = 10;
-  TokenSigner signer(key_exp_seconds - key_rotation_seconds,
+  TokenSigner signer(key_exp_seconds - 2 * key_rotation_seconds,
                      key_rotation_seconds);
   int64_t key_seq_num;
   {

http://git-wip-us.apache.org/repos/asf/kudu/blob/795f5ee9/src/kudu/security/token_signer.cc
----------------------------------------------------------------------
diff --git a/src/kudu/security/token_signer.cc b/src/kudu/security/token_signer.cc
index 5b4fcd6..3cfa382 100644
--- a/src/kudu/security/token_signer.cc
+++ b/src/kudu/security/token_signer.cc
@@ -58,7 +58,8 @@ TokenSigner::TokenSigner(int64_t authn_token_validity_seconds,
                          : std::make_shared<TokenVerifier>()),
       authn_token_validity_seconds_(authn_token_validity_seconds),
       key_rotation_seconds_(key_rotation_seconds),
-      key_validity_seconds_(key_rotation_seconds_ + authn_token_validity_seconds_),
+      // The TSK propagation interval is equal to the rotation interval.
+      key_validity_seconds_(2 * key_rotation_seconds_ + authn_token_validity_seconds_),
       next_key_seq_num_(0) {
   CHECK_GE(key_rotation_seconds_, 0);
   CHECK_GE(authn_token_validity_seconds_, 0);

http://git-wip-us.apache.org/repos/asf/kudu/blob/795f5ee9/src/kudu/security/token_signer.h
----------------------------------------------------------------------
diff --git a/src/kudu/security/token_signer.h b/src/kudu/security/token_signer.h
index 768ff53..a09a075 100644
--- a/src/kudu/security/token_signer.h
+++ b/src/kudu/security/token_signer.h
@@ -94,6 +94,10 @@ class TokenVerifier;
 // NOTE: The very first key created on the system bootstrap does not have
 //       propagation interval -- it turns active immediately.
 //
+// NOTE: One other result of the above is that the first key (Key 1) is actually
+//       active for longer than the rest. This has some potential security
+//       implications, so it's worth considering rolling twice at startup.
+//
 // For example, consider the following configuration for token signing keys:
 //   validity period:      4 days
 //   rotation interval:    1 days
@@ -106,6 +110,8 @@ class TokenVerifier;
 // Key 3:             <----AAAAA==========>
 // Key 4:                  <----AAAAA==========>
 //                              ...............
+// authn token:                     <**********>
+//
 // 'A' indicates the 'Originator Usage Period' (a.k.a. 'Activity Interval'),
 // i.e. the period in which the key is being used to sign tokens.
 //
@@ -113,20 +119,33 @@ class TokenVerifier;
 // the verifier may get tokens signed by the TSK and should consider them
 // for verification. The start of the recipient usage period is not crucial
 // in that regard, but the end of that period is -- after the TSK is expired,
-// a verifier should consider tokens signed by that TSK invalid
-// and stop accepting them even if the token signature is correct.
+// a verifier should consider tokens signed by that TSK invalid and stop
+// accepting them even if the token signature is correct and the expiration.
 //
-// When configuring the rotation and validity, consider the following constraint:
+// '<***>' indicates the validity interval for an authn token.
 //
-//   max_token_validity < tsk_validity_period - tsk_propagation_interval
+// When configuring key rotation and authn token validity interval durations,
+// consider the following constraint:
 //
-// In the example above, this means that no token may be issued with a validity
-// period longer than or equal to 3 days, without risking that the
-// signing/verification key would expire before the token.
+//   max_token_validity < tsk_validity_period -
+//       (tsk_propagation_interval + tsk_rotation_interval)
 //
-// NOTE: One other result of the above is that the first key (Key 1) is actually
-//       active for longer than the rest. This has some potential security
-//       implications, so it's worth considering rolling twice at startup.
+// The idea is that the token validity interval should be contained in the
+// corresponding TSK's validity interval. If the TSK is already expired at the
+// time of token verification, the token is considered invalid and the
+// verification of the token fails. This means that no token may be issued with
+// a validity period longer than or equal to TSK inactivity interval, without
+// risking that the signing/verification key would expire before the token
+// itself. The edge case is demonstrated by the following scenario:
+//
+// * A TSK is issued at 00:00:00 on day 4.
+// * An authn token generated and signed by current/active TSK at 23:59:59 on
+//   day 6. That's at the very end of the TSK's activity interval.
+// * From the diagram above it's clear that if the authn token validity
+//   interval were set to something longer than TSK inactivity interval
+//   (which is 2 days with for the specified parameters), an attempt to verify
+//   the token at 00:00:00 on day 8 or later would fail due to the expiration
+//   the corresponding TSK.
 //
 // NOTE: Current implementation of TokenSigner assumes the propagation
 //       interval is equal to the rotation interval.
@@ -172,7 +191,7 @@ class TokenSigner {
   // The 'authn_token_validity_seconds' parameter is used to specify validity
   // interval for the generated authn tokens and with 'key_rotation_seconds'
   // it defines validity interval of the newly generated TSK:
-  //   key_validity = key_rotation + authn_token_validity.
+  //   key_validity = 2 * key_rotation + authn_token_validity.
   //
   // That corresponds to the maximum possible token lifetime for the effective
   // TSK validity and rotation intervals: see the class comment above for
@@ -251,7 +270,9 @@ class TokenSigner {
   const int64_t authn_token_validity_seconds_;
 
   // TSK rotation interval: number of seconds between consecutive activations
-  // of new token signing keys.
+  // of new token signing keys. Note that in current implementation it defines
+  // the propagation interval as well, i.e. the TSK propagation interval is
+  // equal to the TSK rotation interval.
   const int64_t key_rotation_seconds_;
 
   // Period of validity for newly created token signing keys. In other words,