You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/02/02 07:55:32 UTC

[9/9] incubator-kudu git commit: KUDU-1317 (part 2): decay the replica creation load on TS lazily

KUDU-1317 (part 2): decay the replica creation load on TS lazily

This is a follow-up in response to a review by Binglin[1] suggesting
that the load value for TS Descriptors should be decayed on access
instead of in a background loop. This also adds a new test verifying
the behavior.

[1] http://gerrit.cloudera.org:8080/1654

Change-Id: I15899b8bf221f334fdec5983e1b5d93cc57fab3b
Reviewed-on: http://gerrit.cloudera.org:8080/1979
Reviewed-by: Jean-Daniel Cryans
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Binglin Chang <de...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/0b5545bc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/0b5545bc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/0b5545bc

Branch: refs/heads/master
Commit: 0b5545bc30518f895dd4c6936188831453252a7f
Parents: e078f77
Author: Todd Lipcon <to...@apache.org>
Authored: Mon Feb 1 14:55:33 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Feb 2 06:54:55 2016 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager-test.cc | 23 +++++++++++++++++++++++
 src/kudu/master/catalog_manager.cc      | 23 +++++------------------
 src/kudu/master/master-test.cc          |  1 -
 src/kudu/master/ts_descriptor.cc        | 27 ++++++++++++++++++++++++++-
 src/kudu/master/ts_descriptor.h         | 24 +++++++++++-------------
 5 files changed, 65 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/0b5545bc/src/kudu/master/catalog_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager-test.cc b/src/kudu/master/catalog_manager-test.cc
index b3dd302..bd48309 100644
--- a/src/kudu/master/catalog_manager-test.cc
+++ b/src/kudu/master/catalog_manager-test.cc
@@ -18,6 +18,7 @@
 
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/catalog_manager.h"
+#include "kudu/master/ts_descriptor.h"
 #include "kudu/util/test_util.h"
 
 namespace kudu {
@@ -83,5 +84,27 @@ TEST(TableInfoTest, TestAssignmentRanges) {
   }
 }
 
+TEST(TestTSDescriptor, TestReplicaCreationsDecay) {
+  TSDescriptor ts("test");
+  ASSERT_EQ(0, ts.RecentReplicaCreations());
+  ts.IncrementRecentReplicaCreations();
+
+  // The load should start at close to 1.0.
+  double val_a = ts.RecentReplicaCreations();
+  ASSERT_NEAR(1.0, val_a, 0.05);
+
+  // After 10ms it should have dropped a bit, but still be close to 1.0.
+  SleepFor(MonoDelta::FromMilliseconds(10));
+  double val_b = ts.RecentReplicaCreations();
+  ASSERT_LT(val_b, val_a);
+  ASSERT_NEAR(0.99, val_a, 0.05);
+
+  if (AllowSlowTests()) {
+    // After 10 seconds, we should have dropped to 0.5^(10/60) = 0.891
+    SleepFor(MonoDelta::FromSeconds(10));
+    ASSERT_NEAR(0.891, ts.RecentReplicaCreations(), 0.05);
+  }
+}
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/0b5545bc/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index e1ecadb..9a9344a 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -346,19 +346,7 @@ void CatalogManagerBgTasks::Shutdown() {
 }
 
 void CatalogManagerBgTasks::Run() {
-  MonoTime last_process_time = MonoTime::Now(MonoTime::FINE);
   while (!NoBarrier_Load(&closing_)) {
-    MonoTime now = MonoTime::Now(MonoTime::FINE);
-    double since_last_process = now.GetDeltaSince(last_process_time).ToSeconds();
-    last_process_time = now;
-
-    // Decay load estimates on tablet servers.
-    TSDescriptorVector ts_descs;
-    catalog_manager_->master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
-    for (const auto& ts : ts_descs) {
-      ts->DecayRecentReplicaCreations(since_last_process);
-    }
-
     // Perform assignment processing.
     if (!catalog_manager_->IsInitialized()) {
       LOG(WARNING) << "Catalog manager is not initialized!";
@@ -2852,8 +2840,8 @@ shared_ptr<TSDescriptor> CatalogManager::PickBetterReplicaLocation(
   //
   // TODO: in the future we may want to factor in other items such as available disk space,
   // actual request load, etc.
-  double load_a = a->recent_replica_creations() + a->num_live_replicas();
-  double load_b = b->recent_replica_creations() + b->num_live_replicas();
+  double load_a = a->RecentReplicaCreations() + a->num_live_replicas();
+  double load_b = b->RecentReplicaCreations() + b->num_live_replicas();
   if (load_a < load_b) {
     return a;
   } else if (load_b < load_a) {
@@ -2922,10 +2910,9 @@ void CatalogManager::SelectReplicas(const TSDescriptorVector& ts_descs,
     InsertOrDie(&already_selected, ts);
 
     // Increment the number of pending replicas so that we take this selection into
-    // account when assigning replicas for other tablets of the same table.
-    //
-    // This value gets decayed by the catalog manager background task.
-    ts->increment_recent_replica_creations();
+    // account when assigning replicas for other tablets of the same table. This
+    // value decays back to 0 over time.
+    ts->IncrementRecentReplicaCreations();
 
     TSRegistrationPB reg;
     ts->GetRegistration(&reg);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/0b5545bc/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index 9abf32f..9fb52db 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -271,7 +271,6 @@ TEST_F(MasterTest, TestCatalog) {
   ASSERT_EQ(1, tables.tables_size());
   ASSERT_EQ(kTableName, tables.tables(0).name());
 
-
   // Delete the table
   {
     DeleteTableRequestPB req;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/0b5545bc/src/kudu/master/ts_descriptor.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc
index 8fa477b..d92a600 100644
--- a/src/kudu/master/ts_descriptor.cc
+++ b/src/kudu/master/ts_descriptor.cc
@@ -49,6 +49,7 @@ TSDescriptor::TSDescriptor(std::string perm_id)
       last_heartbeat_(MonoTime::Now(MonoTime::FINE)),
       has_tablet_report_(false),
       recent_replica_creations_(0),
+      last_replica_creations_decay_(MonoTime::Now(MonoTime::FINE)),
       num_live_replicas_(0) {
 }
 
@@ -110,9 +111,33 @@ void TSDescriptor::set_has_tablet_report(bool has_report) {
   has_tablet_report_ = has_report;
 }
 
-void TSDescriptor::DecayRecentReplicaCreations(double secs_since_last_decay) {
+void TSDescriptor::DecayRecentReplicaCreationsUnlocked() {
+  // In most cases, we won't have any recent replica creations, so
+  // we don't need to bother calling the clock, etc.
+  if (recent_replica_creations_ == 0) return;
+
   const double kHalflifeSecs = 60;
+  MonoTime now = MonoTime::Now(MonoTime::FINE);
+  double secs_since_last_decay = now.GetDeltaSince(last_replica_creations_decay_).ToSeconds();
   recent_replica_creations_ *= pow(0.5, secs_since_last_decay / kHalflifeSecs);
+
+  // If sufficiently small, reset down to 0 to take advantage of the fast path above.
+  if (recent_replica_creations_ < 1e-12) {
+    recent_replica_creations_ = 0;
+  }
+  last_replica_creations_decay_ = now;
+}
+
+void TSDescriptor::IncrementRecentReplicaCreations() {
+  lock_guard<simple_spinlock> l(&lock_);
+  DecayRecentReplicaCreationsUnlocked();
+  recent_replica_creations_ += 1;
+}
+
+double TSDescriptor::RecentReplicaCreations() {
+  boost::lock_guard<simple_spinlock> l(lock_);
+  DecayRecentReplicaCreationsUnlocked();
+  return recent_replica_creations_;
 }
 
 void TSDescriptor::GetRegistration(TSRegistrationPB* reg) const {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/0b5545bc/src/kudu/master/ts_descriptor.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h
index 404e8f4..b0327fc 100644
--- a/src/kudu/master/ts_descriptor.h
+++ b/src/kudu/master/ts_descriptor.h
@@ -90,22 +90,15 @@ class TSDescriptor {
   Status GetConsensusProxy(const std::shared_ptr<rpc::Messenger>& messenger,
                            std::shared_ptr<consensus::ConsensusServiceProxy>* proxy);
 
-  void increment_recent_replica_creations() {
-    lock_guard<simple_spinlock> l(&lock_);
-    recent_replica_creations_ += 1;
-  }
-
-  // Decay the accounting of how many replicas have been recently
-  // created on this host.
-  void DecayRecentReplicaCreations(double secs_since_last_decay);
+  // Increment the accounting of the number of replicas recently created on this
+  // server. This value will automatically decay over time.
+  void IncrementRecentReplicaCreations();
 
   // Return the number of replicas which have recently been created on this
   // TS. This number is incremented when replicas are placed on the TS, and
-  // then decayed over time.
-  double recent_replica_creations() const {
-    lock_guard<simple_spinlock> l(&lock_);
-    return recent_replica_creations_;
-  }
+  // then decayed over time. This method is not 'const' because each call
+  // actually performs the time-based decay.
+  double RecentReplicaCreations();
 
   // Set the number of live replicas (i.e. running or bootstrapping).
   void set_num_live_replicas(int n) {
@@ -121,11 +114,15 @@ class TSDescriptor {
   }
 
  private:
+  FRIEND_TEST(TestTSDescriptor, TestReplicaCreationsDecay);
+
   explicit TSDescriptor(std::string perm_id);
 
   // Uses DNS to resolve registered hosts to a single Sockaddr.
   Status ResolveSockaddr(Sockaddr* addr) const;
 
+  void DecayRecentReplicaCreationsUnlocked();
+
   mutable simple_spinlock lock_;
 
   const std::string permanent_uuid_;
@@ -140,6 +137,7 @@ class TSDescriptor {
   // The number of times this tablet server has recently been selected to create a
   // tablet replica. This value decays back to 0 over time.
   double recent_replica_creations_;
+  MonoTime last_replica_creations_decay_;
 
   // The number of live replicas on this host, from the last heartbeat.
   int num_live_replicas_;