You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/07/16 05:07:48 UTC

[kudu] 01/02: KUDU-2823 Place tablet replicas based on dimension

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 32cbf9d6bf37ee2df21837e0f62bd15064b1b4ed
Author: oclarms <oc...@gmail.com>
AuthorDate: Wed Jun 12 20:32:25 2019 +0800

    KUDU-2823 Place tablet replicas based on dimension
    
    When we add a new range to the fact table, we expect replicas
    of the newly created tablet to be evenly distributed across all
    available tablet servers.
    
    This is especially important when we use time as the range key.
    The more recent the data, the hotter it gets. We expect hot tablets
    on the cluster to be evenly distributed.
    
    Unfortunately, after we add some new tablet servers to the cluster,
    creating a new tablet replica will prioritize the new tablet server for
    placement according to the current placement policy. This is because
    we prefer to select tablet server which a smaller number of tablet
    replicas. This will cause hot tablets to be concentrated on these new
    tablet servers.
    
    So, I added a new placement policy. When creating a new tablet replica,
    we prefer to select tablet server which a smaller number of tablet
    replicas in the dimension. You can set dimensions when creating tables
    and adding partitions, this will ensure that the new tablets are evenly
    distributed in the cluster based on dimension.
    
    Change-Id: I48a225e221eb42ef2f5489687e80a151d8dc1a42
    Reviewed-on: http://gerrit.cloudera.org:8080/13632
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/client/client.cc                        |  21 +-
 src/kudu/client/client.h                         |  54 +++++
 src/kudu/client/table_alterer-internal.cc        |   4 +
 src/kudu/client/table_alterer-internal.h         |   3 +
 src/kudu/client/table_creator-internal.h         |   2 +
 src/kudu/integration-tests/create-table-itest.cc | 144 +++++++++++-
 src/kudu/master/catalog_manager.cc               |  48 +++-
 src/kudu/master/catalog_manager.h                |   3 +-
 src/kudu/master/master.proto                     |  18 ++
 src/kudu/master/master_service.cc                |   3 +
 src/kudu/master/placement_policy-test.cc         | 283 ++++++++++++++++-------
 src/kudu/master/placement_policy.cc              |  26 ++-
 src/kudu/master/placement_policy.h               |  37 ++-
 src/kudu/master/sys_catalog.cc                   |   1 +
 src/kudu/master/ts_descriptor.cc                 |   6 +-
 src/kudu/master/ts_descriptor.h                  |  27 ++-
 src/kudu/tablet/metadata.proto                   |   4 +
 src/kudu/tablet/tablet-harness.h                 |   1 +
 src/kudu/tablet/tablet_bootstrap-test.cc         |   1 +
 src/kudu/tablet/tablet_metadata.cc               |  25 +-
 src/kudu/tablet/tablet_metadata.h                |  11 +-
 src/kudu/tools/kudu-tool-test.cc                 |   2 +
 src/kudu/tserver/heartbeater.cc                  |   4 +
 src/kudu/tserver/mini_tablet_server.cc           |   2 +-
 src/kudu/tserver/tablet_copy_client.cc           |   1 +
 src/kudu/tserver/tablet_server-test.cc           |   2 +-
 src/kudu/tserver/tablet_service.cc               |   1 +
 src/kudu/tserver/ts_tablet_manager-test.cc       |  10 +-
 src/kudu/tserver/ts_tablet_manager.cc            |  18 ++
 src/kudu/tserver/ts_tablet_manager.h             |   7 +
 src/kudu/tserver/tserver_admin.proto             |   4 +
 31 files changed, 643 insertions(+), 130 deletions(-)

diff --git a/src/kudu/client/client.cc b/src/kudu/client/client.cc
index cc4ee3f..62f8ef6 100644
--- a/src/kudu/client/client.cc
+++ b/src/kudu/client/client.cc
@@ -724,6 +724,11 @@ KuduTableCreator& KuduTableCreator::num_replicas(int num_replicas) {
   return *this;
 }
 
+KuduTableCreator& KuduTableCreator::dimension_label(const std::string& dimension_label) {
+  data_->dimension_label_ = dimension_label;
+  return *this;
+}
+
 KuduTableCreator& KuduTableCreator::extra_configs(const map<string, string>& extra_configs) {
   data_->extra_configs_ = extra_configs;
   return *this;
@@ -760,6 +765,9 @@ Status KuduTableCreator::Create() {
   if (data_->num_replicas_ != boost::none) {
     req.set_num_replicas(data_->num_replicas_.get());
   }
+  if (data_->dimension_label_) {
+    req.set_dimension_label(data_->dimension_label_.get());
+  }
   if (data_->extra_configs_) {
     req.mutable_extra_configs()->insert(data_->extra_configs_->begin(),
                                         data_->extra_configs_->end());
@@ -1132,6 +1140,16 @@ KuduTableAlterer* KuduTableAlterer::AddRangePartition(
     KuduPartialRow* upper_bound,
     KuduTableCreator::RangePartitionBound lower_bound_type,
     KuduTableCreator::RangePartitionBound upper_bound_type) {
+  return AddRangePartitionWithDimension(
+      lower_bound, upper_bound, "", lower_bound_type, upper_bound_type);
+}
+
+KuduTableAlterer* KuduTableAlterer::AddRangePartitionWithDimension(
+    KuduPartialRow* lower_bound,
+    KuduPartialRow* upper_bound,
+    const std::string& dimension_label,
+    KuduTableCreator::RangePartitionBound lower_bound_type,
+    KuduTableCreator::RangePartitionBound upper_bound_type) {
 
   if (lower_bound == nullptr || upper_bound == nullptr) {
     data_->status_ = Status::InvalidArgument("range partition bounds may not be null");
@@ -1153,7 +1171,8 @@ KuduTableAlterer* KuduTableAlterer::AddRangePartition(
                  unique_ptr<KuduPartialRow>(lower_bound),
                  unique_ptr<KuduPartialRow>(upper_bound),
                  lower_bound_type,
-                 upper_bound_type };
+                 upper_bound_type,
+                 dimension_label.empty() ? boost::none : boost::make_optional(dimension_label) };
   data_->steps_.emplace_back(std::move(s));
   data_->has_alter_partitioning_steps = true;
   return this;
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index fdc408d..513aa36 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -874,6 +874,19 @@ class KUDU_EXPORT KuduTableCreator {
   /// @return Reference to the modified table creator.
   KuduTableCreator& num_replicas(int n_replicas);
 
+  /// Set the dimension label for all tablets created at table creation time.
+  ///
+  /// @note By default, the master will try to place newly created tablet replicas on tablet
+  /// servers with a small number of tablet replicas. If the dimension label is provided,
+  /// newly created replicas will be evenly distributed in the cluster based on the dimension
+  /// label. In other words, the master will try to place newly created tablet replicas on
+  /// tablet servers with a small number of tablet replicas belonging to this dimension label.
+  ///
+  /// @param [in] dimension_label
+  ///   The dimension label for the tablet to be created.
+  /// @return Reference to the modified table creator.
+  KuduTableCreator& dimension_label(const std::string& dimension_label);
+
   /// Sets the table's extra configuration properties.
   ///
   /// If the value of the kv pair is empty, the property will be ignored.
@@ -1204,6 +1217,47 @@ class KUDU_EXPORT KuduTableAlterer {
       KuduTableCreator::RangePartitionBound lower_bound_type = KuduTableCreator::INCLUSIVE_BOUND,
       KuduTableCreator::RangePartitionBound upper_bound_type = KuduTableCreator::EXCLUSIVE_BOUND);
 
+  /// Add a range partition to the table with dimension label.
+  ///
+  /// @note The table alterer takes ownership of the rows.
+  ///
+  /// @note Multiple range partitions may be added as part of a single alter
+  ///   table transaction by calling this method multiple times on the table
+  ///   alterer.
+  ///
+  /// @note This client may immediately write and scan the new tablets when
+  ///   Alter() returns success, however other existing clients may have to wait
+  ///   for a timeout period to elapse before the tablets become visible. This
+  ///   period is configured by the master's 'table_locations_ttl_ms' flag, and
+  ///   defaults to 5 minutes.
+  ///
+  /// @note See KuduTableCreator::dimension_label() for details on dimension label.
+  ///
+  /// @param [in] lower_bound
+  ///   The lower bound of the range partition to add. If the row is empty, then
+  ///   the lower bound is unbounded. If any of the columns are unset, the
+  ///   logical minimum value for the column's type will be used by default.
+  /// @param [in] upper_bound
+  ///   The upper bound of the range partition to add. If the row is empty, then
+  ///   the upper bound is unbounded. If any of the individual columns are
+  ///   unset, the logical minimum value for the column' type will be used by
+  ///   default.
+  /// @param [in] dimension_label
+  ///   The dimension label for the tablet to be created.
+  /// @param [in] lower_bound_type
+  ///   The type of the lower bound, either inclusive or exclusive. Defaults to
+  ///   inclusive.
+  /// @param [in] upper_bound_type
+  ///   The type of the lower bound, either inclusive or exclusive. Defaults to
+  ///   exclusive.
+  /// @return Raw pointer to this alterer object.
+  KuduTableAlterer* AddRangePartitionWithDimension(
+      KuduPartialRow* lower_bound,
+      KuduPartialRow* upper_bound,
+      const std::string& dimension_label,
+      KuduTableCreator::RangePartitionBound lower_bound_type = KuduTableCreator::INCLUSIVE_BOUND,
+      KuduTableCreator::RangePartitionBound upper_bound_type = KuduTableCreator::EXCLUSIVE_BOUND);
+
   /// Drop the range partition from the table with the specified lower bound and
   /// upper bound. The bounds must match an existing range partition exactly,
   /// and may not span multiple range partitions.
diff --git a/src/kudu/client/table_alterer-internal.cc b/src/kudu/client/table_alterer-internal.cc
index 7b5302d..fd77fcc 100644
--- a/src/kudu/client/table_alterer-internal.cc
+++ b/src/kudu/client/table_alterer-internal.cc
@@ -152,6 +152,10 @@ Status KuduTableAlterer::Data::ToRequest(AlterTableRequestPB* req) {
 
         encoder.Add(lower_bound_type, *s.lower_bound);
         encoder.Add(upper_bound_type, *s.upper_bound);
+
+        if (s.dimension_label) {
+          pb_step->mutable_add_range_partition()->set_dimension_label(s.dimension_label.get());
+        }
         break;
       }
       case AlterTableRequestPB::DROP_RANGE_PARTITION:
diff --git a/src/kudu/client/table_alterer-internal.h b/src/kudu/client/table_alterer-internal.h
index df480a7..2cfa1de 100644
--- a/src/kudu/client/table_alterer-internal.h
+++ b/src/kudu/client/table_alterer-internal.h
@@ -63,6 +63,9 @@ class KuduTableAlterer::Data {
     std::unique_ptr<KuduPartialRow> upper_bound;
     KuduTableCreator::RangePartitionBound lower_bound_type;
     KuduTableCreator::RangePartitionBound upper_bound_type;
+
+    // The dimension label for tablet. Only set when the StepType is ADD_RANGE_PARTITION.
+    boost::optional<std::string> dimension_label;
   };
   std::vector<Step> steps_;
 
diff --git a/src/kudu/client/table_creator-internal.h b/src/kudu/client/table_creator-internal.h
index f631845..5caee98 100644
--- a/src/kudu/client/table_creator-internal.h
+++ b/src/kudu/client/table_creator-internal.h
@@ -62,6 +62,8 @@ class KuduTableCreator::Data {
 
   boost::optional<int> num_replicas_;
 
+  boost::optional<std::string> dimension_label_;
+
   boost::optional<std::map<std::string, std::string>> extra_configs_;
 
   MonoDelta timeout_;
diff --git a/src/kudu/integration-tests/create-table-itest.cc b/src/kudu/integration-tests/create-table-itest.cc
index b9e574b..ba7fc39 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -33,6 +33,7 @@
 #include "kudu/client/client.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/wire_protocol-test-util.h"
@@ -58,6 +59,7 @@
 using std::multimap;
 using std::set;
 using std::string;
+using std::unique_ptr;
 using std::vector;
 
 METRIC_DECLARE_entity(server);
@@ -65,6 +67,7 @@ METRIC_DECLARE_histogram(handler_latency_kudu_tserver_TabletServerAdminService_C
 
 namespace kudu {
 
+using client::KuduClient;
 using client::KuduSchema;
 using cluster::ClusterNodes;
 
@@ -215,7 +218,146 @@ TEST_F(CreateTableITest, TestSpreadReplicasEvenly) {
   ASSERT_GE(avg_num_peers, kNumServers / 2);
 }
 
-static void LookUpRandomKeysLoop(std::shared_ptr<master::MasterServiceProxy> master,
+// Regression test for KUDU-2823. Ensure that, after we add some new tablet servers
+// to the cluster, the new tablets are evenly distributed in the cluster based on
+// dimensions.
+TEST_F(CreateTableITest, TestSpreadReplicasEvenlyWithDimension) {
+  const int kNumServers = 10;
+  const int kNumTablets = 20;
+  vector<int32_t> num_new_replicas(kNumServers, 0);
+  vector<string> master_flags = {
+      "--tserver_last_replica_creations_halflife_ms=10",
+  };
+  // We have five tablet servers.
+  NO_FATALS(StartCluster({}, master_flags, kNumServers / 2));
+
+  Schema schema = Schema({ ColumnSchema("key1", INT32),
+                           ColumnSchema("key2", INT32),
+                           ColumnSchema("int_val", INT32),
+                           ColumnSchema("string_val", STRING, true) }, 2);
+  auto client_schema = KuduSchema::FromSchema(schema);
+
+  auto create_table_func = [](KuduClient* client,
+                              KuduSchema* client_schema,
+                              const string& table_name,
+                              int32_t range_lower_bound,
+                              int32_t range_upper_bound,
+                              const string& dimension_label) -> Status {
+    gscoped_ptr<client::KuduTableCreator> table_creator(client->NewTableCreator());
+    unique_ptr<KuduPartialRow> lower_bound(client_schema->NewRow());
+    RETURN_NOT_OK(lower_bound->SetInt32("key2", range_lower_bound));
+    unique_ptr<KuduPartialRow> upper_bound(client_schema->NewRow());
+    RETURN_NOT_OK(upper_bound->SetInt32("key2", range_upper_bound));
+    return table_creator->table_name(table_name)
+        .schema(client_schema)
+        .add_hash_partitions({ "key1" }, kNumTablets)
+        .set_range_partition_columns({ "key2" })
+        .add_range_partition(lower_bound.release(), upper_bound.release())
+        .num_replicas(3)
+        .dimension_label(dimension_label)
+        .Create();
+  };
+
+  auto alter_table_func = [](KuduClient* client,
+                             KuduSchema* client_schema,
+                             const string& table_name,
+                             int32_t range_lower_bound,
+                             int32_t range_upper_bound,
+                             const string& dimension_label) -> Status {
+    gscoped_ptr<client::KuduTableAlterer> table_alterer(client->NewTableAlterer(table_name));
+    unique_ptr<KuduPartialRow> lower_bound(client_schema->NewRow());
+    RETURN_NOT_OK(lower_bound->SetInt32("key2", range_lower_bound));
+    unique_ptr<KuduPartialRow> upper_bound(client_schema->NewRow());
+    RETURN_NOT_OK(upper_bound->SetInt32("key2", range_upper_bound));
+    return table_alterer->AddRangePartitionWithDimension(lower_bound.release(),
+                                                         upper_bound.release(),
+                                                         dimension_label)
+                        ->Alter();
+  };
+
+  auto calc_stddev_func = [](const vector<int32_t>& num_replicas,
+                             double mean_per_ts,
+                             int32_t ts_idx_start,
+                             int32_t ts_idx_end) {
+    double sum_squared_deviation = 0;
+    for (int ts_idx = ts_idx_start; ts_idx < ts_idx_end; ts_idx++) {
+      int num_ts = num_replicas[ts_idx];
+      LOG(INFO) << "TS " << ts_idx << " has " << num_ts << " tablets";
+      double deviation = static_cast<double>(num_ts) - mean_per_ts;
+      sum_squared_deviation += deviation * deviation;
+    }
+    return sqrt(sum_squared_deviation / (mean_per_ts - 1));
+  };
+
+  {
+    for (int ts_idx = 0; ts_idx < kNumServers / 2; ts_idx++) {
+      num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size();
+    }
+    // create the 'test-table1' table with 'label1'.
+    ASSERT_OK(create_table_func(client_.get(), &client_schema, "test-table1", 0, 100, "label1"));
+    for (int ts_idx = 0; ts_idx < kNumServers / 2; ts_idx++) {
+      int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size();
+      num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx];
+    }
+    // Check that the replicas are fairly well spread by computing the standard
+    // deviation of the number of replicas per alive server.
+    const double kMeanPerServer = kNumTablets * 3.0 / kNumServers * 2;
+    double stddev = calc_stddev_func(
+        num_new_replicas, kMeanPerServer, 0, kNumServers / 2);
+    LOG(INFO) << "stddev = " << stddev;
+    ASSERT_LE(stddev, 3.0);
+  }
+
+  // Waiting for the recent creation replicas to decay to 0.
+  SleepFor(MonoDelta::FromMilliseconds(1000));
+
+  // Add five new tablet servers to cluster.
+  for (int ts_idx = kNumServers / 2; ts_idx < kNumServers; ts_idx++) {
+    ASSERT_OK(cluster_->AddTabletServer());
+  }
+  ASSERT_OK(cluster_->WaitForTabletServerCount(kNumServers, MonoDelta::FromSeconds(60)));
+
+  {
+    for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) {
+      num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size();
+    }
+    // create the 'test-table2' table with 'label2'.
+    ASSERT_OK(create_table_func(client_.get(), &client_schema, "test-table2", 0, 100, "label2"));
+    for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) {
+      int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size();
+      num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx];
+    }
+    // Check that the replicas are fairly well spread by computing the standard
+    // deviation of the number of replicas per server.
+    const double kMeanPerServer = kNumTablets * 3.0 / kNumServers;
+    double stddev = calc_stddev_func(num_new_replicas, kMeanPerServer, 0, kNumServers);
+    LOG(INFO) << "stddev = " << stddev;
+    ASSERT_LE(stddev, 3.0);
+  }
+
+  // Waiting for the recent creation replicas to decay to 0.
+  SleepFor(MonoDelta::FromMilliseconds(1000));
+
+  {
+    for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) {
+      num_new_replicas[ts_idx] = inspect_->ListTabletsOnTS(ts_idx).size();
+    }
+    // Add partition with 'label3' to 'test-table2'
+    ASSERT_OK(alter_table_func(client_.get(), &client_schema, "test-table1", 100, 200, "label3"));
+    for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) {
+      int num_replicas = inspect_->ListTabletsOnTS(ts_idx).size();
+      num_new_replicas[ts_idx] = num_replicas - num_new_replicas[ts_idx];
+    }
+    // Check that the replicas are fairly well spread by computing the standard
+    // deviation of the number of replicas per server.
+    const double kMeanPerServer = kNumTablets * 3.0 / kNumServers;
+    double stddev = calc_stddev_func(num_new_replicas, kMeanPerServer, 0, kNumServers);
+    LOG(INFO) << "stddev = " << stddev;
+    ASSERT_LE(stddev, 3.0);
+  }
+}
+
+static void LookUpRandomKeysLoop(const std::shared_ptr<master::MasterServiceProxy>& master,
                                  const char* table_name,
                                  AtomicBool* quit) {
   Schema schema(GetSimpleTestSchema());
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 54b9592..1ea7408 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1635,10 +1635,12 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
       e->mutable_metadata()->AbortMutation();
     }
   });
+  optional<string> dimension_label =
+      req.has_dimension_label() ? make_optional<string>(req.dimension_label()) : none;
   for (const Partition& partition : partitions) {
     PartitionPB partition_pb;
     partition.ToPB(&partition_pb);
-    tablets.emplace_back(CreateTabletInfo(table, partition_pb));
+    tablets.emplace_back(CreateTabletInfo(table, partition_pb, dimension_label));
   }
   TRACE("Created new table and tablet info");
 
@@ -1779,14 +1781,19 @@ scoped_refptr<TableInfo> CatalogManager::CreateTableInfo(
   return table;
 }
 
-scoped_refptr<TabletInfo> CatalogManager::CreateTabletInfo(const scoped_refptr<TableInfo>& table,
-                                                           const PartitionPB& partition) {
+scoped_refptr<TabletInfo> CatalogManager::CreateTabletInfo(
+    const scoped_refptr<TableInfo>& table,
+    const PartitionPB& partition,
+    const optional<string>& dimension_label) {
   scoped_refptr<TabletInfo> tablet(new TabletInfo(table, GenerateId()));
   tablet->mutable_metadata()->StartMutation();
-  SysTabletsEntryPB *metadata = &tablet->mutable_metadata()->mutable_dirty()->pb;
+  SysTabletsEntryPB* metadata = &tablet->mutable_metadata()->mutable_dirty()->pb;
   metadata->set_state(SysTabletsEntryPB::PREPARING);
   metadata->mutable_partition()->CopyFrom(partition);
   metadata->set_table_id(table->id());
+  if (dimension_label) {
+    metadata->set_dimension_label(*dimension_label);
+  }
   return tablet;
 }
 
@@ -2259,7 +2266,10 @@ Status CatalogManager::ApplyAlterPartitioningSteps(
 
           PartitionPB partition_pb;
           partition.ToPB(&partition_pb);
-          new_tablets.emplace(lower_bound, CreateTabletInfo(table, partition_pb));
+          optional<string> dimension_label = step.add_range_partition().has_dimension_label() ?
+              make_optional<string>(step.add_range_partition().dimension_label()) : none;
+          new_tablets.emplace(lower_bound,
+                              CreateTabletInfo(table, partition_pb, dimension_label));
         }
         break;
       }
@@ -3328,6 +3338,7 @@ class AsyncCreateReplica : public RetrySpecificTSRpcTask {
         tablet_lock.data().pb.consensus_state().committed_config());
     req_.mutable_extra_config()->CopyFrom(
         table_lock.data().pb.extra_config());
+    req_.set_dimension_label(tablet_lock.data().pb.dimension_label());
   }
 
   string type_name() const override { return "Create Tablet"; }
@@ -3693,6 +3704,15 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
     TSDescriptorVector ts_descs;
     master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
 
+    // Get the dimension of the tablet. Otherwise, it will be none.
+    optional<string> dimension = none;
+    {
+      TabletMetadataLock l(tablet_.get(), LockMode::READ);
+      if (tablet_->metadata().state().pb.has_dimension_label()) {
+        dimension = tablet_->metadata().state().pb.dimension_label();
+      }
+    }
+
     // Some of the tablet servers hosting the current members of the config
     // (see the 'existing' populated above) might be presumably dead.
     // Inclusion of a presumably dead tablet server into 'existing' is OK:
@@ -3702,7 +3722,7 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
     // to host the extra replica is 'ts_descs' after blacklisting all elements
     // common with 'existing'.
     PlacementPolicy policy(std::move(ts_descs), rng_);
-    s = policy.PlaceExtraTabletReplica(std::move(existing), &extra_replica);
+    s = policy.PlaceExtraTabletReplica(std::move(existing), dimension, &extra_replica);
   }
   if (PREDICT_FALSE(!s.ok())) {
     auto msg = Substitute("no extra replica candidate found for tablet $0: $1",
@@ -4425,10 +4445,13 @@ void CatalogManager::HandleAssignCreatingTablet(const scoped_refptr<TabletInfo>&
 
   const PersistentTabletInfo& old_info = tablet->metadata().state();
 
+  optional<string> dimension_label = old_info.pb.has_dimension_label() ?
+      make_optional<string>(old_info.pb.dimension_label()) : none;
   // The "tablet creation" was already sent, but we didn't receive an answer
   // within the timeout. So the tablet will be replaced by a new one.
   scoped_refptr<TabletInfo> replacement = CreateTabletInfo(tablet->table(),
-                                                           old_info.pb.partition());
+                                                           old_info.pb.partition(),
+                                                           dimension_label);
   LOG_WITH_PREFIX(WARNING) << Substitute("Tablet $0 was not created within the "
       "allowed timeout. Replacing with a new tablet $1",
       tablet->ToString(), replacement->id());
@@ -4636,9 +4659,15 @@ Status CatalogManager::SelectReplicasForTablet(const PlacementPolicy& policy,
   config->set_obsolete_local(nreplicas == 1);
   config->set_opid_index(consensus::kInvalidOpIdIndex);
 
+  // Get the dimension of the tablet. Otherwise, it will be none.
+  optional<string> dimension = none;
+  if (tablet->metadata().state().pb.has_dimension_label()) {
+    dimension = tablet->metadata().state().pb.dimension_label();
+  }
+
   // Select the set of replicas for the tablet.
   TSDescriptorVector descriptors;
-  RETURN_NOT_OK_PREPEND(policy.PlaceTabletReplicas(nreplicas, &descriptors),
+  RETURN_NOT_OK_PREPEND(policy.PlaceTabletReplicas(nreplicas, dimension, &descriptors),
                         Substitute("failed to place replicas for tablet $0 "
                                    "(table '$1')",
                                    tablet->id(), table_guard.data().name()));
@@ -4824,6 +4853,9 @@ Status CatalogManager::ReplaceTablet(const string& tablet_id, ReplaceTabletRespo
   new_metadata->set_state(SysTabletsEntryPB::PREPARING);
   new_metadata->mutable_partition()->CopyFrom(replaced_pb.partition());
   new_metadata->set_table_id(table->id());
+  if (replaced_pb.has_dimension_label()) {
+    new_metadata->set_dimension_label(replaced_pb.dimension_label());
+  }
 
   const string replace_msg = Substitute("replaced by tablet $0", new_tablet->id());
   old_tablet->mutable_metadata()->mutable_dirty()->set_state(SysTabletsEntryPB::DELETED,
diff --git a/src/kudu/master/catalog_manager.h b/src/kudu/master/catalog_manager.h
index df20777..aa08b0d 100644
--- a/src/kudu/master/catalog_manager.h
+++ b/src/kudu/master/catalog_manager.h
@@ -861,7 +861,8 @@ class CatalogManager : public tserver::TabletReplicaLookupIf {
   // Leaves the tablet "write locked" with the new info in the
   // "dirty" state field.
   scoped_refptr<TabletInfo> CreateTabletInfo(const scoped_refptr<TableInfo>& table,
-                                             const PartitionPB& partition);
+                                             const PartitionPB& partition,
+                                             const boost::optional<std::string>& dimension_label);
 
 
   // Builds the TabletLocationsPB for a tablet based on the provided TabletInfo
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index dbf01a7..c74f3f1 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -142,6 +142,10 @@ message SysTabletsEntryPB {
 
   // The table id for the tablet.
   required bytes table_id = 6;
+
+  // The dimension label for the tablet. Used for dimension-specific
+  // placement of the tablet's replicas.
+  optional string dimension_label = 8;
 }
 
 // The on-disk entry in the sys.catalog table ("metadata" column) for
@@ -311,6 +315,11 @@ message TSHeartbeatRequestPB {
   // Replica management parameters that the tablet server is running with.
   // This field is set only if the registration field is present.
   optional consensus.ReplicaManagementInfoPB replica_management_info = 7;
+
+  // The number of tablets that are BOOTSTRAPPING or RUNNING in each dimension.
+  // Used by the master to determine load when creating new tablet replicas
+  // based on dimension.
+  map<string, int32> num_live_tablets_by_dimension = 8;
 }
 
 message TSHeartbeatResponsePB {
@@ -457,6 +466,11 @@ message CreateTableRequestPB {
 
   // The table's extra configuration properties.
   map<string, string> extra_configs = 9;
+
+  // The dimension label for tablets that were created during table creation. Used for
+  // dimension-specific placement of tablet replicas corresponding to the partitions of
+  // the newly created table.
+  optional string dimension_label = 10;
 }
 
 message CreateTableResponsePB {
@@ -586,6 +600,10 @@ message AlterTableRequestPB {
     // A set of row operations containing the lower and upper range bound for
     // the range partition to add or drop.
     optional RowOperationsPB range_bounds = 1;
+
+    // The dimension label for the tablet. Used for dimension-specific placement
+    // of the tablet's replicas.
+    optional string dimension_label = 2;
   }
   message DropRangePartition {
     // A set of row operations containing the lower and upper range bound for
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index 683809c..d1647ce 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -251,6 +251,9 @@ void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,
   // 4. Update tserver soft state based on the heartbeat contents.
   ts_desc->UpdateHeartbeatTime();
   ts_desc->set_num_live_replicas(req->num_live_tablets());
+  ts_desc->set_num_live_replicas_by_dimension(
+      std::move(TabletNumByDimensionMap(req->num_live_tablets_by_dimension().begin(),
+                                        req->num_live_tablets_by_dimension().end())));
 
   // 5. Only leaders handle tablet reports.
   if (is_leader_master && req->has_tablet_report()) {
diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc
index 541d0eb..caf16d3 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -17,7 +17,9 @@
 
 #include "kudu/master/placement_policy.h"
 
+#include <cmath>
 #include <cstddef>
+#include <initializer_list>
 #include <map>
 #include <memory>
 #include <set>
@@ -36,10 +38,12 @@
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 
-using std::make_shared;
+using boost::make_optional;
+using boost::none;
+using boost::optional;
+using std::initializer_list;
 using std::map;
 using std::multiset;
-using std::set;
 using std::shared_ptr;
 using std::string;
 using std::vector;
@@ -52,8 +56,12 @@ namespace master {
 class PlacementPolicyTest : public ::testing::Test {
  public:
   struct TSInfo {
-    const string id;          // TS identifier
-    const size_t replica_num; // number of tablet replicas hosted by TS
+    // TS identifier
+    const string id;
+    // number of tablet replicas hosted by TS
+    const size_t replica_num;
+    // number of tablet replicas in each dimension
+    TabletNumByDimensionMap replica_num_by_dimension;
   };
 
   struct LocationInfo {
@@ -98,6 +106,7 @@ class PlacementPolicyTest : public ::testing::Test {
         shared_ptr<TSDescriptor> tsd(new TSDescriptor(ts.id));
         tsd->set_num_live_replicas(ts.replica_num);
         tsd->location_.emplace(location_info.id);
+        tsd->set_num_live_replicas_by_dimension(ts.replica_num_by_dimension);
         ts_descriptors.emplace_back(std::move(tsd));
       }
     }
@@ -249,40 +258,49 @@ TEST_F(PlacementPolicyTest, PlaceExtraTabletReplicaNoLoc) {
   // 'No location case': expecting backward compatible behavior with
   // non-location-aware logic.
   const vector<LocationInfo> cluster_info = {
-    { "", { { "ts0", 0 }, { "ts1", 10 }, { "ts2", 1 }, } },
+    {
+      "",
+      {
+        { "ts0", 0 },
+        { "ts1", 10, { { "labelA", 10 } } },
+        { "ts2", 1, { { "labelA", 1 } } },
+      }
+    },
   };
   ASSERT_OK(Prepare(cluster_info));
 
   const auto& all = descriptors();
   PlacementPolicy policy(all, rng());
 
-  {
-    TSDescriptorVector existing(all.begin(), all.end());
-    existing.pop_back();
-    shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
-    ASSERT_TRUE(extra_ts);
-    ASSERT_EQ("ts2", extra_ts->permanent_uuid());
-  }
+  for (const auto& label : initializer_list<optional<string>>{ none, string("labelA") }) {
+    {
+      TSDescriptorVector existing(all.begin(), all.end());
+      existing.pop_back();
+      shared_ptr<TSDescriptor> extra_ts;
+      ASSERT_OK(policy.PlaceExtraTabletReplica(existing, label, &extra_ts));
+      ASSERT_TRUE(extra_ts);
+      ASSERT_EQ("ts2", extra_ts->permanent_uuid());
+    }
 
-  {
-    TSDescriptorVector existing(all.begin(), all.end());
-    existing.pop_back();
-    existing.pop_back();
-    shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
-    ASSERT_TRUE(extra_ts);
-    ASSERT_EQ("ts2", extra_ts->permanent_uuid());
-  }
+    {
+      TSDescriptorVector existing(all.begin(), all.end());
+      existing.pop_back();
+      existing.pop_back();
+      shared_ptr<TSDescriptor> extra_ts;
+      ASSERT_OK(policy.PlaceExtraTabletReplica(existing, label, &extra_ts));
+      ASSERT_TRUE(extra_ts);
+      ASSERT_EQ("ts2", extra_ts->permanent_uuid());
+    }
 
-  {
-    TSDescriptorVector existing(all.begin(), all.end());
-    shared_ptr<TSDescriptor> extra_ts;
-    const auto s = policy.PlaceExtraTabletReplica(existing, &extra_ts);
-    ASSERT_TRUE(s.IsNotFound()) << s.ToString();
-    ASSERT_FALSE(extra_ts);
-    ASSERT_STR_CONTAINS(s.ToString(),
-                        "could not select location for extra replica");
+    {
+      TSDescriptorVector existing(all.begin(), all.end());
+      shared_ptr<TSDescriptor> extra_ts;
+      const auto s = policy.PlaceExtraTabletReplica(existing, label, &extra_ts);
+      ASSERT_TRUE(s.IsNotFound()) << s.ToString();
+      ASSERT_FALSE(extra_ts);
+      ASSERT_STR_CONTAINS(s.ToString(),
+                          "could not select location for extra replica");
+    }
   }
 }
 
@@ -290,47 +308,56 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasNoLoc) {
   // 'No location case': expecting backward-compatible behavior with the
   // legacy (i.e. non-location-aware) logic.
   const vector<LocationInfo> cluster_info = {
-    { "", { { "ts0", 0 }, { "ts1", 10 }, { "ts2", 1 }, } },
+    {
+      "",
+      {
+        { "ts0", 0 },
+        { "ts1", 10, { { "labelA", 10 } } },
+        { "ts2", 1, { { "labelA", 1 } } },
+      }
+    },
   };
   ASSERT_OK(Prepare(cluster_info));
 
   const auto& all = descriptors();
   PlacementPolicy policy(all, rng());
 
-  // Ask just for a single replica.
-  {
-    TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(1, &result));
-    ASSERT_EQ(1, result.size());
-    TSDescriptorsMap m;
-    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
-    ASSERT_EQ(1, m.size());
-    // "Power of Two Choices" should select less loaded servers.
-    ASSERT_TRUE(m.count("ts0") == 1 || m.count("ts2") == 1);
-    ASSERT_EQ(0, m.count("ts1"));
-  }
+  for (const auto& label : initializer_list<optional<string>>{ none, string("labelA") }) {
+    // Ask just for a single replica.
+    {
+      TSDescriptorVector result;
+      ASSERT_OK(policy.PlaceTabletReplicas(1, label, &result));
+      ASSERT_EQ(1, result.size());
+      TSDescriptorsMap m;
+      ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+      ASSERT_EQ(1, m.size());
+      // "Power of Two Choices" should select less loaded servers.
+      ASSERT_TRUE(m.count("ts0") == 1 || m.count("ts2") == 1);
+      ASSERT_EQ(0, m.count("ts1"));
+    }
 
-  // Ask for number of replicas equal to the number of available tablet servers.
-  {
-    TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(3, &result));
-    ASSERT_EQ(3, result.size());
-    TSDescriptorsMap m;
-    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
-    ASSERT_EQ(1, m.count("ts0"));
-    ASSERT_EQ(1, m.count("ts1"));
-    ASSERT_EQ(1, m.count("ts2"));
-  }
+    // Ask for number of replicas equal to the number of available tablet servers.
+    {
+      TSDescriptorVector result;
+      ASSERT_OK(policy.PlaceTabletReplicas(3, label, &result));
+      ASSERT_EQ(3, result.size());
+      TSDescriptorsMap m;
+      ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+      ASSERT_EQ(1, m.count("ts0"));
+      ASSERT_EQ(1, m.count("ts1"));
+      ASSERT_EQ(1, m.count("ts2"));
+    }
 
-  // Try to ask for too many replicas when too few tablet servers are available.
-  {
-    TSDescriptorVector result;
-    auto s = policy.PlaceTabletReplicas(4, &result);
-    ASSERT_TRUE(s.IsNotFound()) << s.ToString();
-    ASSERT_STR_CONTAINS(s.ToString(),
-                        "could not find next location after placing "
-                        "3 out of 4 tablet replicas");
-    ASSERT_TRUE(result.empty());
+    // Try to ask for too many replicas when too few tablet servers are available.
+    {
+      TSDescriptorVector result;
+      auto s = policy.PlaceTabletReplicas(4, label, &result);
+      ASSERT_TRUE(s.IsNotFound()) << s.ToString();
+      ASSERT_STR_CONTAINS(s.ToString(),
+                          "could not find next location after placing "
+                          "3 out of 4 tablet replicas");
+      ASSERT_TRUE(result.empty());
+    }
   }
 }
 
@@ -348,7 +375,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicas) {
   // Ask for number of replicas equal to the number of available locations.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(3, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(3, none, &result));
     ASSERT_EQ(3, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -363,7 +390,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicas) {
   // enough locations to spread the replicas.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(5, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(5, none, &result));
     ASSERT_EQ(5, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -377,7 +404,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicas) {
   // Ask for number of replicas greater than the number of tablet servers.
   {
     TSDescriptorVector result;
-    auto s = policy.PlaceTabletReplicas(8, &result);
+    auto s = policy.PlaceTabletReplicas(8, none, &result);
     ASSERT_TRUE(s.IsNotFound()) << s.ToString();
     ASSERT_STR_CONTAINS(s.ToString(),
                         "could not find next location after placing "
@@ -402,7 +429,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasOneTSPerLocation) {
   // Ask for number of replicas equal to the number of available locations.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(3, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(3, none, &result));
     ASSERT_EQ(3, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -416,7 +443,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasOneTSPerLocation) {
   // Ask for number of replicas equal to the number of available locations.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(5, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(5, none, &result));
     ASSERT_EQ(5, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -430,7 +457,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasOneTSPerLocation) {
   // Ask for number of replicas greater than the number of tablet servers.
   {
     TSDescriptorVector result;
-    auto s = policy.PlaceTabletReplicas(6, &result);
+    auto s = policy.PlaceTabletReplicas(6, none, &result);
     ASSERT_TRUE(s.IsNotFound()) << s.ToString();
     ASSERT_STR_CONTAINS(s.ToString(),
                         "could not find next location after placing "
@@ -455,7 +482,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasBalancingLocations) {
   // Make sure no location contains the majority of replicas.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(3, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(3, none, &result));
     ASSERT_EQ(3, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -471,7 +498,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasBalancingLocations) {
   // Current location selection algorithm loads the locations evenly.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(5, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(5, none, &result));
     ASSERT_EQ(5, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -486,7 +513,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasBalancingLocations) {
   // servers in the cluster.
   {
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(7, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(7, none, &result));
     ASSERT_EQ(7, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -519,7 +546,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTabletReplicaViolatedPolicy) {
     // in the same location.
     const auto existing = GetDescriptors({ "A_ts0", "A_ts1", "A_ts2", });
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     // Within location a replica is placed by the 'power of 2' algorithm.
     ASSERT_TRUE(extra_ts->permanent_uuid() == "C_ts0" ||
@@ -534,7 +561,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTabletReplicaViolatedPolicy) {
     // constraint would be violated.
     const auto existing = GetDescriptors({ "A_ts0", "A_ts1", "C_ts1", "C_ts2", });
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     // Within location a replica is placed by the 'power of 2' algorithm.
     ASSERT_TRUE(extra_ts->permanent_uuid() == "B_ts0" ||
@@ -596,7 +623,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L5_TS7) {
   {
     static constexpr auto num_replicas = 3;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     // Make sure the placement of replicas conforms with the main constraint:
@@ -618,7 +645,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L5_TS7) {
   {
     static constexpr auto num_replicas = 5;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -635,7 +662,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L5_TS7) {
   {
     static constexpr auto num_replicas = 7;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -649,7 +676,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L5_TS7) {
   {
     static constexpr auto num_replicas = 9;
     TSDescriptorVector result;
-    auto s = policy.PlaceTabletReplicas(num_replicas, &result);
+    auto s = policy.PlaceTabletReplicas(num_replicas, none, &result);
     ASSERT_TRUE(s.IsNotFound()) << s.ToString();
     const string ref_msg = Substitute(
         "could not find next location after placing 7 out of $0 tablet replicas",
@@ -681,7 +708,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTablet_L5_TS10_RF5) {
     const auto existing = GetDescriptors(
         { "A_ts0", "B_ts0", "C_ts0", "D_ts0", "E_ts0", });
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     // The location with lowest load is selected for the extra replica.
     ASSERT_EQ("A_ts2", extra_ts->permanent_uuid());
@@ -692,7 +719,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTablet_L5_TS10_RF5) {
     const auto existing = GetDescriptors(
         { "A_ts0", "A_ts1", "B_ts0", "B_ts1", "C_ts0", });
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     // The location with lowest load is selected for the extra replica.
     ASSERT_EQ("D_ts0", extra_ts->permanent_uuid());
@@ -703,7 +730,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTablet_L5_TS10_RF5) {
     const auto existing = GetDescriptors(
         { "A_ts0", "B_ts0", "B_ts1", "B_ts2", "E_ts0", });
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     // Among the locations where an additional replica can be placed,
     // location A and location C have the least load. As for the preferences
@@ -741,7 +768,7 @@ TEST_F(PlacementPolicyTest, PlaceExtraTablet_L5_TS16_RF5) {
   map<string, int> placement_stats;
   for (auto i = 0; i < 6000; ++i) {
     shared_ptr<TSDescriptor> extra_ts;
-    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, &extra_ts));
+    ASSERT_OK(policy.PlaceExtraTabletReplica(existing, none, &extra_ts));
     ASSERT_TRUE(extra_ts);
     const auto& ts_uuid = extra_ts->permanent_uuid();
     ASSERT_TRUE(ts_uuid == "A_ts1" ||
@@ -783,7 +810,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) {
   {
     static constexpr auto num_replicas = 2;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -793,7 +820,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) {
   {
     static constexpr auto num_replicas = 4;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -803,7 +830,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) {
   {
     static constexpr auto num_replicas = 6;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -831,7 +858,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   {
     static constexpr auto num_replicas = 2;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -843,7 +870,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   {
     static constexpr auto num_replicas = 4;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -858,7 +885,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   {
     static constexpr auto num_replicas = 6;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -871,7 +898,7 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   {
     static constexpr auto num_replicas = 8;
     TSDescriptorVector result;
-    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result));
     ASSERT_EQ(num_replicas, result.size());
     TSDescriptorsMap m;
     ASSERT_OK(TSDescriptorVectorToMap(result, &m));
@@ -885,5 +912,83 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
   }
 }
 
+// In a Kudu cluster with newly added tablet servers, add tablet replicas with and without
+// dimension label and verify the result distribution of the replicas. Check for 1) overall
+// distribution of replicas 2) distribution of replicas within the specified dimension.
+TEST_F(PlacementPolicyTest, PlaceTabletReplicasWithNewTabletServers) {
+  const vector<LocationInfo> cluster_info = {
+      {
+        "",
+        {
+          { "ts0", 0 }, // a new tablet server
+          { "ts1", 0 }, // a new tablet server
+          { "ts2", 1000, { { "label0", 1000 }, } },
+          { "ts3", 1000, { { "label0", 1000 }, } },
+          { "ts4", 2000, { { "label0", 2000 }, } },
+          { "ts5", 2000, { { "label0", 2000 }, } },
+        }
+      },
+  };
+
+  auto calc_stddev_func = [](const map<string, int>& placement_stats, double mean_per_ts) {
+    double sum_squared_deviation = 0;
+    for (const auto& stat : placement_stats) {
+      int num_ts = stat.second;
+      double deviation = static_cast<double>(num_ts) - mean_per_ts;
+      sum_squared_deviation += deviation * deviation;
+    }
+    return sqrt(sum_squared_deviation / (mean_per_ts - 1));
+  };
+
+  // Place three replicas a thousand times.
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+
+    map<string, int> placement_stats;
+    for (auto i = 0; i < 1000; ++i) {
+      TSDescriptorVector result;
+      // Get the number of tablet replicas on tablet server.
+      ASSERT_OK(policy.PlaceTabletReplicas(3, none, &result));
+      ASSERT_EQ(3, result.size());
+      for (const auto& ts : result) {
+        const auto& ts_uuid = ts->permanent_uuid();
+        ++placement_stats[ts_uuid];
+      }
+    }
+
+    ASSERT_EQ(6, placement_stats.size());
+    const double kMeanPerServer = 3000 / 6.0;
+    double stddev = calc_stddev_func(placement_stats, kMeanPerServer);
+    ASSERT_GE(stddev, 20.0);
+  }
+
+  // Place three replicas with dimension labels a thousand times.
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    const auto& all = descriptors();
+    PlacementPolicy policy(all, rng());
+
+    for (const auto& label : { "label1", "label2", "label3", "label4", "label5" }) {
+      map<string, int> placement_stats;
+      for (auto i = 0; i < 1000; ++i) {
+        TSDescriptorVector result;
+        ASSERT_OK(policy.PlaceTabletReplicas(3, make_optional(string(label)), &result));
+        ASSERT_EQ(3, result.size());
+        for (const auto& ts : result) {
+          const auto& ts_uuid = ts->permanent_uuid();
+          ++placement_stats[ts_uuid];
+        }
+      }
+
+      ASSERT_EQ(6, placement_stats.size());
+      const double kMeanPerServer = 3000 / 6.0;
+      double stddev = calc_stddev_func(placement_stats, kMeanPerServer);
+      ASSERT_LE(stddev, 3.0);
+    }
+  }
+}
+
 } // namespace master
 } // namespace kudu
diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc
index 6219597..c81a8ce 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -52,14 +52,17 @@ namespace master {
 
 namespace {
 
-double GetTSLoad(TSDescriptor* desc) {
-  return desc->RecentReplicaCreations() + desc->num_live_replicas();
+double GetTSLoad(const boost::optional<string>& dimension, TSDescriptor* desc) {
+  // TODO (oclarms): get the number of times this tablet server has recently been
+  //  selected to create a tablet replica by dimension.
+  return desc->RecentReplicaCreations() + desc->num_live_replicas(dimension);
 }
 
 // Given exactly two choices in 'two_choices', pick the better tablet server on
 // which to place a tablet replica. Ties are broken using 'rng'.
 shared_ptr<TSDescriptor> PickBetterReplica(
     const TSDescriptorVector& two_choices,
+    const boost::optional<std::string>& dimension,
     ThreadSafeRandom* rng) {
   CHECK_EQ(2, two_choices.size());
 
@@ -67,7 +70,8 @@ shared_ptr<TSDescriptor> PickBetterReplica(
   const auto& b = two_choices[1];
 
   // When creating replicas, we consider two aspects of load:
-  //   (1) how many tablet replicas are already on the server, and
+  //   (1) how many tablet replicas are already on the server (if dimension is not none, only
+  //       return the number of tablet replicas in the dimension), and
   //   (2) how often we've chosen this server recently.
   //
   // The first factor will attempt to put more replicas on servers that
@@ -82,8 +86,8 @@ shared_ptr<TSDescriptor> PickBetterReplica(
   //
   // TODO(wdberkeley): in the future we may want to factor in other items such
   // as available disk space, actual request load, etc.
-  double load_a = GetTSLoad(a.get());
-  double load_b = GetTSLoad(b.get());
+  double load_a = GetTSLoad(dimension, a.get());
+  double load_b = GetTSLoad(dimension, b.get());
   if (load_a < load_b) {
     return a;
   }
@@ -110,6 +114,7 @@ PlacementPolicy::PlacementPolicy(TSDescriptorVector descs,
 }
 
 Status PlacementPolicy::PlaceTabletReplicas(int nreplicas,
+                                            const boost::optional<std::string>& dimension,
                                             TSDescriptorVector* ts_descs) const {
   DCHECK(ts_descs);
 
@@ -122,13 +127,14 @@ Status PlacementPolicy::PlaceTabletReplicas(int nreplicas,
     const auto& loc = elem.first;
     const auto loc_nreplicas = elem.second;
     const auto& ts_descriptors = FindOrDie(ltd_, loc);
-    RETURN_NOT_OK(SelectReplicas(ts_descriptors, loc_nreplicas, ts_descs));
+    RETURN_NOT_OK(SelectReplicas(ts_descriptors, loc_nreplicas, dimension, ts_descs));
   }
   return Status::OK();
 }
 
 Status PlacementPolicy::PlaceExtraTabletReplica(
     TSDescriptorVector existing,
+    const boost::optional<std::string>& dimension,
     shared_ptr<TSDescriptor>* ts_desc) const {
   DCHECK(ts_desc);
 
@@ -166,7 +172,7 @@ Status PlacementPolicy::PlaceExtraTabletReplica(
     return Status::IllegalState(
         Substitute("'$0': no info on tablet servers at location", location));
   }
-  auto replica = SelectReplica(*location_ts_descs_ptr, existing_set);
+  auto replica = SelectReplica(*location_ts_descs_ptr, dimension, existing_set);
   if (!replica) {
     return Status::NotFound("could not find tablet server for extra replica");
   }
@@ -226,6 +232,7 @@ Status PlacementPolicy::SelectReplicaLocations(
 
 Status PlacementPolicy::SelectReplicas(const TSDescriptorVector& source_ts_descs,
                                        int nreplicas,
+                                       const boost::optional<string>& dimension,
                                        TSDescriptorVector* result_ts_descs) const {
   if (nreplicas > source_ts_descs.size()) {
     return Status::InvalidArgument(
@@ -237,7 +244,7 @@ Status PlacementPolicy::SelectReplicas(const TSDescriptorVector& source_ts_descs
   // put two replicas on the same host.
   set<shared_ptr<TSDescriptor>> already_selected;
   for (auto i = 0; i < nreplicas; ++i) {
-    auto ts = SelectReplica(source_ts_descs, already_selected);
+    auto ts = SelectReplica(source_ts_descs, dimension, already_selected);
     CHECK(ts);
 
     // Increment the number of pending replicas so that we take this selection
@@ -276,6 +283,7 @@ Status PlacementPolicy::SelectReplicas(const TSDescriptorVector& source_ts_descs
 //
 shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
     const TSDescriptorVector& ts_descs,
+    const boost::optional<string>& dimension,
     const set<shared_ptr<TSDescriptor>>& excluded) const {
   // Pick two random servers, excluding those we've already picked.
   // If we've only got one server left, 'two_choices' will actually
@@ -286,7 +294,7 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
 
   if (two_choices.size() == 2) {
     // Pick the better of the two.
-    return PickBetterReplica(two_choices, rng_);
+    return PickBetterReplica(two_choices, dimension, rng_);
   }
   if (two_choices.size() == 1) {
     return two_choices.front();
diff --git a/src/kudu/master/placement_policy.h b/src/kudu/master/placement_policy.h
index 924c0d1..8996fe2 100644
--- a/src/kudu/master/placement_policy.h
+++ b/src/kudu/master/placement_policy.h
@@ -24,6 +24,7 @@
 #include <unordered_map>
 #include <unordered_set>
 
+#include <boost/optional/optional.hpp>
 #include <gtest/gtest_prod.h>
 
 #include "kudu/master/ts_descriptor.h"
@@ -69,15 +70,29 @@ class PlacementPolicy {
   size_t ts_num() const { return ts_num_; }
 
   // Select tablet servers to host the given number of replicas for a tablet.
-  // The 'nreplicas' parameter specifies the desired replication factor,
-  // the result set of tablet server descriptors is output into the 'ts_descs'
-  // placeholder (must not be null).
-  Status PlaceTabletReplicas(int nreplicas, TSDescriptorVector* ts_descs) const;
-
-  // Select tablet server to host an additional tablet replica. The 'existing'
-  // parameter lists current members of the tablet's Raft configuration,
-  // the new member is output into 'ts_desc' placeholer (must not be null).
+  //
+  // Parameters:
+  //   'nreplicas'  The 'nreplicas' parameter specifies the desired replication factor.
+  //   'dimension'  The 'dimension' parameter specifies the dimension information of the tablet.
+  //                If not none, place tablet replicas based on the number of tablets in a
+  //                dimension. Otherwise, based on the number of tablets at a tablet server.
+  //   'ts_descs'   The result set of tablet server descriptors is output into the 'ts_descs'
+  //                placeholder (must not be null).
+  Status PlaceTabletReplicas(int nreplicas,
+                             const boost::optional<std::string>& dimension,
+                             TSDescriptorVector* ts_descs) const;
+
+  // Select tablet server to host an additional tablet replica.
+  //
+  // Parameters:
+  //   'existing'  The 'existing' parameter lists current members of the tablet's
+  //               Raft configuration.
+  //   'dimension' The 'dimension' parameter specifies the dimension information of the tablet.
+  //               If not none, place tablet replicas based on the number of tablets in a
+  //               dimension. Otherwise, based on the number of tablets at a tablet server.
+  //   'ts_desc'   The new member is output into 'ts_desc' placeholer (must not be null).
   Status PlaceExtraTabletReplica(TSDescriptorVector existing,
+                                 const boost::optional<std::string>& dimension,
                                  std::shared_ptr<TSDescriptor>* ts_desc) const;
 
  private:
@@ -122,15 +137,17 @@ class PlacementPolicy {
 
   // Select the given number ('nreplicas') from the set of specified tablet
   // servers to place tablet replicas.
-  Status SelectReplicas(const TSDescriptorVector& source_ts_desc,
+  Status SelectReplicas(const TSDescriptorVector& source_ts_descs,
                         int nreplicas,
-                        TSDescriptorVector* result_ts_desc) const;
+                        const boost::optional<std::string>& dimension,
+                        TSDescriptorVector* result_ts_descs) const;
 
   // Given the tablet servers in 'ts_descs', pick a tablet server to host
   // a tablet replica, excluding tablet servers in 'excluded'. If there are no
   // servers in 'ts_descs' that are not in 'existing', return nullptr.
   std::shared_ptr<TSDescriptor> SelectReplica(
       const TSDescriptorVector& ts_descs,
+      const boost::optional<std::string>& dimension,
       const std::set<std::shared_ptr<TSDescriptor>>& excluded) const;
 
   // Select location for next replica of a tablet with the specified replication
diff --git a/src/kudu/master/sys_catalog.cc b/src/kudu/master/sys_catalog.cc
index e3ed8d9..fbc6e9e 100644
--- a/src/kudu/master/sys_catalog.cc
+++ b/src/kudu/master/sys_catalog.cc
@@ -254,6 +254,7 @@ Status SysCatalogTable::CreateNew(FsManager *fs_manager) {
                                                   /*tombstone_last_logged_opid=*/ boost::none,
                                                   /*supports_live_row_count=*/ true,
                                                   /*extra_config=*/ boost::none,
+                                                  /*dimension_label=*/ boost::none,
                                                   &metadata));
 
   RaftConfigPB config;
diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc
index 4c1c0a1..e89b27f 100644
--- a/src/kudu/master/ts_descriptor.cc
+++ b/src/kudu/master/ts_descriptor.cc
@@ -46,6 +46,10 @@ DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000,
              "selected when assigning replicas during table creation or re-replication.");
 TAG_FLAG(tserver_unresponsive_timeout_ms, advanced);
 
+DEFINE_double(tserver_last_replica_creations_halflife_ms, 60 * 1000,
+              "The half-life of last replica creations time. Only for testing!");
+TAG_FLAG(tserver_last_replica_creations_halflife_ms, hidden);
+
 using kudu::pb_util::SecureDebugString;
 using kudu::pb_util::SecureShortDebugString;
 using std::shared_ptr;
@@ -172,7 +176,7 @@ void TSDescriptor::DecayRecentReplicaCreationsUnlocked() {
   // we don't need to bother calling the clock, etc.
   if (recent_replica_creations_ == 0) return;
 
-  const double kHalflifeSecs = 60;
+  const double kHalflifeSecs = FLAGS_tserver_last_replica_creations_halflife_ms / 1000;
   MonoTime now = MonoTime::Now();
   double secs_since_last_decay = (now - last_replica_creations_decay_).ToSeconds();
   recent_replica_creations_ *= pow(0.5, secs_since_last_decay / kHalflifeSecs);
diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h
index 0b64019..800710f 100644
--- a/src/kudu/master/ts_descriptor.h
+++ b/src/kudu/master/ts_descriptor.h
@@ -21,6 +21,8 @@
 #include <memory>
 #include <mutex>
 #include <string>
+#include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>
@@ -28,8 +30,10 @@
 #include <gtest/gtest_prod.h>
 
 #include "kudu/common/wire_protocol.pb.h"
+#include "kudu/gutil/basictypes.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/macros.h"
+#include "kudu/gutil/map-util.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/make_shared.h"
 #include "kudu/util/monotime.h"
@@ -54,6 +58,9 @@ class TabletServerAdminServiceProxy;
 
 namespace master {
 
+// Map of dimension -> tablets number.
+typedef std::unordered_map<std::string, int32_t> TabletNumByDimensionMap;
+
 // Master-side view of a single tablet server.
 //
 // Tracks the last heartbeat, status, instance identifier, location, etc.
@@ -119,9 +126,24 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
     num_live_replicas_ = n;
   }
 
+  // Set the number of live replicas in each dimension.
+  void set_num_live_replicas_by_dimension(TabletNumByDimensionMap num_live_tablets_by_dimension) {
+    std::lock_guard<rw_spinlock> l(lock_);
+    num_live_tablets_by_dimension_ = std::move(num_live_tablets_by_dimension);
+  }
+
   // Return the number of live replicas (i.e running or bootstrapping).
-  int num_live_replicas() const {
+  // If dimension is none, return the total number of replicas in the tablet server.
+  // Otherwise, return the number of replicas in the dimension.
+  int num_live_replicas(const boost::optional<std::string>& dimension = boost::none) const {
     shared_lock<rw_spinlock> l(lock_);
+    if (dimension) {
+      int32_t num_live_tablets = 0;
+      if (num_live_tablets_by_dimension_) {
+        ignore_result(FindCopy(*num_live_tablets_by_dimension_, *dimension, &num_live_tablets));
+      }
+      return num_live_tablets;
+    }
     return num_live_replicas_;
   }
 
@@ -167,6 +189,9 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
   // The number of live replicas on this host, from the last heartbeat.
   int num_live_replicas_;
 
+  // The number of live replicas in each dimension, from the last heartbeat.
+  boost::optional<TabletNumByDimensionMap> num_live_tablets_by_dimension_;
+
   // The tablet server's location, as determined by the master at registration.
   boost::optional<std::string> location_;
 
diff --git a/src/kudu/tablet/metadata.proto b/src/kudu/tablet/metadata.proto
index 861c8a3..8f19b2b 100644
--- a/src/kudu/tablet/metadata.proto
+++ b/src/kudu/tablet/metadata.proto
@@ -142,6 +142,10 @@ message TabletSuperBlockPB {
 
   // The table's extra-config.
   optional TableExtraConfigPB extra_config = 17;
+
+  // The dimension label for tablet. Used by the master to determine load when
+  // creating new tablet replicas based on dimension.
+  optional string dimension_label = 18;
 }
 
 // Tablet states represent stages of a TabletReplica's object lifecycle and are
diff --git a/src/kudu/tablet/tablet-harness.h b/src/kudu/tablet/tablet-harness.h
index b4fd1a7..83ffc3b 100644
--- a/src/kudu/tablet/tablet-harness.h
+++ b/src/kudu/tablet/tablet-harness.h
@@ -102,6 +102,7 @@ class TabletHarness {
                                                TABLET_DATA_READY,
                                                /*tombstone_last_logged_opid=*/ boost::none,
                                                /*extra_config=*/ boost::none,
+                                               /*dimension_label=*/ boost::none,
                                                &metadata));
     if (options_.enable_metrics) {
       metrics_registry_.reset(new MetricRegistry());
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc
index 0da4e7c..ceb7d5a 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -128,6 +128,7 @@ class BootstrapTest : public LogTestBase {
                                                TABLET_DATA_READY,
                                                /*tombstone_last_logged_opid=*/ boost::none,
                                                /*extra_config=*/ boost::none,
+                                               /*dimension_label=*/ boost::none,
                                                meta));
     (*meta)->SetLastDurableMrsIdForTests(mrs_id);
     if ((*meta)->GetRowSetForTests(0) != nullptr) {
diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index 18f080c..006c231 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -93,6 +93,7 @@ Status TabletMetadata::CreateNew(FsManager* fs_manager,
                                  boost::optional<OpId> tombstone_last_logged_opid,
                                  bool supports_live_row_count,
                                  boost::optional<TableExtraConfigPB> extra_config,
+                                 boost::optional<string> dimension_label,
                                  scoped_refptr<TabletMetadata>* metadata) {
 
   // Verify that no existing tablet exists with the same ID.
@@ -115,7 +116,8 @@ Status TabletMetadata::CreateNew(FsManager* fs_manager,
                                                        initial_tablet_data_state,
                                                        std::move(tombstone_last_logged_opid),
                                                        supports_live_row_count,
-                                                       std::move(extra_config)));
+                                                       std::move(extra_config),
+                                                       std::move(dimension_label)));
   RETURN_NOT_OK(ret->Flush());
   dir_group_cleanup.cancel();
 
@@ -142,6 +144,7 @@ Status TabletMetadata::LoadOrCreate(FsManager* fs_manager,
                                     const TabletDataState& initial_tablet_data_state,
                                     boost::optional<OpId> tombstone_last_logged_opid,
                                     boost::optional<TableExtraConfigPB> extra_config,
+                                    boost::optional<string> dimension_label,
                                     scoped_refptr<TabletMetadata>* metadata) {
   Status s = Load(fs_manager, tablet_id, metadata);
   if (s.ok()) {
@@ -158,6 +161,7 @@ Status TabletMetadata::LoadOrCreate(FsManager* fs_manager,
                      std::move(tombstone_last_logged_opid),
                      /*supports_live_row_count=*/ true,
                      std::move(extra_config),
+                     std::move(dimension_label),
                      metadata);
   }
   return s;
@@ -277,7 +281,8 @@ TabletMetadata::TabletMetadata(FsManager* fs_manager, string tablet_id,
                                const TabletDataState& tablet_data_state,
                                boost::optional<OpId> tombstone_last_logged_opid,
                                bool supports_live_row_count,
-                               boost::optional<TableExtraConfigPB> extra_config)
+                               boost::optional<TableExtraConfigPB> extra_config,
+                               boost::optional<string> dimension_label)
     : state_(kNotWrittenYet),
       tablet_id_(std::move(tablet_id)),
       table_id_(std::move(table_id)),
@@ -292,6 +297,7 @@ TabletMetadata::TabletMetadata(FsManager* fs_manager, string tablet_id,
       tablet_data_state_(tablet_data_state),
       tombstone_last_logged_opid_(std::move(tombstone_last_logged_opid)),
       extra_config_(std::move(extra_config)),
+      dimension_label_(std::move(dimension_label)),
       num_flush_pins_(0),
       needs_flush_(false),
       flush_count_for_tests_(0),
@@ -464,6 +470,12 @@ Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock)
     } else {
       extra_config_ = boost::none;
     }
+
+    if (superblock.has_dimension_label()) {
+      dimension_label_ = superblock.dimension_label();
+    } else {
+      dimension_label_ = boost::none;
+    }
   }
 
   // Now is a good time to clean up any orphaned blocks that may have been
@@ -706,6 +718,10 @@ Status TabletMetadata::ToSuperBlockUnlocked(TabletSuperBlockPB* super_block,
     *pb.mutable_extra_config() = *extra_config_;
   }
 
+  if (dimension_label_) {
+    pb.set_dimension_label(*dimension_label_);
+  }
+
   super_block->Swap(&pb);
   return Status::OK();
 }
@@ -801,5 +817,10 @@ boost::optional<TableExtraConfigPB> TabletMetadata::extra_config() const {
   return extra_config_;
 }
 
+boost::optional<string> TabletMetadata::dimension_label() const {
+  std::lock_guard<LockType> l(data_lock_);
+  return dimension_label_;
+}
+
 } // namespace tablet
 } // namespace kudu
diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h
index efb124b..060c071 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -84,6 +84,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
                           boost::optional<consensus::OpId> tombstone_last_logged_opid,
                           bool supports_live_row_count,
                           boost::optional<TableExtraConfigPB> extra_config,
+                          boost::optional<std::string> dimension_label,
                           scoped_refptr<TabletMetadata>* metadata);
 
   // Load existing metadata from disk.
@@ -106,6 +107,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
                              const TabletDataState& initial_tablet_data_state,
                              boost::optional<consensus::OpId> tombstone_last_logged_opid,
                              boost::optional<TableExtraConfigPB> extra_config,
+                             boost::optional<std::string> dimension_label,
                              scoped_refptr<TabletMetadata>* metadata);
 
   static std::vector<BlockIdPB> CollectBlockIdPBs(
@@ -242,6 +244,9 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
   // Returns the table's extra configuration properties.
   boost::optional<TableExtraConfigPB> extra_config() const;
 
+  // Returns the table's dimension label.
+  boost::optional<std::string> dimension_label() const;
+
   // Loads the currently-flushed superblock from disk into the given protobuf.
   Status ReadSuperBlockFromDisk(TabletSuperBlockPB* superblock) const;
 
@@ -299,7 +304,8 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
                  const TabletDataState& tablet_data_state,
                  boost::optional<consensus::OpId> tombstone_last_logged_opid,
                  bool supports_live_row_count,
-                 boost::optional<TableExtraConfigPB> extra_config);
+                 boost::optional<TableExtraConfigPB> extra_config,
+                 boost::optional<std::string> dimension_label);
 
   // Constructor for loading an existing tablet.
   TabletMetadata(FsManager* fs_manager, std::string tablet_id);
@@ -390,6 +396,9 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
   // Table extra config.
   boost::optional<TableExtraConfigPB> extra_config_;
 
+  // Tablet's dimension label.
+  boost::optional<std::string> dimension_label_;
+
   // If this counter is > 0 then Flush() will not write any data to
   // disk.
   int32_t num_flush_pins_;
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 0732542..2ab104e 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1681,6 +1681,7 @@ TEST_F(ToolTest, TestLocalReplicaDumpDataDirs) {
       /*tombstone_last_logged_opid=*/ boost::none,
       /*supports_live_row_count=*/ true,
       /*extra_config=*/ boost::none,
+      /*dimension_label=*/ boost::none,
       &meta));
   string stdout;
   NO_FATALS(RunActionStdoutString(Substitute("local_replica dump data_dirs $0 "
@@ -1717,6 +1718,7 @@ TEST_F(ToolTest, TestLocalReplicaDumpMeta) {
                   /*tombstone_last_logged_opid=*/ boost::none,
                   /*supports_live_row_count=*/ true,
                   /*extra_config=*/ boost::none,
+                  /*dimension_label=*/ boost::none,
                   &meta);
   string stdout;
   NO_FATALS(RunActionStdoutString(Substitute("local_replica dump meta $0 "
diff --git a/src/kudu/tserver/heartbeater.cc b/src/kudu/tserver/heartbeater.cc
index b21a002..1a58333 100644
--- a/src/kudu/tserver/heartbeater.cc
+++ b/src/kudu/tserver/heartbeater.cc
@@ -460,7 +460,11 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error,
                           master_address_.ToString());
     GenerateIncrementalTabletReport(req.mutable_tablet_report());
   }
+
   req.set_num_live_tablets(server_->tablet_manager()->GetNumLiveTablets());
+  auto num_live_tablets_by_dimension = server_->tablet_manager()->GetNumLiveTabletsByDimension();
+  req.mutable_num_live_tablets_by_dimension()->insert(num_live_tablets_by_dimension.begin(),
+                                                      num_live_tablets_by_dimension.end());
 
   VLOG(2) << "Sending heartbeat:\n" << SecureDebugString(req);
   master::TSHeartbeatResponsePB resp;
diff --git a/src/kudu/tserver/mini_tablet_server.cc b/src/kudu/tserver/mini_tablet_server.cc
index 56ab111..bb9b782 100644
--- a/src/kudu/tserver/mini_tablet_server.cc
+++ b/src/kudu/tserver/mini_tablet_server.cc
@@ -148,7 +148,7 @@ Status MiniTabletServer::AddTestTablet(const std::string& table_id,
 
   return server_->tablet_manager()->CreateNewTablet(
       table_id, tablet_id, partition.second, table_id,
-      schema_with_ids, partition.first, config, boost::none, nullptr);
+      schema_with_ids, partition.first, config, boost::none, boost::none, nullptr);
 }
 
 vector<string> MiniTabletServer::ListTablets() const {
diff --git a/src/kudu/tserver/tablet_copy_client.cc b/src/kudu/tserver/tablet_copy_client.cc
index daa2218..f724c7f 100644
--- a/src/kudu/tserver/tablet_copy_client.cc
+++ b/src/kudu/tserver/tablet_copy_client.cc
@@ -363,6 +363,7 @@ Status TabletCopyClient::Start(const HostPort& copy_source_addr,
                                             superblock_->tombstone_last_logged_opid(),
                                             remote_superblock_->supports_live_row_count(),
                                             superblock_->extra_config(),
+                                            superblock_->dimension_label(),
                                             &meta_));
     TRACE("Wrote new tablet metadata");
 
diff --git a/src/kudu/tserver/tablet_server-test.cc b/src/kudu/tserver/tablet_server-test.cc
index df14533..717aacb 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -3307,7 +3307,7 @@ TEST_F(TabletServerTest, TestWriteOutOfBounds) {
       "TestWriteOutOfBoundsTable", tabletId,
       partitions[1],
       tabletId, schema, partition_schema,
-      mini_server_->CreateLocalConfig(), boost::none, nullptr));
+      mini_server_->CreateLocalConfig(), boost::none, boost::none, nullptr));
 
   ASSERT_OK(WaitForTabletRunning(tabletId));
 
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 1ad8dce..cbeae9b 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -1034,6 +1034,7 @@ void TabletServiceAdminImpl::CreateTablet(const CreateTabletRequestPB* req,
       partition_schema,
       req->config(),
       req->has_extra_config() ? boost::make_optional(req->extra_config()) : boost::none,
+      req->has_dimension_label() ? boost::make_optional(req->dimension_label()) : boost::none,
       nullptr);
   if (PREDICT_FALSE(!s.ok())) {
     TabletServerErrorPB::Code code;
diff --git a/src/kudu/tserver/ts_tablet_manager-test.cc b/src/kudu/tserver/ts_tablet_manager-test.cc
index 3cde294..0dd569f 100644
--- a/src/kudu/tserver/ts_tablet_manager-test.cc
+++ b/src/kudu/tserver/ts_tablet_manager-test.cc
@@ -100,6 +100,7 @@ class TsTabletManagerTest : public KuduTest {
   Status CreateNewTablet(const std::string& tablet_id,
                          const Schema& schema,
                          boost::optional<TableExtraConfigPB> extra_config,
+                         boost::optional<std::string> dimension_label,
                          scoped_refptr<tablet::TabletReplica>* out_tablet_replica) {
     Schema full_schema = SchemaBuilder(schema).Build();
     std::pair<PartitionSchema, Partition> partition = tablet::CreateDefaultPartition(full_schema);
@@ -110,6 +111,7 @@ class TsTabletManagerTest : public KuduTest {
                                                    full_schema, partition.first,
                                                    config_,
                                                    std::move(extra_config),
+                                                   std::move(dimension_label),
                                                    &tablet_replica));
     if (out_tablet_replica) {
       (*out_tablet_replica) = tablet_replica;
@@ -156,9 +158,9 @@ TEST_F(TsTabletManagerTest, TestCreateTablet) {
   extra_config.set_history_max_age_sec(7200);
 
   // Create a new tablet.
-  ASSERT_OK(CreateNewTablet(tablet1, schema_, boost::none, &replica1));
+  ASSERT_OK(CreateNewTablet(tablet1, schema_, boost::none, boost::none, &replica1));
   // Create a new tablet with extra config.
-  ASSERT_OK(CreateNewTablet(tablet2, schema_, extra_config, &replica2));
+  ASSERT_OK(CreateNewTablet(tablet2, schema_, extra_config, boost::none, &replica2));
   ASSERT_EQ(tablet1, replica1->tablet()->tablet_id());
   ASSERT_EQ(tablet2, replica2->tablet()->tablet_id());
   ASSERT_EQ(boost::none, replica1->tablet()->metadata()->extra_config());
@@ -238,7 +240,7 @@ TEST_F(TsTabletManagerTest, TestTabletReports) {
   MarkTabletReportAcknowledged(report);
 
   // Create a tablet and do another incremental report - should include the tablet.
-  ASSERT_OK(CreateNewTablet("tablet-1", schema_, boost::none, nullptr));
+  ASSERT_OK(CreateNewTablet("tablet-1", schema_, boost::none, boost::none, nullptr));
   int updated_tablets = 0;
   while (updated_tablets != 1) {
     GenerateIncrementalTabletReport(&report);
@@ -266,7 +268,7 @@ TEST_F(TsTabletManagerTest, TestTabletReports) {
   MarkTabletReportAcknowledged(report);
 
   // Create a second tablet, and ensure the incremental report shows it.
-  ASSERT_OK(CreateNewTablet("tablet-2", schema_, boost::none, nullptr));
+  ASSERT_OK(CreateNewTablet("tablet-2", schema_, boost::none, boost::none, nullptr));
 
   // Wait up to 10 seconds to get a tablet report from tablet-2.
   // TabletReplica does not mark tablets dirty until after it commits the
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index 9366c68..d4a404c 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -407,6 +407,7 @@ Status TSTabletManager::CreateNewTablet(const string& table_id,
                                         const PartitionSchema& partition_schema,
                                         RaftConfigPB config,
                                         boost::optional<TableExtraConfigPB> extra_config,
+                                        boost::optional<string> dimension_label,
                                         scoped_refptr<TabletReplica>* replica) {
   CHECK_EQ(state(), MANAGER_RUNNING);
   CHECK(IsRaftConfigMember(server_->instance_pb().permanent_uuid(), config));
@@ -446,6 +447,7 @@ Status TSTabletManager::CreateNewTablet(const string& table_id,
                               boost::none,
                               /*supports_live_row_count=*/ true,
                               std::move(extra_config),
+                              std::move(dimension_label),
                               &meta),
     "Couldn't create tablet metadata");
 
@@ -1241,6 +1243,22 @@ int TSTabletManager::GetNumLiveTablets() const {
   return count;
 }
 
+TabletNumByDimensionMap TSTabletManager::GetNumLiveTabletsByDimension() const {
+  TabletNumByDimensionMap result;
+  shared_lock<RWMutex> l(lock_);
+  for (const auto& entry : tablet_map_) {
+    tablet::TabletStatePB state = entry.second->state();
+    if (state == tablet::BOOTSTRAPPING ||
+        state == tablet::RUNNING) {
+      boost::optional<string> dimension_label = entry.second->tablet_metadata()->dimension_label();
+      if (dimension_label) {
+        result[*dimension_label]++;
+      }
+    }
+  }
+  return result;
+}
+
 void TSTabletManager::InitLocalRaftPeerPB() {
   DCHECK_EQ(state(), MANAGER_INITIALIZING);
   local_peer_pb_.set_permanent_uuid(fs_manager_->uuid());
diff --git a/src/kudu/tserver/ts_tablet_manager.h b/src/kudu/tserver/ts_tablet_manager.h
index 6ed049a..d33f3b3 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -82,6 +82,9 @@ class TabletServer;
 // Map of tablet id -> transition reason string.
 typedef std::unordered_map<std::string, std::string> TransitionInProgressMap;
 
+// Map of dimension -> tablets number.
+typedef std::unordered_map<std::string, int32_t> TabletNumByDimensionMap;
+
 class TransitionInProgressDeleter;
 
 // Keeps track of the tablets hosted on the tablet server side.
@@ -125,6 +128,7 @@ class TSTabletManager : public tserver::TabletReplicaLookupIf {
                          const PartitionSchema& partition_schema,
                          consensus::RaftConfigPB config,
                          boost::optional<TableExtraConfigPB> extra_config,
+                         boost::optional<std::string> dimension_label,
                          scoped_refptr<tablet::TabletReplica>* replica);
 
   // Delete the specified tablet asynchronously with callback 'cb'.
@@ -197,6 +201,9 @@ class TSTabletManager : public tserver::TabletReplicaLookupIf {
   // Return the number of tablets in RUNNING or BOOTSTRAPPING state.
   int GetNumLiveTablets() const;
 
+  // Get the number of tablets in RUNNING or BOOTSTRAPPING state in each dimension.
+  TabletNumByDimensionMap GetNumLiveTabletsByDimension() const;
+
   Status RunAllLogGC();
 
   // Delete the tablet using the specified delete_type as the final metadata
diff --git a/src/kudu/tserver/tserver_admin.proto b/src/kudu/tserver/tserver_admin.proto
index 4d77441..e68c7f4 100644
--- a/src/kudu/tserver/tserver_admin.proto
+++ b/src/kudu/tserver/tserver_admin.proto
@@ -72,6 +72,10 @@ message CreateTabletRequestPB {
 
   // The table's extra-config.
   optional TableExtraConfigPB extra_config = 11;
+
+  // The dimension label for tablet. Used by the master to determine load when
+  // creating new tablet replicas based on dimension.
+  optional string dimension_label = 12;
 }
 
 message CreateTabletResponsePB {