You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/08/13 18:46:32 UTC

[kudu] branch master updated: [ksck] Filter tables and tablets in KsckCluster

This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new b107b42  [ksck] Filter tables and tablets in KsckCluster
b107b42 is described below

commit b107b420f8824a789d8513e2e3ff5a756f8ab898
Author: zhangyifan27 <ch...@163.com>
AuthorDate: Thu Jul 25 19:22:46 2019 +0800

    [ksck] Filter tables and tablets in KsckCluster
    
    The ksck tool executes slowly if there are too many tables in a cluster
    even if we just want to check some specific tables and tablets.
    Filtering tables and tablets in KsckCluster and only tracking specified
    tables and tablets would speed up the execution of ksck tool with
    '--tables=<tables>' and '--tablets=<tablets>' args.
    
    Change-Id: I23b6e6ef258d3498a42af7f92b63392a59c99761
    Reviewed-on: http://gerrit.cloudera.org:8080/13937
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/tools/ksck-test.cc           | 62 ++++++++++++++++++++++++++++++-----
 src/kudu/tools/ksck.cc                | 22 +++----------
 src/kudu/tools/ksck.h                 | 37 ++++++++++++++++++++-
 src/kudu/tools/ksck_checksum.cc       | 12 +++----
 src/kudu/tools/ksck_remote.cc         | 21 ++++++++++--
 src/kudu/tools/rebalancer.cc          |  2 +-
 src/kudu/tools/tool_action_cluster.cc |  4 +--
 src/kudu/tools/tool_replica_util.cc   |  2 +-
 8 files changed, 122 insertions(+), 40 deletions(-)

diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 528aa64..d0f3eb9 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -46,6 +46,7 @@
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/tools/ksck_checksum.h"
 #include "kudu/tools/ksck_results.h"
+#include "kudu/tools/tool_action_common.h"
 #include "kudu/util/jsonreader.h"
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
@@ -197,10 +198,32 @@ class MockKsckCluster : public KsckCluster {
   }
 
   virtual Status RetrieveTablesList() override {
+    filtered_tables_count_ = 0;
+    filtered_tablets_count_ = 0;
+    for (auto it = tables_.begin(); it != tables_.end();)  {
+      if (!MatchesAnyPattern(table_filters_, (*it)->name())) {
+        filtered_tables_count_++;
+        it = tables_.erase(it);
+        continue;
+      }
+      it++;
+    }
     return Status::OK();
   }
 
   virtual Status RetrieveAllTablets() override {
+    for (auto& table : tables_) {
+      vector<shared_ptr<KsckTablet>> tablets(table->tablets());
+      for (auto it = tablets.begin(); it != tablets.end();) {
+        if (!MatchesAnyPattern(tablet_id_filters_, (*it)->id())) {
+          filtered_tablets_count_++;
+          it = tablets.erase(it);
+          continue;
+        }
+        it++;
+      }
+      table->set_tablets(tablets);
+    }
     return Status::OK();
   }
 
@@ -1204,10 +1227,21 @@ TEST_F(KsckTest, TestOneSmallReplicatedTable) {
   CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
 }
 
+// Test filtering on a cluster with no table.
+TEST_F(KsckTest, TestFilterOnNoTableCluster) {
+  cluster_->tables_.clear();
+  cluster_->set_table_filters({"xyz"});
+  FLAGS_checksum_scan = true;
+  ASSERT_OK(RunKsck());
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "The cluster doesn't have any matching tables");
+  CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
+}
+
 // Test filtering on a non-matching table pattern.
 TEST_F(KsckTest, TestNonMatchingTableFilter) {
   CreateOneSmallReplicatedTable();
-  ksck_->set_table_filters({"xyz"});
+  cluster_->set_table_filters({"xyz"});
   FLAGS_checksum_scan = true;
   ASSERT_TRUE(RunKsck().IsRuntimeError());
   const vector<Status>& error_messages = ksck_->results().error_messages;
@@ -1223,7 +1257,7 @@ TEST_F(KsckTest, TestNonMatchingTableFilter) {
 // Test filtering with a matching table pattern.
 TEST_F(KsckTest, TestMatchingTableFilter) {
   CreateOneSmallReplicatedTable();
-  ksck_->set_table_filters({"te*"});
+  cluster_->set_table_filters({"te*"});
   FLAGS_checksum_scan = true;
   ASSERT_OK(RunKsck());
   ASSERT_STR_CONTAINS(err_stream_.str(),
@@ -1232,10 +1266,22 @@ TEST_F(KsckTest, TestMatchingTableFilter) {
   CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
 }
 
+// Test filtering on a table with no tablet.
+TEST_F(KsckTest, TestFilterOnNotabletTable) {
+  CreateAndAddTable("test", 0);
+  cluster_->set_table_filters({"te*"});
+  FLAGS_checksum_scan = true;
+  ASSERT_OK(RunKsck());
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "The cluster doesn't have any matching tablets");
+
+  CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
+}
+
 // Test filtering on a non-matching tablet id pattern.
 TEST_F(KsckTest, TestNonMatchingTabletIdFilter) {
   CreateOneSmallReplicatedTable();
-  ksck_->set_tablet_id_filters({"xyz"});
+  cluster_->set_tablet_id_filters({"xyz"});
   FLAGS_checksum_scan = true;
   ASSERT_TRUE(RunKsck().IsRuntimeError());
   const vector<Status>& error_messages = ksck_->results().error_messages;
@@ -1252,7 +1298,7 @@ TEST_F(KsckTest, TestNonMatchingTabletIdFilter) {
 // Test filtering with a matching tablet ID pattern.
 TEST_F(KsckTest, TestMatchingTabletIdFilter) {
   CreateOneSmallReplicatedTable();
-  ksck_->set_tablet_id_filters({"*-id-2"});
+  cluster_->set_tablet_id_filters({"*-id-2"});
   FLAGS_checksum_scan = true;
   ASSERT_OK(RunKsck());
   ASSERT_STR_CONTAINS(err_stream_.str(),
@@ -1557,7 +1603,7 @@ TEST_F(KsckTest, TestMasterNotReportingTabletServerWithConsensusConflict) {
 TEST_F(KsckTest, TestTableFiltersNoMatch) {
   CreateOneSmallReplicatedTable();
 
-  ksck_->set_table_filters({ "fake-table" });
+  cluster_->set_table_filters({ "fake-table" });
 
   // Every table we check is healthy ;).
   ASSERT_OK(RunKsck());
@@ -1573,7 +1619,7 @@ TEST_F(KsckTest, TestTableFilters) {
   CreateOneSmallReplicatedTable();
   CreateOneSmallReplicatedTable("other", "other-");
 
-  ksck_->set_table_filters({ "test" });
+  cluster_->set_table_filters({ "test" });
   ASSERT_OK(RunKsck());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "                | Total Count\n"
@@ -1590,7 +1636,7 @@ TEST_F(KsckTest, TestTableFilters) {
 TEST_F(KsckTest, TestTabletFiltersNoMatch) {
   CreateOneSmallReplicatedTable();
 
-  ksck_->set_tablet_id_filters({ "tablet-id-fake" });
+  cluster_->set_tablet_id_filters({ "tablet-id-fake" });
 
   // Every tablet we check is healthy ;).
   ASSERT_OK(RunKsck());
@@ -1605,7 +1651,7 @@ TEST_F(KsckTest, TestTabletFiltersNoMatch) {
 TEST_F(KsckTest, TestTabletFilters) {
   CreateOneSmallReplicatedTable();
 
-  ksck_->set_tablet_id_filters({ "tablet-id-0", "tablet-id-1" });
+  cluster_->set_tablet_id_filters({ "tablet-id-0", "tablet-id-1" });
   ASSERT_OK(RunKsck());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "                | Total Count\n"
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 134319f..3debfe0 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -42,7 +42,6 @@
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/tools/color.h"
 #include "kudu/tools/ksck_checksum.h"
-#include "kudu/tools/tool_action_common.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/string_case.h"
@@ -465,8 +464,8 @@ Status Ksck::Run() {
 
   if (FLAGS_checksum_scan) {
     // Copy the filters because they are passed by-value.
-    auto table_filters_for_checksum_opts = table_filters_;
-    auto tablet_id_filters_for_checksum_opts = tablet_id_filters_;
+    auto table_filters_for_checksum_opts = cluster_->table_filters();
+    auto tablet_id_filters_for_checksum_opts = cluster_->tablet_id_filters();
     PUSH_PREPEND_NOT_OK(
         ChecksumData(KsckChecksumOptions(std::move(table_filters_for_checksum_opts),
                                          std::move(tablet_id_filters_for_checksum_opts))),
@@ -565,10 +564,6 @@ Status Ksck::RunAndPrintResults() {
 Status Ksck::CheckTablesConsistency() {
   int bad_tables_count = 0;
   for (const shared_ptr<KsckTable> &table : cluster_->tables()) {
-    if (!MatchesAnyPattern(table_filters_, table->name())) {
-      VLOG(1) << "Skipping table " << table->name();
-      continue;
-    }
     if (!VerifyTable(table)) {
       bad_tables_count++;
     }
@@ -593,14 +588,7 @@ Status Ksck::ChecksumData(const KsckChecksumOptions& opts) {
 }
 
 bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
-  const auto& all_tablets = table->tablets();
-  vector<shared_ptr<KsckTablet>> tablets;
-  std::copy_if(all_tablets.begin(), all_tablets.end(), std::back_inserter(tablets),
-                 [&](const shared_ptr<KsckTablet>& t) {
-                   return MatchesAnyPattern(tablet_id_filters_, t->id());
-                 });
-
-  if (tablets.empty()) {
+  if (table->tablets().empty()) {
     VLOG(1) << Substitute("Skipping table $0 as it has no matching tablets",
                           table->name());
     return true;
@@ -611,8 +599,8 @@ bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
   ts.name = table->name();
   ts.replication_factor = table->num_replicas();
   VLOG(1) << Substitute("Verifying $0 tablet(s) for table $1 configured with num_replicas = $2",
-                        tablets.size(), table->name(), table->num_replicas());
-  for (const auto& tablet : tablets) {
+                        table->tablets().size(), table->name(), table->num_replicas());
+  for (const auto& tablet : table->tablets()) {
     auto tablet_result = VerifyTablet(tablet, table->num_replicas());
     switch (tablet_result) {
       case KsckCheckResult::HEALTHY:
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index 181af26..2a07b46 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -449,13 +449,48 @@ class KsckCluster {
     return nullptr;
   }
 
+  // Setters for filtering the tables/tablets to be checked.
+  // Equivalent to the same functions in class 'Ksck'.
+  void set_table_filters(std::vector<std::string> table_names) {
+    table_filters_ = std::move(table_names);
+  }
+
+  // See above.
+  void set_tablet_id_filters(std::vector<std::string> tablet_ids) {
+    tablet_id_filters_ = std::move(tablet_ids);
+  }
+
+  const std::vector<std::string>& table_filters() const {
+    return table_filters_;
+  }
+
+  const std::vector<std::string>& tablet_id_filters() const {
+    return tablet_id_filters_;
+  }
+
+  int filtered_tables_count() const {
+    return filtered_tables_count_;
+  }
+
+  int filtered_tablets_count() const {
+    return filtered_tablets_count_;
+  }
+
  protected:
-  KsckCluster() = default;
+  KsckCluster() : filtered_tables_count_(0), filtered_tablets_count_(0) {}
   MasterList masters_;
   TSMap tablet_servers_;
   std::vector<std::shared_ptr<KsckTable>> tables_;
   gscoped_ptr<ThreadPool> pool_;
 
+  std::vector<std::string> table_filters_;
+  std::vector<std::string> tablet_id_filters_;
+
+  // The count of tables/tablets filtered out.
+  // Used to determine whether all tables/tablets have been filtered out.
+  std::atomic<int> filtered_tables_count_;
+  std::atomic<int> filtered_tablets_count_;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(KsckCluster);
 };
diff --git a/src/kudu/tools/ksck_checksum.cc b/src/kudu/tools/ksck_checksum.cc
index 1217d05..55c6b59 100644
--- a/src/kudu/tools/ksck_checksum.cc
+++ b/src/kudu/tools/ksck_checksum.cc
@@ -40,7 +40,6 @@
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/periodic.h"
 #include "kudu/tools/ksck.h"
-#include "kudu/tools/tool_action_common.h"
 #include "kudu/util/fault_injection.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/scoped_cleanup.h"
@@ -478,12 +477,10 @@ Status KsckChecksummer::BuildTabletInfoMap(
   int num_replicas_tmp = 0;
   for (const shared_ptr<KsckTable>& table : cluster_->tables()) {
     VLOG(1) << "Table: " << table->name();
-    if (!MatchesAnyPattern(opts.table_filters, table->name())) continue;
     num_tables += 1;
     num_tablets += table->tablets().size();
     for (const shared_ptr<KsckTablet>& tablet : table->tablets()) {
       VLOG(1) << "Tablet: " << tablet->id();
-      if (!MatchesAnyPattern(opts.tablet_id_filters, tablet->id())) continue;
       EmplaceOrDie(&tablet_infos_tmp,
                    tablet->id(),
                    TabletChecksumInfo(tablet, table->schema()));
@@ -491,7 +488,8 @@ Status KsckChecksummer::BuildTabletInfoMap(
     }
   }
 
-  if (num_tables == 0) {
+  if (cluster_->filtered_tables_count() > 0 && num_tables == 0) {
+    // Warn if all tables filtered out.
     string msg = "No table found.";
     if (!opts.table_filters.empty()) {
       msg += " Filter: table_filters=" + JoinStrings(opts.table_filters, ",");
@@ -499,9 +497,9 @@ Status KsckChecksummer::BuildTabletInfoMap(
     return Status::NotFound(msg);
   }
 
-  if (num_tablets > 0 && num_replicas_tmp == 0) {
-    // Warn if the table has tablets, but no replicas. The table may have no
-    // tablets if all range partitions have been dropped.
+  if (cluster_->filtered_tablets_count() > 0 && num_tablets == 0) {
+    // Warn if all tablets filtered out.
+    // The table may have no tablets if all range partitions have been dropped.
     string msg = "No tablet replicas found.";
     if (!opts.table_filters.empty() || !opts.tablet_id_filters.empty()) {
       msg += " Filter:";
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index efa2ba6..3338c77 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -57,6 +57,7 @@
 #include "kudu/tools/ksck.h"
 #include "kudu/tools/ksck_checksum.h"
 #include "kudu/tools/ksck_results.h"
+#include "kudu/tools/tool_action_common.h"
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/tserver/tserver_service.pb.h"
@@ -521,8 +522,17 @@ Status RemoteKsckCluster::RetrieveTablesList() {
   vector<shared_ptr<KsckTable>> tables;
   tables.reserve(table_names.size());
   simple_spinlock tables_lock;
+  int tables_count = 0;
+  filtered_tables_count_ = 0;
+  filtered_tablets_count_ = 0;
 
   for (const auto& table_name : table_names) {
+    if (!MatchesAnyPattern(table_filters_, table_name)) {
+      filtered_tables_count_++;
+      VLOG(1) << "Skipping table " << table_name;
+      continue;
+    }
+    tables_count++;
     RETURN_NOT_OK(pool_->SubmitFunc([&]() {
       client::sp::shared_ptr<KuduTable> t;
       Status s = client_->OpenTable(table_name, &t);
@@ -545,10 +555,10 @@ Status RemoteKsckCluster::RetrieveTablesList() {
 
   tables_.swap(tables);
 
-  if (tables_.size() < table_names.size()) {
+  if (tables_.size() < tables_count) {
     return Status::NetworkError(
-        Substitute("failed to gather info from all tables: $0 of $1 had errors",
-                   table_names.size() - tables_.size(), table_names.size()));
+        Substitute("failed to gather info from all filtered tables: $0 of $1 had errors",
+                   tables_count - tables_.size(), tables_count));
   }
 
   return Status::OK();
@@ -580,6 +590,11 @@ Status RemoteKsckCluster::RetrieveTabletsList(const shared_ptr<KsckTable>& table
 
   vector<shared_ptr<KsckTablet>> tablets;
   for (const auto* t : tokens) {
+    if (!MatchesAnyPattern(tablet_id_filters_, t->tablet().id())) {
+      filtered_tablets_count_++;
+      VLOG(1) << "Skipping tablet " << t->tablet().id();
+      continue;
+    }
     shared_ptr<KsckTablet> tablet(
         new KsckTablet(table.get(), t->tablet().id()));
     vector<shared_ptr<KsckTabletReplica>> replicas;
diff --git a/src/kudu/tools/rebalancer.cc b/src/kudu/tools/rebalancer.cc
index 55422bc..6994885 100644
--- a/src/kudu/tools/rebalancer.cc
+++ b/src/kudu/tools/rebalancer.cc
@@ -949,8 +949,8 @@ Status Rebalancer::RefreshKsckResults() {
   RETURN_NOT_OK_PREPEND(
       RemoteKsckCluster::Build(config_.master_addresses, &cluster),
       "unable to build KsckCluster");
+  cluster->set_table_filters(config_.table_filters);
   ksck_.reset(new Ksck(cluster));
-  ksck_->set_table_filters(config_.table_filters);
   ignore_result(ksck_->Run());
   return Status::OK();
 }
diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc
index 28698bf..8a3c36a 100644
--- a/src/kudu/tools/tool_action_cluster.cc
+++ b/src/kudu/tools/tool_action_cluster.cc
@@ -178,10 +178,10 @@ Status RunKsck(const RunnerContext& context) {
   shared_ptr<KsckCluster> cluster;
   RETURN_NOT_OK_PREPEND(RemoteKsckCluster::Build(master_addresses, &cluster),
                         "unable to build KsckCluster");
+  cluster->set_table_filters(Split(FLAGS_tables, ",", strings::SkipEmpty()));
+  cluster->set_tablet_id_filters(Split(FLAGS_tablets, ",", strings::SkipEmpty()));
   shared_ptr<Ksck> ksck(new Ksck(cluster));
 
-  ksck->set_table_filters(Split(FLAGS_tables, ",", strings::SkipEmpty()));
-  ksck->set_tablet_id_filters(Split(FLAGS_tablets, ",", strings::SkipEmpty()));
   ksck->set_print_sections(Split(FLAGS_sections, ",", strings::SkipEmpty()));
 
   return ksck->RunAndPrintResults();
diff --git a/src/kudu/tools/tool_replica_util.cc b/src/kudu/tools/tool_replica_util.cc
index c3fc249..986986e 100644
--- a/src/kudu/tools/tool_replica_util.cc
+++ b/src/kudu/tools/tool_replica_util.cc
@@ -616,8 +616,8 @@ Status DoKsckForTablet(const vector<string>& master_addresses,
   // Print to an unopened ofstream to discard ksck output.
   // See https://stackoverflow.com/questions/8243743.
   std::ofstream null_stream;
+  cluster->set_tablet_id_filters({ tablet_id });
   Ksck ksck(cluster, &null_stream);
-  ksck.set_tablet_id_filters({ tablet_id });
   RETURN_NOT_OK(ksck.CheckMasterHealth());
   RETURN_NOT_OK(ksck.CheckMasterConsensus());
   RETURN_NOT_OK(ksck.CheckClusterRunning());