You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2019/02/27 02:15:00 UTC

[kudu] 03/07: Fix potential duplicate scanner on /scans page

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit c1330bc2f71cd2e84ac16fdcdc4373841afc7bdb
Author: Will Berkeley <wd...@gmail.org>
AuthorDate: Tue Feb 26 13:15:08 2019 -0800

    Fix potential duplicate scanner on /scans page
    
    In rare scenarios, it is possible to see the same scanner listed twice
    on the /scans page. This can happen because we gather the list in two
    stages, first getting a list of in-flight scanners, and then getting a
    list of recently completed scanners. Each is done under a lock and so
    gives a consistent snapshot of in-flight or completed scanners,
    respectively, but if a scanner completes between gathering the list of
    in-flight scanners and gathering the list of completed scanners, it's
    possible to see that same scanner listed as both in-flight and also as
    completed.
    
    This patch changes the logic a bit so we deduplicate the list of
    scanners by scanner id, favoring keeping the information about a
    complete scanner since it must be more up-to-date than information
    about the scanner while it was in-flight.
    
    Change-Id: I680546d80315a1548337a504c45afa4f2f0350ad
    Reviewed-on: http://gerrit.cloudera.org:8080/12600
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Will Berkeley <wd...@gmail.com>
---
 src/kudu/tserver/scanners.cc | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index e1562e8..3c6342c 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -66,6 +66,7 @@ METRIC_DEFINE_gauge_size(server, active_scanners,
 
 using std::string;
 using std::unique_ptr;
+using std::unordered_map;
 using std::vector;
 using strings::Substitute;
 
@@ -221,31 +222,38 @@ void ScannerManager::ListScanners(std::vector<SharedScanner>* scanners) const {
 }
 
 vector<ScanDescriptor> ScannerManager::ListScans() const {
-  vector<ScanDescriptor> scans;
+  unordered_map<string, ScanDescriptor> scans;
   for (const ScannerMapStripe* stripe : scanner_maps_) {
     shared_lock<RWMutex> l(stripe->lock_);
     for (const auto& se : stripe->scanners_by_id_) {
       if (se.second->IsInitialized()) {
-        scans.emplace_back(se.second->descriptor());
-        scans.back().state = ScanState::kActive;
+        ScanDescriptor desc = se.second->descriptor();
+        desc.state = ScanState::kActive;
+        EmplaceOrDie(&scans, se.first, std::move(desc));
       }
     }
   }
 
   {
     shared_lock<RWMutex> l(completed_scans_lock_);
-    scans.insert(scans.end(), completed_scans_.begin(), completed_scans_.end());
+    // A scanner in 'scans' may have completed between the above loop and here.
+    // As we'd rather have the finalized descriptor of the completed scan,
+    // update over the old descriptor in this case.
+    for (const auto& scan : completed_scans_) {
+      InsertOrUpdate(&scans, scan.scanner_id, scan);
+    }
   }
 
-  // TODO(dan): It's possible for a descriptor to be included twice in the
-  // result set if its scanner is concurrently removed from the scanner map.
+  vector<ScanDescriptor> ret;
+  ret.reserve(scans.size());
+  AppendValuesFromMap(scans, &ret);
 
   // Sort oldest to newest, so that the ordering is consistent across calls.
-  std::sort(scans.begin(), scans.end(), [] (const ScanDescriptor& a, const ScanDescriptor& b) {
+  std::sort(ret.begin(), ret.end(), [] (const ScanDescriptor& a, const ScanDescriptor& b) {
       return a.start_time > b.start_time;
   });
 
-  return scans;
+  return ret;
 }
 
 void ScannerManager::RemoveExpiredScanners() {