You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/01/16 22:46:50 UTC

[kudu] branch master updated (b924d18 -> 5818a83)

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from b924d18  [build] Fix IWYU script
     new c8384a0  [tserver] replace gscoped_ptr with unique_ptr in Scanner
     new 6325c4e  tools: avoid extra 100ms sleep at end of table scan
     new 5818a83  [build] Fix print function usage in Python 2

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 build-support/clang_tidy_gerrit.py |  3 +++
 build-support/iwyu.py              |  2 ++
 src/kudu/scripts/graph-metrics.py  |  3 +++
 src/kudu/tools/table_scanner.cc    | 20 +++++-----------
 src/kudu/tools/table_scanner.h     |  1 -
 src/kudu/tserver/scanners.cc       |  4 ++--
 src/kudu/tserver/scanners.h        | 32 +++++++++++++-------------
 src/kudu/tserver/tablet_service.cc | 47 +++++++++++++++++---------------------
 8 files changed, 53 insertions(+), 59 deletions(-)


[kudu] 01/03: [tserver] replace gscoped_ptr with unique_ptr in Scanner

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit c8384a04029c3e1b4ce23888355a850721d0f8e7
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Wed Jan 15 19:32:42 2020 -0800

    [tserver] replace gscoped_ptr with unique_ptr in Scanner
    
    This is a small clean up to replace gscoped_ptr with std::unique_ptr
    and avoid extra call to operator new.  I also moved the
    Scanner::IsInitialized() method into the private section.
    
    Change-Id: Ieac533565f96773cc450de521703c21534020596
    Reviewed-on: http://gerrit.cloudera.org:8080/15048
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/tserver/scanners.cc       |  4 ++--
 src/kudu/tserver/scanners.h        | 32 +++++++++++++-------------
 src/kudu/tserver/tablet_service.cc | 47 +++++++++++++++++---------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index 77b851b..418bbdc 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -359,11 +359,11 @@ void Scanner::AddTimings(const CpuTimes& elapsed) {
 }
 
 void Scanner::Init(unique_ptr<RowwiseIterator> iter,
-                   gscoped_ptr<ScanSpec> spec) {
+                   unique_ptr<ScanSpec> spec) {
   std::lock_guard<simple_spinlock> l(lock_);
   CHECK(!iter_) << "Already initialized";
   iter_ = std::move(iter);
-  spec_.reset(spec.release());
+  spec_ = std::move(spec);
 }
 
 const ScanSpec& Scanner::spec() const {
diff --git a/src/kudu/tserver/scanners.h b/src/kudu/tserver/scanners.h
index f03e8ff..15eb86c 100644
--- a/src/kudu/tserver/scanners.h
+++ b/src/kudu/tserver/scanners.h
@@ -30,6 +30,7 @@
 
 #include "kudu/common/iterator_stats.h"
 #include "kudu/common/scan_spec.h"
+#include "kudu/common/schema.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/ref_counted.h"
@@ -50,7 +51,6 @@
 namespace kudu {
 
 class RowwiseIterator;
-class Schema;
 class Status;
 class Thread;
 
@@ -203,15 +203,7 @@ class Scanner {
   // Attach an actual iterator and a ScanSpec to this Scanner.
   // Takes ownership of 'iter' and 'spec'.
   void Init(std::unique_ptr<RowwiseIterator> iter,
-            gscoped_ptr<ScanSpec> spec);
-
-  // Return true if the scanner has been initialized (i.e has an iterator).
-  // Once a Scanner is initialized, it is safe to assume that iter() and spec()
-  // return non-NULL for the lifetime of the Scanner object.
-  bool IsInitialized() const {
-    std::lock_guard<simple_spinlock> l(lock_);
-    return iter_ != NULL;
-  }
+            std::unique_ptr<ScanSpec> spec);
 
   RowwiseIterator* iter() {
     return DCHECK_NOTNULL(iter_.get());
@@ -282,8 +274,8 @@ class Scanner {
   // projection is a subset of the iterator's schema -- the iterator's
   // schema needs to include all columns that have predicates, whereas
   // the client may not want to project all of them.
-  void set_client_projection_schema(gscoped_ptr<Schema> client_projection_schema) {
-    client_projection_schema_.swap(client_projection_schema);
+  void set_client_projection_schema(std::unique_ptr<Schema> client_projection_schema) {
+    client_projection_schema_ = std::move(client_projection_schema);
   }
 
   // Returns request's projection schema if it differs from the schema
@@ -332,6 +324,14 @@ class Scanner {
 
   static const std::string kNullTabletId;
 
+  // Return true if the scanner has been initialized (i.e has an iterator).
+  // Once a Scanner is initialized, it is safe to assume that iter() and spec()
+  // return non-NULL for the lifetime of the Scanner object.
+  bool IsInitialized() const {
+    std::lock_guard<simple_spinlock> l(lock_);
+    return iter_ != nullptr;
+  }
+
   // The unique ID of this scanner.
   const std::string id_;
 
@@ -364,13 +364,13 @@ class Scanner {
   IteratorStats already_reported_stats_;
 
   // The spec used by 'iter_'
-  gscoped_ptr<ScanSpec> spec_;
+  std::unique_ptr<ScanSpec> spec_;
+
+  std::unique_ptr<RowwiseIterator> iter_;
 
   // Stores the request's projection schema, if it differs from the
   // schema used by the iterator.
-  gscoped_ptr<Schema> client_projection_schema_;
-
-  std::unique_ptr<RowwiseIterator> iter_;
+  std::unique_ptr<Schema> client_projection_schema_;
 
   AutoReleasePool autorelease_pool_;
 
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index f642a7e..bde51f5 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -25,9 +25,7 @@
 #include <numeric>
 #include <ostream>
 #include <string>
-#include <type_traits>
 #include <unordered_set>
-#include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>
@@ -91,7 +89,6 @@
 #include "kudu/tserver/tserver_admin.pb.h"
 #include "kudu/tserver/tserver_service.pb.h"
 #include "kudu/util/auto_release_pool.h"
-#include "kudu/util/bitset.h"
 #include "kudu/util/crc.h"
 #include "kudu/util/debug/trace_event.h"
 #include "kudu/util/faststring.h"
@@ -2160,16 +2157,15 @@ static Status DecodeEncodedKeyRange(const NewScanRequestPB& scan_pb,
 
 static Status SetupScanSpec(const NewScanRequestPB& scan_pb,
                             const Schema& tablet_schema,
-                            gscoped_ptr<ScanSpec>* spec,
-                            const SharedScanner& scanner) {
-  gscoped_ptr<ScanSpec> ret(new ScanSpec);
-  ret->set_cache_blocks(scan_pb.cache_blocks());
+                            const SharedScanner& scanner,
+                            ScanSpec* spec) {
+  spec->set_cache_blocks(scan_pb.cache_blocks());
 
   // First the column predicates.
   for (const ColumnPredicatePB& pred_pb : scan_pb.column_predicates()) {
     boost::optional<ColumnPredicate> predicate;
     RETURN_NOT_OK(ColumnPredicateFromPB(tablet_schema, scanner->arena(), pred_pb, &predicate));
-    ret->AddPredicate(std::move(*predicate));
+    spec->AddPredicate(std::move(*predicate));
   }
 
   // Then the column range predicates.
@@ -2205,19 +2201,18 @@ static Status SetupScanSpec(const NewScanRequestPB& scan_pb,
     if (pred) {
       VLOG(3) << Substitute("Parsed predicate $0 from $1",
                             pred->ToString(), SecureShortDebugString(scan_pb));
-      ret->AddPredicate(*pred);
+      spec->AddPredicate(*pred);
     }
   }
 
   // Then any encoded key range predicates.
-  RETURN_NOT_OK(DecodeEncodedKeyRange(scan_pb, tablet_schema, scanner, ret.get()));
+  RETURN_NOT_OK(DecodeEncodedKeyRange(scan_pb, tablet_schema, scanner, spec));
 
   // If the scanner has a limit, set it now.
   if (scan_pb.has_limit()) {
-    ret->set_limit(scan_pb.limit());
+    spec->set_limit(scan_pb.limit());
   }
 
-  spec->swap(ret);
   return Status::OK();
 }
 
@@ -2325,17 +2320,16 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
     }
   }
 
-  gscoped_ptr<ScanSpec> spec(new ScanSpec);
-
-  s = SetupScanSpec(scan_pb, tablet_schema, &spec, scanner);
+  ScanSpec spec;
+  s = SetupScanSpec(scan_pb, tablet_schema, scanner, &spec);
   if (PREDICT_FALSE(!s.ok())) {
     *error_code = TabletServerErrorPB::INVALID_SCAN_SPEC;
     return s;
   }
 
-  VLOG(3) << "Before optimizing scan spec: " << spec->ToString(tablet_schema);
-  spec->OptimizeScan(tablet_schema, scanner->arena(), scanner->autorelease_pool(), true);
-  VLOG(3) << "After optimizing scan spec: " << spec->ToString(tablet_schema);
+  VLOG(3) << "Before optimizing scan spec: " << spec.ToString(tablet_schema);
+  spec.OptimizeScan(tablet_schema, scanner->arena(), scanner->autorelease_pool(), true);
+  VLOG(3) << "After optimizing scan spec: " << spec.ToString(tablet_schema);
 
   // Missing columns will contain the columns that are not mentioned in the
   // client projection but are actually needed for the scan, such as columns
@@ -2343,16 +2337,18 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   //
   // NOTE: We should build the missing column after optimizing scan which will
   // remove unnecessary predicates.
-  vector<ColumnSchema> missing_cols = spec->GetMissingColumns(projection);
-  if (spec->CanShortCircuit()) {
+  vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection);
+  if (spec.CanShortCircuit()) {
     VLOG(1) << "short-circuiting without creating a server-side scanner.";
     *has_more_results = false;
     return Status::OK();
   }
 
   // Store the original projection.
-  gscoped_ptr<Schema> orig_projection(new Schema(projection));
-  scanner->set_client_projection_schema(std::move(orig_projection));
+  {
+    unique_ptr<Schema> orig_projection(new Schema(projection));
+    scanner->set_client_projection_schema(std::move(orig_projection));
+  }
 
   // Build a new projection with the projection columns and the missing columns,
   // annotating each column as a key column appropriately.
@@ -2395,8 +2391,6 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   projection = projection_builder.BuildWithoutIds();
   VLOG(3) << "Scan projection: " << projection.ToString(Schema::BASE_INFO);
 
-  unique_ptr<RowwiseIterator> iter;
-
   // It's important to keep the reference to the tablet for the case when the
   // tablet replica's shutdown is run concurrently with the code below.
   shared_ptr<Tablet> tablet;
@@ -2413,6 +2407,7 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
     return s;
   }
 
+  unique_ptr<RowwiseIterator> iter;
   boost::optional<Timestamp> snap_start_timestamp;
 
   {
@@ -2448,11 +2443,11 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   // This copy will be given to the Scanner so it can report its predicates to
   // /scans. The copy is necessary because the original spec will be modified
   // as its predicates are pushed into lower-level iterators.
-  gscoped_ptr<ScanSpec> orig_spec(new ScanSpec(*spec));
+  unique_ptr<ScanSpec> orig_spec(new ScanSpec(spec));
 
   if (PREDICT_TRUE(s.ok())) {
     TRACE_EVENT0("tserver", "iter->Init");
-    s = iter->Init(spec.get());
+    s = iter->Init(&spec);
     if (PREDICT_FALSE(s.IsInvalidArgument())) {
       // Tablet::Iterator::Init() returns InvalidArgument when an invalid
       // projection is specified.


[kudu] 03/03: [build] Fix print function usage in Python 2

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 5818a839e6219347ea28385dd7b3bbf2ab475e27
Author: Grant Henke <gr...@apache.org>
AuthorDate: Thu Jan 16 16:17:30 2020 -0600

    [build] Fix print function usage in Python 2
    
    This fixes the scripts that use the Python 3 print function when
    running in Python 2.
    
    Change-Id: Iee528e66fc017180e50b796f8cc1d2ee31be8a5e
    Reviewed-on: http://gerrit.cloudera.org:8080/15054
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 build-support/clang_tidy_gerrit.py | 3 +++
 build-support/iwyu.py              | 2 ++
 src/kudu/scripts/graph-metrics.py  | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/build-support/clang_tidy_gerrit.py b/build-support/clang_tidy_gerrit.py
index d7d1644..35f32f6 100755
--- a/build-support/clang_tidy_gerrit.py
+++ b/build-support/clang_tidy_gerrit.py
@@ -17,6 +17,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# Support the print function in Python 2.
+from __future__ import print_function
+
 import argparse
 import collections
 import json
diff --git a/build-support/iwyu.py b/build-support/iwyu.py
index 9af043b..bef4296 100755
--- a/build-support/iwyu.py
+++ b/build-support/iwyu.py
@@ -17,7 +17,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# Support the print function in Python 2.
 from __future__ import print_function
+
 from io import StringIO
 import glob
 import json
diff --git a/src/kudu/scripts/graph-metrics.py b/src/kudu/scripts/graph-metrics.py
index 853e2e4..35d606f 100755
--- a/src/kudu/scripts/graph-metrics.py
+++ b/src/kudu/scripts/graph-metrics.py
@@ -20,6 +20,9 @@
 # Script which parses a test log for 'metrics: ' lines emited by
 # TimeSeriesCollector, and constructs a graph from them
 
+# Support the print function in Python 2.
+from __future__ import print_function
+
 import os
 import re
 import simplejson


[kudu] 02/03: tools: avoid extra 100ms sleep at end of table scan

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 6325c4edcc389aa2ee1d8f028f367978047b8be6
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Wed Jan 15 15:23:00 2020 -0800

    tools: avoid extra 100ms sleep at end of table scan
    
    Prior to this patch, the table scan tool printed a status output every
    5 seconds by starting a separate monitor thread. This thread's main loop
    would only check for the scan completion every 100ms, so after the scans
    completed, the process could hang for up to an extra 100ms before
    exiting.
    
    This changes the printing to happen from the main thread instead, with
    the waiting based on the actual scan completion. Now the tool exits
    immediately when the scan completes.
    
    Example output:
    
    Before:
    todd@turbo:~/kudu$ time ./build/latest/bin/kudu table scan localhost sequences  | wc -l
      79
    
      real  0m0.180s
      user  0m0.041s
      sys   0m0.021s
    
    After:
      todd@turbo:~/kudu$ time ./build/latest/bin/kudu table scan localhost sequences  | wc -l
      79
    
      real  0m0.078s
      user  0m0.055s
      sys   0m0.005s
    
    Change-Id: Ic7dcd8408b1dbd5322a2f5e8f4889f5f5660ece0
    Reviewed-on: http://gerrit.cloudera.org:8080/15040
    Reviewed-by: Bankim Bhavsar <ba...@cloudera.com>
    Reviewed-by: Grant Henke <gr...@apache.org>
    Tested-by: Grant Henke <gr...@apache.org>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/tools/table_scanner.cc | 20 ++++++--------------
 src/kudu/tools/table_scanner.h  |  1 -
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 4924f30..f50b93a 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -502,8 +502,9 @@ void TableScanner::ScanTask(const vector<KuduScanToken *>& tokens, Status* threa
     if (out_ && FLAGS_show_values) {
       MutexLock l(output_lock_);
       for (const auto& row : batch) {
-        *out_ << row.ToString() << endl;
+        *out_ << row.ToString() << "\n";
       }
+      out_->flush();
     }
   });
 }
@@ -530,16 +531,6 @@ void TableScanner::CopyTask(const vector<KuduScanToken*>& tokens, Status* thread
   });
 }
 
-void TableScanner::MonitorTask() {
-  MonoTime last_log_time = MonoTime::Now();
-  while (thread_pool_->num_threads() > 1) {    // Some other table scan thread is running.
-    if (MonoTime::Now() - last_log_time >= MonoDelta::FromSeconds(5)) {
-      LOG(INFO) << "Scanned count: " << total_count_.Load();
-      last_log_time = MonoTime::Now();
-    }
-    SleepFor(MonoDelta::FromMilliseconds(100));
-  }
-}
 
 void TableScanner::SetOutput(ostream* out) {
   out_ = out;
@@ -600,7 +591,7 @@ Status TableScanner::StartWork(WorkType type) {
   vector<Status> thread_statuses(FLAGS_num_threads);
 
   RETURN_NOT_OK(ThreadPoolBuilder("table_scan_pool")
-                  .set_max_threads(FLAGS_num_threads + 1)  // add extra 1 thread for MonitorTask
+                  .set_max_threads(FLAGS_num_threads)
                   .set_idle_timeout(MonoDelta::FromMilliseconds(1))
                   .Build(&thread_pool_));
 
@@ -617,8 +608,9 @@ Status TableScanner::StartWork(WorkType type) {
         boost::bind(&TableScanner::CopyTask, this, thread_tokens[i], &thread_statuses[i])));
     }
   }
-  RETURN_NOT_OK(thread_pool_->SubmitFunc(boost::bind(&TableScanner::MonitorTask, this)));
-  thread_pool_->Wait();
+  while (!thread_pool_->WaitFor(MonoDelta::FromSeconds(5))) {
+    LOG(INFO) << "Scanned count: " << total_count_.Load();
+  }
   thread_pool_->Shutdown();
 
   sw.stop();
diff --git a/src/kudu/tools/table_scanner.h b/src/kudu/tools/table_scanner.h
index b784835..ff6b81b 100644
--- a/src/kudu/tools/table_scanner.h
+++ b/src/kudu/tools/table_scanner.h
@@ -83,7 +83,6 @@ class TableScanner {
                   const std::function<void(const kudu::client::KuduScanBatch& batch)>& cb);
   void ScanTask(const std::vector<kudu::client::KuduScanToken*>& tokens, Status* thread_status);
   void CopyTask(const std::vector<kudu::client::KuduScanToken*>& tokens, Status* thread_status);
-  void MonitorTask();
 
   Status AddRow(const client::sp::shared_ptr<kudu::client::KuduTable>& table,
                 const kudu::client::KuduSchema& table_schema,