You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/01/16 22:46:50 UTC
[kudu] branch master updated (b924d18 -> 5818a83)
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.
from b924d18 [build] Fix IWYU script
new c8384a0 [tserver] replace gscoped_ptr with unique_ptr in Scanner
new 6325c4e tools: avoid extra 100ms sleep at end of table scan
new 5818a83 [build] Fix print function usage in Python 2
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
build-support/clang_tidy_gerrit.py | 3 +++
build-support/iwyu.py | 2 ++
src/kudu/scripts/graph-metrics.py | 3 +++
src/kudu/tools/table_scanner.cc | 20 +++++-----------
src/kudu/tools/table_scanner.h | 1 -
src/kudu/tserver/scanners.cc | 4 ++--
src/kudu/tserver/scanners.h | 32 +++++++++++++-------------
src/kudu/tserver/tablet_service.cc | 47 +++++++++++++++++---------------------
8 files changed, 53 insertions(+), 59 deletions(-)
[kudu] 01/03: [tserver] replace gscoped_ptr with unique_ptr in
Scanner
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit c8384a04029c3e1b4ce23888355a850721d0f8e7
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Wed Jan 15 19:32:42 2020 -0800
[tserver] replace gscoped_ptr with unique_ptr in Scanner
This is a small clean up to replace gscoped_ptr with std::unique_ptr
and avoid extra call to operator new. I also moved the
Scanner::IsInitialized() method into the private section.
Change-Id: Ieac533565f96773cc450de521703c21534020596
Reviewed-on: http://gerrit.cloudera.org:8080/15048
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/tserver/scanners.cc | 4 ++--
src/kudu/tserver/scanners.h | 32 +++++++++++++-------------
src/kudu/tserver/tablet_service.cc | 47 +++++++++++++++++---------------------
3 files changed, 39 insertions(+), 44 deletions(-)
diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index 77b851b..418bbdc 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -359,11 +359,11 @@ void Scanner::AddTimings(const CpuTimes& elapsed) {
}
void Scanner::Init(unique_ptr<RowwiseIterator> iter,
- gscoped_ptr<ScanSpec> spec) {
+ unique_ptr<ScanSpec> spec) {
std::lock_guard<simple_spinlock> l(lock_);
CHECK(!iter_) << "Already initialized";
iter_ = std::move(iter);
- spec_.reset(spec.release());
+ spec_ = std::move(spec);
}
const ScanSpec& Scanner::spec() const {
diff --git a/src/kudu/tserver/scanners.h b/src/kudu/tserver/scanners.h
index f03e8ff..15eb86c 100644
--- a/src/kudu/tserver/scanners.h
+++ b/src/kudu/tserver/scanners.h
@@ -30,6 +30,7 @@
#include "kudu/common/iterator_stats.h"
#include "kudu/common/scan_spec.h"
+#include "kudu/common/schema.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/macros.h"
#include "kudu/gutil/ref_counted.h"
@@ -50,7 +51,6 @@
namespace kudu {
class RowwiseIterator;
-class Schema;
class Status;
class Thread;
@@ -203,15 +203,7 @@ class Scanner {
// Attach an actual iterator and a ScanSpec to this Scanner.
// Takes ownership of 'iter' and 'spec'.
void Init(std::unique_ptr<RowwiseIterator> iter,
- gscoped_ptr<ScanSpec> spec);
-
- // Return true if the scanner has been initialized (i.e has an iterator).
- // Once a Scanner is initialized, it is safe to assume that iter() and spec()
- // return non-NULL for the lifetime of the Scanner object.
- bool IsInitialized() const {
- std::lock_guard<simple_spinlock> l(lock_);
- return iter_ != NULL;
- }
+ std::unique_ptr<ScanSpec> spec);
RowwiseIterator* iter() {
return DCHECK_NOTNULL(iter_.get());
@@ -282,8 +274,8 @@ class Scanner {
// projection is a subset of the iterator's schema -- the iterator's
// schema needs to include all columns that have predicates, whereas
// the client may not want to project all of them.
- void set_client_projection_schema(gscoped_ptr<Schema> client_projection_schema) {
- client_projection_schema_.swap(client_projection_schema);
+ void set_client_projection_schema(std::unique_ptr<Schema> client_projection_schema) {
+ client_projection_schema_ = std::move(client_projection_schema);
}
// Returns request's projection schema if it differs from the schema
@@ -332,6 +324,14 @@ class Scanner {
static const std::string kNullTabletId;
+ // Return true if the scanner has been initialized (i.e has an iterator).
+ // Once a Scanner is initialized, it is safe to assume that iter() and spec()
+ // return non-NULL for the lifetime of the Scanner object.
+ bool IsInitialized() const {
+ std::lock_guard<simple_spinlock> l(lock_);
+ return iter_ != nullptr;
+ }
+
// The unique ID of this scanner.
const std::string id_;
@@ -364,13 +364,13 @@ class Scanner {
IteratorStats already_reported_stats_;
// The spec used by 'iter_'
- gscoped_ptr<ScanSpec> spec_;
+ std::unique_ptr<ScanSpec> spec_;
+
+ std::unique_ptr<RowwiseIterator> iter_;
// Stores the request's projection schema, if it differs from the
// schema used by the iterator.
- gscoped_ptr<Schema> client_projection_schema_;
-
- std::unique_ptr<RowwiseIterator> iter_;
+ std::unique_ptr<Schema> client_projection_schema_;
AutoReleasePool autorelease_pool_;
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index f642a7e..bde51f5 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -25,9 +25,7 @@
#include <numeric>
#include <ostream>
#include <string>
-#include <type_traits>
#include <unordered_set>
-#include <utility>
#include <vector>
#include <boost/optional/optional.hpp>
@@ -91,7 +89,6 @@
#include "kudu/tserver/tserver_admin.pb.h"
#include "kudu/tserver/tserver_service.pb.h"
#include "kudu/util/auto_release_pool.h"
-#include "kudu/util/bitset.h"
#include "kudu/util/crc.h"
#include "kudu/util/debug/trace_event.h"
#include "kudu/util/faststring.h"
@@ -2160,16 +2157,15 @@ static Status DecodeEncodedKeyRange(const NewScanRequestPB& scan_pb,
static Status SetupScanSpec(const NewScanRequestPB& scan_pb,
const Schema& tablet_schema,
- gscoped_ptr<ScanSpec>* spec,
- const SharedScanner& scanner) {
- gscoped_ptr<ScanSpec> ret(new ScanSpec);
- ret->set_cache_blocks(scan_pb.cache_blocks());
+ const SharedScanner& scanner,
+ ScanSpec* spec) {
+ spec->set_cache_blocks(scan_pb.cache_blocks());
// First the column predicates.
for (const ColumnPredicatePB& pred_pb : scan_pb.column_predicates()) {
boost::optional<ColumnPredicate> predicate;
RETURN_NOT_OK(ColumnPredicateFromPB(tablet_schema, scanner->arena(), pred_pb, &predicate));
- ret->AddPredicate(std::move(*predicate));
+ spec->AddPredicate(std::move(*predicate));
}
// Then the column range predicates.
@@ -2205,19 +2201,18 @@ static Status SetupScanSpec(const NewScanRequestPB& scan_pb,
if (pred) {
VLOG(3) << Substitute("Parsed predicate $0 from $1",
pred->ToString(), SecureShortDebugString(scan_pb));
- ret->AddPredicate(*pred);
+ spec->AddPredicate(*pred);
}
}
// Then any encoded key range predicates.
- RETURN_NOT_OK(DecodeEncodedKeyRange(scan_pb, tablet_schema, scanner, ret.get()));
+ RETURN_NOT_OK(DecodeEncodedKeyRange(scan_pb, tablet_schema, scanner, spec));
// If the scanner has a limit, set it now.
if (scan_pb.has_limit()) {
- ret->set_limit(scan_pb.limit());
+ spec->set_limit(scan_pb.limit());
}
- spec->swap(ret);
return Status::OK();
}
@@ -2325,17 +2320,16 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
}
}
- gscoped_ptr<ScanSpec> spec(new ScanSpec);
-
- s = SetupScanSpec(scan_pb, tablet_schema, &spec, scanner);
+ ScanSpec spec;
+ s = SetupScanSpec(scan_pb, tablet_schema, scanner, &spec);
if (PREDICT_FALSE(!s.ok())) {
*error_code = TabletServerErrorPB::INVALID_SCAN_SPEC;
return s;
}
- VLOG(3) << "Before optimizing scan spec: " << spec->ToString(tablet_schema);
- spec->OptimizeScan(tablet_schema, scanner->arena(), scanner->autorelease_pool(), true);
- VLOG(3) << "After optimizing scan spec: " << spec->ToString(tablet_schema);
+ VLOG(3) << "Before optimizing scan spec: " << spec.ToString(tablet_schema);
+ spec.OptimizeScan(tablet_schema, scanner->arena(), scanner->autorelease_pool(), true);
+ VLOG(3) << "After optimizing scan spec: " << spec.ToString(tablet_schema);
// Missing columns will contain the columns that are not mentioned in the
// client projection but are actually needed for the scan, such as columns
@@ -2343,16 +2337,18 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
//
// NOTE: We should build the missing column after optimizing scan which will
// remove unnecessary predicates.
- vector<ColumnSchema> missing_cols = spec->GetMissingColumns(projection);
- if (spec->CanShortCircuit()) {
+ vector<ColumnSchema> missing_cols = spec.GetMissingColumns(projection);
+ if (spec.CanShortCircuit()) {
VLOG(1) << "short-circuiting without creating a server-side scanner.";
*has_more_results = false;
return Status::OK();
}
// Store the original projection.
- gscoped_ptr<Schema> orig_projection(new Schema(projection));
- scanner->set_client_projection_schema(std::move(orig_projection));
+ {
+ unique_ptr<Schema> orig_projection(new Schema(projection));
+ scanner->set_client_projection_schema(std::move(orig_projection));
+ }
// Build a new projection with the projection columns and the missing columns,
// annotating each column as a key column appropriately.
@@ -2395,8 +2391,6 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
projection = projection_builder.BuildWithoutIds();
VLOG(3) << "Scan projection: " << projection.ToString(Schema::BASE_INFO);
- unique_ptr<RowwiseIterator> iter;
-
// It's important to keep the reference to the tablet for the case when the
// tablet replica's shutdown is run concurrently with the code below.
shared_ptr<Tablet> tablet;
@@ -2413,6 +2407,7 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
return s;
}
+ unique_ptr<RowwiseIterator> iter;
boost::optional<Timestamp> snap_start_timestamp;
{
@@ -2448,11 +2443,11 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
// This copy will be given to the Scanner so it can report its predicates to
// /scans. The copy is necessary because the original spec will be modified
// as its predicates are pushed into lower-level iterators.
- gscoped_ptr<ScanSpec> orig_spec(new ScanSpec(*spec));
+ unique_ptr<ScanSpec> orig_spec(new ScanSpec(spec));
if (PREDICT_TRUE(s.ok())) {
TRACE_EVENT0("tserver", "iter->Init");
- s = iter->Init(spec.get());
+ s = iter->Init(&spec);
if (PREDICT_FALSE(s.IsInvalidArgument())) {
// Tablet::Iterator::Init() returns InvalidArgument when an invalid
// projection is specified.
[kudu] 03/03: [build] Fix print function usage in Python 2
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 5818a839e6219347ea28385dd7b3bbf2ab475e27
Author: Grant Henke <gr...@apache.org>
AuthorDate: Thu Jan 16 16:17:30 2020 -0600
[build] Fix print function usage in Python 2
This fixes the scripts that use the Python 3 print function when
running in Python 2.
Change-Id: Iee528e66fc017180e50b796f8cc1d2ee31be8a5e
Reviewed-on: http://gerrit.cloudera.org:8080/15054
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
---
build-support/clang_tidy_gerrit.py | 3 +++
build-support/iwyu.py | 2 ++
src/kudu/scripts/graph-metrics.py | 3 +++
3 files changed, 8 insertions(+)
diff --git a/build-support/clang_tidy_gerrit.py b/build-support/clang_tidy_gerrit.py
index d7d1644..35f32f6 100755
--- a/build-support/clang_tidy_gerrit.py
+++ b/build-support/clang_tidy_gerrit.py
@@ -17,6 +17,9 @@
# specific language governing permissions and limitations
# under the License.
+# Support the print function in Python 2.
+from __future__ import print_function
+
import argparse
import collections
import json
diff --git a/build-support/iwyu.py b/build-support/iwyu.py
index 9af043b..bef4296 100755
--- a/build-support/iwyu.py
+++ b/build-support/iwyu.py
@@ -17,7 +17,9 @@
# specific language governing permissions and limitations
# under the License.
+# Support the print function in Python 2.
from __future__ import print_function
+
from io import StringIO
import glob
import json
diff --git a/src/kudu/scripts/graph-metrics.py b/src/kudu/scripts/graph-metrics.py
index 853e2e4..35d606f 100755
--- a/src/kudu/scripts/graph-metrics.py
+++ b/src/kudu/scripts/graph-metrics.py
@@ -20,6 +20,9 @@
# Script which parses a test log for 'metrics: ' lines emited by
# TimeSeriesCollector, and constructs a graph from them
+# Support the print function in Python 2.
+from __future__ import print_function
+
import os
import re
import simplejson
[kudu] 02/03: tools: avoid extra 100ms sleep at end of table scan
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 6325c4edcc389aa2ee1d8f028f367978047b8be6
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Wed Jan 15 15:23:00 2020 -0800
tools: avoid extra 100ms sleep at end of table scan
Prior to this patch, the table scan tool printed a status output every
5 seconds by starting a separate monitor thread. This thread's main loop
would only check for the scan completion every 100ms, so after the scans
completed, the process could hang for up to an extra 100ms before
exiting.
This changes the printing to happen from the main thread instead, with
the waiting based on the actual scan completion. Now the tool exits
immediately when the scan completes.
Example output:
Before:
todd@turbo:~/kudu$ time ./build/latest/bin/kudu table scan localhost sequences | wc -l
79
real 0m0.180s
user 0m0.041s
sys 0m0.021s
After:
todd@turbo:~/kudu$ time ./build/latest/bin/kudu table scan localhost sequences | wc -l
79
real 0m0.078s
user 0m0.055s
sys 0m0.005s
Change-Id: Ic7dcd8408b1dbd5322a2f5e8f4889f5f5660ece0
Reviewed-on: http://gerrit.cloudera.org:8080/15040
Reviewed-by: Bankim Bhavsar <ba...@cloudera.com>
Reviewed-by: Grant Henke <gr...@apache.org>
Tested-by: Grant Henke <gr...@apache.org>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/tools/table_scanner.cc | 20 ++++++--------------
src/kudu/tools/table_scanner.h | 1 -
2 files changed, 6 insertions(+), 15 deletions(-)
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 4924f30..f50b93a 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -502,8 +502,9 @@ void TableScanner::ScanTask(const vector<KuduScanToken *>& tokens, Status* threa
if (out_ && FLAGS_show_values) {
MutexLock l(output_lock_);
for (const auto& row : batch) {
- *out_ << row.ToString() << endl;
+ *out_ << row.ToString() << "\n";
}
+ out_->flush();
}
});
}
@@ -530,16 +531,6 @@ void TableScanner::CopyTask(const vector<KuduScanToken*>& tokens, Status* thread
});
}
-void TableScanner::MonitorTask() {
- MonoTime last_log_time = MonoTime::Now();
- while (thread_pool_->num_threads() > 1) { // Some other table scan thread is running.
- if (MonoTime::Now() - last_log_time >= MonoDelta::FromSeconds(5)) {
- LOG(INFO) << "Scanned count: " << total_count_.Load();
- last_log_time = MonoTime::Now();
- }
- SleepFor(MonoDelta::FromMilliseconds(100));
- }
-}
void TableScanner::SetOutput(ostream* out) {
out_ = out;
@@ -600,7 +591,7 @@ Status TableScanner::StartWork(WorkType type) {
vector<Status> thread_statuses(FLAGS_num_threads);
RETURN_NOT_OK(ThreadPoolBuilder("table_scan_pool")
- .set_max_threads(FLAGS_num_threads + 1) // add extra 1 thread for MonitorTask
+ .set_max_threads(FLAGS_num_threads)
.set_idle_timeout(MonoDelta::FromMilliseconds(1))
.Build(&thread_pool_));
@@ -617,8 +608,9 @@ Status TableScanner::StartWork(WorkType type) {
boost::bind(&TableScanner::CopyTask, this, thread_tokens[i], &thread_statuses[i])));
}
}
- RETURN_NOT_OK(thread_pool_->SubmitFunc(boost::bind(&TableScanner::MonitorTask, this)));
- thread_pool_->Wait();
+ while (!thread_pool_->WaitFor(MonoDelta::FromSeconds(5))) {
+ LOG(INFO) << "Scanned count: " << total_count_.Load();
+ }
thread_pool_->Shutdown();
sw.stop();
diff --git a/src/kudu/tools/table_scanner.h b/src/kudu/tools/table_scanner.h
index b784835..ff6b81b 100644
--- a/src/kudu/tools/table_scanner.h
+++ b/src/kudu/tools/table_scanner.h
@@ -83,7 +83,6 @@ class TableScanner {
const std::function<void(const kudu::client::KuduScanBatch& batch)>& cb);
void ScanTask(const std::vector<kudu::client::KuduScanToken*>& tokens, Status* thread_status);
void CopyTask(const std::vector<kudu::client::KuduScanToken*>& tokens, Status* thread_status);
- void MonitorTask();
Status AddRow(const client::sp::shared_ptr<kudu::client::KuduTable>& table,
const kudu::client::KuduSchema& table_schema,