You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/01/17 20:16:44 UTC
[kudu] branch master updated (0bd417b -> 32d1ac0)
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.
from 0bd417b KUDU-3042: Fix invalid DCHECK when rpc is cancelled then times out
new 2e50335 client: use dense_hash_set in Batcher
new 32d1ac0 [benchmark] Fix printing in get-job-stats-from-mysql.py
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/kudu/client/batcher.cc | 2 ++
src/kudu/client/batcher.h | 9 +++++----
src/kudu/scripts/get-job-stats-from-mysql.py | 5 ++---
3 files changed, 9 insertions(+), 7 deletions(-)
[kudu] 02/02: [benchmark] Fix printing in
get-job-stats-from-mysql.py
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 32d1ac04bbd4b1bb8eecb35e05bde3bfa384a3ed
Author: Grant Henke <gr...@apache.org>
AuthorDate: Fri Jan 17 13:50:15 2020 -0600
[benchmark] Fix printing in get-job-stats-from-mysql.py
This is a follow up to 8463663 that fixes to print function output
in get-job-stats-from-mysql.py.
Change-Id: I10dfcb87647ea73b9089cd5a7a035665adb50494
Reviewed-on: http://gerrit.cloudera.org:8080/15067
Tested-by: Grant Henke <gr...@apache.org>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/scripts/get-job-stats-from-mysql.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/kudu/scripts/get-job-stats-from-mysql.py b/src/kudu/scripts/get-job-stats-from-mysql.py
index 2fd062f..a7b52e2 100644
--- a/src/kudu/scripts/get-job-stats-from-mysql.py
+++ b/src/kudu/scripts/get-job-stats-from-mysql.py
@@ -52,7 +52,6 @@ with con:
days = sys.argv[2]
cur.execute("select workload, runtime, build_number from kudu_perf_tpch where workload like %s AND curr_date >= DATE_SUB(NOW(), INTERVAL %s DAY) and runtime != 0 ORDER BY workload, build_number, curr_date", (workload, days))
rows = cur.fetchall()
- print('workload', '\t', 'runtime', '\t', 'build_number')
+ print('workload \truntime \tbuild_number')
for row in rows:
- print(row[0], '\t', row[1], '\t', row[2])
-
+ print("{} \t{} \t{}".format(row[0], row[1], row[2]))
[kudu] 01/02: client: use dense_hash_set in Batcher
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 2e50335112aab5ffc706414c5873400b8177bd5b
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Fri Dec 6 15:14:02 2019 -0800
client: use dense_hash_set in Batcher
The Batcher implementation keeps an unordered set of in-flight
operations. The built-in unordered_map is not very fast. Google's
dense_hash_map is noticeably faster.
This sped up a client-reactor-bound benchmark by around 27%.
Before:
Generator report
time total : 37293.8 ms
time per row: 0.000466172 ms
Dropping auto-created table 'default.loadgen_auto_ece2f41beef94a9fa032c77899f7e61c'
Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8':
189,125.49 msec task-clock # 5.060 CPUs utilized
29,363 context-switches # 0.155 K/sec
2,043 cpu-migrations # 0.011 K/sec
48,405 page-faults # 0.256 K/sec
772,496,448,279 cycles # 4.085 GHz (83.33%)
129,999,474,226 stalled-cycles-frontend # 16.83% frontend cycles idle (83.36%)
300,049,388,250 stalled-cycles-backend # 38.84% backend cycles idle (83.30%)
414,415,517,571 instructions # 0.54 insn per cycle
# 0.72 stalled cycles per insn (83.32%)
76,829,647,882 branches # 406.236 M/sec (83.34%)
352,749,453 branch-misses # 0.46% of all branches (83.35%)
37.376785122 seconds time elapsed
186.834651000 seconds user
2.143945000 seconds sys
After:
Generator report
time total : 29191.6 ms
time per row: 0.000364895 ms
Dropping auto-created table 'default.loadgen_auto_86e36746a9ce4bf19ef2beee143c09f7'
Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8':
175,386.48 msec task-clock # 5.993 CPUs utilized
107,552 context-switches # 0.613 K/sec
3,056 cpu-migrations # 0.017 K/sec
49,802 page-faults # 0.284 K/sec
718,173,598,221 cycles # 4.095 GHz (83.34%)
154,338,830,503 stalled-cycles-frontend # 21.49% frontend cycles idle (83.35%)
162,605,327,354 stalled-cycles-backend # 22.64% backend cycles idle (83.33%)
407,408,456,605 instructions # 0.57 insn per cycle
# 0.40 stalled cycles per insn (83.33%)
76,468,794,077 branches # 436.002 M/sec (83.31%)
462,854,805 branch-misses # 0.61% of all branches (83.35%)
29.266327666 seconds time elapsed
171.921196000 seconds user
3.312785000 seconds sys
Change-Id: Ieb2d190b9445b49ed752c5991146477fd793099a
Reviewed-on: http://gerrit.cloudera.org:8080/14869
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
---
src/kudu/client/batcher.cc | 2 ++
src/kudu/client/batcher.h | 9 +++++----
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/kudu/client/batcher.cc b/src/kudu/client/batcher.cc
index c68e77d..20ff371 100644
--- a/src/kudu/client/batcher.cc
+++ b/src/kudu/client/batcher.cc
@@ -598,6 +598,8 @@ Batcher::Batcher(KuduClient* client,
timeout_(client->default_rpc_timeout()),
outstanding_lookups_(0),
buffer_bytes_used_(0) {
+ ops_.set_empty_key(nullptr);
+ ops_.set_deleted_key(reinterpret_cast<InFlightOp*>(-1));
}
void Batcher::Abort() {
diff --git a/src/kudu/client/batcher.h b/src/kudu/client/batcher.h
index b244b28..7128d71 100644
--- a/src/kudu/client/batcher.h
+++ b/src/kudu/client/batcher.h
@@ -20,9 +20,11 @@
#include <cstdint>
#include <mutex>
#include <unordered_map>
-#include <unordered_set>
+#include <utility>
#include <vector>
+#include <sparsehash/dense_hash_set>
+
#include "kudu/client/client.h"
#include "kudu/client/shared_ptr.h"
#include "kudu/client/write_op.h"
@@ -42,11 +44,10 @@ class KuduStatusCallback;
namespace internal {
-struct InFlightOp;
-
class ErrorCollector;
class RemoteTablet;
class WriteRpc;
+struct InFlightOp;
// A Batcher is the class responsible for collecting row operations, routing them to the
// correct tablet server, and possibly batching them together for better efficiency.
@@ -202,7 +203,7 @@ class Batcher : public RefCountedThreadSafe<Batcher> {
KuduStatusCallback* flush_callback_;
// All buffered or in-flight ops.
- std::unordered_set<InFlightOp*> ops_;
+ google::dense_hash_set<InFlightOp*> ops_;
// Each tablet's buffered ops.
typedef std::unordered_map<RemoteTablet*, std::vector<InFlightOp*> > OpsMap;
OpsMap per_tablet_ops_;