You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/01/17 20:16:44 UTC

[kudu] branch master updated (0bd417b -> 32d1ac0)

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 0bd417b  KUDU-3042: Fix invalid DCHECK when rpc is cancelled then times out
     new 2e50335  client: use dense_hash_set in Batcher
     new 32d1ac0  [benchmark] Fix printing in get-job-stats-from-mysql.py

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/batcher.cc                   | 2 ++
 src/kudu/client/batcher.h                    | 9 +++++----
 src/kudu/scripts/get-job-stats-from-mysql.py | 5 ++---
 3 files changed, 9 insertions(+), 7 deletions(-)


[kudu] 02/02: [benchmark] Fix printing in get-job-stats-from-mysql.py

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 32d1ac04bbd4b1bb8eecb35e05bde3bfa384a3ed
Author: Grant Henke <gr...@apache.org>
AuthorDate: Fri Jan 17 13:50:15 2020 -0600

    [benchmark] Fix printing in get-job-stats-from-mysql.py
    
    This is a follow up to 8463663 that fixes to print function output
    in get-job-stats-from-mysql.py.
    
    Change-Id: I10dfcb87647ea73b9089cd5a7a035665adb50494
    Reviewed-on: http://gerrit.cloudera.org:8080/15067
    Tested-by: Grant Henke <gr...@apache.org>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/scripts/get-job-stats-from-mysql.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/kudu/scripts/get-job-stats-from-mysql.py b/src/kudu/scripts/get-job-stats-from-mysql.py
index 2fd062f..a7b52e2 100644
--- a/src/kudu/scripts/get-job-stats-from-mysql.py
+++ b/src/kudu/scripts/get-job-stats-from-mysql.py
@@ -52,7 +52,6 @@ with con:
   days = sys.argv[2]
   cur.execute("select workload, runtime, build_number from kudu_perf_tpch where workload like %s AND curr_date >= DATE_SUB(NOW(), INTERVAL %s DAY) and runtime != 0 ORDER BY workload, build_number, curr_date", (workload, days))
   rows = cur.fetchall()
-  print('workload', '\t', 'runtime', '\t', 'build_number')
+  print('workload \truntime \tbuild_number')
   for row in rows:
-    print(row[0], '\t', row[1], '\t', row[2])
-
+    print("{} \t{} \t{}".format(row[0], row[1], row[2]))


[kudu] 01/02: client: use dense_hash_set in Batcher

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 2e50335112aab5ffc706414c5873400b8177bd5b
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Fri Dec 6 15:14:02 2019 -0800

    client: use dense_hash_set in Batcher
    
    The Batcher implementation keeps an unordered set of in-flight
    operations. The built-in unordered_map is not very fast. Google's
    dense_hash_map is noticeably faster.
    
    This sped up a client-reactor-bound benchmark by around 27%.
    
    Before:
    
    Generator report
        time total  : 37293.8 ms
        time per row: 0.000466172 ms
      Dropping auto-created table 'default.loadgen_auto_ece2f41beef94a9fa032c77899f7e61c'
    
       Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8':
    
              189,125.49 msec task-clock                #    5.060 CPUs utilized
                  29,363      context-switches          #    0.155 K/sec
                   2,043      cpu-migrations            #    0.011 K/sec
                  48,405      page-faults               #    0.256 K/sec
         772,496,448,279      cycles                    #    4.085 GHz                      (83.33%)
         129,999,474,226      stalled-cycles-frontend   #   16.83% frontend cycles idle     (83.36%)
         300,049,388,250      stalled-cycles-backend    #   38.84% backend cycles idle      (83.30%)
         414,415,517,571      instructions              #    0.54  insn per cycle
                                                        #    0.72  stalled cycles per insn  (83.32%)
          76,829,647,882      branches                  #  406.236 M/sec                    (83.34%)
             352,749,453      branch-misses             #    0.46% of all branches          (83.35%)
    
            37.376785122 seconds time elapsed
    
           186.834651000 seconds user
             2.143945000 seconds sys
    
    After:
      Generator report
        time total  : 29191.6 ms
        time per row: 0.000364895 ms
      Dropping auto-created table 'default.loadgen_auto_86e36746a9ce4bf19ef2beee143c09f7'
    
       Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8':
    
              175,386.48 msec task-clock                #    5.993 CPUs utilized
                 107,552      context-switches          #    0.613 K/sec
                   3,056      cpu-migrations            #    0.017 K/sec
                  49,802      page-faults               #    0.284 K/sec
         718,173,598,221      cycles                    #    4.095 GHz                      (83.34%)
         154,338,830,503      stalled-cycles-frontend   #   21.49% frontend cycles idle     (83.35%)
         162,605,327,354      stalled-cycles-backend    #   22.64% backend cycles idle      (83.33%)
         407,408,456,605      instructions              #    0.57  insn per cycle
                                                        #    0.40  stalled cycles per insn  (83.33%)
          76,468,794,077      branches                  #  436.002 M/sec                    (83.31%)
             462,854,805      branch-misses             #    0.61% of all branches          (83.35%)
    
            29.266327666 seconds time elapsed
    
           171.921196000 seconds user
             3.312785000 seconds sys
    
    Change-Id: Ieb2d190b9445b49ed752c5991146477fd793099a
    Reviewed-on: http://gerrit.cloudera.org:8080/14869
    Tested-by: Kudu Jenkins
    Reviewed-by: Todd Lipcon <to...@apache.org>
---
 src/kudu/client/batcher.cc | 2 ++
 src/kudu/client/batcher.h  | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/kudu/client/batcher.cc b/src/kudu/client/batcher.cc
index c68e77d..20ff371 100644
--- a/src/kudu/client/batcher.cc
+++ b/src/kudu/client/batcher.cc
@@ -598,6 +598,8 @@ Batcher::Batcher(KuduClient* client,
     timeout_(client->default_rpc_timeout()),
     outstanding_lookups_(0),
     buffer_bytes_used_(0) {
+  ops_.set_empty_key(nullptr);
+  ops_.set_deleted_key(reinterpret_cast<InFlightOp*>(-1));
 }
 
 void Batcher::Abort() {
diff --git a/src/kudu/client/batcher.h b/src/kudu/client/batcher.h
index b244b28..7128d71 100644
--- a/src/kudu/client/batcher.h
+++ b/src/kudu/client/batcher.h
@@ -20,9 +20,11 @@
 #include <cstdint>
 #include <mutex>
 #include <unordered_map>
-#include <unordered_set>
+#include <utility>
 #include <vector>
 
+#include <sparsehash/dense_hash_set>
+
 #include "kudu/client/client.h"
 #include "kudu/client/shared_ptr.h"
 #include "kudu/client/write_op.h"
@@ -42,11 +44,10 @@ class KuduStatusCallback;
 
 namespace internal {
 
-struct InFlightOp;
-
 class ErrorCollector;
 class RemoteTablet;
 class WriteRpc;
+struct InFlightOp;
 
 // A Batcher is the class responsible for collecting row operations, routing them to the
 // correct tablet server, and possibly batching them together for better efficiency.
@@ -202,7 +203,7 @@ class Batcher : public RefCountedThreadSafe<Batcher> {
   KuduStatusCallback* flush_callback_;
 
   // All buffered or in-flight ops.
-  std::unordered_set<InFlightOp*> ops_;
+  google::dense_hash_set<InFlightOp*> ops_;
   // Each tablet's buffered ops.
   typedef std::unordered_map<RemoteTablet*, std::vector<InFlightOp*> > OpsMap;
   OpsMap per_tablet_ops_;