You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2017/10/26 20:44:38 UTC

[2/2] kudu git commit: [tools] limit per-session error buffer space

[tools] limit per-session error buffer space

Introduced --error-buffer-size-bytes flag to control the size of the
per-session error buffer.  By default, the limit is set to 16MB per
session (i.e. per worker thread).

Prior to this patch, the per-session error buffer had no size limit
and could grow very large because of huge number of errors during the
run (e.g., because of duplicate value errors).

I also updated the in-line documentation to add an example of using
the tool for subsequent runs against the same table using the
--seq-start flag.

Change-Id: I2b7663c52e57b2d3a1e11b22301b804da784fa74
Reviewed-on: http://gerrit.cloudera.org:8080/8396
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6097e2d1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6097e2d1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6097e2d1

Branch: refs/heads/master
Commit: 6097e2d1a4287563ef22a4f6e32e6f4ee2604031
Parents: 49a7d97
Author: Alexey Serbin <as...@cloudera.com>
Authored: Wed Oct 25 17:47:29 2017 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Thu Oct 26 20:43:51 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/tool_action_perf.cc | 52 ++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/6097e2d1/src/kudu/tools/tool_action_perf.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_perf.cc b/src/kudu/tools/tool_action_perf.cc
index 568b0a0..1e4e30c 100644
--- a/src/kudu/tools/tool_action_perf.cc
+++ b/src/kudu/tools/tool_action_perf.cc
@@ -30,7 +30,7 @@
 // of length 64 for binary and string fields
 // with Kudu master server listening on the default port at localhost:
 //
-//   kudu test loadgen \
+//   kudu perf loadgen \
 //     --num_threads=1 \
 //     --num_rows_per_thread=8000000 \
 //     --string_len=64 \
@@ -46,7 +46,7 @@
 // using the specified pre-set string for binary and string fields
 // with Kudu master server listening on the default port at localhost:
 //
-//   kudu test loadgen \
+//   kudu perf loadgen \
 //     --num_threads=2 \
 //     --num_rows_per_thread=4000000 \
 //     --string_fixed=012345678901234567890123456789012 \
@@ -62,7 +62,7 @@
 // using the specified pre-set string for binary and string fields
 // with Kudu master server listening at 127.0.0.1:8765
 //
-//   kudu test loadgen \
+//   kudu perf loadgen \
 //     --num_threads=4 \
 //     --num_rows_per_thread=2000000 \
 //     --string_fixed=0123456789 \
@@ -79,10 +79,31 @@
 // plus run post-insertion row scan to verify
 // that the count of the inserted rows matches the expected number:
 //
-//   kudu test loadgen \
+//   kudu perf loadgen \
 //     --run_scan=true \
 //     127.0.0.1
 //
+//
+// If running the tool against already existing table multiple times,
+// use the '--seq_start' flag to avoid errors on duplicate values in subsequent
+// runs. For example: an already existing table 't3' has 5 columns.
+// The sequence below contains 3 runs which insert 6000 rows in total
+// (3 runs * 1000 rows per thread * 2 threads)
+// with no duplicate values across all columns:
+//
+//   kudu perf loadgen 127.0.0.1 --table_name=t3 --num_threads=2 \
+//     --num_rows_per_thread=1000 --seq_start=0
+//
+//   kudu perf loadgen 127.0.0.1 --table_name=t3 --num_threads=2 \
+//     --num_rows_per_thread=1000 --seq_start=10000
+//
+//   perf perf loadgen 127.0.0.1 --table_name=t3 --num_threads=2 \
+//     --num_rows_per_thread=1000 --seq_start=20000
+//
+// The single sequence number is used to generate values for all table columns,
+// so for the example above each run increments the sequence number by 10000:
+// 1000 rows per thread * 2 threads * 5 columns
+//
 
 #include "kudu/tools/tool_action.h"
 
@@ -103,7 +124,6 @@
 #include <gflags/gflags.h>
 
 #include "kudu/client/client.h"
-#include "kudu/client/row_result.h"
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h"
@@ -131,11 +151,10 @@ using kudu::client::KuduClientBuilder;
 using kudu::client::KuduColumnSchema;
 using kudu::client::KuduError;
 using kudu::client::KuduInsert;
-using kudu::client::KuduRowResult;
-using kudu::client::KuduSchema;
-using kudu::client::KuduSchemaBuilder;
 using kudu::client::KuduScanBatch;
 using kudu::client::KuduScanner;
+using kudu::client::KuduSchema;
+using kudu::client::KuduSchemaBuilder;
 using kudu::client::KuduSession;
 using kudu::client::KuduTable;
 using kudu::client::KuduTableCreator;
@@ -150,8 +169,8 @@ using std::numeric_limits;
 using std::ostringstream;
 using std::string;
 using std::thread;
-using std::vector;
 using std::unique_ptr;
+using std::vector;
 using strings::Substitute;
 using strings::SubstituteAndAppend;
 
@@ -161,6 +180,11 @@ DEFINE_int32(buffer_size_bytes, 4 * 1024 * 1024,
              "Size of the mutation buffer, per session (bytes).");
 DEFINE_int32(buffers_num, 2,
              "Number of mutation buffers per session.");
+DEFINE_int32(error_buffer_size_bytes, 16 * 1024 * 1024,
+             "Size of the error buffer, per session (bytes). 0 means 'unlimited'. "
+             "This setting may impose an additional upper limit for the "
+             "effective number of errors controlled by the "
+             "'--show_first_n_errors' flag.");
 DEFINE_int32(flush_per_n_rows, 0,
              "Perform async flush per given number of rows added. "
              "Setting to non-zero implicitly turns on manual flush mode.");
@@ -168,7 +192,7 @@ DEFINE_bool(keep_auto_table, false,
             "If using the auto-generated table, enabling this option "
             "retains the table populated with the data after the test "
             "finishes. By default, the auto-generated table is dropped "
-            "after sucessfully finishing the test. NOTE: this parameter "
+            "after successfully finishing the test. NOTE: this parameter "
             "has no effect if using already existing table "
             "(see the '--table_name' flag): the existing tables nor their data "
             "are never dropped/deleted.");
@@ -191,7 +215,11 @@ DEFINE_uint64(seq_start, 0,
               "(num_threads * num_rows_per_thread * column_num + seq_start).");
 DEFINE_int32(show_first_n_errors, 0,
              "Output detailed information on the specified number of "
-             "first n errors (if any).");
+             "first n errors (if any). The limit on the per-session error "
+             "buffer space may impose an additional upper limit for the "
+             "effective number of errors in the output. If so, consider "
+             "increasing the size of the error buffer using the "
+             "'--error_buffer_size_bytes' flag.");
 DEFINE_string(string_fixed, "",
               "Pre-defined string to write into binary and string columns. "
               "Client generates more data per second using pre-defined string "
@@ -363,6 +391,7 @@ void GeneratorThread(
     RETURN_NOT_OK(session->SetMutationBufferSpace(
                      FLAGS_buffer_size_bytes));
     RETURN_NOT_OK(session->SetMutationBufferMaxNum(FLAGS_buffers_num));
+    RETURN_NOT_OK(session->SetErrorBufferSpace(FLAGS_error_buffer_size_bytes));
     RETURN_NOT_OK(session->SetFlushMode(
         flush_per_n_rows == 0 ? KuduSession::AUTO_FLUSH_BACKGROUND
                               : KuduSession::MANUAL_FLUSH));
@@ -618,6 +647,7 @@ unique_ptr<Mode> BuildPerfMode() {
       .AddOptionalParameter("buffer_flush_watermark_pct")
       .AddOptionalParameter("buffer_size_bytes")
       .AddOptionalParameter("buffers_num")
+      .AddOptionalParameter("error_buffer_size_bytes")
       .AddOptionalParameter("flush_per_n_rows")
       .AddOptionalParameter("keep_auto_table")
       .AddOptionalParameter("num_rows_per_thread")