You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/09/08 02:12:24 UTC

[4/4] kudu git commit: tablet_server-stress-test: limit runtime of this test

tablet_server-stress-test: limit runtime of this test

This stress test is typically the long pole in the dist-test runtime.
For example, in a recent ASAN run[1] the test took around 730 seconds.

This changes the behavior of the test to run for a prescribed amount of
time (60 seconds in slow mode, 10 in fast). This should keep relatively
good coverage while avoiding such long test runs.

[1] http://dist-test.cloudera.org/trace?job_id=jenkins-slave.1473295331.9755

Change-Id: I7441f50bcd4788e3e54a90bd5f782201a7d4c6af
Reviewed-on: http://gerrit.cloudera.org:8080/4329
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/46d9ed7a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/46d9ed7a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/46d9ed7a

Branch: refs/heads/master
Commit: 46d9ed7aa86e7bcd9649ec37af1fbd8369d5c0fe
Parents: 2876683
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Sep 7 18:23:52 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Sep 8 02:06:54 2016 +0000

----------------------------------------------------------------------
 src/kudu/scripts/benchmarks.sh                |  2 +-
 src/kudu/tserver/tablet_server-stress-test.cc | 39 +++++++++++++++++-----
 2 files changed, 32 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/46d9ed7a/src/kudu/scripts/benchmarks.sh
----------------------------------------------------------------------
diff --git a/src/kudu/scripts/benchmarks.sh b/src/kudu/scripts/benchmarks.sh
index 799d7be..be110f1 100755
--- a/src/kudu/scripts/benchmarks.sh
+++ b/src/kudu/scripts/benchmarks.sh
@@ -240,7 +240,7 @@ run_benchmarks() {
   # Run multi-threaded TS insert benchmark
   for i in $(seq 1 $NUM_SAMPLES) ; do
     KUDU_ALLOW_SLOW_TESTS=1 build/latest/bin/tablet_server-stress-test \
-      --num_inserts_per_thread=30000 &> $LOGDIR/${TS_8THREAD_BENCH}$i.log
+      --num_inserts_per_thread=30000 -runtime_secs=0 &> $LOGDIR/${TS_8THREAD_BENCH}$i.log
   done
 
   # Run full stack scan/insert test using MRS only, ~26s each

http://git-wip-us.apache.org/repos/asf/kudu/blob/46d9ed7a/src/kudu/tserver/tablet_server-stress-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_server-stress-test.cc b/src/kudu/tserver/tablet_server-stress-test.cc
index cfe24b1..87d24fd 100644
--- a/src/kudu/tserver/tablet_server-stress-test.cc
+++ b/src/kudu/tserver/tablet_server-stress-test.cc
@@ -16,12 +16,22 @@
 // under the License.
 #include "kudu/tserver/tablet_server-test-base.h"
 
+#include <thread>
+
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/stopwatch.h"
 
+DEFINE_int32(runtime_secs, 10,
+             "Maximum number of seconds to run. If the threads have not completed "
+             "inserting by this time, they will stop regardless. Set to 0 to disable "
+             "the timeout.");
 DEFINE_int32(num_inserter_threads, 8, "Number of inserter threads to run");
-DEFINE_int32(num_inserts_per_thread, 0, "Number of inserts from each thread");
+DEFINE_int32(num_inserts_per_thread, 100000000,
+             "Number of inserts from each thread. If 'runtime_secs' is non-zero, threads will "
+             "exit after that time out even if they have not inserted the desired number. The "
+             "default is set high so that, typically, the 'runtime_secs' parameter determines "
+             "how long this test will run.");
 DECLARE_bool(enable_maintenance_manager);
 
 METRIC_DEFINE_histogram(test, insert_latency,
@@ -37,11 +47,10 @@ namespace tserver {
 class TSStressTest : public TabletServerTestBase {
  public:
   TSStressTest()
-    : start_latch_(FLAGS_num_inserter_threads) {
+      : start_latch_(FLAGS_num_inserter_threads),
+        stop_latch_(1) {
 
-    if (FLAGS_num_inserts_per_thread == 0) {
-      FLAGS_num_inserts_per_thread = AllowSlowTests() ? 100000 : 1000;
-    }
+    OverrideFlagForSlowTests("runtime_secs", "60");
 
     // Re-enable the maintenance manager which is disabled by default
     // in TS tests. We want to stress the whole system including
@@ -76,6 +85,7 @@ class TSStressTest : public TabletServerTestBase {
  protected:
   scoped_refptr<Histogram> histogram_;
   CountDownLatch start_latch_;
+  CountDownLatch stop_latch_;
   std::vector<scoped_refptr<kudu::Thread> > threads_;
 };
 
@@ -87,7 +97,7 @@ void TSStressTest::InserterThread(int thread_idx) {
 
   uint64_t max_rows = FLAGS_num_inserts_per_thread;
   int start_row = thread_idx * max_rows;
-  for (int i = start_row; i < start_row + max_rows ; i++) {
+  for (int i = start_row; i < start_row + max_rows && stop_latch_.count() > 0; i++) {
     MonoTime before = MonoTime::Now();
     InsertTestRowsRemote(thread_idx, i, 1);
     MonoTime after = MonoTime::Now();
@@ -98,23 +108,36 @@ void TSStressTest::InserterThread(int thread_idx) {
 }
 
 TEST_F(TSStressTest, TestMTInserts) {
+  std::thread timeout_thread;
   StartThreads();
   Stopwatch s(Stopwatch::ALL_THREADS);
   s.start();
+
+  // Start a thread to fire 'stop_latch_' after the prescribed number of seconds.
+  if (FLAGS_runtime_secs > 0) {
+    timeout_thread = std::thread([&]() {
+      stop_latch_.WaitFor(MonoDelta::FromSeconds(FLAGS_runtime_secs));
+      stop_latch_.CountDown();
+    });
+  }
   JoinThreads();
   s.stop();
-  int num_rows = (FLAGS_num_inserter_threads * FLAGS_num_inserts_per_thread);
+
+  int num_rows = histogram_->TotalCount();
   LOG(INFO) << "Inserted " << num_rows << " rows in " << s.elapsed().wall_millis() << " ms";
   LOG(INFO) << "Throughput: " << (num_rows * 1000 / s.elapsed().wall_millis()) << " rows/sec";
   LOG(INFO) << "CPU efficiency: " << (num_rows / s.elapsed().user_cpu_seconds()) << " rows/cpusec";
 
-
   // Generate the JSON.
   std::stringstream out;
   JsonWriter writer(&out, JsonWriter::PRETTY);
   ASSERT_OK(histogram_->WriteAsJson(&writer, MetricJsonOptions()));
 
   LOG(INFO) << out.str();
+
+  // Ensure the timeout thread is stopped before exiting.
+  stop_latch_.CountDown();
+  if (timeout_thread.joinable()) timeout_thread.join();
 }
 
 } // namespace tserver