You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/07/31 16:12:37 UTC
[2/6] incubator-impala git commit: IMPALA-4086: Add benchmark for simple scheduler

IMPALA-4086: Add benchmark for simple scheduler

Change-Id: I89ec1c6c1828bb0b86d1e13ce4dfc5a8df865c2e
Reviewed-on: http://gerrit.cloudera.org:8080/4554
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2ee16067
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2ee16067
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2ee16067

Branch: refs/heads/master
Commit: 2ee160679ca693777ad6764706a251755ad6ece3
Parents: e90afcb
Author: Lars Volker <lv...@cloudera.com>
Authored: Fri Sep 9 13:28:05 2016 +0200
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 29 05:43:58 2017 +0000

----------------------------------------------------------------------
 be/src/benchmarks/CMakeLists.txt         |   1 +
 be/src/benchmarks/scheduler-benchmark.cc | 171 ++++++++++++++++++++++++++
 be/src/scheduling/scheduler.h            |   8 +-
 be/src/util/debug-util.cc                |   2 +
 be/src/util/debug-util.h                 |   1 +
 5 files changed, 178 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index e954583..1d67d45 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -50,6 +50,7 @@ ADD_BE_BENCHMARK(overflow-benchmark)
 ADD_BE_BENCHMARK(parse-timestamp-benchmark)
 ADD_BE_BENCHMARK(rle-benchmark)
 ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
+ADD_BE_BENCHMARK(scheduler-benchmark)
 ADD_BE_BENCHMARK(status-benchmark)
 ADD_BE_BENCHMARK(string-benchmark)
 ADD_BE_BENCHMARK(string-compare-benchmark)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/scheduler-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/scheduler-benchmark.cc b/be/src/benchmarks/scheduler-benchmark.cc
new file mode 100644
index 0000000..df9e981
--- /dev/null
+++ b/be/src/benchmarks/scheduler-benchmark.cc
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "gutil/strings/substitute.h"
+#include "scheduling/scheduler.h"
+#include "scheduling/scheduler-test-util.h"
+#include "util/benchmark.h"
+#include "util/cpu-info.h"
+#include "util/debug-util.h"
+#include "util/thread.h"
+
+#include "common/names.h"
+
+using namespace impala;
+using namespace impala::test;
+
+
+// This benchmark exercises the core scheduling method 'ComputeScanRangeAssignment()' of
+// the Scheduler class for various cluster and table sizes. It makes the following
+// assumptions:
+// - All nodes run local impalads. The benchmark includes suites for both DISK_LOCAL and
+//   REMOTE scheduling preferences. Having datanodes without a local impalad would result
+//   in a performance somewhere between these two.
+// - The plan only scans one table. All logic in the scheduler is built around assigning
+//   blocks to impalad backends, which scan them. Having multiple tables will merely
+//   repeat this process. The interesting metric is varying the number of blocks per
+//   table.
+// - Blocks and files are treated as the same thing from the scheduler's perspective.
+//   Scheduling happens on scan ranges, which are issued based on file blocks by the
+//   frontend.
+//
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
+// Cluster Size, DISK_LOCAL:  Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                             3 Hosts               13.2     13.5     13.5         1X         1X         1X
+//                            10 Hosts               12.6     12.6     12.8     0.951X     0.937X     0.951X
+//                            50 Hosts               9.34     9.52     9.53     0.705X     0.706X     0.706X
+//                           100 Hosts               8.68     8.68     8.68     0.655X     0.643X     0.643X
+//                           500 Hosts               5.57     5.67      5.7      0.42X     0.421X     0.423X
+//                          1000 Hosts               4.02      4.1      4.1     0.304X     0.304X     0.304X
+//                          3000 Hosts               1.85     1.85     1.86      0.14X     0.137X     0.138X
+//                         10000 Hosts              0.577    0.588    0.588    0.0436X    0.0436X    0.0436X
+//
+// Cluster Size, REMOTE:      Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                             3 Hosts               23.8     23.9     24.1         1X         1X         1X
+//                            10 Hosts               20.4     20.9     21.1     0.854X     0.877X     0.878X
+//                            50 Hosts                 15       15     15.2     0.628X     0.628X     0.632X
+//                           100 Hosts               12.6     12.7     12.7      0.53X     0.532X     0.528X
+//                           500 Hosts               7.38     7.55      7.6      0.31X     0.316X     0.316X
+//                          1000 Hosts                4.9     4.93     4.93     0.206X     0.207X     0.205X
+//                          3000 Hosts                1.9     1.96     1.96    0.0797X    0.0823X    0.0817X
+//                         10000 Hosts              0.577    0.588    0.588    0.0242X    0.0246X    0.0245X
+//
+// Number of Blocks:          Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                            1 Blocks               74.5     75.2     76.1         1X         1X         1X
+//                           10 Blocks               44.4     44.6     45.1     0.596X     0.593X     0.593X
+//                          100 Blocks               8.46     8.46     8.49     0.114X     0.113X     0.112X
+//                         1000 Blocks              0.981        1        1    0.0132X    0.0133X    0.0131X
+//                        10000 Blocks                0.1    0.102    0.103   0.00134X   0.00136X   0.00136X
+
+static const vector<int> CLUSTER_SIZES = {3, 10, 50, 100, 500, 1000, 3000, 10000};
+static const int DEFAULT_CLUSTER_SIZE = 100;
+static const vector<int> NUM_BLOCKS_PER_TABLE = {1, 10, 100, 1000, 10000};
+static const int DEFAULT_NUM_BLOCKS_PER_TABLE = 100;
+
+/// Members of this struct are needed to build the test fixtures and depend on each other.
+/// Since their constructors take const references they must be constructed in order,
+/// which is why we keep pointers to them here.
+struct TestCtx {
+  std::unique_ptr<Cluster> cluster;
+  std::unique_ptr<Schema> schema;
+  std::unique_ptr<Plan> plan;
+  std::unique_ptr<Result> result;
+  std::unique_ptr<SchedulerWrapper> scheduler_wrapper;
+};
+
+/// Initialize a test context for a single benchmark run.
+void InitializeTestCtx(int num_hosts, int num_blocks,
+    TReplicaPreference::type replica_preference, TestCtx* test_ctx) {
+  test_ctx->cluster.reset(new Cluster());
+  test_ctx->cluster->AddHosts(num_hosts, true, true);
+
+  test_ctx->schema.reset(new Schema(*test_ctx->cluster));
+  test_ctx->schema->AddMultiBlockTable("T0", num_blocks, ReplicaPlacement::LOCAL_ONLY, 3);
+
+  test_ctx->plan.reset(new Plan(*test_ctx->schema));
+  test_ctx->plan->SetReplicaPreference(replica_preference);
+  test_ctx->plan->SetRandomReplica(true);
+  test_ctx->plan->AddTableScan("T0");
+
+  test_ctx->result.reset(new Result(*test_ctx->plan));
+
+  test_ctx->scheduler_wrapper.reset(new SchedulerWrapper(*test_ctx->plan));
+}
+
+/// This function is passed to the test framework and executes the scheduling method
+/// repeatedly.
+void BenchmarkFunction(int num_iterations, void* data) {
+  TestCtx* test_ctx = static_cast<TestCtx*>(data);
+  for (int i = 0; i < num_iterations; ++i) {
+    test_ctx->result->Reset();
+    test_ctx->scheduler_wrapper->Compute(test_ctx->result.get());
+  }
+}
+
+/// Build and run a benchmark suite for various cluster sizes with the default number of
+/// blocks. Scheduling will be done according to the parameter 'replica_preference'.
+void RunClusterSizeBenchmark(TReplicaPreference::type replica_preference) {
+  string suite_name = strings::Substitute(
+      "Cluster Size, $0", PrintTReplicaPreference(replica_preference));
+  Benchmark suite(suite_name, false /* micro_heuristics */);
+  vector<TestCtx> test_ctx(CLUSTER_SIZES.size());
+
+  for (int i = 0; i < CLUSTER_SIZES.size(); ++i) {
+    int cluster_size = CLUSTER_SIZES[i];
+    InitializeTestCtx(
+        cluster_size, DEFAULT_NUM_BLOCKS_PER_TABLE, replica_preference, &test_ctx[i]);
+    string benchmark_name = strings::Substitute("$0 Hosts", cluster_size);
+    suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]);
+  }
+  cout << suite.Measure() << endl;
+}
+
+/// Build and run a benchmark suite for various table sizes with the default cluster size.
+/// Scheduling will be done according to the parameter 'replica_preference'.
+void RunNumBlocksBenchmark(TReplicaPreference::type replica_preference) {
+  Benchmark suite("Number of Blocks", false /* micro_heuristics */);
+  vector<TestCtx> test_ctx(NUM_BLOCKS_PER_TABLE.size());
+
+  for (int i = 0; i < NUM_BLOCKS_PER_TABLE.size(); ++i) {
+    int num_blocks = NUM_BLOCKS_PER_TABLE[i];
+    InitializeTestCtx(DEFAULT_CLUSTER_SIZE, num_blocks, replica_preference, &test_ctx[i]);
+    string benchmark_name = strings::Substitute("$0 Blocks", num_blocks);
+    suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]);
+  }
+  cout << suite.Measure() << endl;
+}
+
+int main(int argc, char** argv) {
+  google::InitGoogleLogging(argv[0]);
+  CpuInfo::Init();
+  impala::InitThreading();
+
+  cout << Benchmark::GetMachineInfo() << endl;
+  RunClusterSizeBenchmark(TReplicaPreference::DISK_LOCAL);
+  RunClusterSizeBenchmark(TReplicaPreference::REMOTE);
+  RunNumBlocksBenchmark(TReplicaPreference::DISK_LOCAL);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/scheduling/scheduler.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/scheduler.h b/be/src/scheduling/scheduler.h
index 8d57c32..043deac 100644
--- a/be/src/scheduling/scheduler.h
+++ b/be/src/scheduling/scheduler.h
@@ -65,11 +65,8 @@ class SchedulerWrapper;
 /// TODO: Inject global dependencies into the class (for example ExecEnv::GetInstance(),
 ///       RNG used during scheduling, FLAGS_*)
 ///       to make it testable.
-/// TODO: Benchmark the performance of the scheduler. The tests need to include setups
-///       with:
-///         - Small and large number of executors.
-///         - Small and large query plans.
-///         - Scheduling query plans with concurrent updates to the internal executor
+/// TODO: Extend the benchmarks of the scheduler. The tests need to include setups with:
+///         - Scheduling query plans with concurrent updates to the internal backend
 ///           configuration.
 class Scheduler {
  public:
@@ -334,6 +331,7 @@ class Scheduler {
   /// the schedule's TQueryExecRequest.plan_exec_info.
   /// Unpartitioned fragments are assigned to the coordinator. Populate the schedule's
   /// fragment_exec_params_ with the resulting scan range assignment.
+  /// We have a benchmark for this method in be/src/benchmarks/scheduler-benchmark.cc.
   Status ComputeScanRangeAssignment(QuerySchedule* schedule);
 
   /// Process the list of scan ranges of a single plan node and compute scan range

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc
index 6a94496..48df592 100644
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -78,6 +78,7 @@ THRIFT_ENUM_OUTPUT_FN(TDdlType);
 THRIFT_ENUM_OUTPUT_FN(TCatalogOpType);
 THRIFT_ENUM_OUTPUT_FN(THdfsFileFormat);
 THRIFT_ENUM_OUTPUT_FN(THdfsCompression);
+THRIFT_ENUM_OUTPUT_FN(TReplicaPreference);
 THRIFT_ENUM_OUTPUT_FN(TSessionType);
 THRIFT_ENUM_OUTPUT_FN(TStmtType);
 THRIFT_ENUM_OUTPUT_FN(QueryState);
@@ -91,6 +92,7 @@ THRIFT_ENUM_OUTPUT_FN(TImpalaQueryOptions);
 THRIFT_ENUM_PRINT_FN(TCatalogObjectType);
 THRIFT_ENUM_PRINT_FN(TDdlType);
 THRIFT_ENUM_PRINT_FN(TCatalogOpType);
+THRIFT_ENUM_PRINT_FN(TReplicaPreference);
 THRIFT_ENUM_PRINT_FN(TSessionType);
 THRIFT_ENUM_PRINT_FN(TStmtType);
 THRIFT_ENUM_PRINT_FN(QueryState);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h
index fcfb9d6..29fd6bb 100644
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -64,6 +64,7 @@ std::string PrintPlanNodeType(const TPlanNodeType::type& type);
 std::string PrintTCatalogObjectType(const TCatalogObjectType::type& type);
 std::string PrintTDdlType(const TDdlType::type& type);
 std::string PrintTCatalogOpType(const TCatalogOpType::type& type);
+std::string PrintTReplicaPreference(const TReplicaPreference::type& type);
 std::string PrintTSessionType(const TSessionType::type& type);
 std::string PrintTStmtType(const TStmtType::type& type);
 std::string PrintQueryState(const beeswax::QueryState::type& type);