You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/20 17:41:17 UTC
[arrow] branch master updated: ARROW-3609: [Gandiva] Convert Gandiva benchmark tests as gbenchmark t…
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ee62840 ARROW-3609: [Gandiva] Convert Gandiva benchmark tests as gbenchmark t…
ee62840 is described below
commit ee6284086baeaa685808301f29a2d87c98198789
Author: shyam <sh...@dremio.com>
AuthorDate: Tue Nov 20 12:41:09 2018 -0500
ARROW-3609: [Gandiva] Convert Gandiva benchmark tests as gbenchmark t…
Gandiva benchmark tests are out of gandiva integration testsuite and part of arrow benchmark suite.
```
$ ./release/gandiva-micro_benchmarks
2018-11-19 15:50:45
Running ./release/gandiva-micro_benchmarks
Run on (8 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x4)
L1 Instruction 32K (x4)
L2 Unified 256K (x4)
L3 Unified 8192K (x1)
---------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------
TimedTestAdd3/min_time:1.000 863 us 862 us 1650
TimedTestBigNested/min_time:1.000 14881 us 14872 us 97
TimedTestBigNested/min_time:1.000 15055 us 15047 us 91
TimedTestExtractYear/min_time:1.000 10610 us 10604 us 137
TimedTestFilterAdd2/min_time:1.000 1368 us 1367 us 1018
TimedTestFilterLike/min_time:1.000 616936 us 616606 us 2
TimedTestAllocs/min_time:1.000 31298 us 31256 us 44
TimedTestMultiOr/min_time:1.000 25368 us 25364 us 54
TimedTestInExpr/min_time:1.000 13443 us 13441 us 107
```
Author: shyam <sh...@dremio.com>
Closes #2991 from shyambits2004/master and squashes the following commits:
8b9d353c7 <shyam> ARROW-3610: Convert Gandiva benchmark tests as gbenchmark tests.
---
cpp/src/gandiva/CMakeLists.txt | 2 +-
cpp/src/gandiva/tests/CMakeLists.txt | 9 +-
cpp/src/gandiva/tests/micro_benchmarks.cc | 141 ++++++++++++------------------
cpp/src/gandiva/tests/timed_evaluate.h | 55 ++++++------
4 files changed, 93 insertions(+), 114 deletions(-)
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 40e17a8..515802a 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -77,7 +77,7 @@ set(GANDIVA_STATIC_LINK_LIBS
ADD_ARROW_LIB(gandiva
SOURCES ${SRC_FILES}
OUTPUTS GANDIVA_LIBRARIES
- DEPENDENCIES arrow_dependencies
+ DEPENDENCIES arrow_dependencies precompiled
EXTRA_INCLUDES
$<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
SHARED_LINK_LIBS arrow_shared
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index 3bef5cf..ae60063 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -27,9 +27,16 @@ foreach(lib_type "shared" "static")
add_gandiva_integ_test(utf8_test.cc gandiva_${lib_type})
add_gandiva_integ_test(binary_test.cc gandiva_${lib_type})
add_gandiva_integ_test(date_time_test.cc gandiva_${lib_type})
- add_gandiva_integ_test(micro_benchmarks.cc gandiva_${lib_type})
add_gandiva_integ_test(to_string_test.cc gandiva_${lib_type})
add_gandiva_integ_test(hash_test.cc gandiva_${lib_type})
add_gandiva_integ_test(in_expr_test.cc gandiva_${lib_type})
add_gandiva_integ_test(null_validity_test.cc gandiva_${lib_type})
endforeach(lib_type)
+
+set(GANDIVA_BENCHMARK_LINK_LIBRARIES
+ gandiva_static
+)
+
+ADD_ARROW_BENCHMARK(micro_benchmarks
+ PREFIX "gandiva"
+ EXTRA_LINK_LIBS ${GANDIVA_BENCHMARK_LINK_LIBRARIES})
diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc
index 7494d09..7d844eb 100644
--- a/cpp/src/gandiva/tests/micro_benchmarks.cc
+++ b/cpp/src/gandiva/tests/micro_benchmarks.cc
@@ -14,11 +14,11 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-#include <stdlib.h>
-#include <gtest/gtest.h>
+#include <stdlib.h>
#include "arrow/memory_pool.h"
#include "arrow/status.h"
+#include "benchmark/benchmark.h"
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tests/timed_evaluate.h"
@@ -35,20 +35,13 @@ using arrow::utf8;
// for the hardware used by travis.
float tolerance_ratio = 6.0;
-class TestBenchmarks : public ::testing::Test {
- public:
- void SetUp() { pool_ = arrow::default_memory_pool(); }
-
- protected:
- arrow::MemoryPool* pool_;
-};
-
-TEST_F(TestBenchmarks, TimedTestAdd3) {
+static void TimedTestAdd3(benchmark::State& state) {
// schema for input fields
auto field0 = field("f0", int64());
auto field1 = field("f1", int64());
auto field2 = field("f2", int64());
auto schema = arrow::schema({field0, field1, field2});
+ auto pool_ = arrow::default_memory_pool();
// output field
auto field_sum = field("add", int64());
@@ -63,25 +56,21 @@ TEST_F(TestBenchmarks, TimedTestAdd3) {
auto sum_expr = TreeExprBuilder::MakeExpression(sum, field_sum);
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {sum_expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {sum_expr}, &projector));
- int64_t elapsed_millis;
Int64DataGenerator data_generator;
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::Int64Type, int64_t>(schema, evaluator, data_generator,
- pool_, 1 * MILLION, 16 * THOUSAND,
- elapsed_millis);
- ASSERT_TRUE(status.ok());
- std::cout << "Time taken for Add3 " << elapsed_millis << " ms\n";
- EXPECT_LE(elapsed_millis, 2 * tolerance_ratio);
+ Status status = TimedEvaluate<arrow::Int64Type, int64_t>(
+ schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND, state);
+ ASSERT_OK(status);
}
-TEST_F(TestBenchmarks, TimedTestBigNested) {
+static void TimedTestBigNested(benchmark::State& state) {
// schema for input fields
auto fielda = field("a", int32());
auto schema = arrow::schema({fielda});
+ auto pool_ = arrow::default_memory_pool();
// output fields
auto field_result = field("res", int32());
@@ -110,26 +99,21 @@ TEST_F(TestBenchmarks, TimedTestBigNested) {
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {expr}, &projector));
- int64_t elapsed_millis;
BoundedInt32DataGenerator data_generator(250);
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::Int32Type, int32_t>(schema, evaluator, data_generator,
- pool_, 1 * MILLION, 16 * THOUSAND,
- elapsed_millis);
+ Status status = TimedEvaluate<arrow::Int32Type, int32_t>(
+ schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND, state);
ASSERT_TRUE(status.ok());
- std::cout << "Time taken for BigNestedIf " << elapsed_millis << " ms\n";
-
- EXPECT_LE(elapsed_millis, 12 * tolerance_ratio);
}
-TEST_F(TestBenchmarks, TimedTestExtractYear) {
+static void TimedTestExtractYear(benchmark::State& state) {
// schema for input fields
auto field0 = field("f0", arrow::date64());
auto schema = arrow::schema({field0});
+ auto pool_ = arrow::default_memory_pool();
// output field
auto field_res = field("res", int64());
@@ -138,28 +122,23 @@ TEST_F(TestBenchmarks, TimedTestExtractYear) {
auto expr = TreeExprBuilder::MakeExpression("extractYear", {field0}, field_res);
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {expr}, &projector));
- int64_t elapsed_millis;
Int64DataGenerator data_generator;
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::Date64Type, int64_t>(schema, evaluator, data_generator,
- pool_, 1 * MILLION, 16 * THOUSAND,
- elapsed_millis);
+ Status status = TimedEvaluate<arrow::Date64Type, int64_t>(
+ schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND, state);
ASSERT_TRUE(status.ok());
- std::cout << "Time taken for extractYear " << elapsed_millis << " ms\n";
-
- EXPECT_LE(elapsed_millis, 11 * tolerance_ratio);
}
-TEST_F(TestBenchmarks, TimedTestFilterAdd2) {
+static void TimedTestFilterAdd2(benchmark::State& state) {
// schema for input fields
auto field0 = field("f0", int64());
auto field1 = field("f1", int64());
auto field2 = field("f2", int64());
auto schema = arrow::schema({field0, field1, field2});
+ auto pool_ = arrow::default_memory_pool();
// Build expression
auto sum = TreeExprBuilder::MakeFunction(
@@ -170,25 +149,21 @@ TEST_F(TestBenchmarks, TimedTestFilterAdd2) {
auto condition = TreeExprBuilder::MakeCondition(less_than);
std::shared_ptr<Filter> filter;
- Status status = Filter::Make(schema, condition, &filter);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Filter::Make(schema, condition, &filter));
- int64_t elapsed_millis;
Int64DataGenerator data_generator;
FilterEvaluator evaluator(filter);
- status = TimedEvaluate<arrow::Int64Type, int64_t>(
- schema, evaluator, data_generator, pool_, MILLION, 16 * THOUSAND, elapsed_millis);
+ Status status = TimedEvaluate<arrow::Int64Type, int64_t>(
+ schema, evaluator, data_generator, pool_, MILLION, 16 * THOUSAND, state);
ASSERT_TRUE(status.ok());
- std::cout << "Time taken for Filter with Add2 " << elapsed_millis << " ms\n";
-
- EXPECT_LE(elapsed_millis, 2.5 * tolerance_ratio);
}
-TEST_F(TestBenchmarks, TimedTestFilterLike) {
+static void TimedTestFilterLike(benchmark::State& state) {
// schema for input fields
auto fielda = field("a", utf8());
auto schema = arrow::schema({fielda});
+ auto pool_ = arrow::default_memory_pool();
// build expression.
auto node_a = TreeExprBuilder::MakeField(fielda);
@@ -198,26 +173,21 @@ TEST_F(TestBenchmarks, TimedTestFilterLike) {
auto condition = TreeExprBuilder::MakeCondition(like_yellow);
std::shared_ptr<Filter> filter;
- Status status = Filter::Make(schema, condition, &filter);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Filter::Make(schema, condition, &filter));
- int64_t elapsed_millis;
FastUtf8DataGenerator data_generator(32);
FilterEvaluator evaluator(filter);
- status = TimedEvaluate<arrow::StringType, std::string>(
- schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND,
- elapsed_millis);
+ Status status = TimedEvaluate<arrow::StringType, std::string>(
+ schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND, state);
ASSERT_TRUE(status.ok());
- std::cout << "Time taken for Filter with like " << elapsed_millis << " ms\n";
-
- EXPECT_LE(elapsed_millis, 600 * tolerance_ratio);
}
-TEST_F(TestBenchmarks, TimedTestAllocs) {
+static void TimedTestAllocs(benchmark::State& state) {
// schema for input fields
auto field_a = field("a", arrow::utf8());
auto schema = arrow::schema({field_a});
+ auto pool_ = arrow::default_memory_pool();
// output field
auto field_res = field("res", int32());
@@ -229,27 +199,23 @@ TEST_F(TestBenchmarks, TimedTestAllocs) {
auto expr = TreeExprBuilder::MakeExpression(length, field_res);
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {expr}, &projector));
- int64_t elapsed_millis;
FastUtf8DataGenerator data_generator(64);
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::StringType, std::string>(
- schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND,
- elapsed_millis);
+ Status status = TimedEvaluate<arrow::StringType, std::string>(
+ schema, evaluator, data_generator, pool_, 1 * MILLION, 16 * THOUSAND, state);
ASSERT_TRUE(status.ok());
- std::cout << "Time taken for length(upper(utf8)) " << elapsed_millis << " ms\n";
}
-
// following two tests are for benchmark optimization of
// in expr. will be used in follow-up PRs to optimize in expr.
-TEST_F(TestBenchmarks, TimedTestMultiOr) {
+static void TimedTestMultiOr(benchmark::State& state) {
// schema for input fields
auto fielda = field("a", utf8());
auto schema = arrow::schema({fielda});
+ auto pool_ = arrow::default_memory_pool();
// output fields
auto field_result = field("res", boolean());
@@ -271,23 +237,20 @@ TEST_F(TestBenchmarks, TimedTestMultiOr) {
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {expr}, &projector));
- int64_t elapsed_millis;
FastUtf8DataGenerator data_generator(250);
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::StringType, std::string>(
- schema, evaluator, data_generator, pool_, 100 * THOUSAND, 16 * THOUSAND,
- elapsed_millis);
- ASSERT_TRUE(status.ok());
- std::cout << "Time taken for BooleanOr (100K) " << elapsed_millis << " ms\n";
+ Status status = TimedEvaluate<arrow::StringType, std::string>(
+ schema, evaluator, data_generator, pool_, 100 * THOUSAND, 16 * THOUSAND, state);
+ ASSERT_OK(status);
}
-TEST_F(TestBenchmarks, TimedTestInExpr) {
+static void TimedTestInExpr(benchmark::State& state) {
// schema for input fields
auto fielda = field("a", utf8());
auto schema = arrow::schema({fielda});
+ auto pool_ = arrow::default_memory_pool();
// output fields
auto field_result = field("res", boolean());
@@ -306,19 +269,25 @@ TEST_F(TestBenchmarks, TimedTestInExpr) {
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
- Status status = Projector::Make(schema, {expr}, &projector);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(Projector::Make(schema, {expr}, &projector));
- int64_t elapsed_millis;
FastUtf8DataGenerator data_generator(250);
ProjectEvaluator evaluator(projector);
- status = TimedEvaluate<arrow::StringType, std::string>(
- schema, evaluator, data_generator, pool_, 100 * THOUSAND, 16 * THOUSAND,
- elapsed_millis);
+ Status status = TimedEvaluate<arrow::StringType, std::string>(
+ schema, evaluator, data_generator, pool_, 100 * THOUSAND, 16 * THOUSAND, state);
- ASSERT_TRUE(status.ok());
- std::cout << "Time taken for BooleanIn (100K) " << elapsed_millis << " ms\n";
+ ASSERT_OK(status);
}
+BENCHMARK(TimedTestAdd3)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestExtractYear)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestFilterAdd2)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestFilterLike)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestAllocs)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestMultiOr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestInExpr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/timed_evaluate.h b/cpp/src/gandiva/tests/timed_evaluate.h
index e5ee571..dab47c2 100644
--- a/cpp/src/gandiva/tests/timed_evaluate.h
+++ b/cpp/src/gandiva/tests/timed_evaluate.h
@@ -17,6 +17,7 @@
#include <memory>
#include <vector>
+#include "benchmark/benchmark.h"
#include "gandiva/arrow.h"
#include "gandiva/filter.h"
#include "gandiva/projector.h"
@@ -27,6 +28,7 @@
#define THOUSAND (1024)
#define MILLION (1024 * 1024)
+#define NUM_BATCHES 16
namespace gandiva {
@@ -84,48 +86,49 @@ class FilterEvaluator : public BaseEvaluator {
template <typename TYPE, typename C_TYPE>
Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator,
DataGenerator<C_TYPE>& data_generator, arrow::MemoryPool* pool,
- int num_records, int batch_size, int64_t& num_millis) {
+ int num_records, int batch_size, benchmark::State& state) {
int num_remaining = num_records;
int num_fields = schema->num_fields();
int num_calls = 0;
Status status;
- std::chrono::duration<int64_t, std::micro> micros(0);
- std::chrono::time_point<std::chrono::high_resolution_clock> start;
- std::chrono::time_point<std::chrono::high_resolution_clock> finish;
-
- while (num_remaining > 0) {
- int num_in_batch = batch_size;
- if (batch_size > num_remaining) {
- num_in_batch = num_remaining;
- }
+ // Generate batches of data
+ std::shared_ptr<arrow::RecordBatch> batches[NUM_BATCHES];
+ for (int i = 0; i < NUM_BATCHES; i++) {
// generate data for all columns in the schema
std::vector<ArrayPtr> columns;
for (int col = 0; col < num_fields; col++) {
- std::vector<C_TYPE> data = GenerateData<C_TYPE>(num_in_batch, data_generator);
- std::vector<bool> validity(num_in_batch, true);
+ std::vector<C_TYPE> data = GenerateData<C_TYPE>(batch_size, data_generator);
+ std::vector<bool> validity(batch_size, true);
ArrayPtr col_data = MakeArrowArray<TYPE, C_TYPE>(data, validity);
-
columns.push_back(col_data);
}
// make the record batch
- auto in_batch = arrow::RecordBatch::Make(schema, num_in_batch, columns);
-
- // evaluate
- start = std::chrono::high_resolution_clock::now();
- status = evaluator.Evaluate(*in_batch, pool);
- finish = std::chrono::high_resolution_clock::now();
- if (!status.ok()) {
- return status;
- }
+ std::shared_ptr<arrow::RecordBatch> batch =
+ arrow::RecordBatch::Make(schema, batch_size, columns);
+ batches[i] = batch;
+ }
+
+ for (auto _ : state) {
+ int num_in_batch = batch_size;
+ num_remaining = num_records;
+ while (num_remaining > 0) {
+ if (batch_size > num_remaining) {
+ num_in_batch = num_remaining;
+ }
- micros += std::chrono::duration_cast<std::chrono::microseconds>(finish - start);
- num_calls++;
- num_remaining -= num_in_batch;
+ status = evaluator.Evaluate(*(batches[num_calls % NUM_BATCHES]), pool);
+ if (!status.ok()) {
+ state.SkipWithError("Evaluation of the batch failed");
+ return status;
+ }
+
+ num_calls++;
+ num_remaining -= num_in_batch;
+ }
}
- num_millis = micros.count() / 1000;
return Status::OK();
}