You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/06/24 06:23:30 UTC
[arrow] branch master updated: ARROW-5701: [C++][Gandiva] Build
expr with specific sv
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c66b695 ARROW-5701: [C++][Gandiva] Build expr with specific sv
c66b695 is described below
commit c66b69522d0901187abb9b6e720665b2f5b78812
Author: Pindikura Ravindra <ra...@dremio.com>
AuthorDate: Mon Jun 24 11:53:05 2019 +0530
ARROW-5701: [C++][Gandiva] Build expr with specific sv
- for long expressions, this reduces build time by 4x.
Author: Pindikura Ravindra <ra...@dremio.com>
Closes #4667 from pravindra/arrow-5701 and squashes the following commits:
cc46d92b8 <Pindikura Ravindra> ARROW-5701: Build expr with specific sv
---
cpp/src/gandiva/filter.cc | 2 +-
cpp/src/gandiva/jni/jni_common.cc | 16 ++++++++--
cpp/src/gandiva/llvm_generator.cc | 35 ++++++++++++----------
cpp/src/gandiva/llvm_generator.h | 10 ++++++-
cpp/src/gandiva/projector.cc | 14 +++++++--
cpp/src/gandiva/projector.h | 13 ++++++++
cpp/src/gandiva/projector_cache_key.h | 10 +++++--
cpp/src/gandiva/tests/CMakeLists.txt | 1 +
cpp/src/gandiva/tests/filter_project_test.cc | 18 ++++++-----
.../apache/arrow/gandiva/evaluator/JniWrapper.java | 4 ++-
.../apache/arrow/gandiva/evaluator/Projector.java | 28 ++++++++++++++---
.../arrow/gandiva/evaluator/FilterProjectTest.java | 3 +-
12 files changed, 117 insertions(+), 37 deletions(-)
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 3bba190..b229fc3 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -63,7 +63,7 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
// Return if the expression is invalid since we will not be able to process further.
ExprValidator expr_validator(llvm_gen->types(), schema);
ARROW_RETURN_NOT_OK(expr_validator.Validate(condition));
- ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}));
+ ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE));
// Instantiate the filter with the completely built llvm generator
*filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index b4b9ffe..09d2739 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -538,7 +538,7 @@ void releaseProjectorInput(jbyteArray schema_arr, jbyte* schema_bytes,
JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_buildProjector(
JNIEnv* env, jobject obj, jbyteArray schema_arr, jbyteArray exprs_arr,
- jlong configuration_id) {
+ jint selection_vector_type, jlong configuration_id) {
jlong module_id = 0LL;
std::shared_ptr<Projector> projector;
std::shared_ptr<ProjectorHolder> holder;
@@ -555,6 +555,7 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_build
SchemaPtr schema_ptr;
FieldVector ret_types;
gandiva::Status status;
+ auto mode = gandiva::SelectionVector::MODE_NONE;
std::shared_ptr<Configuration> config = ConfigHolder::MapLookup(configuration_id);
std::stringstream ss;
@@ -599,8 +600,19 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_build
ret_types.push_back(root->result());
}
+ switch (selection_vector_type) {
+ case types::SV_NONE:
+ mode = gandiva::SelectionVector::MODE_NONE;
+ break;
+ case types::SV_INT16:
+ mode = gandiva::SelectionVector::MODE_UINT16;
+ break;
+ case types::SV_INT32:
+ mode = gandiva::SelectionVector::MODE_UINT32;
+ break;
+ }
// good to invoke the evaluator now
- status = Projector::Make(schema_ptr, expr_vector, config, &projector);
+ status = Projector::Make(schema_ptr, expr_vector, mode, config, &projector);
if (!status.ok()) {
ss << "Failed to make LLVM module due to " << status.message() << "\n";
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 867f07b..1d5946d 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -59,33 +59,31 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out
ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity));
// Generate the IR function for the decomposed expression.
std::unique_ptr<CompiledExpr> compiled_expr(new CompiledExpr(value_validity, output));
- for (auto mode : SelectionVector::kAllModes) {
- llvm::Function* ir_function = nullptr;
- ARROW_RETURN_NOT_OK(
- CodeGenExprValue(value_validity->value_expr(), output, idx, &ir_function, mode));
- compiled_expr->SetIRFunction(mode, ir_function);
- }
+ llvm::Function* ir_function = nullptr;
+ ARROW_RETURN_NOT_OK(CodeGenExprValue(value_validity->value_expr(), output, idx,
+ &ir_function, selection_vector_mode_));
+ compiled_expr->SetIRFunction(selection_vector_mode_, ir_function);
compiled_exprs_.push_back(std::move(compiled_expr));
return Status::OK();
}
/// Build and optimise module for projection expression.
-Status LLVMGenerator::Build(const ExpressionVector& exprs) {
+Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode mode) {
+ selection_vector_mode_ = mode;
for (auto& expr : exprs) {
auto output = annotator_.AddOutputFieldDescriptor(expr->result());
ARROW_RETURN_NOT_OK(Add(expr, output));
}
// optimise, compile and finalize the module
ARROW_RETURN_NOT_OK(engine_->FinalizeModule(optimise_ir_, dump_ir_));
+
// setup the jit functions for each expression.
for (auto& compiled_expr : compiled_exprs_) {
- for (auto mode : SelectionVector::kAllModes) {
- auto ir_function = compiled_expr->GetIRFunction(mode);
- auto jit_function =
- reinterpret_cast<EvalFunc>(engine_->CompiledFunction(ir_function));
- compiled_expr->SetJITFunction(mode, jit_function);
- }
+ auto ir_function = compiled_expr->GetIRFunction(mode);
+ auto jit_function =
+ reinterpret_cast<EvalFunc>(engine_->CompiledFunction(ir_function));
+ compiled_expr->SetJITFunction(selection_vector_mode_, jit_function);
}
return Status::OK();
}
@@ -106,15 +104,22 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch,
auto eval_batch = annotator_.PrepareEvalBatch(record_batch, output_vector);
DCHECK_GT(eval_batch->GetNumBuffers(), 0);
+ auto mode = SelectionVector::MODE_NONE;
+ if (selection_vector != nullptr) {
+ mode = selection_vector->GetMode();
+ }
+ if (mode != selection_vector_mode_) {
+ return Status::Invalid("llvm expression built for selection vector mode ",
+ selection_vector_mode_, " received vector with mode ", mode);
+ }
+
for (auto& compiled_expr : compiled_exprs_) {
// generate data/offset vectors.
const uint8_t* selection_buffer = nullptr;
auto num_output_rows = record_batch.num_rows();
- auto mode = SelectionVector::MODE_NONE;
if (selection_vector != nullptr) {
selection_buffer = selection_vector->GetBuffer().data();
num_output_rows = selection_vector->GetNumSlots();
- mode = selection_vector->GetMode();
}
EvalFunc jit_function = compiled_expr->GetJITFunction(mode);
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 61038fc..a68f0d5 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -52,7 +52,13 @@ class GANDIVA_EXPORT LLVMGenerator {
/// \brief Build the code for the expression trees for default mode. Each
/// element in the vector represents an expression tree
- Status Build(const ExpressionVector& exprs);
+ Status Build(const ExpressionVector& exprs, SelectionVector::Mode mode);
+
+ /// \brief Build the code for the expression trees for default mode. Each
+ /// element in the vector represents an expression tree
+ Status Build(const ExpressionVector& exprs) {
+ return Build(exprs, SelectionVector::Mode::MODE_NONE);
+ }
/// \brief Execute the built expression against the provided arguments for
/// default mode.
@@ -65,6 +71,7 @@ class GANDIVA_EXPORT LLVMGenerator {
const SelectionVector* selection_vector,
const ArrayDataVector& output_vector);
+ SelectionVector::Mode selection_vector_mode() { return selection_vector_mode_; }
LLVMTypes* types() { return engine_->types(); }
llvm::Module* module() { return engine_->module(); }
@@ -226,6 +233,7 @@ class GANDIVA_EXPORT LLVMGenerator {
std::vector<std::unique_ptr<CompiledExpr>> compiled_exprs_;
FunctionRegistry function_registry_;
Annotator annotator_;
+ SelectionVector::Mode selection_vector_mode_;
// used for debug
bool dump_ir_;
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index e7b2954..6493fd4 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -40,11 +40,19 @@ Projector::~Projector() {}
Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
std::shared_ptr<Projector>* projector) {
- return Projector::Make(schema, exprs, ConfigurationBuilder::DefaultConfiguration(),
+ return Projector::Make(schema, exprs, SelectionVector::Mode::MODE_NONE,
+ ConfigurationBuilder::DefaultConfiguration(), projector);
+}
+
+Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
+ std::shared_ptr<Configuration> configuration,
+ std::shared_ptr<Projector>* projector) {
+ return Projector::Make(schema, exprs, SelectionVector::Mode::MODE_NONE, configuration,
projector);
}
Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
+ SelectionVector::Mode selection_vector_mode,
std::shared_ptr<Configuration> configuration,
std::shared_ptr<Projector>* projector) {
ARROW_RETURN_IF(schema == nullptr, Status::Invalid("Schema cannot be null"));
@@ -54,7 +62,7 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
// see if equivalent projector was already built
static Cache<ProjectorCacheKey, std::shared_ptr<Projector>> cache;
- ProjectorCacheKey cache_key(schema, configuration, exprs);
+ ProjectorCacheKey cache_key(schema, configuration, exprs, selection_vector_mode);
std::shared_ptr<Projector> cached_projector = cache.GetModule(cache_key);
if (cached_projector != nullptr) {
*projector = cached_projector;
@@ -73,7 +81,7 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
ARROW_RETURN_NOT_OK(expr_validator.Validate(expr));
}
- ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs));
+ ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode));
// save the output field types. Used for validation at Evaluate() time.
std::vector<FieldPtr> output_fields;
diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h
index 2249854..0aa09df 100644
--- a/cpp/src/gandiva/projector.h
+++ b/cpp/src/gandiva/projector.h
@@ -64,6 +64,19 @@ class GANDIVA_EXPORT Projector {
std::shared_ptr<Configuration> configuration,
std::shared_ptr<Projector>* projector);
+ /// Build a projector for the given schema to evaluate the vector of expressions.
+ /// Customize the projector with runtime configuration.
+ ///
+ /// \param[in] schema schema for the record batches, and the expressions.
+ /// \param[in] exprs vector of expressions.
+ /// \param[in] selection_vector_mode mode of selection vector
+ /// \param[in] configuration run time configuration.
+ /// \param[out] projector the returned projector object
+ static Status Make(SchemaPtr schema, const ExpressionVector& exprs,
+ SelectionVector::Mode selection_vector_mode,
+ std::shared_ptr<Configuration> configuration,
+ std::shared_ptr<Projector>* projector);
+
/// Evaluate the specified record batch, and return the allocated and populated output
/// arrays. The output arrays will be allocated from the memory pool 'pool', and added
/// to the vector 'output'.
diff --git a/cpp/src/gandiva/projector_cache_key.h b/cpp/src/gandiva/projector_cache_key.h
index 26da528..e6440c1 100644
--- a/cpp/src/gandiva/projector_cache_key.h
+++ b/cpp/src/gandiva/projector_cache_key.h
@@ -31,8 +31,8 @@ namespace gandiva {
class ProjectorCacheKey {
public:
ProjectorCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration,
- ExpressionVector expression_vector)
- : schema_(schema), configuration_(configuration), uniqifier_(0) {
+ ExpressionVector expression_vector, SelectionVector::Mode mode)
+ : schema_(schema), configuration_(configuration), mode_(mode), uniqifier_(0) {
static const int kSeedValue = 4;
size_t result = kSeedValue;
for (auto& expr : expression_vector) {
@@ -41,6 +41,7 @@ class ProjectorCacheKey {
boost::hash_combine(result, expr_as_string);
UpdateUniqifier(expr_as_string);
}
+ boost::hash_combine(result, mode);
boost::hash_combine(result, configuration->Hash());
boost::hash_combine(result, schema_->ToString());
boost::hash_combine(result, uniqifier_);
@@ -63,6 +64,10 @@ class ProjectorCacheKey {
return false;
}
+ if (mode_ != other.mode_) {
+ return false;
+ }
+
if (uniqifier_ != other.uniqifier_) {
return false;
}
@@ -107,6 +112,7 @@ class ProjectorCacheKey {
const SchemaPtr schema_;
const std::shared_ptr<Configuration> configuration_;
+ SelectionVector::Mode mode_;
std::vector<std::string> expressions_as_strings_;
size_t hash_code_;
uint32_t uniqifier_;
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index e51325f..2235269 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -30,6 +30,7 @@ add_gandiva_test(in_expr_test)
add_gandiva_test(null_validity_test)
add_gandiva_test(decimal_test)
add_gandiva_test(decimal_single_test)
+add_gandiva_test(filter_project_test)
add_gandiva_test(projector_test_static SOURCES projector_test.cc USE_STATIC_LINKING)
diff --git a/cpp/src/gandiva/tests/filter_project_test.cc b/cpp/src/gandiva/tests/filter_project_test.cc
index e290029..0607fea 100644
--- a/cpp/src/gandiva/tests/filter_project_test.cc
+++ b/cpp/src/gandiva/tests/filter_project_test.cc
@@ -62,7 +62,8 @@ TEST_F(TestFilterProject, TestSimple16) {
auto status = Filter::Make(schema, condition, configuration, &filter);
EXPECT_TRUE(status.ok());
- status = Projector::Make(schema, {sum_expr}, configuration, &projector);
+ status = Projector::Make(schema, {sum_expr}, SelectionVector::MODE_UINT16,
+ configuration, &projector);
EXPECT_TRUE(status.ok());
// Create a row-batch with some sample data
@@ -117,7 +118,8 @@ TEST_F(TestFilterProject, TestSimple32) {
auto status = Filter::Make(schema, condition, configuration, &filter);
EXPECT_TRUE(status.ok());
- status = Projector::Make(schema, {sum_expr}, configuration, &projector);
+ status = Projector::Make(schema, {sum_expr}, SelectionVector::MODE_UINT32,
+ configuration, &projector);
EXPECT_TRUE(status.ok());
// Create a row-batch with some sample data
@@ -141,7 +143,7 @@ TEST_F(TestFilterProject, TestSimple32) {
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, selection_vector.get(), pool_, &outputs);
- EXPECT_TRUE(status.ok());
+ ASSERT_OK(status);
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(result, outputs.at(0));
@@ -172,8 +174,9 @@ TEST_F(TestFilterProject, TestSimple64) {
auto status = Filter::Make(schema, condition, configuration, &filter);
EXPECT_TRUE(status.ok());
- status = Projector::Make(schema, {sum_expr}, configuration, &projector);
- EXPECT_TRUE(status.ok());
+ status = Projector::Make(schema, {sum_expr}, SelectionVector::MODE_UINT64,
+ configuration, &projector);
+ ASSERT_OK(status);
// Create a row-batch with some sample data
int num_records = 5;
@@ -234,8 +237,9 @@ TEST_F(TestFilterProject, TestSimpleIf) {
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
- status = Projector::Make(schema, {expr}, configuration, &projector);
- EXPECT_TRUE(status.ok());
+ status = Projector::Make(schema, {expr}, SelectionVector::MODE_UINT32, configuration,
+ &projector);
+ ASSERT_OK(status);
// Create a row-batch with some sample data
int num_records = 6;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
index adbcdf3..ef1d63a 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
@@ -34,12 +34,14 @@ public class JniWrapper {
* @param schemaBuf The schema serialized as a protobuf. See Types.proto
* to see the protobuf specification
* @param exprListBuf The serialized protobuf of the expression vector. Each
- * expression is created using TreeBuilder::MakeExpression
+ * expression is created using TreeBuilder::MakeExpression.
+ * @param selectionVectorType type of selection vector
* @param configId Configuration to gandiva.
* @return A moduleId that is passed to the evaluateProjector() and closeProjector() methods
*
*/
native long buildProjector(byte[] schemaBuf, byte[] exprListBuf,
+ int selectionVectorType,
long configId) throws GandivaException;
/**
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
index 96f8896..ae93fba 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
@@ -73,8 +73,26 @@ public class Projector {
* @return A native evaluator object that can be used to invoke these projections on a RecordBatch
*/
public static Projector make(Schema schema, List<ExpressionTree> exprs)
+ throws GandivaException {
+ return make(schema, exprs, SelectionVectorType.SV_NONE, JniLoader.getDefaultConfiguration());
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evalute() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ SelectionVectorType selectionVectorType)
throws GandivaException {
- return make(schema, exprs, JniLoader.getDefaultConfiguration());
+ return make(schema, exprs, selectionVectorType, JniLoader.getDefaultConfiguration());
}
/**
@@ -85,12 +103,14 @@ public class Projector {
* @param schema Table schema. The field names in the schema should match the fields used
* to create the TreeNodes
* @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
* @param configurationId Custom configuration created through config builder.
*
* @return A native evaluator object that can be used to invoke these projections on a RecordBatch
*/
- public static Projector make(Schema schema, List<ExpressionTree> exprs, long
- configurationId) throws GandivaException {
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ SelectionVectorType selectionVectorType,
+ long configurationId) throws GandivaException {
// serialize the schema and the list of expressions as a protobuf
GandivaTypes.ExpressionList.Builder builder = GandivaTypes.ExpressionList.newBuilder();
for (ExpressionTree expr : exprs) {
@@ -101,7 +121,7 @@ public class Projector {
GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
JniWrapper wrapper = JniLoader.getInstance().getWrapper();
long moduleId = wrapper.buildProjector(schemaBuf.toByteArray(),
- builder.build().toByteArray(), configurationId);
+ builder.build().toByteArray(), selectionVectorType.getNumber(), configurationId);
logger.debug("Created module for the projector with id {}", moduleId);
return new Projector(wrapper, moduleId, schema, exprs.size());
}
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
index 5dc788c..d32a8f5 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
@@ -27,6 +27,7 @@ import org.apache.arrow.gandiva.exceptions.GandivaException;
import org.apache.arrow.gandiva.expression.Condition;
import org.apache.arrow.gandiva.expression.ExpressionTree;
import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -54,7 +55,7 @@ public class FilterProjectTest extends BaseEvaluatorTest {
Filter filter = Filter.make(schema, condition);
ExpressionTree expression = TreeBuilder.makeExpression("add", Lists.newArrayList(a, b), c);
- Projector projector = Projector.make(schema, Lists.newArrayList(expression));
+ Projector projector = Projector.make(schema, Lists.newArrayList(expression), SelectionVectorType.SV_INT16);
int numRows = 16;
byte[] validity = new byte[]{(byte) 255, 0};