You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by np...@apache.org on 2020/04/13 15:25:46 UTC
[arrow] branch master updated: ARROW-8393: [C++][Gandiva] Make
gandiva function registry case-insensitive
This is an automated email from the ASF dual-hosted git repository.
npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a33c294 ARROW-8393: [C++][Gandiva] Make gandiva function registry case-insensitive
a33c294 is described below
commit a33c2945f9ea6211f0b27591cee62f4c83decf95
Author: Projjal Chanda <ia...@pchanda.com>
AuthorDate: Mon Apr 13 08:25:23 2020 -0700
ARROW-8393: [C++][Gandiva] Make gandiva function registry case-insensitive
Closes #6891 from projjal/case_insensitive
Authored-by: Projjal Chanda <ia...@pchanda.com>
Signed-off-by: Neal Richardson <ne...@gmail.com>
---
cpp/src/gandiva/function_signature.cc | 6 ++-
cpp/src/gandiva/function_signature_test.cc | 11 ++++
.../arrow/gandiva/evaluator/FunctionSignature.java | 4 +-
.../gandiva/evaluator/ExpressionRegistryTest.java | 9 ++++
.../arrow/gandiva/evaluator/ProjectorTest.java | 63 ++++++++++++++++++++++
5 files changed, 89 insertions(+), 4 deletions(-)
diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc
index 2a4117f..9e4dac7 100644
--- a/cpp/src/gandiva/function_signature.cc
+++ b/cpp/src/gandiva/function_signature.cc
@@ -17,13 +17,15 @@
#include <gandiva/function_signature.h>
+#include <boost/algorithm/string.hpp>
#include <boost/functional/hash.hpp>
namespace gandiva {
bool FunctionSignature::operator==(const FunctionSignature& other) const {
if (param_types_.size() != other.param_types_.size() ||
- !DataTypeEquals(ret_type_, other.ret_type_) || base_name_ != other.base_name_) {
+ !DataTypeEquals(ret_type_, other.ret_type_) ||
+ !boost::iequals(base_name_, other.base_name_)) {
return false;
}
@@ -40,7 +42,7 @@ bool FunctionSignature::operator==(const FunctionSignature& other) const {
std::size_t FunctionSignature::Hash() const {
static const size_t kSeedValue = 17;
size_t result = kSeedValue;
- boost::hash_combine(result, base_name_);
+ boost::hash_combine(result, boost::algorithm::to_lower_copy(base_name_));
boost::hash_combine(result, ret_type_->id());
// not using hash_range since we only want to include the id from the data type
for (auto& param_type : param_types_) {
diff --git a/cpp/src/gandiva/function_signature_test.cc b/cpp/src/gandiva/function_signature_test.cc
index 82d3212..79bb84a 100644
--- a/cpp/src/gandiva/function_signature_test.cc
+++ b/cpp/src/gandiva/function_signature_test.cc
@@ -59,6 +59,13 @@ TEST_F(TestFunctionSignature, TestEqualsName) {
EXPECT_FALSE(FunctionSignature("add", {arrow::int32()}, arrow::int32()) ==
FunctionSignature("sub", {arrow::int32()}, arrow::int32()));
+
+ EXPECT_EQ(FunctionSignature("extractDay", {arrow::int64()}, arrow::int64()),
+ FunctionSignature("extractday", {arrow::int64()}, arrow::int64()));
+
+ EXPECT_EQ(
+ FunctionSignature("castVARCHAR", {arrow::utf8(), arrow::int64()}, arrow::utf8()),
+ FunctionSignature("castvarchar", {arrow::utf8(), arrow::int64()}, arrow::utf8()));
}
TEST_F(TestFunctionSignature, TestEqualsParamCount) {
@@ -95,6 +102,10 @@ TEST_F(TestFunctionSignature, TestHash) {
FunctionSignature f1("add", {arrow::int32(), arrow::int32()}, arrow::int64());
FunctionSignature f2("add", {local_i32_type_, local_i32_type_}, local_i64_type_);
EXPECT_EQ(f1.Hash(), f2.Hash());
+
+ FunctionSignature f3("extractDay", {arrow::int64()}, arrow::int64());
+ FunctionSignature f4("extractday", {arrow::int64()}, arrow::int64());
+ EXPECT_EQ(f3.Hash(), f4.Hash());
}
} // namespace gandiva
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
index 479445d..d018818 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
@@ -69,14 +69,14 @@ public class FunctionSignature {
return false;
}
final FunctionSignature other = (FunctionSignature) signature;
- return Objects.equal(this.name, other.name) &&
+ return this.name.equalsIgnoreCase(other.name) &&
Objects.equal(this.returnType, other.returnType) &&
Objects.equal(this.paramTypes, other.paramTypes);
}
@Override
public int hashCode() {
- return Objects.hashCode(this.name, this.returnType, this.paramTypes);
+ return Objects.hashCode(this.name.toLowerCase(), this.returnType, this.paramTypes);
}
@Override
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
index 99ae5e4..a51ac09 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
@@ -53,4 +53,13 @@ public class ExpressionRegistryTest {
Assert.assertTrue(functions.contains(signature));
}
+ @Test
+ public void testCaseInsensitiveFunctionName() throws GandivaException {
+ ArrowType.Utf8 utf8 = new ArrowType.Utf8();
+ ArrowType.Int int64 = new ArrowType.Int(64, true);
+ FunctionSignature signature =
+ new FunctionSignature("castvarchar", utf8, Lists.newArrayList(utf8, int64));
+ Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+ Assert.assertTrue(functions.contains(signature));
+ }
}
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 45c5bb1..15d7a55 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -1597,4 +1597,67 @@ public class ProjectorTest extends BaseEvaluatorTest {
releaseValueVectors(output);
}
+ @Test
+ public void testCaseInsensitiveFunctions() throws Exception {
+ ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
+
+ Field tsField = Field.nullable("timestamp", timeStamp);
+
+ TreeNode tsNode = TreeBuilder.makeField(tsField);
+
+ TreeNode extractday = TreeBuilder.makeFunction("extractday", Lists.newArrayList(tsNode),
+ int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(extractday, Field.nullable("result", int64));
+ Schema schema = new Schema(Lists.newArrayList(tsField));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "0007-01-01T01:00:00Z",
+ "2007-03-05T03:40:00Z",
+ "2008-05-31T13:55:00Z",
+ "2000-06-30T23:20:00Z",
+ "2000-07-10T20:30:00Z",
+ };
+
+ long[] expValues =
+ new long[] {
+ 1, 5, 31, 30, 10
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf millisData = stringToMillis(values);
+
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, millisData));
+
+ List<ValueVector> output = new ArrayList<>();
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+ output.add(bigIntVector);
+
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ BigIntVector vector = (BigIntVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(vector.isNull(j));
+ assertEquals(expValues[j], vector.get(j));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
}