You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by np...@apache.org on 2020/04/13 15:25:46 UTC

[arrow] branch master updated: ARROW-8393: [C++][Gandiva] Make gandiva function registry case-insensitive

This is an automated email from the ASF dual-hosted git repository.

npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a33c294  ARROW-8393: [C++][Gandiva] Make gandiva function registry case-insensitive
a33c294 is described below

commit a33c2945f9ea6211f0b27591cee62f4c83decf95
Author: Projjal Chanda <ia...@pchanda.com>
AuthorDate: Mon Apr 13 08:25:23 2020 -0700

    ARROW-8393: [C++][Gandiva] Make gandiva function registry case-insensitive
    
    Closes #6891 from projjal/case_insensitive
    
    Authored-by: Projjal Chanda <ia...@pchanda.com>
    Signed-off-by: Neal Richardson <ne...@gmail.com>
---
 cpp/src/gandiva/function_signature.cc              |  6 ++-
 cpp/src/gandiva/function_signature_test.cc         | 11 ++++
 .../arrow/gandiva/evaluator/FunctionSignature.java |  4 +-
 .../gandiva/evaluator/ExpressionRegistryTest.java  |  9 ++++
 .../arrow/gandiva/evaluator/ProjectorTest.java     | 63 ++++++++++++++++++++++
 5 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc
index 2a4117f..9e4dac7 100644
--- a/cpp/src/gandiva/function_signature.cc
+++ b/cpp/src/gandiva/function_signature.cc
@@ -17,13 +17,15 @@
 
 #include <gandiva/function_signature.h>
 
+#include <boost/algorithm/string.hpp>
 #include <boost/functional/hash.hpp>
 
 namespace gandiva {
 
 bool FunctionSignature::operator==(const FunctionSignature& other) const {
   if (param_types_.size() != other.param_types_.size() ||
-      !DataTypeEquals(ret_type_, other.ret_type_) || base_name_ != other.base_name_) {
+      !DataTypeEquals(ret_type_, other.ret_type_) ||
+      !boost::iequals(base_name_, other.base_name_)) {
     return false;
   }
 
@@ -40,7 +42,7 @@ bool FunctionSignature::operator==(const FunctionSignature& other) const {
 std::size_t FunctionSignature::Hash() const {
   static const size_t kSeedValue = 17;
   size_t result = kSeedValue;
-  boost::hash_combine(result, base_name_);
+  boost::hash_combine(result, boost::algorithm::to_lower_copy(base_name_));
   boost::hash_combine(result, ret_type_->id());
   // not using hash_range since we only want to include the id from the data type
   for (auto& param_type : param_types_) {
diff --git a/cpp/src/gandiva/function_signature_test.cc b/cpp/src/gandiva/function_signature_test.cc
index 82d3212..79bb84a 100644
--- a/cpp/src/gandiva/function_signature_test.cc
+++ b/cpp/src/gandiva/function_signature_test.cc
@@ -59,6 +59,13 @@ TEST_F(TestFunctionSignature, TestEqualsName) {
 
   EXPECT_FALSE(FunctionSignature("add", {arrow::int32()}, arrow::int32()) ==
                FunctionSignature("sub", {arrow::int32()}, arrow::int32()));
+
+  EXPECT_EQ(FunctionSignature("extractDay", {arrow::int64()}, arrow::int64()),
+            FunctionSignature("extractday", {arrow::int64()}, arrow::int64()));
+
+  EXPECT_EQ(
+      FunctionSignature("castVARCHAR", {arrow::utf8(), arrow::int64()}, arrow::utf8()),
+      FunctionSignature("castvarchar", {arrow::utf8(), arrow::int64()}, arrow::utf8()));
 }
 
 TEST_F(TestFunctionSignature, TestEqualsParamCount) {
@@ -95,6 +102,10 @@ TEST_F(TestFunctionSignature, TestHash) {
   FunctionSignature f1("add", {arrow::int32(), arrow::int32()}, arrow::int64());
   FunctionSignature f2("add", {local_i32_type_, local_i32_type_}, local_i64_type_);
   EXPECT_EQ(f1.Hash(), f2.Hash());
+
+  FunctionSignature f3("extractDay", {arrow::int64()}, arrow::int64());
+  FunctionSignature f4("extractday", {arrow::int64()}, arrow::int64());
+  EXPECT_EQ(f3.Hash(), f4.Hash());
 }
 
 }  // namespace gandiva
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
index 479445d..d018818 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
@@ -69,14 +69,14 @@ public class FunctionSignature {
       return false;
     }
     final FunctionSignature other = (FunctionSignature) signature;
-    return Objects.equal(this.name, other.name) &&
+    return this.name.equalsIgnoreCase(other.name) &&
         Objects.equal(this.returnType, other.returnType) &&
         Objects.equal(this.paramTypes, other.paramTypes);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(this.name, this.returnType, this.paramTypes);
+    return Objects.hashCode(this.name.toLowerCase(), this.returnType, this.paramTypes);
   }
 
   @Override
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
index 99ae5e4..a51ac09 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
@@ -53,4 +53,13 @@ public class ExpressionRegistryTest {
     Assert.assertTrue(functions.contains(signature));
   }
 
+  @Test
+  public void testCaseInsensitiveFunctionName() throws GandivaException {
+    ArrowType.Utf8 utf8 = new ArrowType.Utf8();
+    ArrowType.Int int64 = new ArrowType.Int(64, true);
+    FunctionSignature signature =
+        new FunctionSignature("castvarchar", utf8, Lists.newArrayList(utf8, int64));
+    Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+    Assert.assertTrue(functions.contains(signature));
+  }
 }
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 45c5bb1..15d7a55 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -1597,4 +1597,67 @@ public class ProjectorTest extends BaseEvaluatorTest {
     releaseValueVectors(output);
   }
 
+  @Test
+  public void testCaseInsensitiveFunctions() throws Exception {
+    ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
+
+    Field tsField = Field.nullable("timestamp", timeStamp);
+
+    TreeNode tsNode = TreeBuilder.makeField(tsField);
+
+    TreeNode extractday = TreeBuilder.makeFunction("extractday", Lists.newArrayList(tsNode),
+        int64);
+
+    ExpressionTree expr = TreeBuilder.makeExpression(extractday, Field.nullable("result", int64));
+    Schema schema = new Schema(Lists.newArrayList(tsField));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    String[] values =
+        new String[] {
+            "0007-01-01T01:00:00Z",
+            "2007-03-05T03:40:00Z",
+            "2008-05-31T13:55:00Z",
+            "2000-06-30T23:20:00Z",
+            "2000-07-10T20:30:00Z",
+        };
+
+    long[] expValues =
+        new long[] {
+            1, 5, 31, 30, 10
+        };
+
+    ArrowBuf bufValidity = buf(validity);
+    ArrowBuf millisData = stringToMillis(values);
+
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode),
+            Lists.newArrayList(bufValidity, millisData));
+
+    List<ValueVector> output = new ArrayList<>();
+    BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+    bigIntVector.allocateNew(numRows);
+    output.add(bigIntVector);
+
+    eval.evaluate(batch, output);
+    eval.close();
+
+    for (ValueVector valueVector : output) {
+      BigIntVector vector = (BigIntVector) valueVector;
+
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(vector.isNull(j));
+        assertEquals(expValues[j], vector.get(j));
+      }
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
 }