You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/23 21:07:35 UTC
[arrow] branch master updated: ARROW-5637: [Java][C++][Gandiva]
Complete In Expression Support
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5d3668f ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support
5d3668f is described below
commit 5d3668f6700fcc15e4f6e4428bd0dc7ef6ef91d2
Author: Praveen <pr...@dremio.com>
AuthorDate: Sun Jun 23 16:07:26 2019 -0500
ARROW-5637: [Java][C++][Gandiva] Complete In Expression Support
Complete the java and jni bindings for In expression.
Author: Praveen <pr...@dremio.com>
Closes #4662 from praveenbingo/in-expr and squashes the following commits:
82e1d3456 <Praveen> Fix lint issues.
ce51188a6 <Praveen> Fix review comments.
d204f0d57 <Praveen> Address Review comments.
7722fa59a <Praveen> Fixed lint issues.
d2c448cc0 <Praveen> Complete In Expression Support.
---
cpp/src/gandiva/jni/jni_common.cc | 43 +++++++++
cpp/src/gandiva/proto/Types.proto | 27 ++++++
.../apache/arrow/gandiva/expression/InNode.java | 102 +++++++++++++++++++++
.../arrow/gandiva/expression/TreeBuilder.java | 21 +++++
.../arrow/gandiva/evaluator/ProjectorTest.java | 91 ++++++++++++++++++
5 files changed, 284 insertions(+)
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 72061c0..b4b9ffe 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -313,6 +313,45 @@ NodePtr ProtoTypeToOrNode(const types::OrNode& node) {
return TreeExprBuilder::MakeOr(children);
}
+NodePtr ProtoTypeToInNode(const types::InNode& node) {
+ NodePtr field = ProtoTypeToFieldNode(node.field());
+
+ if (node.has_intvalues()) {
+ std::unordered_set<int32_t> int_values;
+ for (int i = 0; i < node.intvalues().intvalues_size(); i++) {
+ int_values.insert(node.intvalues().intvalues(i).value());
+ }
+ return TreeExprBuilder::MakeInExpressionInt32(field, int_values);
+ }
+
+ if (node.has_longvalues()) {
+ std::unordered_set<int64_t> long_values;
+ for (int i = 0; i < node.longvalues().longvalues_size(); i++) {
+ long_values.insert(node.longvalues().longvalues(i).value());
+ }
+ return TreeExprBuilder::MakeInExpressionInt64(field, long_values);
+ }
+
+ if (node.has_stringvalues()) {
+ std::unordered_set<std::string> stringvalues;
+ for (int i = 0; i < node.stringvalues().stringvalues_size(); i++) {
+ stringvalues.insert(node.stringvalues().stringvalues(i).value());
+ }
+ return TreeExprBuilder::MakeInExpressionString(field, stringvalues);
+ }
+
+ if (node.has_binaryvalues()) {
+ std::unordered_set<std::string> stringvalues;
+ for (int i = 0; i < node.binaryvalues().binaryvalues_size(); i++) {
+ stringvalues.insert(node.binaryvalues().binaryvalues(i).value());
+ }
+ return TreeExprBuilder::MakeInExpressionBinary(field, stringvalues);
+ }
+ // not supported yet.
+ std::cerr << "Unknown constant type for in expression.\n";
+ return nullptr;
+}
+
NodePtr ProtoTypeToNullNode(const types::NullNode& node) {
DataTypePtr data_type = ProtoTypeToDataType(node.type());
if (data_type == nullptr) {
@@ -344,6 +383,10 @@ NodePtr ProtoTypeToNode(const types::TreeNode& node) {
return ProtoTypeToOrNode(node.ornode());
}
+ if (node.has_innode()) {
+ return ProtoTypeToInNode(node.innode());
+ }
+
if (node.has_nullnode()) {
return ProtoTypeToNullNode(node.nullnode());
}
diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto
index 9efa80f..d264450 100644
--- a/cpp/src/gandiva/proto/Types.proto
+++ b/cpp/src/gandiva/proto/Types.proto
@@ -173,6 +173,9 @@ message TreeNode {
optional StringNode stringNode = 17;
optional BinaryNode binaryNode = 18;
optional DecimalNode decimalNode = 19;
+
+ // in expr
+ optional InNode inNode = 21;
}
message ExpressionRoot {
@@ -205,3 +208,27 @@ message FunctionSignature {
optional ExtGandivaType returnType = 2;
repeated ExtGandivaType paramTypes = 3;
}
+
+message InNode {
+ optional FieldNode field = 1;
+ optional IntConstants intValues = 2;
+ optional LongConstants longValues = 3;
+ optional StringConstants stringValues = 4;
+ optional BinaryConstants binaryValues = 5;
+}
+
+message IntConstants {
+ repeated IntNode intValues = 1;
+}
+
+message LongConstants {
+ repeated LongNode longValues = 1;
+}
+
+message StringConstants {
+ repeated StringNode stringValues = 1;
+}
+
+message BinaryConstants {
+ repeated BinaryNode binaryValues = 1;
+}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
new file mode 100644
index 0000000..0420ffb
--- /dev/null
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.nio.charset.Charset;
+import java.util.Set;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.Field;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * In Node representation in java.
+ */
+public class InNode implements TreeNode {
+ private static final Charset charset = Charset.forName("UTF-8");
+
+ private final Set<Integer> intValues;
+ private final Set<Long> longValues;
+ private final Set<String> stringValues;
+ private final Set<byte[]> binaryValues;
+ private final Field field;
+
+ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValues, Set<byte[]>
+ binaryValues, Field field) {
+ this.intValues = values;
+ this.longValues = longValues;
+ this.stringValues = stringValues;
+ this.binaryValues = binaryValues;
+ this.field = field;
+ }
+
+ public static InNode makeIntInExpr(Field field, Set<Integer> intValues) {
+ return new InNode(intValues, null, null, null ,field);
+ }
+
+ public static InNode makeLongInExpr(Field field, Set<Long> longValues) {
+ return new InNode(null, longValues, null, null ,field);
+ }
+
+ public static InNode makeStringInExpr(Field field, Set<String> stringValues) {
+ return new InNode(null, null, stringValues, null ,field);
+ }
+
+ public static InNode makeBinaryInExpr(Field field, Set<byte[]> binaryValues) {
+ return new InNode(null, null, null, binaryValues ,field);
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.InNode.Builder inNode = GandivaTypes.InNode.newBuilder();
+
+ GandivaTypes.FieldNode.Builder fieldNode = GandivaTypes.FieldNode.newBuilder();
+ fieldNode.setField(ArrowTypeHelper.arrowFieldToProtobuf(field));
+ inNode.setField(fieldNode);
+
+ if (intValues != null) {
+ GandivaTypes.IntConstants.Builder intConstants = GandivaTypes.IntConstants.newBuilder();
+ intValues.stream().forEach(val -> intConstants.addIntValues(GandivaTypes.IntNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setIntValues(intConstants.build());
+ } else if (longValues != null) {
+ GandivaTypes.LongConstants.Builder longConstants = GandivaTypes.LongConstants.newBuilder();
+ longValues.stream().forEach(val -> longConstants.addLongValues(GandivaTypes.LongNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setLongValues(longConstants.build());
+ } else if (stringValues != null) {
+ GandivaTypes.StringConstants.Builder stringConstants = GandivaTypes.StringConstants
+ .newBuilder();
+ stringValues.stream().forEach(val -> stringConstants.addStringValues(GandivaTypes.StringNode
+ .newBuilder().setValue(ByteString.copyFrom(val.getBytes(charset))).build()));
+ inNode.setStringValues(stringConstants.build());
+ } else if (binaryValues != null) {
+ GandivaTypes.BinaryConstants.Builder binaryConstants = GandivaTypes.BinaryConstants
+ .newBuilder();
+ binaryValues.stream().forEach(val -> binaryConstants.addBinaryValues(GandivaTypes.BinaryNode
+ .newBuilder().setValue(ByteString.copyFrom(val)).build()));
+ inNode.setBinaryValues(binaryConstants.build());
+ }
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setInNode(inNode.build());
+ return builder.build();
+
+ }
+}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 5d8f21f..c20795f 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -19,6 +19,7 @@ package org.apache.arrow.gandiva.expression;
import java.util.ArrayList;
import java.util.List;
+import java.util.Set;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
@@ -190,4 +191,24 @@ public class TreeBuilder {
TreeNode root = makeFunction(function, children, new ArrowType.Bool());
return makeCondition(root);
}
+
+ public static TreeNode makeInExpressionInt32(Field resultField,
+ Set<Integer> intValues) {
+ return InNode.makeIntInExpr(resultField, intValues);
+ }
+
+ public static TreeNode makeInExpressionBigInt(Field resultField,
+ Set<Long> longValues) {
+ return InNode.makeLongInExpr(resultField, longValues);
+ }
+
+ public static TreeNode makeInExpressionString(Field resultField,
+ Set<String> stringValues) {
+ return InNode.makeStringInExpr(resultField, stringValues);
+ }
+
+ public static TreeNode makeInExpressionBinary(Field resultField,
+ Set<byte[]> binaryValues) {
+ return InNode.makeBinaryInExpr(resultField, binaryValues);
+ }
}
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index e5f9fe6..62a1271 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -51,6 +51,7 @@ import org.junit.Ignore;
import org.junit.Test;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import io.netty.buffer.ArrowBuf;
@@ -1048,6 +1049,96 @@ public class ProjectorTest extends BaseEvaluatorTest {
}
@Test
+ public void testInExpr() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", int32);
+
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionInt32(c1, Sets.newHashSet(1,2,3,4,5,15,16));
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ int[] c1Values = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ ArrowBuf c1Validity = buf(validity);
+ ArrowBuf c1Data = intBuf(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, c1Data, c2Validity));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 5; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 5; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testInExprStrings() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", new ArrowType.Utf8());
+
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionString(c1, Sets.newHashSet("one", "two", "three", "four"));
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ String[] c1Values = new String[]{"one", "two", "three", "four", "five", "six", "seven",
+ "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen"};
+
+ ArrowBuf c1Validity = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, dataBufsX.get(0),dataBufsX.get(1), c2Validity));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 4; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 5; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
public void testSmallOutputVectors() throws GandivaException, Exception {
Field a = Field.nullable("a", int32);
Field b = Field.nullable("b", int32);