You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2016/05/27 15:37:27 UTC
[06/48] hive git commit: HIVE-13267: Vectorization: Add
SelectLikeStringColScalar for non-filter operations (Gopal V,
reviewed by Matt McCline)
HIVE-13267: Vectorization: Add SelectLikeStringColScalar for non-filter operations (Gopal V, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/51609a0f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/51609a0f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/51609a0f
Branch: refs/heads/java8
Commit: 51609a0f242ca96de2d6c92b57d4343f89e0d9cc
Parents: d2dac26
Author: Gopal V <go...@apache.org>
Authored: Wed May 25 16:50:33 2016 -0700
Committer: Gopal V <go...@apache.org>
Committed: Wed May 25 16:50:33 2016 -0700
----------------------------------------------------------------------
...AbstractFilterStringColLikeStringScalar.java | 2 +-
.../SelectStringColLikeStringScalar.java | 179 +++++++++++++++++++
.../org/apache/hadoop/hive/ql/udf/UDFLike.java | 3 +-
.../test/queries/clientpositive/vector_udf2.q | 29 +++
.../results/clientpositive/vector_udf2.q.out | 110 ++++++++++++
5 files changed, 321 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/51609a0f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
index b70beef..c50af8d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
@@ -63,7 +63,7 @@ public abstract class AbstractFilterStringColLikeStringScalar extends VectorExpr
* @param pattern
* @return
*/
- private Checker createChecker(String pattern) {
+ Checker createChecker(String pattern) {
for (CheckerFactory checkerFactory : getCheckerFactories()) {
Checker checker = checkerFactory.tryCreate(pattern);
if (checker != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/51609a0f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
new file mode 100644
index 0000000..b914196
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+public class SelectStringColLikeStringScalar extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private byte[] pattern;
+ transient Checker checker = null;
+
+ public SelectStringColLikeStringScalar() {
+ super();
+ }
+
+ public SelectStringColLikeStringScalar(int colNum, byte[] pattern, int outputColumn) {
+ super();
+ this.colNum = colNum;
+ this.pattern = pattern;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ if (checker == null) {
+ checker = borrowChecker();
+ }
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
+ int[] sel = batch.selected;
+ boolean[] nullPos = inputColVector.isNull;
+ int n = batch.size;
+ byte[][] vector = inputColVector.vector;
+ int[] length = inputColVector.length;
+ int[] start = inputColVector.start;
+
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+ long[] outputVector = outV.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ outV.noNulls = inputColVector.noNulls;
+ outV.isRepeating = inputColVector.isRepeating;
+
+ if (inputColVector.noNulls) {
+ if (inputColVector.isRepeating) {
+ outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
+ outV.isNull[0] = false;
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
+ outV.isNull[i] = false;
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
+ outV.isNull[i] = false;
+ }
+ }
+ } else {
+ if (inputColVector.isRepeating) {
+ //All must be selected otherwise size would be zero. Repeating property will not change.
+ if (!nullPos[0]) {
+ outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
+ outV.isNull[0] = false;
+ } else {
+ outputVector[0] = LongColumnVector.NULL_VALUE;
+ outV.isNull[0] = true;
+ }
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos[i]) {
+ outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
+ outV.isNull[i] = false;
+ } else {
+ outputVector[0] = LongColumnVector.NULL_VALUE;
+ outV.isNull[i] = true;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!nullPos[i]) {
+ outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
+ outV.isNull[i] = false;
+ } else {
+ outputVector[0] = LongColumnVector.NULL_VALUE;
+ outV.isNull[i] = true;
+ }
+ }
+ }
+ }
+ }
+
+ private Checker borrowChecker() {
+ FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar();
+ return fil.createChecker(new String(pattern, StandardCharsets.UTF_8));
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public byte[] getPattern() {
+ return pattern;
+ }
+
+ public void setPattern(byte[] pattern) {
+ this.pattern = pattern;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "String_Family";
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/51609a0f/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
index 85d0363..7bcd36e 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectStringColLikeStringScalar;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.Text;
@@ -37,7 +38,7 @@ import org.apache.hadoop.io.Text;
extended = "Example:\n"
+ " > SELECT a.* FROM srcpart a WHERE a.hr _FUNC_ '%2' LIMIT 1;\n"
+ " 27 val_27 2008-04-08 12")
-@VectorizedExpressions({FilterStringColLikeStringScalar.class})
+@VectorizedExpressions({FilterStringColLikeStringScalar.class, SelectStringColLikeStringScalar.class})
public class UDFLike extends UDF {
private final Text lastLikePattern = new Text();
private Pattern p = null;
http://git-wip-us.apache.org/repos/asf/hive/blob/51609a0f/ql/src/test/queries/clientpositive/vector_udf2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf2.q b/ql/src/test/queries/clientpositive/vector_udf2.q
new file mode 100644
index 0000000..e349d14
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_udf2.q
@@ -0,0 +1,29 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+drop table varchar_udf_2;
+
+create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC;
+insert overwrite table varchar_udf_2
+ select key, value, key, value from src where key = '238' limit 1;
+
+explain
+select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1;
+
+select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1;
+
+drop table varchar_udf_2;
http://git-wip-us.apache.org/repos/asf/hive/blob/51609a0f/ql/src/test/results/clientpositive/vector_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf2.q.out b/ql/src/test/results/clientpositive/vector_udf2.q.out
new file mode 100644
index 0000000..42e7041
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_udf2.q.out
@@ -0,0 +1,110 @@
+PREHOOK: query: drop table varchar_udf_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table varchar_udf_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchar_udf_2
+POSTHOOK: query: create table varchar_udf_2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchar_udf_2
+PREHOOK: query: insert overwrite table varchar_udf_2
+ select key, value, key, value from src where key = '238' limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@varchar_udf_2
+POSTHOOK: query: insert overwrite table varchar_udf_2
+ select key, value, key, value from src where key = '238' limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@varchar_udf_2
+POSTHOOK: Lineage: varchar_udf_2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_2.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: varchar_udf_2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: varchar_udf_2
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (c1 like '%38%') (type: boolean), (c2 like 'val_%') (type: boolean), (c3 like '%38') (type: boolean), (c1 like '%3x8%') (type: boolean), (c2 like 'xval_%') (type: boolean), (c3 like '%x38') (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchar_udf_2
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ c1 LIKE '%38%',
+ c2 LIKE 'val_%',
+ c3 LIKE '%38',
+ c1 LIKE '%3x8%',
+ c2 LIKE 'xval_%',
+ c3 LIKE '%x38'
+from varchar_udf_2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchar_udf_2
+#### A masked pattern was here ####
+true true true false false false
+PREHOOK: query: drop table varchar_udf_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@varchar_udf_2
+PREHOOK: Output: default@varchar_udf_2
+POSTHOOK: query: drop table varchar_udf_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@varchar_udf_2
+POSTHOOK: Output: default@varchar_udf_2