You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/19 16:40:59 UTC
hive git commit: HIVE-13562: Enable vector bridge for all
non-vectorized udfs (Matt McCline, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 64454faa3 -> ffa69a22d
HIVE-13562: Enable vector bridge for all non-vectorized udfs (Matt McCline, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ffa69a22
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ffa69a22
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ffa69a22
Branch: refs/heads/master
Commit: ffa69a22d27e5c8e42e6321561964a1b244c8a7f
Parents: 64454fa
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu May 19 09:36:46 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu May 19 09:36:46 2016 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 157 ++++++-------------
.../hive/ql/optimizer/physical/Vectorizer.java | 22 ++-
.../hive/ql/plan/ExprNodeGenericFuncDesc.java | 6 +
.../clientpositive/vector_between_columns.q | 13 +-
.../tez/vector_between_columns.q.out | 40 ++---
.../clientpositive/tez/vector_decimal_udf.q.out | 1 +
.../tez/vector_decimal_udf2.q.out | 1 +
.../clientpositive/vector_between_columns.q.out | 40 ++---
.../clientpositive/vector_decimal_udf.q.out | 1 +
.../clientpositive/vector_decimal_udf2.q.out | 1 +
.../results/clientpositive/vector_udf1.q.out | 1 +
11 files changed, 121 insertions(+), 162 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index d213731..886e222 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -477,8 +477,8 @@ public class VectorizationContext {
ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
- if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr, mode)) {
- ve = getCustomUDFExpression(expr);
+ if (isCustomUDF(expr)) {
+ ve = getCustomUDFExpression(expr, mode);
} else {
// Add cast expression if needed. Child expressions of a udf may return different data types
@@ -489,13 +489,20 @@ public class VectorizationContext {
exprDesc.getChildren(), exprDesc.getTypeInfo());
ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
childExpressions, mode, exprDesc.getTypeInfo());
+ if (ve == null) {
+ /*
+ * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor.
+ */
+ ve = getCustomUDFExpression(expr, mode);
+ }
}
} else if (exprDesc instanceof ExprNodeConstantDesc) {
ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(),
mode);
}
if (ve == null) {
- throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
+ throw new HiveException(
+ "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Input Expression = " + exprDesc.toString()
@@ -758,64 +765,6 @@ public class VectorizationContext {
return genericUdf;
}
-
- /* Return true if this is one of a small set of functions for which
- * it is significantly easier to use the old code path in vectorized
- * mode instead of implementing a new, optimized VectorExpression.
- *
- * Depending on performance requirements and frequency of use, these
- * may be implemented in the future with an optimized VectorExpression.
- */
- public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, Mode mode) {
- GenericUDF gudf = expr.getGenericUDF();
- if (gudf instanceof GenericUDFBridge) {
- GenericUDFBridge bridge = (GenericUDFBridge) gudf;
- Class<? extends UDF> udfClass = bridge.getUdfClass();
- if (udfClass.equals(UDFHex.class)
- || udfClass.equals(UDFRegExpExtract.class)
- || udfClass.equals(UDFRegExpReplace.class)
- || udfClass.equals(UDFConv.class)
- || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr))
- || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
- || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
- || udfClass.equals(UDFToString.class) &&
- (arg0Type(expr).equals("timestamp")
- || arg0Type(expr).equals("double")
- || arg0Type(expr).equals("float"))) {
- return true;
- }
- } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr)))
-
- /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because
- * of their complexity and generality. In the future, variations of these
- * can be optimized to run faster for the vectorized code path. For example,
- * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
- * is an example of a GenericUDFCase that has all constant arguments
- * except for the first argument. This is probably a common case and a
- * good candidate for a fast, special-purpose VectorExpression. Then
- * the UDF Adaptor code path could be used as a catch-all for
- * non-optimized general cases.
- */
- || gudf instanceof GenericUDFCase
- || gudf instanceof GenericUDFWhen) {
- return true;
- } else if (gudf instanceof GenericUDFToChar &&
- (arg0Type(expr).equals("timestamp")
- || arg0Type(expr).equals("double")
- || arg0Type(expr).equals("float"))) {
- return true;
- } else if (gudf instanceof GenericUDFToVarchar &&
- (arg0Type(expr).equals("timestamp")
- || arg0Type(expr).equals("double")
- || arg0Type(expr).equals("float"))) {
- return true;
- } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) {
- // between has 4 args here, but can be vectorized like this
- return true;
- }
- return false;
- }
-
public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
return udfClass.equals(UDFToByte.class)
|| udfClass.equals(UDFToShort.class)
@@ -1213,36 +1162,38 @@ public class VectorizationContext {
List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
childExpr = castedChildren;
- //First handle special cases
+ //First handle special cases. If one of the special case methods cannot handle it,
+ // it returns null.
+ VectorExpression ve = null;
if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) {
- return getBetweenFilterExpression(childExpr, mode, returnType);
+ ve = getBetweenFilterExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIn) {
- return getInExpression(childExpr, mode, returnType);
+ ve = getInExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFOPPositive) {
- return getIdentityExpression(childExpr);
+ ve = getIdentityExpression(childExpr);
} else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) {
// Coalesce is a special case because it can take variable number of arguments.
// Nvl is a specialization of the Coalesce.
- return getCoalesceExpression(childExpr, returnType);
+ ve = getCoalesceExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFElt) {
// Elt is a special case because it can take variable number of arguments.
- return getEltExpression(childExpr, returnType);
+ ve = getEltExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFBridge) {
- VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
+ ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
returnType);
- if (v != null) {
- return v;
- }
} else if (udf instanceof GenericUDFToDecimal) {
- return getCastToDecimal(childExpr, returnType);
+ ve = getCastToDecimal(childExpr, returnType);
} else if (udf instanceof GenericUDFToChar) {
- return getCastToChar(childExpr, returnType);
+ ve = getCastToChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToVarchar) {
- return getCastToVarChar(childExpr, returnType);
+ ve = getCastToVarChar(childExpr, returnType);
} else if (udf instanceof GenericUDFTimestamp) {
- return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType);
+ ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType);
+ }
+ if (ve != null) {
+ return ve;
}
// Now do a general lookup
Class<?> udfClass = udf.getClass();
@@ -1252,13 +1203,9 @@ public class VectorizationContext {
isSubstituted = true;
}
- VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null),
+ ve = getVectorExpressionForUdf((!isSubstituted ? udf : null),
udfClass, castedChildren, mode, returnType);
- if (ve == null) {
- throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
- }
-
return ve;
}
@@ -1623,16 +1570,20 @@ public class VectorizationContext {
private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf,
List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
Class<? extends UDF> cl = udf.getUdfClass();
+ VectorExpression ve = null;
if (isCastToIntFamily(cl)) {
- return getCastToLongExpression(childExpr);
+ ve = getCastToLongExpression(childExpr);
} else if (cl.equals(UDFToBoolean.class)) {
- return getCastToBoolean(childExpr);
+ ve = getCastToBoolean(childExpr);
} else if (isCastToFloatFamily(cl)) {
- return getCastToDoubleExpression(cl, childExpr, returnType);
+ ve = getCastToDoubleExpression(cl, childExpr, returnType);
} else if (cl.equals(UDFToString.class)) {
- return getCastToString(childExpr, returnType);
+ ve = getCastToString(childExpr, returnType);
}
- return null;
+ if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) {
+ ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode);
+ }
+ return ve;
}
private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException {
@@ -1762,10 +1713,10 @@ public class VectorizationContext {
returnType);
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType);
- } else if (isDatetimeFamily(inputType)) {
+ } else if (inputType.equals("timestamp")) {
return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType);
}
- throw new HiveException("Unhandled cast input type: " + inputType);
+ throw null;
}
private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, TypeInfo returnType)
@@ -1790,11 +1741,7 @@ public class VectorizationContext {
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToString.class, childExpr, Mode.PROJECTION, returnType);
}
- /* The string type is deliberately omitted -- the planner removes string to string casts.
- * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
- */
-
- throw new HiveException("Unhandled cast input type: " + inputType);
+ return null;
}
private VectorExpression getCastToChar(List<ExprNodeDesc> childExpr, TypeInfo returnType)
@@ -1818,12 +1765,7 @@ public class VectorizationContext {
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType);
}
-
- /*
- * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
- */
-
- throw new HiveException("Unhandled cast input type: " + inputType);
+ return null;
}
private VectorExpression getCastToVarChar(List<ExprNodeDesc> childExpr, TypeInfo returnType)
@@ -1847,12 +1789,7 @@ public class VectorizationContext {
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType);
}
-
- /*
- * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
- */
-
- throw new HiveException("Unhandled cast input type: " + inputType);
+ return null;
}
private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr,
@@ -1875,8 +1812,6 @@ public class VectorizationContext {
// float types require no conversion, so use a no-op
return getIdentityExpression(childExpr);
}
- // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
-
return null;
}
@@ -1905,8 +1840,6 @@ public class VectorizationContext {
ocm.freeOutputColumn(lenExpr.getOutputColumn());
return lenToBoolExpr;
}
- // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op
-
return null;
}
@@ -1926,8 +1859,6 @@ public class VectorizationContext {
// integer and boolean types require no conversion, so use a no-op
return getIdentityExpression(childExpr);
}
- // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
-
return null;
}
@@ -2031,9 +1962,13 @@ public class VectorizationContext {
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
- private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr)
+ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Mode mode)
throws HiveException {
+ if (mode != Mode.PROJECTION) {
+ return null;
+ }
+
//GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
List<ExprNodeDesc> childExprList = expr.getChildren();
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 4a156a2..51e7a17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1681,14 +1681,14 @@ public class Vectorizer implements PhysicalPlanResolver {
if (desc.getChildren() != null) {
if (isInExpression
&& desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
- // Don't restrict child expressions for projection.
+ // Don't restrict child expressions for projection.
// Always use loose FILTER mode.
if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) {
return false;
}
} else {
for (ExprNodeDesc d : desc.getChildren()) {
- // Don't restrict child expressions for projection.
+ // Don't restrict child expressions for projection.
// Always use loose FILTER mode.
if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) {
return false;
@@ -1754,10 +1754,16 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
} catch (Exception e) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed to vectorize", e);
+ if (e instanceof HiveException) {
+ LOG.info(e.getMessage());
+ } else {
+ if (LOG.isDebugEnabled()) {
+ // Show stack trace.
+ LOG.debug("Failed to vectorize", e);
+ } else {
+ LOG.info("Failed to vectorize", e.getMessage());
+ }
}
-
return false;
}
return true;
@@ -2219,7 +2225,7 @@ public class Vectorizer implements PhysicalPlanResolver {
if (keySerializerClass != org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class) {
return false;
}
-
+
TableDesc valueTableDesc = desc.getValueSerializeInfo();
Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
if (valueDeserializerClass != org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class) {
@@ -2278,7 +2284,7 @@ public class Vectorizer implements PhysicalPlanResolver {
} else {
reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
-
+
vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
@@ -2333,7 +2339,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
}
break;
-
+
case REDUCESINK:
{
VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
index 9e0159c..aef46da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
@@ -135,6 +135,12 @@ public class ExprNodeGenericFuncDesc extends ExprNodeDesc implements
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(genericUDF.getClass().getSimpleName());
+ if (genericUDF instanceof GenericUDFBridge) {
+ GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF;
+ sb.append(" ==> ");
+ sb.append(genericUDFBridge.getUdfName());
+ sb.append(" ");
+ }
sb.append("(");
if (chidren != null) {
for (int i = 0; i < chidren.size(); i++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/queries/clientpositive/vector_between_columns.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q
index 4c83d0a..ba38445 100644
--- a/ql/src/test/queries/clientpositive/vector_between_columns.q
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -6,7 +6,10 @@ set hive.fetch.task.conversion=none;
set hive.mapred.mode=nonstrict;
-- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
@@ -21,9 +24,9 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt;
create table TINT stored as orc AS SELECT * FROM TINT_txt;
--- We DO NOT expect the following to vectorized because the BETWEEN range expressions
--- are not constants. We currently do not support the range expressions being columns.
+
+
explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint;
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint;
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
index d8f9c8b..d548364 100644
--- a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
@@ -1,12 +1,18 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TSINT_txt
POSTHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
POSTHOOK: type: CREATETABLE
@@ -65,15 +71,11 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t
POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ]
tint_txt.rnum tint_txt.cint
Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
--- are not constants. We currently do not support the range expressions being columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
PREHOOK: type: QUERY
-POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
--- are not constants. We currently do not support the range expressions being columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
POSTHOOK: type: QUERY
Explain
STAGE DEPENDENCIES:
@@ -111,8 +113,8 @@ STAGE PLANS:
predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1
+ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -143,18 +145,18 @@ STAGE PLANS:
ListSink
Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
PREHOOK: type: QUERY
PREHOOK: Input: default@tint
PREHOOK: Input: default@tsint
#### A masked pattern was here ####
-POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tint
POSTHOOK: Input: default@tsint
#### A masked pattern was here ####
-tint.rnum tsint.rnum
-1 1
-2 2
-3 3
-4 4
+tint.rnum tsint.rnum tint.cint tsint.csint
+1 1 -1 -1
+2 2 0 0
+3 3 1 1
+4 4 10 10
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
index 54bad12..bcf1ab6 100644
--- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
@@ -2156,6 +2156,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
index b7ddf73..de8ce7f 100644
--- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
@@ -145,6 +145,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out
index a4e8d64..5faa79b 100644
--- a/ql/src/test/results/clientpositive/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out
@@ -1,12 +1,18 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TSINT_txt
POSTHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
POSTHOOK: type: CREATETABLE
@@ -65,15 +71,11 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t
POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ]
tint_txt.rnum tint_txt.cint
Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
-PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
--- are not constants. We currently do not support the range expressions being columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
PREHOOK: type: QUERY
-POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
--- are not constants. We currently do not support the range expressions being columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
POSTHOOK: type: QUERY
Explain
STAGE DEPENDENCIES:
@@ -124,8 +126,8 @@ STAGE PLANS:
predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1
+ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -144,18 +146,18 @@ STAGE PLANS:
ListSink
Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
-PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
PREHOOK: type: QUERY
PREHOOK: Input: default@tint
PREHOOK: Input: default@tsint
#### A masked pattern was here ####
-POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tint
POSTHOOK: Input: default@tsint
#### A masked pattern was here ####
-tint.rnum tsint.rnum
-1 1
-2 2
-3 3
-4 4
+tint.rnum tsint.rnum tint.cint tsint.csint
+1 1 -1 -1
+2 2 0 0
+3 3 1 1
+4 4 10 10
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
index 9dea502..b99fd10 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
@@ -2085,6 +2085,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
index 805584a..4e24fa6 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
@@ -139,6 +139,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ffa69a22/ql/src/test/results/clientpositive/vector_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf1.q.out b/ql/src/test/results/clientpositive/vector_udf1.q.out
index bb02ea7..232d78e 100644
--- a/ql/src/test/results/clientpositive/vector_udf1.q.out
+++ b/ql/src/test/results/clientpositive/vector_udf1.q.out
@@ -766,6 +766,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator