You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/25 16:21:42 UTC
[5/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL /
Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline,
reviewed by Teddy Choi)
HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fa36381f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fa36381f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fa36381f
Branch: refs/heads/master
Commit: fa36381faad40576f62e2ac925ef2976efecd8b6
Parents: e2142b2
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat Aug 25 09:21:25 2018 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat Aug 25 09:21:25 2018 -0700
----------------------------------------------------------------------
.../exec/vector/VectorSMBMapJoinOperator.java | 2 +-
.../ql/exec/vector/VectorizationContext.java | 46 +-
.../exec/vector/VectorizationContext.java.orig | 3771 ------------------
.../expressions/CastStringGroupToString.java | 40 -
.../ql/exec/vector/expressions/VectorElt.java | 168 +-
.../VectorExpressionWriterFactory.java | 26 +
.../ql/exec/vector/TestVectorRowObject.java | 3 +-
.../hive/ql/exec/vector/TestVectorSerDeRow.java | 137 +-
.../ql/exec/vector/VectorRandomRowSource.java | 67 +-
.../hive/ql/exec/vector/VectorVerifyFast.java | 6 +-
.../aggregation/TestVectorAggregation.java | 9 +-
.../expressions/TestVectorArithmetic.java | 14 +-
.../vector/expressions/TestVectorBetweenIn.java | 38 +-
.../expressions/TestVectorCastStatement.java | 11 +-
.../expressions/TestVectorCoalesceElt.java | 87 +-
.../expressions/TestVectorDateAddSub.java | 10 +-
.../vector/expressions/TestVectorDateDiff.java | 9 +-
.../expressions/TestVectorFilterCompare.java | 12 +-
.../expressions/TestVectorIfStatement.java | 3 +-
.../vector/expressions/TestVectorIndex.java | 5 +-
.../vector/expressions/TestVectorNegative.java | 21 +-
.../exec/vector/expressions/TestVectorNull.java | 14 +-
.../expressions/TestVectorStringConcat.java | 3 +-
.../expressions/TestVectorStringUnary.java | 3 +-
.../expressions/TestVectorStructField.java | 370 ++
.../vector/expressions/TestVectorSubStr.java | 3 +-
.../expressions/TestVectorTimestampExtract.java | 3 +-
.../fast/TestVectorMapJoinFastRowHashMap.java | 101 +-
.../clientpositive/query_result_fileformat.q | 4 +-
.../llap/vector_case_when_1.q.out | 8 +-
.../llap/vector_char_mapjoin1.q.out | 1 -
.../clientpositive/llap/vector_udf1.q.out | 18 +-
.../clientpositive/llap/vectorized_casts.q.out | 6 +-
.../query_result_fileformat.q.out | 76 +-
.../clientpositive/vector_case_when_1.q.out | 8 +-
.../clientpositive/vector_char_mapjoin1.q.out | 2 +-
.../clientpositive/vectorized_casts.q.out | 6 +-
.../hadoop/hive/serde2/RandomTypeUtil.java | 29 +
38 files changed, 1059 insertions(+), 4081 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index c13510e..07a6e9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -131,7 +131,7 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator
List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
keyExpressions = vContext.getVectorExpressions(keyDesc);
- keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
+ keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index b7feb1c..57f7c01 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1806,6 +1806,25 @@ public class VectorizationContext {
return vectorExpression;
}
+ public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs)
+ throws HiveException{
+ if (vecExprs == null) {
+ return;
+ }
+ final int size = vecExprs.length;
+ for (int i = 0; i < size; i++) {
+ VectorExpression vecExpr = vecExprs[i];
+ if (vecExpr.getOutputTypeInfo() instanceof DecimalTypeInfo) {
+ DataTypePhysicalVariation outputDataTypePhysicalVariation =
+ vecExpr.getOutputDataTypePhysicalVariation();
+ if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
+ vecExprs[i] =
+ wrapWithDecimal64ToDecimalConversion(vecExpr);
+ }
+ }
+ }
+ }
+
public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression)
throws HiveException {
@@ -2903,7 +2922,11 @@ public class VectorizationContext {
} else if (isTimestampFamily(inputType)) {
return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isStringFamily(inputType)) {
- return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+
+ // STRING and VARCHAR types require no conversion, so use a no-op.
+ // Also, CHAR is stored in BytesColumnVector with trimmed blank padding, so it also
+ // requires no conversion;
+ return getIdentityExpression(childExpr);
}
return null;
}
@@ -3123,8 +3146,27 @@ public class VectorizationContext {
List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>();
boolean wereCastUdfs = false;
+ Category commonTypeCategory = commonType.getCategory();
for (ExprNodeDesc desc: childExpr.subList(1, 4)) {
- if (commonType.equals(desc.getTypeInfo())) {
+ TypeInfo childTypeInfo = desc.getTypeInfo();
+ Category childCategory = childTypeInfo.getCategory();
+
+ if (childCategory != commonTypeCategory) {
+ return null;
+ }
+ final boolean isNeedsCast;
+ if (commonTypeCategory == Category.PRIMITIVE) {
+
+ // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
+ // Otherwise, we generate unnecessary casts.
+ isNeedsCast =
+ ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() !=
+ ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
+ } else {
+ isNeedsCast = !commonType.equals(desc.getTypeInfo());
+ }
+
+ if (!isNeedsCast) {
castChildren.add(desc);
} else {
GenericUDF castUdf = getGenericUDFForCast(commonType);