You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/25 16:21:42 UTC

[5/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fa36381f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fa36381f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fa36381f

Branch: refs/heads/master
Commit: fa36381faad40576f62e2ac925ef2976efecd8b6
Parents: e2142b2
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat Aug 25 09:21:25 2018 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat Aug 25 09:21:25 2018 -0700

----------------------------------------------------------------------
 .../exec/vector/VectorSMBMapJoinOperator.java   |    2 +-
 .../ql/exec/vector/VectorizationContext.java    |   46 +-
 .../exec/vector/VectorizationContext.java.orig  | 3771 ------------------
 .../expressions/CastStringGroupToString.java    |   40 -
 .../ql/exec/vector/expressions/VectorElt.java   |  168 +-
 .../VectorExpressionWriterFactory.java          |   26 +
 .../ql/exec/vector/TestVectorRowObject.java     |    3 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |  137 +-
 .../ql/exec/vector/VectorRandomRowSource.java   |   67 +-
 .../hive/ql/exec/vector/VectorVerifyFast.java   |    6 +-
 .../aggregation/TestVectorAggregation.java      |    9 +-
 .../expressions/TestVectorArithmetic.java       |   14 +-
 .../vector/expressions/TestVectorBetweenIn.java |   38 +-
 .../expressions/TestVectorCastStatement.java    |   11 +-
 .../expressions/TestVectorCoalesceElt.java      |   87 +-
 .../expressions/TestVectorDateAddSub.java       |   10 +-
 .../vector/expressions/TestVectorDateDiff.java  |    9 +-
 .../expressions/TestVectorFilterCompare.java    |   12 +-
 .../expressions/TestVectorIfStatement.java      |    3 +-
 .../vector/expressions/TestVectorIndex.java     |    5 +-
 .../vector/expressions/TestVectorNegative.java  |   21 +-
 .../exec/vector/expressions/TestVectorNull.java |   14 +-
 .../expressions/TestVectorStringConcat.java     |    3 +-
 .../expressions/TestVectorStringUnary.java      |    3 +-
 .../expressions/TestVectorStructField.java      |  370 ++
 .../vector/expressions/TestVectorSubStr.java    |    3 +-
 .../expressions/TestVectorTimestampExtract.java |    3 +-
 .../fast/TestVectorMapJoinFastRowHashMap.java   |  101 +-
 .../clientpositive/query_result_fileformat.q    |    4 +-
 .../llap/vector_case_when_1.q.out               |    8 +-
 .../llap/vector_char_mapjoin1.q.out             |    1 -
 .../clientpositive/llap/vector_udf1.q.out       |   18 +-
 .../clientpositive/llap/vectorized_casts.q.out  |    6 +-
 .../query_result_fileformat.q.out               |   76 +-
 .../clientpositive/vector_case_when_1.q.out     |    8 +-
 .../clientpositive/vector_char_mapjoin1.q.out   |    2 +-
 .../clientpositive/vectorized_casts.q.out       |    6 +-
 .../hadoop/hive/serde2/RandomTypeUtil.java      |   29 +
 38 files changed, 1059 insertions(+), 4081 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index c13510e..07a6e9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -131,7 +131,7 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator
 
     List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
     keyExpressions = vContext.getVectorExpressions(keyDesc);
-    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
+    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyExpressions);
 
     Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
     bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index b7feb1c..57f7c01 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1806,6 +1806,25 @@ public class VectorizationContext {
     return vectorExpression;
   }
 
+  public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs)
+      throws HiveException{
+    if (vecExprs == null) {
+      return;
+    }
+    final int size = vecExprs.length;
+    for (int i = 0; i < size; i++) {
+      VectorExpression vecExpr = vecExprs[i];
+      if (vecExpr.getOutputTypeInfo() instanceof DecimalTypeInfo) {
+        DataTypePhysicalVariation outputDataTypePhysicalVariation =
+            vecExpr.getOutputDataTypePhysicalVariation();
+        if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
+          vecExprs[i] =
+              wrapWithDecimal64ToDecimalConversion(vecExpr);
+        }
+      }
+    }
+  }
+
   public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression)
       throws HiveException {
 
@@ -2903,7 +2922,11 @@ public class VectorizationContext {
     } else if (isTimestampFamily(inputType)) {
       return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
     } else if (isStringFamily(inputType)) {
-      return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+
+      // STRING and VARCHAR types require no conversion, so use a no-op.
+      // Also, CHAR is stored in BytesColumnVector with trimmed blank padding, so it also
+      // requires no conversion;
+      return getIdentityExpression(childExpr);
     }
     return null;
   }
@@ -3123,8 +3146,27 @@ public class VectorizationContext {
 
     List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>();
     boolean wereCastUdfs = false;
+    Category commonTypeCategory = commonType.getCategory();
     for (ExprNodeDesc desc: childExpr.subList(1, 4)) {
-      if (commonType.equals(desc.getTypeInfo())) {
+      TypeInfo childTypeInfo = desc.getTypeInfo();
+      Category childCategory = childTypeInfo.getCategory();
+
+      if (childCategory != commonTypeCategory) {
+        return null;
+      }
+      final boolean isNeedsCast;
+      if (commonTypeCategory == Category.PRIMITIVE) {
+
+        // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
+        // Otherwise, we generate unnecessary casts.
+        isNeedsCast =
+            ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() !=
+            ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
+      } else {
+        isNeedsCast = !commonType.equals(desc.getTypeInfo());
+      }
+
+      if (!isNeedsCast) {
         castChildren.add(desc);
       } else {
         GenericUDF castUdf = getGenericUDFForCast(commonType);