You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2019/02/06 10:08:00 UTC
[hive] branch master updated: HIVE-20295: Remove !isNumber check
after failed constant interpretation (Ivan Suller via Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 269dc5d HIVE-20295: Remove !isNumber check after failed constant interpretation (Ivan Suller via Zoltan Haindrich)
269dc5d is described below
commit 269dc5dde1e8290da93f204f2bb951bd4af40098
Author: Ivan Suller <is...@cloudera.com>
AuthorDate: Wed Feb 6 10:48:21 2019 +0100
HIVE-20295: Remove !isNumber check after failed constant interpretation (Ivan Suller via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
.../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 31 +++--
.../hive/ql/parse/TestTypeCheckProcFactory.java | 146 +++++++++++++++++++++
.../results/clientpositive/infer_const_type.q.out | 13 +-
.../clientpositive/llap/orc_llap_counters.q.out | 11 +-
.../clientpositive/llap/orc_ppd_basic.q.out | 11 +-
.../clientpositive/llap/vectorization_0.q.out | 16 +--
.../clientpositive/parquet_vectorization_0.q.out | 14 +-
.../spark/parquet_vectorization_0.q.out | 14 +-
.../clientpositive/spark/vectorization_0.q.out | 16 +--
9 files changed, 214 insertions(+), 58 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index b49bb36..a2dd554 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -106,6 +106,7 @@ import org.apache.hive.common.util.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
@@ -1345,7 +1346,8 @@ public class TypeCheckProcFactory {
return valueDesc;
}
- private static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) {
+ @VisibleForTesting
+ protected static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) {
if (constChild instanceof ExprNodeConstantDesc) {
// Try to narrow type of constant
Object constVal = ((ExprNodeConstantDesc) constChild).getValue();
@@ -1373,32 +1375,36 @@ public class TypeCheckProcFactory {
return colTypeInfo;
}
+ private static BigDecimal toBigDecimal(String val) {
+ if (!NumberUtils.isNumber(val)) {
+ throw new NumberFormatException("The given string is not a valid number: " + val);
+ }
+ return new BigDecimal(val.replaceAll("[dDfFlL]$", ""));
+ }
+
private static Object interpretConstantAsPrimitive(PrimitiveTypeInfo colTypeInfo, Object constVal,
TypeInfo constTypeInfo) {
- String constTypeInfoName = constTypeInfo.getTypeName();
if (constVal instanceof Number || constVal instanceof String) {
try {
PrimitiveTypeEntry primitiveTypeEntry = colTypeInfo.getPrimitiveTypeEntry();
if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) {
- return (new Integer(constVal.toString()));
+ return toBigDecimal(constVal.toString()).intValueExact();
} else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) {
- return (new Long(constVal.toString()));
+ return toBigDecimal(constVal.toString()).longValueExact();
} else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) {
- return (new Double(constVal.toString()));
+ return Double.valueOf(constVal.toString());
} else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) {
- return (new Float(constVal.toString()));
+ return Float.valueOf(constVal.toString());
} else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) {
- return (new Byte(constVal.toString()));
+ return toBigDecimal(constVal.toString()).byteValueExact();
} else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) {
- return (new Short(constVal.toString()));
+ return toBigDecimal(constVal.toString()).shortValueExact();
} else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) {
return HiveDecimal.create(constVal.toString());
}
- } catch (NumberFormatException nfe) {
+ } catch (NumberFormatException | ArithmeticException nfe) {
LOG.trace("Failed to narrow type of constant", nfe);
- if (!NumberUtils.isNumber(constVal.toString())) {
- return null;
- }
+ return null;
}
}
@@ -1419,6 +1425,7 @@ public class TypeCheckProcFactory {
// if column type is char and constant type is string, then convert the constant to char
// type with padded spaces.
+ String constTypeInfoName = constTypeInfo.getTypeName();
if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && colTypeInfo instanceof CharTypeInfo) {
final String constValue = constVal.toString();
final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java
new file mode 100644
index 0000000..66d024a
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.when;
+
+import java.math.BigDecimal;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Parametrized test for the TypeCheckProcFactory.
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestTypeCheckProcFactory {
+ @Mock
+ private PrimitiveTypeInfo typeInfo;
+ @Mock
+ private ExprNodeConstantDesc nodeDesc;
+
+ private DefaultExprProcessor testSubject;
+
+ @Parameters(name = "{1}")
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] {{"127", PrimitiveObjectInspectorUtils.byteTypeEntry, (byte) 127, true},
+ {"32767", PrimitiveObjectInspectorUtils.shortTypeEntry, (short) 32767, true},
+ {"2147483647", PrimitiveObjectInspectorUtils.intTypeEntry, 2147483647, true},
+ {"9223372036854775807", PrimitiveObjectInspectorUtils.longTypeEntry, 9223372036854775807L, true},
+ {"111.1", PrimitiveObjectInspectorUtils.floatTypeEntry, 111.1f, false},
+ {"111.1", PrimitiveObjectInspectorUtils.doubleTypeEntry, 111.1d, false}});
+ }
+
+ private final BigDecimal maxValue;
+ private final PrimitiveTypeEntry constType;
+ private final Object expectedValue;
+ private final boolean intType;
+
+ public TestTypeCheckProcFactory(String maxValue, PrimitiveTypeEntry constType, Object expectedValue,
+ boolean intType) {
+ this.maxValue = new BigDecimal(maxValue);
+ this.constType = constType;
+ this.expectedValue = expectedValue;
+ this.intType = intType;
+ }
+
+ @Before
+ public void init() {
+ MockitoAnnotations.initMocks(this);
+ testSubject = new DefaultExprProcessor();
+ }
+
+ public void testOneCase(Object constValue) {
+ when(nodeDesc.getValue()).thenReturn(constValue);
+ when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType);
+
+ ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc);
+
+ assertNotNull(result);
+ assertEquals(expectedValue, result.getValue());
+ }
+
+ public void testNullCase(Object constValue) {
+ when(nodeDesc.getValue()).thenReturn(constValue);
+ when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType);
+
+ ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc);
+
+ assertNull(result);
+ }
+
+ @Test
+ public void testWithSring() {
+ testOneCase(maxValue.toString());
+ }
+
+ @Test
+ public void testWithLSuffix() {
+ if (intType) {
+ testOneCase(maxValue.toString() + "L");
+ }
+ }
+
+ @Test
+ public void testWithZeroFraction() {
+ if (intType) {
+ testOneCase(maxValue.toString() + ".0");
+ }
+ }
+
+ @Test
+ public void testWithFSuffix() {
+ testOneCase(maxValue.toString() + "f");
+ }
+
+ @Test
+ public void testWithDSuffix() {
+ testOneCase(maxValue.toString() + "D");
+ }
+
+ @Test
+ public void testOverflow() {
+ if (intType) {
+ testNullCase(maxValue.add(BigDecimal.valueOf(1L)).toString());
+ }
+ }
+
+ @Test
+ public void testWithNonZeroFraction() {
+ if (intType) {
+ testNullCase("100.1");
+ }
+ }
+
+}
diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out
index b736f4b..bbdb5be 100644
--- a/ql/src/test/results/clientpositive/infer_const_type.q.out
+++ b/ql/src/test/results/clientpositive/infer_const_type.q.out
@@ -108,7 +108,6 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@infertypes
#### A masked pattern was here ####
127 32767 12345 -12345 906.0 -307.0 1234
-WARNING: Comparing a bigint and a string may result in a loss of precision.
PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE
ti = '128' OR
si = 32768 OR
@@ -139,10 +138,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: infertypes
- filterExpr: ((UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(bi) = 9.223372036854776E18D) or null) (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((UDFToDouble(bi) = 9.223372036854776E18D) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or null) (type: boolean)
+ predicate: false (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string)
@@ -163,7 +161,6 @@ STAGE PLANS:
Processor Tree:
ListSink
-WARNING: Comparing a bigint and a string may result in a loss of precision.
PREHOOK: query: SELECT * FROM infertypes WHERE
ti = '128' OR
si = 32768 OR
@@ -208,10 +205,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: infertypes
- filterExpr: ((UDFToDouble(ti) = 127.0D) or (CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D)) (type: boolean)
+ filterExpr: ((ti = 127Y) or (CAST( si AS decimal(5,0)) = 327) or (i = -100)) (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D) or (UDFToDouble(ti) = 127.0D)) (type: boolean)
+ predicate: ((CAST( si AS decimal(5,0)) = 327) or (i = -100) or (ti = 127Y)) (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string)
@@ -271,10 +268,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: infertypes
- filterExpr: ((UDFToDouble(ti) < 127.0D) and (UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D)) (type: boolean)
+ filterExpr: ((ti < 127Y) and (i > 100) and (UDFToDouble(str) = 1.57D)) (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D) and (UDFToDouble(ti) < 127.0D)) (type: boolean)
+ predicate: ((UDFToDouble(str) = 1.57D) and (i > 100) and (ti < 127Y)) (type: boolean)
Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string)
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index 5627490..dad4b1c 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -898,7 +898,7 @@ Stage-1 FILE SYSTEM COUNTERS:
Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
- RECORDS_IN_Map_1: 0
+ RECORDS_IN_Map_1: 2100
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 0
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
@@ -908,12 +908,15 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 0
RECORDS_OUT_OPERATOR_SEL_9: 0
- RECORDS_OUT_OPERATOR_TS_0: 0
+ RECORDS_OUT_OPERATOR_TS_0: 3
Stage-1 LLAP IO COUNTERS:
- CACHE_HIT_BYTES: 823
+ CACHE_HIT_BYTES: 354
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
- SELECTED_ROWGROUPS: 0
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
Stage-1 INPUT COUNTERS:
GROUPED_INPUT_SPLITS_Map_1: 1
INPUT_DIRECTORIES_Map_1: 1
diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
index 42c2f5b..53c6cfd 100644
--- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
@@ -850,7 +850,7 @@ Stage-1 FILE SYSTEM COUNTERS:
Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
- RECORDS_IN_Map_1: 0
+ RECORDS_IN_Map_1: 2100
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 0
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
@@ -860,12 +860,15 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_3: 0
RECORDS_OUT_OPERATOR_SEL_2: 0
- RECORDS_OUT_OPERATOR_TS_0: 0
+ RECORDS_OUT_OPERATOR_TS_0: 2100
Stage-1 LLAP IO COUNTERS:
- CACHE_HIT_BYTES: 823
+ CACHE_HIT_BYTES: 354
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
- SELECTED_ROWGROUPS: 0
+ NUM_DECODED_BATCHES: 3
+ NUM_VECTOR_BATCHES: 3
+ ROWS_EMITTED: 2100
+ SELECTED_ROWGROUPS: 3
Stage-1 INPUT COUNTERS:
GROUPED_INPUT_SPLITS_Map_1: 1
INPUT_DIRECTORIES_Map_1: 1
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
index ec0712b..4c7fe06 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
@@ -1640,7 +1640,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean)
+ filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean)
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -1649,8 +1649,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...]
- predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
+ predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...]
+ predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
@@ -1658,13 +1658,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 4, 0, 15, 18]
- selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
+ projectedOutputColumnNums: [3, 4, 0, 18, 21]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2)
Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1691,14 +1691,14 @@ STAGE PLANS:
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
rowBatchContext:
dataColumnCount: 12
includeColumns: [0, 1, 2, 3, 4, 5, 7, 11]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(13,3), double, double, double, double, double]
+ scratchColumnTypeNames: [decimal(13,3), double, bigint, bigint, bigint, double, double, double, double]
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
index c83b6e6..e292490 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
@@ -1487,7 +1487,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean)
+ filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean)
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
@@ -1495,8 +1495,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tiny [...]
- predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
+ predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = [...]
+ predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
@@ -1504,13 +1504,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 4, 0, 15, 18]
- selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
+ projectedOutputColumnNums: [3, 4, 0, 18, 21]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double
Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2)
Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1536,7 +1536,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reduce Vectorization:
enabled: false
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
index 1232957..738f19a 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
@@ -1480,7 +1480,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean)
+ filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean)
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1488,8 +1488,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...]
- predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
+ predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...]
+ predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
@@ -1497,13 +1497,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 4, 0, 15, 18]
- selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
+ projectedOutputColumnNums: [3, 4, 0, 18, 21]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2)
Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1528,7 +1528,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index ac8675c..c782c13 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -1621,7 +1621,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean)
+ filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean)
Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1630,8 +1630,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col [...]
- predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
+ predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboole [...]
+ predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
@@ -1639,13 +1639,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 4, 0, 15, 18]
- selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
+ projectedOutputColumnNums: [3, 4, 0, 18, 21]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double
Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2)
Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1671,14 +1671,14 @@ STAGE PLANS:
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
rowBatchContext:
dataColumnCount: 12
includeColumns: [0, 1, 2, 3, 4, 5, 7, 11]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(13,3), double, double, double, double, double]
+ scratchColumnTypeNames: [decimal(13,3), double, bigint, bigint, bigint, double, double, double, double]
Reducer 2
Execution mode: vectorized
Reduce Vectorization: