You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/07/31 02:19:12 UTC
[5/5] hive git commit: HIVE-20245: Vectorization: Fix NULL / Wrong
Results issues in BETWEEN / IN (Matt McCline, reviewed by Teddy Choi)
HIVE-20245: Vectorization: Fix NULL / Wrong Results issues in BETWEEN / IN (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edc53cc0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edc53cc0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edc53cc0
Branch: refs/heads/master
Commit: edc53cc0d95e983c371a224943dd866210f0c65c
Parents: 65f02d2
Author: Matt McCline <mm...@hortonworks.com>
Authored: Mon Jul 30 21:18:44 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Mon Jul 30 21:18:44 2018 -0500
----------------------------------------------------------------------
.../ExpressionTemplates/ColumnBetween.txt | 162 +++
.../Decimal64ColumnBetween.txt | 50 +
.../DecimalColumnBetween.txt | 188 ++++
.../ExpressionTemplates/FilterColumnBetween.txt | 6 +-
.../ExpressionTemplates/StringColumnBetween.txt | 191 ++++
.../TimestampColumnBetween.txt | 177 +++
.../TruncStringColumnBetween.txt | 31 +
.../hive/ql/exec/vector/VectorExtractRow.java | 13 +-
.../ql/exec/vector/VectorizationContext.java | 358 +++++--
.../exec/vector/expressions/CastLongToDate.java | 93 --
.../expressions/ConstantVectorExpression.java | 223 +++-
.../expressions/Decimal64ColumnInList.java | 65 ++
.../FilterDecimal64ColumnBetween.java | 68 ++
.../FilterDecimal64ColumnInList.java | 68 ++
.../FilterDecimal64ColumnNotBetween.java | 68 ++
.../expressions/FilterLongColumnInList.java | 5 +-
.../vector/expressions/LongColumnInList.java | 4 +-
.../vector/expressions/StringColumnInList.java | 7 +-
.../vector/expressions/VectorUDFDateLong.java | 35 -
.../ql/exec/vector/udf/VectorUDFArgDesc.java | 68 +-
.../hive/ql/udf/generic/GenericUDFDate.java | 3 +-
.../hive/ql/udf/generic/GenericUDFToDate.java | 3 +-
.../ql/exec/vector/VectorRandomRowSource.java | 171 ++-
.../expressions/TestVectorArithmetic.java | 4 +-
.../vector/expressions/TestVectorBetweenIn.java | 1014 ++++++++++++++++++
.../expressions/TestVectorCastStatement.java | 21 +-
.../expressions/TestVectorDateAddSub.java | 6 +-
.../vector/expressions/TestVectorDateDiff.java | 2 +-
.../expressions/TestVectorFilterCompare.java | 2 +-
.../TestVectorGenericDateExpressions.java | 7 +-
.../expressions/TestVectorIfStatement.java | 6 +-
.../vector/expressions/TestVectorNegative.java | 2 +-
.../exec/vector/expressions/TestVectorNull.java | 513 +++++++++
.../expressions/TestVectorStringConcat.java | 2 +-
.../expressions/TestVectorStringUnary.java | 2 +-
.../vector/expressions/TestVectorSubStr.java | 2 +-
.../expressions/TestVectorTimestampExtract.java | 2 +-
.../llap/vector_annotate_stats_select.q.out | 19 +-
.../clientpositive/llap/vector_between_in.q.out | 8 +-
.../llap/vector_case_when_2.q.out | 4 +-
.../clientpositive/llap/vector_udf_inline.q.out | 2 +-
.../clientpositive/llap/vectorization_10.q.out | 2 +-
.../clientpositive/llap/vectorization_7.q.out | 4 +-
.../clientpositive/llap/vectorization_8.q.out | 4 +-
.../llap/vectorization_short_regress.q.out | 4 +-
.../clientpositive/llap/vectorized_casts.q.out | 2 +-
.../llap/vectorized_date_funcs.q.out | 12 +-
.../llap/vectorized_timestamp_funcs.q.out | 4 +-
.../parquet_vectorization_10.q.out | 2 +-
.../parquet_vectorization_7.q.out | 4 +-
.../parquet_vectorization_8.q.out | 4 +-
.../spark/parquet_vectorization_10.q.out | 2 +-
.../spark/parquet_vectorization_7.q.out | 4 +-
.../spark/parquet_vectorization_8.q.out | 4 +-
...k_vectorized_dynamic_partition_pruning.q.out | 2 +-
.../spark/vector_between_in.q.out | 8 +-
.../clientpositive/spark/vectorization_10.q.out | 2 +-
.../spark/vectorization_short_regress.q.out | 4 +-
.../spark/vectorized_timestamp_funcs.q.out | 4 +-
.../clientpositive/vector_case_when_2.q.out | 4 +-
.../clientpositive/vectorization_10.q.out | 2 +-
.../clientpositive/vectorization_7.q.out | 4 +-
.../clientpositive/vectorization_8.q.out | 4 +-
.../clientpositive/vectorized_casts.q.out | 2 +-
.../clientpositive/vectorized_date_funcs.q.out | 12 +-
.../apache/hadoop/hive/tools/GenVectorCode.java | 89 ++
66 files changed, 3475 insertions(+), 389 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt
new file mode 100644
index 0000000..1aa398a
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.<InputColumnVectorType>;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Output a boolean value indicating if a column is [NOT] BETWEEN two constants.
+ */
+public class <ClassName> extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ protected int colNum;
+
+ protected final <OperandType> leftValue;
+ protected final <OperandType> rightValue;
+
+ public <ClassName>(int colNum, <OperandType> leftValue, <OperandType> rightValue, int outputColumnNum) {
+ super(outputColumnNum);
+ this.colNum = colNum;
+ this.leftValue = leftValue;
+ this.rightValue = rightValue;
+ }
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ colNum = -1;
+ leftValue = 0;
+ rightValue = 0;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ <InputColumnVectorType> inputColVector = (<InputColumnVectorType>) batch.cols[colNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ int n = batch.size;
+ <OperandType>[] vector = inputColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ final <OperandType> repeatValue = vector[0];
+ outputVector[0] = <OptionalNot>(repeatValue < leftValue || repeatValue > rightValue) ? 0 : 1;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ final <OperandType> value = vector[i];
+ outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final <OperandType> value = vector[i];
+ outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ final <OperandType> value = vector[i];
+ outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1;
+ }
+ }
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputIsNull[i] = inputIsNull[i];
+ final <OperandType> value = vector[i];
+ outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1;
+ }
+ } else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ final <OperandType> value = vector[i];
+ outputVector[i] = <OptionalNot>(value < leftValue || value > rightValue) ? 0 : 1;
+ }
+ }
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, colNum) + ", left " + leftValue + ", right " + rightValue;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // return null since this will be handled as a special case in VectorizationContext
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt
new file mode 100644
index 0000000..1763cbd
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+
+public class <ClassName> extends <BaseClassName> {
+ private static final long serialVersionUID = 1L;
+
+ public <ClassName>() {
+ super();
+ }
+
+ public <ClassName>(int colNum, long leftValue, long rightValue, int outputColumnNum) {
+ super(colNum, leftValue, rightValue, outputColumnNum);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1];
+ HiveDecimalWritable writable1 = new HiveDecimalWritable();
+ writable1.deserialize64(leftValue, decimalTypeInfo1.scale());
+
+ DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2];
+ HiveDecimalWritable writable2 = new HiveDecimalWritable();
+ writable2.deserialize64(rightValue, decimalTypeInfo2.scale());
+ return
+ getColumnParamString(0, colNum) +
+ ", decimal64Left " + leftValue + ", decimalLeft " + writable1.toString() +
+ ", decimal64Right " + rightValue + ", decimalRight " + writable2.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt
new file mode 100644
index 0000000..6fd1301
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class <ClassName> extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private final int inputColumn;
+
+ protected final HiveDecimal leftValue;
+ protected final HiveDecimal rightValue;
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ inputColumn = -1;
+ leftValue = null;
+ rightValue = null;
+ }
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set.
+ */
+ public <ClassName>(int colNum, HiveDecimal leftValue, HiveDecimal rightValue,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.inputColumn = colNum;
+ this.leftValue = leftValue;
+ this.rightValue = rightValue;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColumnVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ int n = batch.size;
+ HiveDecimalWritable[] vector = inputColumnVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColumnVector.isRepeating) {
+ if (inputColumnVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ final HiveDecimalWritable repeatValue = vector[0];
+ outputVector[0] =
+ <OptionalNot>(DecimalUtil.compare(repeatValue, leftValue) < 0 ||
+ DecimalUtil.compare(repeatValue, rightValue) > 0) ? 0 : 1;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ final HiveDecimalWritable value = vector[i];
+ outputVector[i] =
+ <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 ||
+ DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final HiveDecimalWritable value = vector[i];
+ outputVector[i] =
+ <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 ||
+ DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ final HiveDecimalWritable value = vector[i];
+ outputVector[i] =
+ <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 ||
+ DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ final HiveDecimalWritable value = vector[i];
+ outputVector[i] =
+ <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 ||
+ DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inputIsNull[i]) {
+ final HiveDecimalWritable value = vector[i];
+ outputVector[i] =
+ <OptionalNot>(DecimalUtil.compare(value, leftValue) < 0 ||
+ DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return
+ getColumnParamString(0, inputColumn) +
+ ", left " + leftValue.toString() + ", right " + rightValue.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt
index 0664cbf..47dd42f 100644
--- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt
+++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt
@@ -34,12 +34,12 @@ public class <ClassName> extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
+ protected final int colNum;
// The comparison is of the form "column BETWEEN leftValue AND rightValue".
// NOTE: These can be set later by FilterColumnBetweenDynamicValue.txt so they are not final.
- private <OperandType> leftValue;
- private <OperandType> rightValue;
+ protected <OperandType> leftValue;
+ protected <OperandType> rightValue;
public <ClassName>(int colNum, <OperandType> leftValue, <OperandType> rightValue) {
super();
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt
new file mode 100644
index 0000000..798cb95
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+import java.util.Arrays;
+
+public class <ClassName> extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ protected final int inputCol;
+
+ private final byte[] left;
+ private final byte[] right;
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ inputCol = -1;
+ left = null;
+ right = null;
+ }
+
+ public <ClassName>(int colNum, byte[] left, byte[] right, int outputColumnNum) {
+ super(outputColumnNum);
+ this.inputCol = colNum;
+ this.left = left;
+ this.right = right;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ int n = batch.size;
+ byte[][] vector = inputColVector.vector;
+ int[] start = inputColVector.start;
+ int[] length = inputColVector.length;
+ long[] outputVector = outputColVector.vector;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ final byte[] repeatBytes = vector[0];
+ final int repeatStart = start[0];
+ final int repeatLength = length[0];
+ outputVector[0] =
+ <OptionalNot>(StringExpr.compare(repeatBytes, repeatStart, repeatLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, repeatBytes, repeatStart, repeatLength) < 0) ? 0 : 1;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ final byte[] valueBytes = vector[i];
+ final int valueStart = start[i];
+ final int valueLength = length[i];
+ outputVector[i] =
+ <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final byte[] valueBytes = vector[i];
+ final int valueStart = start[i];
+ final int valueLength = length[i];
+ outputVector[i] =
+ <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ final byte[] valueBytes = vector[i];
+ final int valueStart = start[i];
+ final int valueLength = length[i];
+ outputVector[i] =
+ <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1;
+ }
+ }
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ final byte[] valueBytes = vector[i];
+ final int valueStart = start[i];
+ final int valueLength = length[i];
+ outputVector[i] =
+ <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1;
+ }
+ }
+ } else {
+ System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inputIsNull[i]) {
+ final byte[] valueBytes = vector[i];
+ final int valueStart = start[i];
+ final int valueLength = length[i];
+ outputVector[i] =
+ <OptionalNot>(StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 ||
+ StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return
+ getColumnParamString(0, inputCol) +
+ ", left " + displayUtf8Bytes(left) + ", right " + displayUtf8Bytes(right);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt
new file mode 100644
index 0000000..db42577
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Output a boolean value indicating if a column is BETWEEN two constants.
+ */
+public class <ClassName> extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private final int inputCol;
+
+ private final Timestamp leftValue;
+ private final Timestamp rightValue;
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ inputCol = -1;
+ leftValue = null;
+ rightValue = null;
+ }
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set.
+ */
+ public <ClassName>(int colNum, Timestamp leftValue, Timestamp rightValue, int outputColumnNum) {
+ super(outputColumnNum);
+ this.inputCol = colNum;
+ this.leftValue = leftValue;
+ this.rightValue = rightValue;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ int n = batch.size;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ outputVector[0] =
+ <OptionalNot>(inputColVector.compareTo(0, leftValue) < 0 ||
+ inputColVector.compareTo(0, rightValue) > 0) ? 0 : 1;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] =
+ <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 ||
+ inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] =
+ <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 ||
+ inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputVector[i] =
+ <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 ||
+ inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ outputVector[i] =
+ <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 ||
+ inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ } else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inputIsNull[i]) {
+ outputVector[i] =
+ <OptionalNot>(inputColVector.compareTo(i, leftValue) < 0 ||
+ inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return
+ getColumnParamString(0, inputCol) +
+ ", left " + leftValue.toString() + ", right " + rightValue.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt
new file mode 100644
index 0000000..4ab8440
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+public class <ClassName> extends <BaseClassName> {
+ private static final long serialVersionUID = 1L;
+
+ public <ClassName>() {
+ super();
+ }
+
+ public <ClassName>(int colNum, byte[] left, byte[] right, int outputColumnNum) {
+ super(colNum, left, right, outputColumnNum);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
index f5f4d72..e1482e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
@@ -183,12 +183,19 @@ public class VectorExtractRow {
* @param logicalColumnIndex
* @return
*/
- private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) {
+ private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex,
+ int logicalColumnIndex) {
final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
final ColumnVector colVector = batch.cols[projectionColumnNum];
- return extractRowColumn(
- colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex);
+ final TypeInfo typeInfo = typeInfos[logicalColumnIndex];
+ // try {
+ return extractRowColumn(
+ colVector, typeInfo, objectInspectors[logicalColumnIndex], batchIndex);
+ // } catch (Exception e){
+ // throw new RuntimeException("Error evaluating column number " + projectionColumnNum +
+ // ", typeInfo " + typeInfo.toString() + ", batchIndex " + batchIndex);
+ // }
}
public Object extractRowColumn(
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index e541217..97e4059 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1381,32 +1381,7 @@ public class VectorizationContext {
}
}
- switch (vectorArgType) {
- case INT_FAMILY:
- return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue(), typeInfo);
- case DATE:
- return new ConstantVectorExpression(outCol, DateWritableV2.dateToDays((Date) constantValue), typeInfo);
- case TIMESTAMP:
- return new ConstantVectorExpression(outCol,
- ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), typeInfo);
- case INTERVAL_YEAR_MONTH:
- return new ConstantVectorExpression(outCol,
- ((HiveIntervalYearMonth) constantValue).getTotalMonths(), typeInfo);
- case INTERVAL_DAY_TIME:
- return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue, typeInfo);
- case FLOAT_FAMILY:
- return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue(), typeInfo);
- case DECIMAL:
- return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeInfo);
- case STRING:
- return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes(), typeInfo);
- case CHAR:
- return new ConstantVectorExpression(outCol, ((HiveChar) constantValue), typeInfo);
- case VARCHAR:
- return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue), typeInfo);
- default:
- throw new HiveException("Unsupported constant type: " + typeName + ", object class " + constantValue.getClass().getSimpleName());
- }
+ return ConstantVectorExpression.create(outCol, constantValue, typeInfo);
}
private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr,
@@ -1431,33 +1406,30 @@ public class VectorizationContext {
*/
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
throws HiveException {
+
+ if (childExprList.size() != 1) {
+ return null;
+ }
ExprNodeDesc childExpr = childExprList.get(0);
+ if (!(childExpr instanceof ExprNodeColumnDesc)) {
+
+ // Some vector operators like VectorSelectOperator optimize out IdentityExpression out of
+ // their vector expression list and don't evaluate the children, so just return the
+ // child expression here instead of IdentityExpression.
+ return getVectorExpression(childExpr);
+ }
+
int identityCol;
TypeInfo identityTypeInfo;
DataTypePhysicalVariation identityDataTypePhysicalVariation;
- VectorExpression v1 = null;
- if (childExpr instanceof ExprNodeGenericFuncDesc) {
- v1 = getVectorExpression(childExpr);
- identityCol = v1.getOutputColumnNum();
- identityTypeInfo = v1.getOutputTypeInfo();
- identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation();
- } else if (childExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- identityCol = getInputColumnIndex(colDesc.getColumn());
- identityTypeInfo = colDesc.getTypeInfo();
-
- // CONSIDER: Validation of type information
-
- identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol);
- } else {
- throw new HiveException("Expression not supported: "+childExpr);
- }
- VectorExpression ve = new IdentityExpression(identityCol);
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
+ identityCol = getInputColumnIndex(colDesc.getColumn());
+ identityTypeInfo = colDesc.getTypeInfo();
- if (v1 != null) {
- ve.setChildExpressions(new VectorExpression [] {v1});
- }
+ identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol);
+
+ VectorExpression ve = new IdentityExpression(identityCol);
ve.setInputTypeInfos(identityTypeInfo);
ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
@@ -1468,7 +1440,6 @@ public class VectorizationContext {
return ve;
}
-
private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException {
if (exprNodeDesc instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc);
@@ -1626,6 +1597,20 @@ public class VectorizationContext {
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
+ return createDecimal64VectorExpression(
+ vectorClass, childExprs, childrenMode,
+ isDecimal64ScaleEstablished, decimal64ColumnScale,
+ returnTypeInfo, returnDataTypePhysicalVariation);
+ }
+
+ private VectorExpression createDecimal64VectorExpression(Class<?> vectorClass,
+ List<ExprNodeDesc> childExprs, VectorExpressionDescriptor.Mode childrenMode,
+ boolean isDecimal64ScaleEstablished, int decimal64ColumnScale,
+ TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation)
+ throws HiveException {
+
+ final int numChildren = childExprs.size();
+
/*
* Custom build arguments.
*/
@@ -1659,8 +1644,7 @@ public class VectorizationContext {
children.add(filterExpr);
}
arguments[i] = colIndex;
- } else {
- Preconditions.checkState(childExpr instanceof ExprNodeConstantDesc);
+ } else if (childExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childExpr;
if (typeInfo instanceof DecimalTypeInfo) {
if (!isDecimal64ScaleEstablished) {
@@ -1681,6 +1665,8 @@ public class VectorizationContext {
(scalarValue == null) ?
getConstantVectorExpression(null, typeInfo, childrenMode) : scalarValue;
}
+ } else {
+ return null;
}
}
@@ -2040,8 +2026,28 @@ public class VectorizationContext {
return ve;
}
+ // Handle strange case of TO_DATE(date) or CAST(date to DATE)
+ private VectorExpression getIdentityForDateToDate(List<ExprNodeDesc> childExprs,
+ TypeInfo returnTypeInfo)
+ throws HiveException {
+ if (childExprs.size() != 1) {
+ return null;
+ }
+ TypeInfo childTypeInfo = childExprs.get(0).getTypeInfo();
+ if (childTypeInfo.getCategory() != Category.PRIMITIVE ||
+ ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) {
+ return null;
+ }
+ if (returnTypeInfo.getCategory() != Category.PRIMITIVE ||
+ ((PrimitiveTypeInfo) returnTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) {
+ return null;
+ }
+ return getIdentityExpression(childExprs);
+ }
+
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
- List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
+ List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType)
+ throws HiveException {
List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
childExpr = castedChildren;
@@ -2049,8 +2055,8 @@ public class VectorizationContext {
//First handle special cases. If one of the special case methods cannot handle it,
// it returns null.
VectorExpression ve = null;
- if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) {
- ve = getBetweenFilterExpression(childExpr, mode, returnType);
+ if (udf instanceof GenericUDFBetween) {
+ ve = getBetweenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIn) {
ve = getInExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIf) {
@@ -2083,6 +2089,8 @@ public class VectorizationContext {
ve = getCastToBinary(childExpr, returnType);
} else if (udf instanceof GenericUDFTimestamp) {
ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType);
+ } else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) {
+ ve = getIdentityForDateToDate(childExpr, returnType);
}
if (ve != null) {
return ve;
@@ -2444,14 +2452,42 @@ public class VectorizationContext {
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
((IDoubleInExpr) expr).setInListValues(inValsD);
} else if (isDecimalFamily(colType)) {
- cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
- HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
- for (int i = 0; i != inValsD.length; i++) {
- inValsD[i] = (HiveDecimal) getVectorTypeScalarValue(
- (ExprNodeConstantDesc) childrenForInList.get(i));
+
+ final boolean tryDecimal64 =
+ checkExprNodeDescForDecimal64(colExpr);
+ if (tryDecimal64) {
+ cl = (mode == VectorExpressionDescriptor.Mode.FILTER ?
+ FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class);
+ final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale();
+ expr = createDecimal64VectorExpression(
+ cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION,
+ /* isDecimal64ScaleEstablished */ true,
+ /* decimal64ColumnScale */ scale,
+ returnType, DataTypePhysicalVariation.NONE);
+ if (expr != null) {
+ long[] inVals = new long[childrenForInList.size()];
+ for (int i = 0; i != inVals.length; i++) {
+ ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i);
+ HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
+ final long decimal64Scalar =
+ new HiveDecimalWritable(hiveDecimal).serialize64(scale);
+ inVals[i] = decimal64Scalar;
+ }
+ ((ILongInExpr) expr).setInListValues(inVals);
+ }
+ }
+ if (expr == null) {
+ cl = (mode == VectorExpressionDescriptor.Mode.FILTER ?
+ FilterDecimalColumnInList.class : DecimalColumnInList.class);
+ expr = createVectorExpression(
+ cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
+ for (int i = 0; i != inValsD.length; i++) {
+ inValsD[i] = (HiveDecimal) getVectorTypeScalarValue(
+ (ExprNodeConstantDesc) childrenForInList.get(i));
+ }
+ ((IDecimalInExpr) expr).setInListValues(inValsD);
}
- expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
- ((IDecimalInExpr) expr).setInListValues(inValsD);
} else if (isDateFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
long[] inVals = new long[childrenForInList.size()];
@@ -2973,21 +3009,32 @@ public class VectorizationContext {
return null;
}
- /* Get a [NOT] BETWEEN filter expression. This is treated as a special case
+ private VectorExpression tryDecimal64Between(VectorExpressionDescriptor.Mode mode, boolean isNot,
+ ExprNodeDesc colExpr, List<ExprNodeDesc> childrenAfterNot, TypeInfo returnTypeInfo)
+ throws HiveException {
+ final Class<?> cl;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = (isNot ? Decimal64ColumnNotBetween.class : Decimal64ColumnBetween.class);
+ } else {
+ cl = (isNot ? FilterDecimal64ColumnNotBetween.class : FilterDecimal64ColumnBetween.class);
+ }
+ return
+ createDecimal64VectorExpression(
+ cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION,
+ /* isDecimal64ScaleEstablished */ true,
+ /* decimal64ColumnScale */ ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(),
+ returnTypeInfo, DataTypePhysicalVariation.NONE);
+ }
+
+ /* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
* because the NOT is actually specified in the expression tree as the first argument,
* and we don't want any runtime cost for that. So creating the VectorExpression
* needs to be done differently than the standard way where all arguments are
* passed to the VectorExpression constructor.
*/
- private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType)
- throws HiveException {
-
- if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
-
- // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer
- // knows to revert to row-at-a-time execution.
- return null;
- }
+ private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr,
+ VectorExpressionDescriptor.Mode mode, TypeInfo returnType)
+ throws HiveException {
boolean hasDynamicValues = false;
@@ -2995,6 +3042,11 @@ public class VectorizationContext {
if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) &&
(childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
hasDynamicValues = true;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+
+ // Projection mode is not applicable.
+ return null;
+ }
} else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) ||
!(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
return null;
@@ -3021,7 +3073,7 @@ public class VectorizationContext {
}
List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>();
-
+ boolean wereCastUdfs = false;
for (ExprNodeDesc desc: childExpr.subList(1, 4)) {
if (commonType.equals(desc.getTypeInfo())) {
castChildren.add(desc);
@@ -3030,6 +3082,7 @@ public class VectorizationContext {
ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf,
Arrays.asList(new ExprNodeDesc[] { desc }));
castChildren.add(engfd);
+ wereCastUdfs = true;
}
}
String colType = commonType.getTypeName();
@@ -3040,55 +3093,141 @@ public class VectorizationContext {
// determine class
Class<?> cl = null;
if (isIntFamily(colType) && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterLongColumnBetweenDynamicValue.class :
- FilterLongColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = LongColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterLongColumnBetweenDynamicValue.class :
+ FilterLongColumnBetween.class);
+ }
} else if (isIntFamily(colType) && notKeywordPresent) {
- cl = FilterLongColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = LongColumnNotBetween.class;
+ } else {
+ cl = FilterLongColumnNotBetween.class;
+ }
} else if (isFloatFamily(colType) && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterDoubleColumnBetweenDynamicValue.class :
- FilterDoubleColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = DoubleColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterDoubleColumnBetweenDynamicValue.class :
+ FilterDoubleColumnBetween.class);
+ }
} else if (isFloatFamily(colType) && notKeywordPresent) {
- cl = FilterDoubleColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = DoubleColumnNotBetween.class;
+ } else {
+ cl = FilterDoubleColumnNotBetween.class;
+ }
} else if (colType.equals("string") && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterStringColumnBetweenDynamicValue.class :
- FilterStringColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = StringColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterStringColumnBetweenDynamicValue.class :
+ FilterStringColumnBetween.class);
+ }
} else if (colType.equals("string") && notKeywordPresent) {
- cl = FilterStringColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = StringColumnNotBetween.class;
+ } else {
+ cl = FilterStringColumnNotBetween.class;
+ }
} else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterVarCharColumnBetweenDynamicValue.class :
- FilterVarCharColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = VarCharColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterVarCharColumnBetweenDynamicValue.class :
+ FilterVarCharColumnBetween.class);
+ }
} else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
- cl = FilterVarCharColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = VarCharColumnNotBetween.class;
+ } else {
+ cl = FilterVarCharColumnNotBetween.class;
+ }
} else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterCharColumnBetweenDynamicValue.class :
- FilterCharColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = CharColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterCharColumnBetweenDynamicValue.class :
+ FilterCharColumnBetween.class);
+ }
} else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
- cl = FilterCharColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = CharColumnNotBetween.class;
+ } else {
+ cl = FilterCharColumnNotBetween.class;
+ }
} else if (colType.equals("timestamp") && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterTimestampColumnBetweenDynamicValue.class :
- FilterTimestampColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = TimestampColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterTimestampColumnBetweenDynamicValue.class :
+ FilterTimestampColumnBetween.class);
+ }
} else if (colType.equals("timestamp") && notKeywordPresent) {
- cl = FilterTimestampColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = TimestampColumnNotBetween.class;
+ } else {
+ cl = FilterTimestampColumnNotBetween.class;
+ }
} else if (isDecimalFamily(colType) && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterDecimalColumnBetweenDynamicValue.class :
- FilterDecimalColumnBetween.class);
+ final boolean tryDecimal64 =
+ checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
+ if (tryDecimal64) {
+ VectorExpression decimal64VecExpr =
+ tryDecimal64Between(
+ mode, /* isNot */ false, colExpr, childrenAfterNot,
+ returnType);
+ if (decimal64VecExpr != null) {
+ return decimal64VecExpr;
+ }
+ }
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = DecimalColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterDecimalColumnBetweenDynamicValue.class :
+ FilterDecimalColumnBetween.class);
+ }
} else if (isDecimalFamily(colType) && notKeywordPresent) {
- cl = FilterDecimalColumnNotBetween.class;
+ final boolean tryDecimal64 =
+ checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
+ if (tryDecimal64) {
+ VectorExpression decimal64VecExpr =
+ tryDecimal64Between(
+ mode, /* isNot */ true, colExpr, childrenAfterNot, returnType);
+ if (decimal64VecExpr != null) {
+ return decimal64VecExpr;
+ }
+ }
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = DecimalColumnNotBetween.class;
+ } else {
+ cl = FilterDecimalColumnNotBetween.class;
+ }
} else if (isDateFamily(colType) && !notKeywordPresent) {
- cl = (hasDynamicValues ?
- FilterDateColumnBetweenDynamicValue.class :
- FilterLongColumnBetween.class);
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = LongColumnBetween.class;
+ } else {
+ cl = (hasDynamicValues ?
+ FilterDateColumnBetweenDynamicValue.class :
+ FilterLongColumnBetween.class);
+ }
} else if (isDateFamily(colType) && notKeywordPresent) {
- cl = FilterLongColumnNotBetween.class;
+ if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
+ cl = LongColumnNotBetween.class;
+ } else {
+ cl = FilterLongColumnNotBetween.class;
+ }
}
- return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ return createVectorExpression(
+ cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
private boolean isCondExpr(ExprNodeDesc exprNodeDesc) {
@@ -3379,11 +3518,12 @@ public class VectorizationContext {
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
// this is a constant (or null)
- if (child.getTypeInfo().getCategory() != Category.PRIMITIVE) {
+ if (child.getTypeInfo().getCategory() != Category.PRIMITIVE &&
+ child.getTypeInfo().getCategory() != Category.STRUCT) {
// Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
throw new HiveException(
- "Unable to vectorize custom UDF. Complex type constants not supported: " + child);
+ "Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
}
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else if (child instanceof ExprNodeDynamicValueDesc) {
@@ -3523,7 +3663,11 @@ public class VectorizationContext {
private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc)
throws HiveException {
Object o = getScalarValue(constDesc);
- if (o instanceof Integer) {
+ if (o instanceof Byte) {
+ return (Byte) o;
+ } if (o instanceof Short) {
+ return (Short) o;
+ } else if (o instanceof Integer) {
return (Integer) o;
} else if (o instanceof Long) {
return (Long) o;
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java
deleted file mode 100644
index f99bd69..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-
-import java.sql.Date;
-
-/**
- * Casts a timestamp and date vector to a date vector.
- */
-public class CastLongToDate extends VectorExpression {
- private static final long serialVersionUID = 1L;
-
- private int inputColumn;
- private transient Date date = new Date(0);
-
- public CastLongToDate() {
- super();
- }
-
- public CastLongToDate(int inputColumn, int outputColumnNum) {
- super(outputColumnNum);
- this.inputColumn = inputColumn;
- }
-
- @Override
- public void evaluate(VectorizedRowBatch batch) throws HiveException {
-
- if (childExpressions != null) {
- super.evaluateChildren(batch);
- }
-
- LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn];
- int[] sel = batch.selected;
- int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
-
- if (n == 0) {
-
- // Nothing to do
- return;
- }
-
- PrimitiveCategory primitiveCategory =
- ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
- switch (primitiveCategory) {
- case DATE:
- inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV);
- break;
- default:
- throw new Error("Unsupported input type " + primitiveCategory.name());
- }
- }
-
- @Override
- public String vectorExpressionParameters() {
- return getColumnParamString(0, inputColumn);
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
- b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
- .setNumArguments(1)
- .setArgumentTypes(
- VectorExpressionDescriptor.ArgumentType.DATE)
- .setInputExpressionTypes(
- VectorExpressionDescriptor.InputExpressionType.COLUMN);
- return b.build();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
index 8ae8a54..0a16e08 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
@@ -20,14 +20,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -45,6 +54,7 @@ public class ConstantVectorExpression extends VectorExpression {
private HiveDecimal decimalValue = null;
private Timestamp timestampValue = null;
private HiveIntervalDayTime intervalDayTimeValue = null;
+ private ConstantVectorExpression[] structValue;
private boolean isNullValue = false;
private final ColumnVector.Type type;
@@ -122,15 +132,135 @@ public class ConstantVectorExpression extends VectorExpression {
}
/*
+ public static VectorExpression createList(int outputColumnNum, Object value, TypeInfo outputTypeInfo)
+ throws HiveException {
+ ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo);
+ result.setListValue(value);
+ return result;
+ }
+
+ public static VectorExpression createMap(int outputColumnNum, Object value, TypeInfo outputTypeInfo)
+ throws HiveException {
+ ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo);
+ result.setMapValue(value);
+ return result;
+ }
+ */
+
+ public static ConstantVectorExpression createStruct(int outputColumnNum, Object value,
+ TypeInfo outputTypeInfo)
+ throws HiveException {
+ ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo);
+ result.setStructValue(value);
+ return result;
+ }
+
+ /*
+ public static VectorExpression createUnion(int outputColumnNum, Object value, TypeInfo outputTypeInfo)
+ throws HiveException {
+ ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo);
+ result.setUnionValue(value);
+ return result;
+ }
+ */
+
+ public static ConstantVectorExpression create(int outputColumnNum, Object constantValue, TypeInfo outputTypeInfo)
+ throws HiveException {
+
+ if (constantValue == null) {
+ return new ConstantVectorExpression(outputColumnNum, outputTypeInfo, true);
+ }
+
+ Category category = outputTypeInfo.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ {
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) outputTypeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ if (((Boolean) constantValue).booleanValue()) {
+ return new ConstantVectorExpression(outputColumnNum, 1, outputTypeInfo);
+ } else {
+ return new ConstantVectorExpression(outputColumnNum, 0, outputTypeInfo);
+ }
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((Number) constantValue).longValue(), outputTypeInfo);
+ case FLOAT:
+ case DOUBLE:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((Number) constantValue).doubleValue(), outputTypeInfo);
+ case DATE:
+ return new ConstantVectorExpression(
+ outputColumnNum, DateWritableV2.dateToDays((Date) constantValue), outputTypeInfo);
+ case TIMESTAMP:
+ return new ConstantVectorExpression(
+ outputColumnNum,
+ ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(),
+ outputTypeInfo);
+ case DECIMAL:
+ return new ConstantVectorExpression(
+ outputColumnNum, (HiveDecimal) constantValue, outputTypeInfo);
+ case STRING:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((String) constantValue).getBytes(), outputTypeInfo);
+ case VARCHAR:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((HiveVarchar) constantValue), outputTypeInfo);
+ case CHAR:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((HiveChar) constantValue), outputTypeInfo);
+ case BINARY:
+ return new ConstantVectorExpression(
+ outputColumnNum, ((byte[]) constantValue), outputTypeInfo);
+ case INTERVAL_YEAR_MONTH:
+ return new ConstantVectorExpression(
+ outputColumnNum,
+ ((HiveIntervalYearMonth) constantValue).getTotalMonths(),
+ outputTypeInfo);
+ case INTERVAL_DAY_TIME:
+ return new ConstantVectorExpression(
+ outputColumnNum,
+ (HiveIntervalDayTime) constantValue,
+ outputTypeInfo);
+ case VOID:
+ case TIMESTAMPLOCALTZ:
+ case UNKNOWN:
+ default:
+ throw new RuntimeException("Unexpected primitive category " + primitiveCategory);
+ }
+ }
+ // case LIST:
+ // return ConstantVectorExpression.createList(
+ // outputColumnNum, constantValue, outputTypeInfo);
+ // case MAP:
+ // return ConstantVectorExpression.createMap(
+ // outputColumnNum, constantValue, outputTypeInfo);
+ case STRUCT:
+ return ConstantVectorExpression.createStruct(
+ outputColumnNum, constantValue, outputTypeInfo);
+ // case UNION:
+ // return ConstantVectorExpression.createUnion(
+ // outputColumnNum, constantValue, outputTypeInfo);
+ default:
+ throw new RuntimeException("Unexpected category " + category);
+ }
+ }
+
+ /*
* In the following evaluate* methods, since we are supporting scratch column reuse, we must
* assume the column may have noNulls of false and some isNull entries true.
*
* So, do a proper assignments.
*/
- private void evaluateLong(VectorizedRowBatch vrg) {
+ private void evaluateLong(ColumnVector colVector) {
- LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum];
+ LongColumnVector cv = (LongColumnVector) colVector;
cv.isRepeating = true;
if (!isNullValue) {
cv.isNull[0] = false;
@@ -141,8 +271,8 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateDouble(VectorizedRowBatch vrg) {
- DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateDouble(ColumnVector colVector) {
+ DoubleColumnVector cv = (DoubleColumnVector) colVector;
cv.isRepeating = true;
if (!isNullValue) {
cv.isNull[0] = false;
@@ -153,8 +283,8 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateBytes(VectorizedRowBatch vrg) {
- BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateBytes(ColumnVector colVector) {
+ BytesColumnVector cv = (BytesColumnVector) colVector;
cv.isRepeating = true;
cv.initBuffer();
if (!isNullValue) {
@@ -166,8 +296,8 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateDecimal(VectorizedRowBatch vrg) {
- DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateDecimal(ColumnVector colVector) {
+ DecimalColumnVector dcv = (DecimalColumnVector) colVector;
dcv.isRepeating = true;
if (!isNullValue) {
dcv.isNull[0] = false;
@@ -178,8 +308,8 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateTimestamp(VectorizedRowBatch vrg) {
- TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateTimestamp(ColumnVector colVector) {
+ TimestampColumnVector tcv = (TimestampColumnVector) colVector;
tcv.isRepeating = true;
if (!isNullValue) {
tcv.isNull[0] = false;
@@ -190,8 +320,8 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateIntervalDayTime(VectorizedRowBatch vrg) {
- IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateIntervalDayTime(ColumnVector colVector) {
+ IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) colVector;
dcv.isRepeating = true;
if (!isNullValue) {
dcv.isNull[0] = false;
@@ -202,8 +332,23 @@ public class ConstantVectorExpression extends VectorExpression {
}
}
- private void evaluateVoid(VectorizedRowBatch vrg) {
- VoidColumnVector voidColVector = (VoidColumnVector) vrg.cols[outputColumnNum];
+ private void evaluateStruct(ColumnVector colVector) {
+ StructColumnVector scv = (StructColumnVector) colVector;
+ scv.isRepeating = true;
+ if (!isNullValue) {
+ scv.isNull[0] = false;
+ final int size = structValue.length;
+ for (int i = 0; i < size; i++) {
+ structValue[i].evaluateColumn(scv.fields[i]);
+ }
+ } else {
+ scv.isNull[0] = true;
+ scv.noNulls = false;
+ }
+ }
+
+ private void evaluateVoid(ColumnVector colVector) {
+ VoidColumnVector voidColVector = (VoidColumnVector) colVector;
voidColVector.isRepeating = true;
voidColVector.isNull[0] = true;
voidColVector.noNulls = false;
@@ -211,27 +356,34 @@ public class ConstantVectorExpression extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch vrg) {
+ evaluateColumn(vrg.cols[outputColumnNum]);
+ }
+
+ private void evaluateColumn(ColumnVector colVector) {
switch (type) {
case LONG:
- evaluateLong(vrg);
+ evaluateLong(colVector);
break;
case DOUBLE:
- evaluateDouble(vrg);
+ evaluateDouble(colVector);
break;
case BYTES:
- evaluateBytes(vrg);
+ evaluateBytes(colVector);
break;
case DECIMAL:
- evaluateDecimal(vrg);
+ evaluateDecimal(colVector);
break;
case TIMESTAMP:
- evaluateTimestamp(vrg);
+ evaluateTimestamp(colVector);
break;
case INTERVAL_DAY_TIME:
- evaluateIntervalDayTime(vrg);
+ evaluateIntervalDayTime(colVector);
+ break;
+ case STRUCT:
+ evaluateStruct(colVector);
break;
case VOID:
- evaluateVoid(vrg);
+ evaluateVoid(colVector);
break;
default:
throw new RuntimeException("Unexpected column vector type " + type);
@@ -287,6 +439,17 @@ public class ConstantVectorExpression extends VectorExpression {
return intervalDayTimeValue;
}
+ public void setStructValue(Object structValue) throws HiveException {
+ StructTypeInfo structTypeInfo = (StructTypeInfo) outputTypeInfo;
+ ArrayList<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
+ final int size = fieldTypeInfoList.size();
+ this.structValue = new ConstantVectorExpression[size];
+ List<Object> fieldValueList = (List<Object>) structValue;
+ for (int i = 0; i < size; i++) {
+ this.structValue[i] = create(i, fieldValueList.get(i), fieldTypeInfoList.get(i));
+ }
+ }
+
@Override
public String vectorExpressionParameters() {
String value;
@@ -313,6 +476,24 @@ public class ConstantVectorExpression extends VectorExpression {
case INTERVAL_DAY_TIME:
value = intervalDayTimeValue.toString();
break;
+ case STRUCT:
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append("STRUCT {");
+ boolean isFirst = true;
+ final int size = structValue.length;
+ for (int i = 0; i < size; i++) {
+ if (isFirst) {
+ isFirst = false;
+ } else {
+ sb.append(", ");
+ }
+ sb.append(structValue[i].toString());
+ }
+ sb.append("}");
+ value = sb.toString();
+ }
+ break;
default:
throw new RuntimeException("Unknown vector column type " + type);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java
new file mode 100644
index 0000000..5632cfb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class Decimal64ColumnInList extends LongColumnInList {
+
+ private static final long serialVersionUID = 1L;
+
+ public Decimal64ColumnInList(int colNum, int outputColumnNum) {
+ super(colNum, outputColumnNum);
+ }
+
+ public Decimal64ColumnInList() {
+ super();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0];
+ final int scale = decimalTypeInfo.scale();
+ HiveDecimalWritable writable = new HiveDecimalWritable();
+ StringBuilder sb = new StringBuilder();
+ sb.append(getColumnParamString(0, colNum));
+ sb.append(", values [");
+ for (long value : inListValues) {
+ writable.deserialize64(value, scale);
+ sb.append(", decimal64Val ");
+ sb.append(value);
+ sb.append(", decimalVal ");
+ sb.append(writable.toString());
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // return null since this will be handled as a special case in VectorizationContext
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/edc53cc0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java
new file mode 100644
index 0000000..c26a93a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+
+public class FilterDecimal64ColumnBetween extends FilterLongColumnBetween {
+
+ private static final long serialVersionUID = 1L;
+
+ public FilterDecimal64ColumnBetween(int colNum, long leftValue, long rightValue) {
+ super(colNum, leftValue, rightValue);
+ }
+
+ public FilterDecimal64ColumnBetween() {
+ super();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1];
+ HiveDecimalWritable writable1 = new HiveDecimalWritable();
+ writable1.deserialize64(leftValue, decimalTypeInfo1.scale());
+
+ DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2];
+ HiveDecimalWritable writable2 = new HiveDecimalWritable();
+ writable2.deserialize64(rightValue, decimalTypeInfo2.scale());
+ return
+ getColumnParamString(0, colNum) +
+ ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() +
+ ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString();
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.FILTER)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.DECIMAL_64,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL_64,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL_64)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+}