You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/05/15 22:40:51 UTC
[31/50] [abbrv] hive git commit: HIVE-19384: Vectorization:
IfExprTimestamp* do not handle NULLs correctly (Matt McCline,
reviewed by Teddy Choi)
HIVE-19384: Vectorization: IfExprTimestamp* do not handle NULLs correctly (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2d4f347
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2d4f347
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2d4f347
Branch: refs/heads/branch-3.0.0
Commit: e2d4f3476f67331e3690903ad12a30aeb47126d0
Parents: e941bea
Author: Matt McCline <mm...@hortonworks.com>
Authored: Mon May 14 01:57:12 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Mon May 14 01:58:43 2018 -0500
----------------------------------------------------------------------
.../IfExprObjectColumnColumn.txt | 217 +++++++++
.../IfExprObjectColumnScalar.txt | 194 ++++++++
.../IfExprObjectScalarColumn.txt | 196 ++++++++
.../IfExprObjectScalarScalar.txt | 166 +++++++
.../ql/exec/vector/VectorizationContext.java | 5 -
.../expressions/IfExprLongColumnLongColumn.java | 7 +-
.../hive/ql/udf/generic/GenericUDFIf.java | 16 +-
.../exec/vector/TestVectorizationContext.java | 8 +-
.../ql/exec/vector/VectorRandomBatchSource.java | 311 +++++++++++++
.../ql/exec/vector/VectorRandomRowSource.java | 312 +++++++++----
.../expressions/TestVectorIfStatement.java | 444 +++++++++++++++++++
.../clientpositive/vectorized_timestamp_funcs.q | 48 +-
.../llap/vectorized_timestamp_funcs.q.out | 384 ++++++++++++----
.../spark/vectorized_timestamp_funcs.q.out | 384 ++++++++++++----
.../vectorized_timestamp_funcs.q.out | 382 ++++++++++++----
.../hive/ql/exec/vector/VectorizedRowBatch.java | 91 ++--
.../ql/exec/vector/TestStructColumnVector.java | 4 +-
.../apache/hadoop/hive/tools/GenVectorCode.java | 55 +++
18 files changed, 2813 insertions(+), 411 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt
new file mode 100644
index 0000000..e8ef279
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.<ColumnVectorType>;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second and third are long columns or long expression results.
+ */
+public class <ClassName> extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public <ClassName>(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ }
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ <ColumnVectorType> arg2ColVector = (<ColumnVectorType>) batch.cols[arg2Column];
+ boolean[] arg2IsNull = arg2ColVector.isNull;
+ <ColumnVectorType> arg3ColVector = (<ColumnVectorType>) batch.cols[arg3Column];
+ boolean[] arg3IsNull = arg3ColVector.isNull;
+ <ColumnVectorType> outputColVector = (<ColumnVectorType>) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ /* All the code paths below propagate nulls even if neither arg2 nor arg3
+ * have nulls. This is to reduce the number of code paths and shorten the
+ * code, at the expense of maybe doing unnecessary work if neither input
+ * has nulls. This could be improved in the future by expanding the number
+ * of code paths.
+ */
+ if (arg1ColVector.isRepeating) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // extend any repeating values and noNulls indicator in the inputs
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ getColumnParamString(2, arg3Column);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("int_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt
new file mode 100644
index 0000000..56ae2ca
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.<ColumnVectorType>;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import <ScalarImport>;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second is a column or non-constant expression result.
+ * The third is a constant value.
+ */
+public class <ClassName> extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private final int arg1Column;
+ private final int arg2Column;
+ private final <ScalarType> arg3Scalar;
+
+ public <ClassName>(int arg1Column, int arg2Column, <ScalarType> arg3Scalar,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Scalar = arg3Scalar;
+ }
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Scalar = null;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ <ColumnVectorType> arg2ColVector = (<ColumnVectorType>) batch.cols[arg2Column];
+ boolean[] arg2IsNull = arg2ColVector.isNull;
+ <ColumnVectorType> outputColVector = (<ColumnVectorType>) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (arg1ColVector.isRepeating) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ } else {
+ outputColVector.fill(arg3Scalar);
+ }
+ return;
+ }
+
+ // Extend any repeating values and noNulls indicator in the inputs to
+ // reduce the number of code paths needed below.
+ arg2ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+
+ // FUTURE: We could check arg2ColVector.noNulls and optimize these loops.
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3Scalar);
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3Scalar);
+ }
+ }
+ }
+ } else /* there are nulls */ {
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3Scalar);
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ if (!arg2IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3Scalar);
+ }
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg2ColVector.unFlatten();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ ", val "+ arg3Scalar;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("int_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt
new file mode 100644
index 0000000..271b589
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.<ColumnVectorType>;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import <ScalarImport>;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input column expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second is a column or non-constant expression result.
+ * The third is a constant value.
+ */
+public class <ClassName> extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private final int arg1Column;
+ private <ScalarType> arg2Scalar;
+ private final int arg3Column;
+
+ public <ClassName>(int arg1Column, <ScalarType> arg2Scalar, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Scalar = arg2Scalar;
+ this.arg3Column = arg3Column;
+ }
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Scalar = null;
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ <ColumnVectorType> arg3ColVector = (<ColumnVectorType>) batch.cols[arg3Column];
+ boolean[] arg3IsNull = arg3ColVector.isNull;
+ <ColumnVectorType> outputColVector = (<ColumnVectorType>) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (arg1ColVector.isRepeating) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
+ outputColVector.fill(arg2Scalar);
+ } else {
+ arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
+ }
+ return;
+ }
+
+ // Extend any repeating values and noNulls indicator in the inputs to
+ // reduce the number of code paths needed below.
+ // This could be optimized in the future by having separate paths
+ // for when arg3ColVector is repeating or has no nulls.
+ arg3ColVector.flatten(batch.selectedInUse, sel, n);
+
+ if (arg1ColVector.noNulls) {
+
+ // FUTURE: We could check arg3ColVector.noNulls and optimize these loops.
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2Scalar);
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (vector1[i] == 1) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2Scalar);
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ }
+ } else /* there are nulls */ {
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2Scalar);
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg2Scalar);
+ } else {
+ if (!arg3IsNull[i]) {
+ outputIsNull[i] = false;
+ outputColVector.set(i, arg3ColVector.asScratch<ObjectName>(i));
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ }
+ }
+
+ // restore repeating and no nulls indicators
+ arg3ColVector.unFlatten();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", " +
+ getColumnParamString(2, arg3Column);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("int_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt
new file mode 100644
index 0000000..10f97df
--- /dev/null
+++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.<ColumnVectorType>;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import <ScalarImport>;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Compute IF(expr1, expr2, expr3) for 3 input expressions.
+ * The first is always a boolean (LongColumnVector).
+ * The second is a constant value.
+ * The third is a constant value.
+ */
+public class <ClassName> extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private final int arg1Column;
+ private final <ScalarType> arg2Scalar;
+ private final <ScalarType> arg3Scalar;
+
+ public <ClassName>(int arg1Column, <ScalarType> arg2Scalar, <ScalarType> arg3Scalar,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Scalar = arg2Scalar;
+ this.arg3Scalar = arg3Scalar;
+ }
+
+ public <ClassName>() {
+ super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Scalar = null;
+ arg3Scalar = null;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ <ColumnVectorType> outputColVector = (<ColumnVectorType>) batch.cols[outputColumnNum];
+ int[] sel = batch.selected;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ int n = batch.size;
+ long[] vector1 = arg1ColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (arg1ColVector.isRepeating) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
+ outputColVector.fill(arg2Scalar);
+ } else {
+ outputColVector.fill(arg3Scalar);
+ }
+ return;
+ }
+
+ /*
+ * Since we always set a value, make sure all isNull entries are set to false.
+ */
+
+ if (arg1ColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
+ }
+ } else /* there are nulls */ {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputIsNull[i] = false;
+ outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2Scalar : arg3Scalar);
+ }
+ } else {
+ Arrays.fill(outputIsNull, 0, n, false);
+ for(int i = 0; i != n; i++) {
+ outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ arg2Scalar : arg3Scalar);
+ }
+ }
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", val "+ arg3Scalar;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(3)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("int_family"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"),
+ VectorExpressionDescriptor.ArgumentType.getType("<TypeName>"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 2cad04b..491a6b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -390,9 +390,6 @@ public class VectorizationContext {
if (initialDataTypePhysicalVariations == null) {
return null;
}
- if (columnNum < 0) {
- fake++;
- }
if (columnNum < initialDataTypePhysicalVariations.size()) {
return initialDataTypePhysicalVariations.get(columnNum);
}
@@ -1682,8 +1679,6 @@ public class VectorizationContext {
return vectorExpression;
}
- static int fake = 0;
-
private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf,
Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode,
TypeInfo returnType) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
index 75de7a0..2a10e29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
@@ -60,6 +60,7 @@ public class IfExprLongColumnLongColumn extends VectorExpression {
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
+ boolean[] arg1IsNull = arg1ColVector.isNull;
LongColumnVector arg2ColVector = (LongColumnVector) batch.cols[arg2Column];
LongColumnVector arg3ColVector = (LongColumnVector) batch.cols[arg3Column];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
@@ -87,7 +88,7 @@ public class IfExprLongColumnLongColumn extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1IsNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -121,14 +122,14 @@ public class IfExprLongColumnLongColumn extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ?
vector2[i] : vector3[i]);
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
+ outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ?
vector2[i] : vector3[i]);
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ?
arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
index 90ff765..142dd1b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java
@@ -41,10 +41,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarD
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar;
@@ -52,10 +52,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumn
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar;
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 551bb9e..791ac82 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -47,10 +47,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumn
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull;
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java
new file mode 100644
index 0000000..8de247c
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+
+/**
+ * Generate random batch source from a random Object[] row source (VectorRandomRowSource).
+ */
+public class VectorRandomBatchSource {
+
+ // Divide up rows array into different sized batches.
+ // Modify the rows array for isRepeating / NULL patterns.
+ // Provide iterator that will fill up a VRB with the divided up rows.
+
+ private final VectorRandomRowSource vectorRandomRowSource;
+
+ private final Object[][] randomRows;
+
+ private final int rowCount;
+ private final int columnCount;
+
+ private final VectorBatchPatterns vectorBatchPatterns;
+
+ private VectorAssignRow vectorAssignRow;
+
+ private int nextRowIndex;
+ private int batchCount;
+
+ private VectorRandomBatchSource(
+ VectorRandomRowSource vectorRandomRowSource,
+ Object[][] randomRows,
+ VectorBatchPatterns vectorBatchPatterns,
+ VectorAssignRow vectorAssignRow) {
+ this.vectorRandomRowSource = vectorRandomRowSource;
+ this.randomRows = randomRows;
+ rowCount = randomRows.length;
+ Object[] firstRow = randomRows[0];
+ columnCount = firstRow.length;
+ this.vectorBatchPatterns = vectorBatchPatterns;
+ this.vectorAssignRow = vectorAssignRow;
+ }
+
+ public static class VectorRandomBatchParameters {
+ }
+
+ private static class VectorBatchPatterns {
+
+ private final List<VectorBatchPattern> vectorBatchPatternList;
+
+ VectorBatchPatterns(List<VectorBatchPattern> vectorBatchPatternList) {
+ this.vectorBatchPatternList = vectorBatchPatternList;
+ }
+
+ List<VectorBatchPattern> getTectorBatchPatternList() {
+ return vectorBatchPatternList;
+ }
+ }
+
+ private static class VectorBatchPattern {
+
+ final int batchSize;
+ final BitSet bitSet;
+
+ private VectorBatchPattern(int batchSize, BitSet bitSet) {
+ this.batchSize = batchSize;
+ this.bitSet = bitSet;
+ }
+
+ public static VectorBatchPattern createRegularBatch(int batchSize) {
+ return new VectorBatchPattern(batchSize, null);
+ }
+
+ public static VectorBatchPattern createRepeatedBatch(int batchSize, BitSet bitSet) {
+ return new VectorBatchPattern(batchSize, bitSet);
+ }
+
+ public int getBatchSize() {
+ return batchSize;
+ }
+
+ public BitSet getBitSet() {
+ return bitSet;
+ }
+
+ public String toString() {
+ String batchSizeString = "batchSize " + Integer.toString(batchSize);
+ if (bitSet == null) {
+ return batchSizeString;
+ }
+ long bitMask = bitSet.toLongArray()[0];
+ return batchSizeString + " repeating 0x" + Long.toHexString(bitMask);
+ }
+ }
+
+ private static VectorBatchPatterns chooseBatchPatterns(
+ Random random,
+ VectorRandomRowSource vectorRandomRowSource,
+ Object[][] randomRows) {
+
+ List<VectorBatchPattern> vectorBatchPatternList = new ArrayList<VectorBatchPattern>();
+ final int rowCount = randomRows.length;
+ int rowIndex = 0;
+
+ if (rowCount > 0) {
+
+ final int columnCount = randomRows[0].length;
+
+ // Choose first up to a full batch.
+ final int regularBatchSize = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE);
+ vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(regularBatchSize));
+ rowIndex += regularBatchSize;
+
+ // Have a non-NULL value on hand.
+ Object[] nonNullRow = new Object[columnCount];
+ for (int c = 0; c < columnCount; c++) {
+ for (int r = 0; r < rowCount; r++) {
+ Object object = randomRows[r][c];
+ if (object != null) {
+ nonNullRow[c] = object;
+ break;
+ }
+ }
+ }
+
+ int columnPermutationLimit = Math.min(columnCount, Long.SIZE);
+
+ // Repeated NULL permutations.
+ long columnPermutation = 1;
+ while (true) {
+ if (columnPermutation > columnPermutationLimit) {
+ break;
+ }
+ final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE);
+ if (maximumRowCount == 0) {
+ break;
+ }
+ int randomRowCount = 1 + random.nextInt(maximumRowCount);
+ final int rowLimit = rowIndex + randomRowCount;
+
+ BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation});
+
+ for (int columnNum = bitSet.nextSetBit(0);
+ columnNum >= 0;
+ columnNum = bitSet.nextSetBit(columnNum + 1)) {
+
+ // Repeated NULL fill down column.
+ for (int r = rowIndex; r < rowLimit; r++) {
+ randomRows[r][columnNum] = null;
+ }
+ }
+ vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet));
+ columnPermutation++;
+ rowIndex = rowLimit;
+ }
+
+ // Repeated non-NULL permutations.
+ columnPermutation = 1;
+ while (true) {
+ if (columnPermutation > columnPermutationLimit) {
+ break;
+ }
+ final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE);
+ if (maximumRowCount == 0) {
+ break;
+ }
+ int randomRowCount = 1 + random.nextInt(maximumRowCount);
+ final int rowLimit = rowIndex + randomRowCount;
+
+ BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation});
+
+ for (int columnNum = bitSet.nextSetBit(0);
+ columnNum >= 0;
+ columnNum = bitSet.nextSetBit(columnNum + 1)) {
+
+ // Repeated non-NULL fill down column.
+ Object repeatedObject = randomRows[rowIndex][columnNum];
+ if (repeatedObject == null) {
+ repeatedObject = nonNullRow[columnNum];
+ }
+ for (int r = rowIndex; r < rowLimit; r++) {
+ randomRows[r][columnNum] = repeatedObject;
+ }
+ }
+ vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet));
+ columnPermutation++;
+ rowIndex = rowLimit;
+ }
+
+ // Remaining batches.
+ while (true) {
+ final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE);
+ if (maximumRowCount == 0) {
+ break;
+ }
+ int randomRowCount = 1 + random.nextInt(maximumRowCount);
+ vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(randomRowCount));
+ rowIndex += randomRowCount;
+ }
+ }
+
+ // System.out.println("*DEBUG* vectorBatchPatternList" + vectorBatchPatternList.toString());
+
+ return new VectorBatchPatterns(vectorBatchPatternList);
+ }
+
+ public static VectorRandomBatchSource createInterestingBatches(
+ Random random,
+ VectorRandomRowSource vectorRandomRowSource,
+ Object[][] randomRows,
+ VectorRandomBatchParameters vectorRandomBatchParameters)
+ throws HiveException {
+
+ VectorAssignRow vectorAssignRow = new VectorAssignRow();
+ vectorAssignRow.init(vectorRandomRowSource.typeNames());
+
+ VectorBatchPatterns vectorBatchPatterns =
+ chooseBatchPatterns(random, vectorRandomRowSource, randomRows);
+
+ return new VectorRandomBatchSource(
+ vectorRandomRowSource, randomRows, vectorBatchPatterns, vectorAssignRow);
+ }
+
+ public VectorRandomRowSource getRowSource() {
+ return vectorRandomRowSource;
+ }
+
+ public Object[][] getRandomRows() {
+ return randomRows;
+ }
+
+ public void resetBatchIteration() {
+ nextRowIndex = 0;
+ batchCount = 0;
+ }
+
+ public int getBatchCount() {
+ return batchCount;
+ }
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+ /*
+ * Patterns of isRepeating columns
+ * For boolean: tri-state: null, 0, 1
+ * For others: null, some-value
+ * noNulls: sometimes false and there are no NULLs.
+ * Random selectedInUse, too.
+ */
+ public boolean fillNextBatch(VectorizedRowBatch batch) {
+ if (nextRowIndex >= rowCount) {
+ return false;
+ }
+
+ VectorBatchPattern vectorBatchPattern =
+ vectorBatchPatterns.getTectorBatchPatternList().get(batchCount);
+ final int batchSize = vectorBatchPattern.getBatchSize();
+
+ for (int c = 0; c < columnCount; c++) {
+ batch.cols[c].reset();
+ }
+
+ BitSet bitSet = vectorBatchPattern.getBitSet();
+ if (bitSet != null) {
+ for (int columnNum = bitSet.nextSetBit(0);
+ columnNum >= 0;
+ columnNum = bitSet.nextSetBit(columnNum + 1)) {
+ batch.cols[columnNum].isRepeating = true;
+ }
+ }
+
+ int rowIndex = nextRowIndex;
+ for (int batchIndex = 0; batchIndex < batchSize; batchIndex++) {
+ for (int c = 0; c < columnCount; c++) {
+ if (batch.cols[c].isRepeating && batchIndex > 0) {
+ continue;
+ }
+ vectorAssignRow.assignRowColumn(batch, batchIndex, c, randomRows[rowIndex][c]);
+ }
+ rowIndex++;
+ }
+ batch.size = batchSize;
+ batchCount++;
+ nextRowIndex += batchSize;
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2d4f347/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index 3f99328..fa5c775 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -24,7 +24,9 @@ import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
+import java.util.Set;
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
@@ -32,6 +34,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.type.RandomTypeUtil;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -71,6 +74,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
@@ -96,6 +100,8 @@ public class VectorRandomRowSource {
private TypeInfo[] typeInfos;
+ private DataTypePhysicalVariation[] dataTypePhysicalVariations;
+
private List<ObjectInspector> objectInspectorList;
// Primitive.
@@ -127,6 +133,10 @@ public class VectorRandomRowSource {
return typeInfos;
}
+ public DataTypePhysicalVariation[] dataTypePhysicalVariations() {
+ return dataTypePhysicalVariations;
+ }
+
public PrimitiveCategory[] primitiveCategories() {
return primitiveCategories;
}
@@ -163,7 +173,22 @@ public class VectorRandomRowSource {
public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, boolean allowNull) {
this.r = r;
this.allowNull = allowNull;
- chooseSchema(supportedTypes, maxComplexDepth);
+ chooseSchema(supportedTypes, null, null, null, maxComplexDepth);
+ }
+
+ public void init(Random r, Set<String> allowedTypeNameSet, int maxComplexDepth, boolean allowNull) {
+ this.r = r;
+ this.allowNull = allowNull;
+ chooseSchema(SupportedTypes.ALL, allowedTypeNameSet, null, null, maxComplexDepth);
+ }
+
+ public void initExplicitSchema(Random r, List<String> explicitTypeNameList, int maxComplexDepth,
+ boolean allowNull, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList) {
+ this.r = r;
+ this.allowNull = allowNull;
+ chooseSchema(
+ SupportedTypes.ALL, null, explicitTypeNameList, explicitDataTypePhysicalVariationList,
+ maxComplexDepth);
}
/*
@@ -180,7 +205,7 @@ public class VectorRandomRowSource {
"float",
"double",
"string",
-// "char",
+ "char",
"varchar",
"binary",
"date",
@@ -197,27 +222,30 @@ public class VectorRandomRowSource {
"map"
};
- private String getRandomTypeName(SupportedTypes supportedTypes) {
+ private String getRandomTypeName(SupportedTypes supportedTypes, Set<String> allowedTypeNameSet) {
String typeName = null;
- if (r.nextInt(10 ) != 0) {
- typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)];
- } else {
- switch (supportedTypes) {
- case PRIMITIVES:
+ do {
+ if (r.nextInt(10 ) != 0) {
typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)];
- break;
- case ALL_EXCEPT_MAP:
- typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)];
- break;
- case ALL:
- typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)];
- break;
+ } else {
+ switch (supportedTypes) {
+ case PRIMITIVES:
+ typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)];
+ break;
+ case ALL_EXCEPT_MAP:
+ typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)];
+ break;
+ case ALL:
+ typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)];
+ break;
+ }
}
- }
+ } while (allowedTypeNameSet != null && !allowedTypeNameSet.contains(typeName));
return typeName;
}
- private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes, int depth, int maxDepth) {
+ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes,
+ Set<String> allowedTypeNameSet, int depth, int maxDepth) {
depth++;
if (depth < maxDepth) {
supportedTypes = SupportedTypes.PRIMITIVES;
@@ -229,23 +257,32 @@ public class VectorRandomRowSource {
final int maxLength = 1 + r.nextInt(100);
typeName = String.format("varchar(%d)", maxLength);
} else if (typeName.equals("decimal")) {
- typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
+ typeName =
+ String.format(
+ "decimal(%d,%d)",
+ HiveDecimal.SYSTEM_DEFAULT_PRECISION,
+ HiveDecimal.SYSTEM_DEFAULT_SCALE);
} else if (typeName.equals("array")) {
- String elementTypeName = getRandomTypeName(supportedTypes);
- elementTypeName = getDecoratedTypeName(elementTypeName, supportedTypes, depth, maxDepth);
+ String elementTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet);
+ elementTypeName =
+ getDecoratedTypeName(elementTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth);
typeName = String.format("array<%s>", elementTypeName);
} else if (typeName.equals("map")) {
- String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES);
- keyTypeName = getDecoratedTypeName(keyTypeName, supportedTypes, depth, maxDepth);
- String valueTypeName = getRandomTypeName(supportedTypes);
- valueTypeName = getDecoratedTypeName(valueTypeName, supportedTypes, depth, maxDepth);
+ String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES, allowedTypeNameSet);
+ keyTypeName =
+ getDecoratedTypeName(keyTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth);
+ String valueTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet);
+ valueTypeName =
+ getDecoratedTypeName(valueTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth);
typeName = String.format("map<%s,%s>", keyTypeName, valueTypeName);
} else if (typeName.equals("struct")) {
final int fieldCount = 1 + r.nextInt(10);
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < fieldCount; i++) {
- String fieldTypeName = getRandomTypeName(supportedTypes);
- fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth);
+ String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet);
+ fieldTypeName =
+ getDecoratedTypeName(
+ fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth);
if (i > 0) {
sb.append(",");
}
@@ -260,8 +297,10 @@ public class VectorRandomRowSource {
final int fieldCount = 1 + r.nextInt(10);
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < fieldCount; i++) {
- String fieldTypeName = getRandomTypeName(supportedTypes);
- fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth);
+ String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet);
+ fieldTypeName =
+ getDecoratedTypeName(
+ fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth);
if (i > 0) {
sb.append(",");
}
@@ -273,14 +312,29 @@ public class VectorRandomRowSource {
}
private ObjectInspector getObjectInspector(TypeInfo typeInfo) {
+ return getObjectInspector(typeInfo, DataTypePhysicalVariation.NONE);
+ }
+
+ private ObjectInspector getObjectInspector(TypeInfo typeInfo,
+ DataTypePhysicalVariation dataTypePhysicalVariation) {
+
final ObjectInspector objectInspector;
switch (typeInfo.getCategory()) {
case PRIMITIVE:
{
- final PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo;
- objectInspector =
- PrimitiveObjectInspectorFactory.
- getPrimitiveWritableObjectInspector(primitiveType);
+ final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+ if (primitiveTypeInfo instanceof DecimalTypeInfo &&
+ dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
+ objectInspector =
+ PrimitiveObjectInspectorFactory.
+ getPrimitiveWritableObjectInspector(
+ TypeInfoFactory.longTypeInfo);
+ } else {
+ objectInspector =
+ PrimitiveObjectInspectorFactory.
+ getPrimitiveWritableObjectInspector(
+ primitiveTypeInfo);
+ }
}
break;
case MAP:
@@ -341,35 +395,50 @@ public class VectorRandomRowSource {
return objectInspector;
}
- private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) {
- HashSet hashSet = null;
+ private void chooseSchema(SupportedTypes supportedTypes, Set<String> allowedTypeNameSet,
+ List<String> explicitTypeNameList,
+ List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList,
+ int maxComplexDepth) {
+ HashSet<Integer> hashSet = null;
final boolean allTypes;
- final boolean onlyOne = (r.nextInt(100) == 7);
- if (onlyOne) {
- columnCount = 1;
+ final boolean onlyOne;
+ if (explicitTypeNameList != null) {
+ columnCount = explicitTypeNameList.size();
+ allTypes = false;
+ onlyOne = false;
+ } else if (allowedTypeNameSet != null) {
+ columnCount = 1 + r.nextInt(20);
allTypes = false;
+ onlyOne = false;
} else {
- allTypes = r.nextBoolean();
- if (allTypes) {
- switch (supportedTypes) {
- case ALL:
- columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
- break;
- case ALL_EXCEPT_MAP:
- columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
- break;
- case PRIMITIVES:
- columnCount = possibleHivePrimitiveTypeNames.length;
- break;
- }
- hashSet = new HashSet<Integer>();
+ onlyOne = (r.nextInt(100) == 7);
+ if (onlyOne) {
+ columnCount = 1;
+ allTypes = false;
} else {
- columnCount = 1 + r.nextInt(20);
+ allTypes = r.nextBoolean();
+ if (allTypes) {
+ switch (supportedTypes) {
+ case ALL:
+ columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
+ break;
+ case ALL_EXCEPT_MAP:
+ columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
+ break;
+ case PRIMITIVES:
+ columnCount = possibleHivePrimitiveTypeNames.length;
+ break;
+ }
+ hashSet = new HashSet<Integer>();
+ } else {
+ columnCount = 1 + r.nextInt(20);
+ }
}
}
typeNames = new ArrayList<String>(columnCount);
categories = new Category[columnCount];
typeInfos = new TypeInfo[columnCount];
+ dataTypePhysicalVariations = new DataTypePhysicalVariation[columnCount];
objectInspectorList = new ArrayList<ObjectInspector>(columnCount);
primitiveCategories = new PrimitiveCategory[columnCount];
@@ -379,9 +448,13 @@ public class VectorRandomRowSource {
for (int c = 0; c < columnCount; c++) {
columnNames.add(String.format("col%d", c));
final String typeName;
+ DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
- if (onlyOne) {
- typeName = getRandomTypeName(supportedTypes);
+ if (explicitTypeNameList != null) {
+ typeName = explicitTypeNameList.get(c);
+ dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c);
+ } else if (onlyOne || allowedTypeNameSet != null) {
+ typeName = getRandomTypeName(supportedTypes, allowedTypeNameSet);
} else {
int typeNum;
if (allTypes) {
@@ -425,7 +498,8 @@ public class VectorRandomRowSource {
}
- String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth);
+ String decoratedTypeName =
+ getDecoratedTypeName(typeName, supportedTypes, allowedTypeNameSet, 0, maxComplexDepth);
final TypeInfo typeInfo;
try {
@@ -435,15 +509,14 @@ public class VectorRandomRowSource {
}
typeInfos[c] = typeInfo;
+ dataTypePhysicalVariations[c] = dataTypePhysicalVariation;
final Category category = typeInfo.getCategory();
categories[c] = category;
- ObjectInspector objectInspector = getObjectInspector(typeInfo);
+ ObjectInspector objectInspector = getObjectInspector(typeInfo, dataTypePhysicalVariation);
switch (category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
- objectInspector = PrimitiveObjectInspectorFactory.
- getPrimitiveWritableObjectInspector(primitiveTypeInfo);
primitiveTypeInfos[c] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
primitiveCategories[c] = primitiveCategory;
@@ -498,27 +571,46 @@ public class VectorRandomRowSource {
}
public Object[] randomPrimitiveRow(int columnCount) {
- return randomPrimitiveRow(columnCount, r, primitiveTypeInfos);
+ return randomPrimitiveRow(columnCount, r, primitiveTypeInfos, dataTypePhysicalVariations);
}
public static Object[] randomPrimitiveRow(int columnCount, Random r,
- PrimitiveTypeInfo[] primitiveTypeInfos) {
+ PrimitiveTypeInfo[] primitiveTypeInfos,
+ DataTypePhysicalVariation[] dataTypePhysicalVariations) {
final Object row[] = new Object[columnCount];
for (int c = 0; c < columnCount; c++) {
- row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c]);
+ row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c], dataTypePhysicalVariations[c]);
}
return row;
}
public static Object[] randomWritablePrimitiveRow(int columnCount, Random r,
PrimitiveTypeInfo[] primitiveTypeInfos) {
+ return randomWritablePrimitiveRow(columnCount, r, primitiveTypeInfos, null);
+ }
+
+ public static Object[] randomWritablePrimitiveRow(int columnCount, Random r,
+ PrimitiveTypeInfo[] primitiveTypeInfos,
+ DataTypePhysicalVariation[] dataTypePhysicalVariations) {
final Object row[] = new Object[columnCount];
for (int c = 0; c < columnCount; c++) {
final PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[c];
- final ObjectInspector objectInspector =
- PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
+ final DataTypePhysicalVariation dataTypePhysicalVariation =
+ (dataTypePhysicalVariations != null ?
+ dataTypePhysicalVariations[c] : DataTypePhysicalVariation.NONE);
+ final ObjectInspector objectInspector;
+ if (primitiveTypeInfo instanceof DecimalTypeInfo &&
+ dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
+ objectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ TypeInfoFactory.longTypeInfo);
+ } else {
+ objectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ primitiveTypeInfo);
+ }
final Object object = randomPrimitiveObject(r, primitiveTypeInfo);
row[c] = getWritablePrimitiveObject(primitiveTypeInfo, objectInspector, object);
}
@@ -575,6 +667,14 @@ public class VectorRandomRowSource {
public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo,
ObjectInspector objectInspector, Object object) {
+ return
+ getWritablePrimitiveObject(
+ primitiveTypeInfo, objectInspector, DataTypePhysicalVariation.NONE, object);
+ }
+
+ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo,
+ ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation,
+ Object object) {
switch (primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
@@ -596,17 +696,17 @@ public class VectorRandomRowSource {
case STRING:
return ((WritableStringObjectInspector) objectInspector).create((String) object);
case CHAR:
- {
- WritableHiveCharObjectInspector writableCharObjectInspector =
- new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
- return writableCharObjectInspector.create((HiveChar) object);
- }
+ {
+ WritableHiveCharObjectInspector writableCharObjectInspector =
+ new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
+ return writableCharObjectInspector.create((HiveChar) object);
+ }
case VARCHAR:
- {
- WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
- new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
- return writableVarcharObjectInspector.create((HiveVarchar) object);
- }
+ {
+ WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
+ new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
+ return writableVarcharObjectInspector.create((HiveVarchar) object);
+ }
case BINARY:
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create((byte[]) object);
case TIMESTAMP:
@@ -616,31 +716,55 @@ public class VectorRandomRowSource {
case INTERVAL_DAY_TIME:
return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create((HiveIntervalDayTime) object);
case DECIMAL:
- {
- WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
- new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
- return writableDecimalObjectInspector.create((HiveDecimal) object);
- }
+ {
+ if (dataTypePhysicalVariation == dataTypePhysicalVariation.DECIMAL_64) {
+ final long value;
+ if (object instanceof HiveDecimal) {
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+ value = new HiveDecimalWritable((HiveDecimal) object).serialize64(
+ decimalTypeInfo.getScale());
+ } else {
+ value = (long) object;
+ }
+ return ((WritableLongObjectInspector) objectInspector).create(value);
+ } else {
+ WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
+ new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
+ return writableDecimalObjectInspector.create((HiveDecimal) object);
+ }
+ }
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
}
public Object randomWritable(int column) {
- return randomWritable(typeInfos[column], objectInspectorList.get(column));
+ return randomWritable(
+ typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column],
+ allowNull);
}
public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector) {
- return randomWritable(typeInfo, objectInspector, allowNull);
+ return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull);
+ }
+
+ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector,
+ boolean allowNull) {
+ return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull);
}
- public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, boolean allowNull) {
+ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector,
+ DataTypePhysicalVariation dataTypePhysicalVariation, boolean allowNull) {
switch (typeInfo.getCategory()) {
case PRIMITIVE:
{
+ if (allowNull && r.nextInt(20) == 0) {
+ return null;
+ }
final Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo);
- return getWritablePrimitiveObject((PrimitiveTypeInfo) typeInfo, objectInspector, object);
+ return getWritablePrimitiveObject(
+ (PrimitiveTypeInfo) typeInfo, objectInspector, dataTypePhysicalVariation, object);
}
case LIST:
{
@@ -780,6 +904,11 @@ public class VectorRandomRowSource {
}
public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo) {
+ return randomPrimitiveObject(r, primitiveTypeInfo, DataTypePhysicalVariation.NONE);
+ }
+
+ public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo,
+ DataTypePhysicalVariation dataTypePhysicalVariation) {
switch (primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
@@ -813,9 +942,14 @@ public class VectorRandomRowSource {
case INTERVAL_DAY_TIME:
return getRandIntervalDayTime(r);
case DECIMAL:
- {
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- }
+ {
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+ HiveDecimal hiveDecimal = getRandHiveDecimal(r, decimalTypeInfo);
+ if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
+ return new HiveDecimalWritable(hiveDecimal).serialize64(decimalTypeInfo.getScale());
+ }
+ return hiveDecimal;
+ }
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getCategory());
}
@@ -869,7 +1003,13 @@ public class VectorRandomRowSource {
sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
}
- return HiveDecimal.create(sb.toString());
+ HiveDecimal dec = HiveDecimal.create(sb.toString());
+ dec =
+ HiveDecimal.enforcePrecisionScale(
+ dec, decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
+ if (dec != null) {
+ return dec;
+ }
}
}