You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:39 UTC
[25/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java
index ff8593e..e605e88 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java
@@ -59,7 +59,6 @@ public class FuncRoundWithNumDigitsDecimalToDecimal extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
HiveDecimalWritable[] vector = inputColVector.vector;
@@ -68,32 +67,57 @@ public class FuncRoundWithNumDigitsDecimalToDecimal extends VectorExpression {
return;
}
- if (inputColVector.isRepeating) {
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
- outputIsNull[0] = inputIsNull[0];
- round(0, vector[0], decimalPlaces, outputColVector);
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ round(0, vector[0], decimalPlaces, outputColVector);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- // Set isNull because decimal operation can yield a null.
- outputIsNull[i] = false;
- round(i, vector[i], decimalPlaces, outputColVector);
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ round(i, vector[i], decimalPlaces, outputColVector);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ round(i, vector[i], decimalPlaces, outputColVector);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
- // Set isNull because decimal operation can yield a null.
- Arrays.fill(outputIsNull, 0, n, false);
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
round(i, vector[i], decimalPlaces, outputColVector);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -106,7 +130,6 @@ public class FuncRoundWithNumDigitsDecimalToDecimal extends VectorExpression {
round(i, vector[i], decimalPlaces, outputColVector);
}
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
index d474ff0..1b5c07a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -50,60 +52,84 @@ public abstract class FuncStringToLong extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputCol];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputCol];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
//Nothing to do
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for (int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
- for (int i = 0; i != n; i++) {
- func(outV, inV, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
}
- outV.isRepeating = false;
- }
- } else {
- // Handle case with nulls. Don't do function if the value is null, to save time,
- // because calling the function can be expensive.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
+ for(int i = 0; i != n; i++) {
+ func(outputColVector, inputColVector, i);
}
- } else if (batch.selectedInUse) {
+ }
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for (int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
@@ -111,7 +137,7 @@ public abstract class FuncStringToLong extends VectorExpression {
/* Evaluate result for position i (using bytes[] to avoid storage allocation costs)
* and set position i of the output vector to the result.
*/
- protected abstract void func(LongColumnVector outV, BytesColumnVector inV, int i);
+ protected abstract void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i);
public int getOutputCol() {
return outputCol;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java
index 93cf1ec..2213b83 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ public abstract class FuncTimestampToDecimal extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i);
+ abstract protected void func(DecimalColumnVector outputColVector, TimestampColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -53,10 +55,12 @@ public abstract class FuncTimestampToDecimal extends VectorExpression {
super.evaluateChildren(batch);
}
- TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn];
+ TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -64,51 +68,72 @@ public abstract class FuncTimestampToDecimal extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java
index 9eb4312..060070e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -45,7 +47,7 @@ public abstract class FuncTimestampToLong extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(LongColumnVector outV, TimestampColumnVector inV, int i);
+ abstract protected void func(LongColumnVector outputColVector, TimestampColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -54,10 +56,12 @@ public abstract class FuncTimestampToLong extends VectorExpression {
super.evaluateChildren(batch);
}
- TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn];
+ TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -65,51 +69,72 @@ public abstract class FuncTimestampToLong extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
index f9b3f76..bf2c9a4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
@@ -64,39 +64,138 @@ public class IfExprColumnNull extends VectorExpression {
return;
}
- arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+ /*
+ * Repeating IF expression?
+ */
if (arg1ColVector.isRepeating) {
- if (!null1[0] && vector1[0] == 1) {
- outputColVector.setElement(0, 0, arg2ColVector);
+ if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) {
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
+ outputColVector.isRepeating = true;
outputColVector.noNulls = false;
isNull[0] = true;
}
return;
}
- if (batch.selectedInUse) {
- for (int j = 0; j < n; j++) {
- int i = sel[j];
- if (!null1[0] && vector1[i] == 1) {
- outputColVector.setElement(i, i, arg2ColVector);
+
+ if (arg1ColVector.noNulls) {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ /*
+ * Repeating THEN expression?
+ */
+ if (arg2ColVector.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ isNull[i] = false;
+ // Assign repeated value (index 0) over and over.
+ outputColVector.setElement(i, 0, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (vector1[i] == 1) {
+ isNull[i] = false;
+ // Assign repeated value (index 0) over and over.
+ outputColVector.setElement(i, 0, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
} else {
- outputColVector.noNulls = false;
- isNull[i] = true;
+ for (int i = 0; i < n; i++) {
+ if (vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
}
}
- } else {
- for (int i = 0; i < n; i++) {
- if (!null1[0] && vector1[i] == 1) {
- outputColVector.setElement(i, i, arg2ColVector);
+ } else /* there are nulls in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ /*
+ * Repeating THEN expression?
+ */
+ if (arg2ColVector.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
} else {
- outputColVector.noNulls = false;
- isNull[i] = true;
+ for (int i = 0; i < n; i++) {
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ } else {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
}
}
}
-
- arg2ColVector.unFlatten();
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
index e7d4e4d..4296692 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
@@ -65,8 +65,7 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
double[] vector2 = arg2ColVector.vector;
@@ -78,6 +77,9 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
/* All the code paths below propagate nulls even if neither arg2 nor arg3
* have nulls. This is to reduce the number of code paths and shorten the
* code, at the expense of maybe doing unnecessary work if neither input
@@ -85,7 +87,7 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -98,6 +100,15 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
arg3ColVector.flatten(batch.selectedInUse, sel, n);
if (arg1ColVector.noNulls) {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -112,7 +123,16 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
}
- } else /* there are nulls */ {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
index fa7b2da..099a319 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
@@ -65,8 +65,7 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression {
IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -75,6 +74,9 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
/* All the code paths below propagate nulls even if neither arg2 nor arg3
* have nulls. This is to reduce the number of code paths and shorten the
* code, at the expense of maybe doing unnecessary work if neither input
@@ -82,7 +84,7 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -95,21 +97,39 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression {
arg3ColVector.flatten(batch.selectedInUse, sel, n);
if (arg1ColVector.noNulls) {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
outputIsNull[i] = (vector1[i] == 1 ?
arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
}
} else {
for(int i = 0; i != n; i++) {
- outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
outputIsNull[i] = (vector1[i] == 1 ?
arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
+ outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
}
}
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java
index 487fb97..905ffba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector;
@@ -67,8 +69,10 @@ public class IfExprIntervalDayTimeColumnScalar extends VectorExpression {
IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -78,7 +82,7 @@ public class IfExprIntervalDayTimeColumnScalar extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
outputColVector.fill(arg3Scalar);
@@ -94,14 +98,25 @@ public class IfExprIntervalDayTimeColumnScalar extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar);
}
}
} else /* there are nulls */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java
index 7b18cf8..e99754b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector;
@@ -67,8 +69,10 @@ public class IfExprIntervalDayTimeScalarColumn extends VectorExpression {
IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -78,7 +82,7 @@ public class IfExprIntervalDayTimeScalarColumn extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -94,16 +98,47 @@ public class IfExprIntervalDayTimeScalarColumn extends VectorExpression {
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i));
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i));
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i));
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i));
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i));
}
}
} else /* there are nulls */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java
index 0ba6722..5875d48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java
@@ -68,8 +68,10 @@ public class IfExprIntervalDayTimeScalarScalar extends VectorExpression {
IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = false; // output is a scalar which we know is non null
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -79,23 +81,54 @@ public class IfExprIntervalDayTimeScalarScalar extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
outputColVector.fill(arg3Scalar);
}
- } else if (arg1ColVector.noNulls) {
+ return;
+ }
+
+ if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
}
} else /* there are nulls */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
index 0c8a2f6..d8ec895 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
@@ -64,8 +64,10 @@ public class IfExprLongColumnLongColumn extends VectorExpression {
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
long[] vector2 = arg2ColVector.vector;
@@ -96,6 +98,9 @@ public class IfExprLongColumnLongColumn extends VectorExpression {
arg2ColVector.flatten(batch.selectedInUse, sel, n);
arg3ColVector.flatten(batch.selectedInUse, sel, n);
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
index 85c37f9..4afdce4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
@@ -64,39 +64,132 @@ public class IfExprNullColumn extends VectorExpression {
return;
}
- arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+ /*
+ * Repeating IF expression?
+ */
if (arg1ColVector.isRepeating) {
- if (!null1[0] && vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) {
+ outputColVector.isRepeating = true;
outputColVector.noNulls = false;
isNull[0] = true;
} else {
- outputColVector.setElement(0, 0, arg2ColVector);
+ arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
}
return;
}
- if (batch.selectedInUse) {
- for (int j = 0; j < n; j++) {
- int i = sel[j];
- if (!null1[0] && vector1[i] == 1) {
- outputColVector.noNulls = false;
- isNull[i] = true;
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (arg1ColVector.noNulls) {
+
+ /*
+ * Repeating ELSE expression?
+ */
+ if (arg2ColVector.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ }
+ }
} else {
- outputColVector.setElement(i, i, arg2ColVector);
+ for (int i = 0; i < n; i++) {
+ if (vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ }
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ }
+ }
}
}
} else {
- for (int i = 0; i < n; i++) {
- if (!null1[0] && vector1[i] == 1) {
- outputColVector.noNulls = false;
- isNull[i] = true;
+
+ /*
+ * Repeating ELSE expression?
+ */
+ if (arg2ColVector.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ }
+ }
} else {
- outputColVector.setElement(i, i, arg2ColVector);
+ for (int i = 0; i < n; i++) {
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, 0, arg2ColVector);
+ }
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (!null1[i] && vector1[i] == 1) {
+ isNull[i] = true;
+ outputColVector.noNulls = false;
+ } else {
+ isNull[i] = false;
+ outputColVector.setElement(i, i, arg2ColVector);
+ }
+ }
}
}
}
-
- arg2ColVector.unFlatten();
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java
new file mode 100644
index 0000000..5a68cec
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+public class IfExprNullNull extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ public IfExprNullNull(int outputColumnNum) {
+ super(outputColumnNum);
+ }
+
+ public IfExprNullNull() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ final ColumnVector outputColVector = batch.cols[outputColumnNum];
+
+ // We do not need to do a column reset since we are carefully changing the output.
+
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true;
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return "null, null";
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ throw new UnsupportedOperationException("Undefined descriptor");
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
index 09aa9ab..bb57e4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
@@ -67,8 +67,10 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression {
BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -86,7 +88,7 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -98,6 +100,11 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression {
arg2ColVector.flatten(batch.selectedInUse, sel, n);
arg3ColVector.flatten(batch.selectedInUse, sel, n);
+ /*
+ * Do careful maintenance of NULLs.
+ */
+ outputColVector.noNulls = false;
+
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -134,6 +141,7 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression {
}
}
} else /* there are nulls */ {
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java
index 9167178..998448a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java
@@ -69,8 +69,10 @@ public class IfExprStringGroupColumnStringScalar extends VectorExpression {
BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -88,7 +90,7 @@ public class IfExprStringGroupColumnStringScalar extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
outputColVector.fill(arg3Scalar);
@@ -99,7 +101,14 @@ public class IfExprStringGroupColumnStringScalar extends VectorExpression {
// extend any repeating values and noNulls indicator in the inputs
arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ /*
+ * Do careful maintenance of NULLs.
+ */
+ outputColVector.noNulls = false;
+
if (arg1ColVector.noNulls) {
+
+ // FUTURE: We could check arg2ColVector.noNulls and optimize these loops.
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java
index 84d0052..c597a34 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java
@@ -70,8 +70,15 @@ public class IfExprStringScalarStringGroupColumn extends VectorExpression {
BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
+ if (!outputColVector.noNulls) {
+ // TEMPORARILY:
+ outputColVector.reset();
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -89,7 +96,7 @@ public class IfExprStringScalarStringGroupColumn extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -100,6 +107,11 @@ public class IfExprStringScalarStringGroupColumn extends VectorExpression {
// extend any repeating values and noNulls indicator in the input
arg3ColVector.flatten(batch.selectedInUse, sel, n);
+ /*
+ * Do careful maintenance of NULLs.
+ */
+ outputColVector.noNulls = false;
+
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java
index 5ed457b..9c0e7be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -67,8 +68,11 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- outputColVector.noNulls = true; // output must be a scalar and neither one is null
- outputColVector.isRepeating = false; // may override later
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -80,11 +84,12 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
outputColVector.initBuffer();
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
- outputColVector.fill(arg2Scalar);
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
+ outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length);
} else {
- outputColVector.fill(arg3Scalar);
+ outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length);
}
+ outputColVector.isRepeating = true;
return;
}
@@ -92,6 +97,7 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
if (vector1[i] == 1) {
outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length);
} else {
@@ -99,6 +105,7 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
}
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
if (vector1[i] == 1) {
outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length);
@@ -111,6 +118,7 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length);
} else {
@@ -118,6 +126,7 @@ public class IfExprStringScalarStringScalar extends VectorExpression {
}
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
index ee3cd19..ed21ce9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
@@ -64,8 +64,10 @@ public abstract class IfExprTimestampColumnColumnBase extends VectorExpression {
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -81,7 +83,7 @@ public abstract class IfExprTimestampColumnColumnBase extends VectorExpression {
* of code paths.
*/
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -93,6 +95,11 @@ public abstract class IfExprTimestampColumnColumnBase extends VectorExpression {
arg2ColVector.flatten(batch.selectedInUse, sel, n);
arg3ColVector.flatten(batch.selectedInUse, sel, n);
+ /*
+ * Do careful maintenance of NULLs.
+ */
+ outputColVector.noNulls = false;
+
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java
index b98ddbe..c0cb2c1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java
@@ -19,13 +19,12 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
* Compute IF(expr1, expr2, expr3) for 3 input column expressions.
@@ -70,8 +69,10 @@ public abstract class IfExprTimestampColumnScalarBase extends VectorExpression {
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -81,7 +82,7 @@ public abstract class IfExprTimestampColumnScalarBase extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
outputColVector.fill(arg3Scalar);
@@ -93,13 +94,19 @@ public abstract class IfExprTimestampColumnScalarBase extends VectorExpression {
// reduce the number of code paths needed below.
arg2ColVector.flatten(batch.selectedInUse, sel, n);
+ /*
+ * Since we always set a value, make sure all isNull entries are set to false.
+ */
+
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
index abd585d..0798f1f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -69,8 +70,10 @@ public abstract class IfExprTimestampScalarColumnBase extends VectorExpression {
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -80,7 +83,7 @@ public abstract class IfExprTimestampScalarColumnBase extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
@@ -96,16 +99,44 @@ public abstract class IfExprTimestampScalarColumnBase extends VectorExpression {
if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i));
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i));
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i));
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i));
+ outputColVector.set(
+ i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i));
}
}
} else /* there are nulls */ {
+
+ /*
+ * Do careful maintenance of NULLs.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
index 24299e9..0059c58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
@@ -68,8 +68,10 @@ public abstract class IfExprTimestampScalarScalarBase extends VectorExpression {
TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = false; // output is a scalar which we know is non null
- outputColVector.isRepeating = false; // may override later
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
@@ -79,18 +81,44 @@ public abstract class IfExprTimestampScalarScalarBase extends VectorExpression {
}
if (arg1ColVector.isRepeating) {
- if (vector1[0] == 1) {
+ if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
outputColVector.fill(arg2Scalar);
} else {
outputColVector.fill(arg3Scalar);
}
- } else if (arg1ColVector.noNulls) {
+ return;
+ }
+
+ /*
+ * Since we always set a value, make sure all isNull entries are set to false.
+ */
+
+ if (arg1ColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar);
}
@@ -99,16 +127,16 @@ public abstract class IfExprTimestampScalarScalarBase extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ?
arg2Scalar : arg3Scalar);
- outputIsNull[i] = false;
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ?
arg2Scalar : arg3Scalar);
}
- Arrays.fill(outputIsNull, 0, n, false);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
index 6b141d1..a5cddc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -52,35 +54,45 @@ public class IsNotNull extends VectorExpression {
ColumnVector inputColVector = batch.cols[colNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
int n = batch.size;
- long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector;
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ long[] outputVector = outputColVector.vector;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n <= 0) {
// Nothing to do
return;
}
- // output never has nulls for this operator
- batch.cols[outputColumnNum].noNulls = true;
- if (inputColVector.noNulls) {
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.noNulls) {
+ outputColVector.isRepeating = true;
+ outputIsNull[0] = false;
outputVector[0] = 1;
- batch.cols[outputColumnNum].isRepeating = true;
} else if (inputColVector.isRepeating) {
- // All must be selected otherwise size would be zero
- // Selection property will not change.
- outputVector[0] = nullPos[0] ? 0 : 1;
- batch.cols[outputColumnNum].isRepeating = true;
+ outputColVector.isRepeating = true;
+ outputIsNull[0] = false;
+ outputVector[0] = inputIsNull[0] ? 0 : 1;
} else {
- batch.cols[outputColumnNum].isRepeating = false;
+
+ /*
+ * Since we have a result for all rows, we don't need to do conditional NULL maintenance or
+ * turn off noNulls..
+ */
+
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = nullPos[i] ? 0 : 1;
+ outputIsNull[i] = false;
+ outputVector[i] = inputIsNull[i] ? 0 : 1;
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
- outputVector[i] = nullPos[i] ? 0 : 1;
+ outputVector[i] = inputIsNull[i] ? 0 : 1;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
index 7347800..17d567f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -52,34 +54,47 @@ public class IsNull extends VectorExpression {
ColumnVector inputColVector = batch.cols[colNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
int n = batch.size;
- long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector;
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ long[] outputVector = outputColVector.vector;
+ boolean[] outputIsNull = outputColVector.isNull;
+
if (n <= 0) {
// Nothing to do, this is EOF
return;
}
- // output never has nulls for this operator
- batch.cols[outputColumnNum].noNulls = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.noNulls) {
+ outputColVector.isRepeating = true;
+ outputIsNull[0] = false;
outputVector[0] = 0;
- batch.cols[outputColumnNum].isRepeating = true;
} else if (inputColVector.isRepeating) {
- outputVector[0] = nullPos[0] ? 1 : 0;
- batch.cols[outputColumnNum].isRepeating = true;
+ outputColVector.isRepeating = true;
+ outputIsNull[0] = false;
+ outputVector[0] = inputIsNull[0] ? 1 : 0;
} else {
+
+ /*
+ * Since we have a result for all rows, we don't need to do conditional NULL maintenance or
+ * turn off noNulls..
+ */
+
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = nullPos[i] ? 1 : 0;
+ outputIsNull[i] = false;
+ outputVector[i] = inputIsNull[i] ? 1 : 0;
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
- outputVector[i] = nullPos[i] ? 1 : 0;
+ outputVector[i] = inputIsNull[i] ? 1 : 0;
}
}
- batch.cols[outputColumnNum].isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
index dfe3bd1..9d22a3c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
@@ -56,7 +56,9 @@ public class ListIndexColColumn extends VectorExpression {
LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum];
long[] indexV = indexColumnVector.vector;
- outV.noNulls = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
if (listV.isRepeating) {
if (listV.isNull[0]) {
outV.isNull[0] = true;
@@ -68,8 +70,8 @@ public class ListIndexColColumn extends VectorExpression {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
- outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV);
outV.isNull[0] = false;
+ outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV);
}
outV.isRepeating = true;
} else {
@@ -79,11 +81,11 @@ public class ListIndexColColumn extends VectorExpression {
outV.isNull[j] = true;
outV.noNulls = false;
} else {
- outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV);
outV.isNull[j] = false;
+ outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV);
+
}
}
- outV.isRepeating = false;
}
}
} else {
@@ -93,11 +95,10 @@ public class ListIndexColColumn extends VectorExpression {
outV.isNull[j] = true;
outV.noNulls = false;
} else {
- outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV);
outV.isNull[j] = false;
+ outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV);
}
}
- outV.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
index 62860df..948652a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
@@ -53,7 +53,10 @@ public class ListIndexColScalar extends VectorExpression {
ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum];
ColumnVector childV = listV.child;
- outV.noNulls = true;
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (listV.isRepeating) {
if (listV.isNull[0]) {
outV.isNull[0] = true;
@@ -63,8 +66,8 @@ public class ListIndexColScalar extends VectorExpression {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
- outV.setElement(0, (int) (listV.offsets[0] + index), childV);
outV.isNull[0] = false;
+ outV.setElement(0, (int) (listV.offsets[0] + index), childV);
}
}
outV.isRepeating = true;
@@ -75,8 +78,8 @@ public class ListIndexColScalar extends VectorExpression {
outV.isNull[j] = true;
outV.noNulls = false;
} else {
- outV.setElement(j, (int) (listV.offsets[j] + index), childV);
outV.isNull[j] = false;
+ outV.setElement(j, (int) (listV.offsets[j] + index), childV);
}
}
outV.isRepeating = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
index c2f7143..42483c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
@@ -69,9 +69,9 @@ public class LongColDivideLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);