You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:40 UTC
[26/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java
index 3542a07..9208cd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -69,104 +71,120 @@ public class ColOrCol extends VectorExpression {
return;
}
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
long vector1Value = vector1[0];
long vector2Value = vector2[0];
if (inputColVector1.noNulls && inputColVector2.noNulls) {
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+
// All must be selected otherwise size would be zero
// Repeating property will not change.
outV.isRepeating = true;
+ outputIsNull[0] = false;
outputVector[0] = vector1[0] | vector2[0];
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1Value | vector2[i];
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value | vector2[i];
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1[i] | vector2Value;
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2Value;
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1[i] | vector2[i];
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2[i];
}
}
- outV.isRepeating = false;
}
- outV.noNulls = true;
- } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+ return;
+ }
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outV.noNulls = false;
+
+ if (inputColVector1.noNulls && !inputColVector2.noNulls) {
// only input 2 side has nulls
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
// All must be selected otherwise size would be zero
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] | vector2[0];
- outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0];
+ outputIsNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0];
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i];
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0];
+ outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0];
+ outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0];
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i];
}
}
- outV.isRepeating = false;
}
- outV.noNulls = false;
} else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
// only input 1 side has nulls
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
@@ -174,51 +192,47 @@ public class ColOrCol extends VectorExpression {
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] | vector2[0];
- outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0);
+ outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0);
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0);
+ outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0);
+ outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0);
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0);
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0);
}
}
- outV.isRepeating = false;
}
- outV.noNulls = false;
} else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{
// either input 1 or input 2 may have nulls
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
@@ -226,7 +240,7 @@ public class ColOrCol extends VectorExpression {
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] | vector2[0];
- outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0])
+ outputIsNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[0] && (vector2[0] == 0))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[0]);
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
@@ -234,57 +248,53 @@ public class ColOrCol extends VectorExpression {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[0] && (vector2[i] == 0))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value | vector2[i];
- outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[0] && (vector2[i] == 0))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0])
+ outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[i] && (vector2[0] == 0))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2Value;
- outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0])
+ outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[i] && (vector2[0] == 0))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[i] && (vector2[i] == 0))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] | vector2[i];
- outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[i] && (vector2[i] == 0))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
}
}
- outV.isRepeating = false;
}
- outV.noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
index c7cab2a..5b89131 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
@@ -121,70 +121,84 @@ public class ConstantVectorExpression extends VectorExpression {
isNullValue = isNull;
}
+ /*
+ * In the following evaluate* methods, since we are supporting scratch column reuse, we must
+ * assume the column may have noNulls of false and some isNull entries true.
+ *
+ * So, do a proper assignments.
+ */
+
private void evaluateLong(VectorizedRowBatch vrg) {
+
LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
if (!isNullValue) {
+ cv.isNull[0] = false;
cv.vector[0] = longValue;
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateDouble(VectorizedRowBatch vrg) {
DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
if (!isNullValue) {
+ cv.isNull[0] = false;
cv.vector[0] = doubleValue;
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateBytes(VectorizedRowBatch vrg) {
BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
cv.initBuffer();
if (!isNullValue) {
+ cv.isNull[0] = false;
cv.setVal(0, bytesValue, 0, bytesValueLength);
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateDecimal(VectorizedRowBatch vrg) {
DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum];
dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
if (!isNullValue) {
- dcv.vector[0].set(decimalValue);
+ dcv.isNull[0] = false;
+ dcv.set(0, decimalValue);
} else {
dcv.isNull[0] = true;
+ dcv.noNulls = false;
}
}
private void evaluateTimestamp(VectorizedRowBatch vrg) {
- TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum];
- dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
+ TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum];
+ tcv.isRepeating = true;
if (!isNullValue) {
- dcv.set(0, timestampValue);
+ tcv.isNull[0] = false;
+ tcv.set(0, timestampValue);
} else {
- dcv.isNull[0] = true;
+ tcv.isNull[0] = true;
+ tcv.noNulls = false;
}
}
private void evaluateIntervalDayTime(VectorizedRowBatch vrg) {
IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum];
dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
if (!isNullValue) {
+ dcv.isNull[0] = false;
dcv.set(0, intervalDayTimeValue);
} else {
dcv.isNull[0] = true;
+ dcv.noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java
index d91b09c..7342d9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java
@@ -82,12 +82,9 @@ public class DateColSubtractDateColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java
index 2699681..3ea189a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -73,8 +74,6 @@ public class DateColSubtractDateScalar extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector1.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector1.noNulls;
- outputColVector.isRepeating = inputColVector1.isRepeating;
int n = batch.size;
long[] vector1 = inputColVector1.vector;
@@ -83,43 +82,81 @@ public class DateColSubtractDateScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector1.isRepeating) {
- scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0]));
- dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
- outputColVector.setFromScratchIntervalDayTime(0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
- } else if (inputColVector1.noNulls) {
+ if (inputColVector1.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0]));
+ dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
+ outputColVector.setFromScratchIntervalDayTime(0);
+
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
+ return;
+ }
+
+ if (inputColVector1.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
- dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
- outputColVector.setFromScratchIntervalDayTime(i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
+ dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
+ outputColVector.setFromScratchIntervalDayTime(i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
+ dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
+ outputColVector.setFromScratchIntervalDayTime(i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
}
}
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = inputIsNull[i];
scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
- outputIsNull[i] = inputIsNull[i];
}
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i]));
dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java
index 946b738..a87ae39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.*;
@@ -75,8 +76,6 @@ public class DateScalarSubtractDateColumn extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector2.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector2.noNulls;
- outputColVector.isRepeating = inputColVector2.isRepeating;
int n = batch.size;
long[] vector2 = inputColVector2.vector;
@@ -86,43 +85,61 @@ public class DateScalarSubtractDateColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector2.isRepeating) {
- scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0]));
- dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
- outputColVector.setFromScratchIntervalDayTime(0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
- } else if (inputColVector2.noNulls) {
+ if (inputColVector2.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0]));
+ dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
+ outputColVector.setFromScratchIntervalDayTime(0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
+ return;
+ }
+
+ if (inputColVector2.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i]));
dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i]));
dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
}
}
- } else { /* there are nulls */
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = inputIsNull[i];
scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i]));
dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
- outputIsNull[i] = inputIsNull[i];
}
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i]));
dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime());
outputColVector.setFromScratchIntervalDayTime(i);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java
index 9a8177c..a677f90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java
@@ -24,10 +24,9 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descript
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.util.DateTimeMath;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import java.sql.Timestamp;
import java.util.Arrays;
import java.util.HashSet;
@@ -80,8 +79,8 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE
DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColumnVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColumnVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
HiveDecimalWritable[] vector = inputColumnVector.vector;
long[] outputVector = outputColVector.vector;
@@ -91,49 +90,68 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColumnVector.noNulls;
- if (inputColumnVector.noNulls) {
- if (inputColumnVector.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
+ if (inputColumnVector.isRepeating) {
+ if (inputColumnVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = inSet.contains(vector[0]) ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
}
}
- } else {
- if (inputColumnVector.isRepeating) {
-
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = inSet.contains(vector[0]) ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outNulls[i] = nullPos[i];
- if (!nullPos[i]) {
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
}
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.contains(vector[i]) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java
index 791d8f2..452bd5e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -43,7 +45,7 @@ abstract public class DecimalToStringUnaryUDF extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i);
+ abstract protected void func(BytesColumnVector outputColVector, DecimalColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -52,62 +54,86 @@ abstract public class DecimalToStringUnaryUDF extends VectorExpression {
super.evaluateChildren(batch);
}
- DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+ DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ outputColVector.initBuffer();
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
//Nothing to do
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
index ba83b6a..89e58f1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
@@ -65,8 +65,8 @@ public class DoubleColumnInList extends VectorExpression implements IDoubleInExp
DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
double[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -76,49 +76,69 @@ public class DoubleColumnInList extends VectorExpression implements IDoubleInExp
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
- } else {
- if (inputColVector.isRepeating) {
-
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outNulls[i] = nullPos[i];
- if (!nullPos[i]) {
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java
index c8b1dad..b33046e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ abstract public class DoubleToStringUnaryUDF extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(BytesColumnVector outV, double[] vector, int i);
+ abstract protected void func(BytesColumnVector outputColVector, double[] vector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -57,59 +59,83 @@ abstract public class DoubleToStringUnaryUDF extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
double[] vector = inputColVector.vector;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.initBuffer();
+ boolean[] inputIsNull = inputColVector.isNull;
if (n == 0) {
//Nothing to do
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, vector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- func(outV, vector, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, vector, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, vector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, vector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are NULLs in the inputColVector */ {
// Handle case with nulls. Don't do function if the value is null,
// because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- func(outV, vector, 0);
- }
- } else if (batch.selectedInUse) {
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java
index 1c1bc0b..252a816 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java
@@ -77,73 +77,73 @@ public class DynamicValueVectorExpression extends VectorExpression {
private void evaluateLong(VectorizedRowBatch vrg) {
LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
if (!isNullValue) {
- cv.vector[0] = longValue;
cv.isNull[0] = false;
+ cv.vector[0] = longValue;
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateDouble(VectorizedRowBatch vrg) {
DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
if (!isNullValue) {
- cv.vector[0] = doubleValue;
cv.isNull[0] = false;
+ cv.vector[0] = doubleValue;
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateBytes(VectorizedRowBatch vrg) {
BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum];
cv.isRepeating = true;
- cv.noNulls = !isNullValue;
cv.initBuffer();
if (!isNullValue) {
- cv.setVal(0, bytesValue, 0, bytesValueLength);
cv.isNull[0] = false;
+ cv.setVal(0, bytesValue, 0, bytesValueLength);
} else {
cv.isNull[0] = true;
+ cv.noNulls = false;
}
}
private void evaluateDecimal(VectorizedRowBatch vrg) {
DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum];
dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
if (!isNullValue) {
- dcv.vector[0].set(decimalValue);
dcv.isNull[0] = false;
+ dcv.set(0, decimalValue);
} else {
dcv.isNull[0] = true;
+ dcv.noNulls = false;
}
}
private void evaluateTimestamp(VectorizedRowBatch vrg) {
TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum];
dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
if (!isNullValue) {
- dcv.set(0, timestampValue);
dcv.isNull[0] = false;
+ dcv.set(0, timestampValue);
} else {
dcv.isNull[0] = true;
+ dcv.noNulls = false;
}
}
private void evaluateIntervalDayTime(VectorizedRowBatch vrg) {
IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum];
dcv.isRepeating = true;
- dcv.noNulls = !isNullValue;
if (!isNullValue) {
- dcv.set(0, intervalDayTimeValue);
dcv.isNull[0] = false;
+ dcv.set(0, intervalDayTimeValue);
} else {
dcv.isNull[0] = true;
+ dcv.noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java
index 28d800e..2d8becf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -43,7 +45,7 @@ public abstract class FuncDecimalToDouble extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(DoubleColumnVector outV, DecimalColumnVector inV, int i);
+ abstract protected void func(DoubleColumnVector outputColVector, DecimalColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -52,10 +54,13 @@ public abstract class FuncDecimalToDouble extends VectorExpression {
super.evaluateChildren(batch);
}
- DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+ DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum];
+ DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -63,51 +68,72 @@ public abstract class FuncDecimalToDouble extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java
index 5fb9778..0ef3da0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -51,7 +53,7 @@ public abstract class FuncDecimalToLong extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(LongColumnVector outV, DecimalColumnVector inV, int i);
+ abstract protected void func(LongColumnVector outputColVector, DecimalColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -65,10 +67,13 @@ public abstract class FuncDecimalToLong extends VectorExpression {
integerPrimitiveCategoryKnown = true;
}
- DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+ DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -76,51 +81,72 @@ public abstract class FuncDecimalToLong extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java
index f518f39..8324506 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ public abstract class FuncDecimalToTimestamp extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i);
+ abstract protected void func(TimestampColumnVector outputColVector, DecimalColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -53,10 +55,13 @@ public abstract class FuncDecimalToTimestamp extends VectorExpression {
super.evaluateChildren(batch);
}
- DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+ DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumnNum];
+ TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -64,51 +69,72 @@ public abstract class FuncDecimalToTimestamp extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java
index e632ff9..b67632a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -43,7 +45,7 @@ public abstract class FuncDoubleToDecimal extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i);
+ abstract protected void func(DecimalColumnVector outputColVector, DoubleColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -52,10 +54,13 @@ public abstract class FuncDoubleToDecimal extends VectorExpression {
super.evaluateChildren(batch);
}
- DoubleColumnVector inV = (DoubleColumnVector) batch.cols[inputColumn];
+ DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -63,51 +68,72 @@ public abstract class FuncDoubleToDecimal extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java
index d500612..1b8707e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -43,7 +45,7 @@ public abstract class FuncLongToDecimal extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(DecimalColumnVector outV, LongColumnVector inV, int i);
+ abstract protected void func(DecimalColumnVector outputColVector, LongColumnVector inputColVector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -52,10 +54,13 @@ public abstract class FuncLongToDecimal extends VectorExpression {
super.evaluateChildren(batch);
}
- LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn];
+ LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -63,51 +68,72 @@ public abstract class FuncLongToDecimal extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java
index f93dbfc..733444e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -70,58 +71,83 @@ public abstract class FuncLongToString extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
long[] vector = inputColVector.vector;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ outputColVector.initBuffer();
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
//Nothing to do
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ prepareResult(0, vector, outputColVector);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- prepareResult(0, vector, outV);
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- prepareResult(i, vector, outV);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ prepareResult(i, vector, outputColVector);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ prepareResult(i, vector, outputColVector);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- prepareResult(i, vector, outV);
+ prepareResult(i, vector, outputColVector);
}
- outV.isRepeating = false;
}
- } else {
- // Handle case with nulls. Don't do function if the value is null, to save time,
- // because calling the function can be expensive.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- prepareResult(0, vector, outV);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- prepareResult(i, vector, outV);
+ prepareResult(i, vector, outputColVector);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
} else {
for(int i = 0; i != n; i++) {
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- prepareResult(i, vector, outV);
+ prepareResult(i, vector, outputColVector);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
}
}
}
@@ -129,7 +155,7 @@ public abstract class FuncLongToString extends VectorExpression {
/* Evaluate result for position i (using bytes[] to avoid storage allocation costs)
* and set position i of the output vector to the result.
*/
- abstract void prepareResult(int i, long[] vector, BytesColumnVector outV);
+ abstract void prepareResult(int i, long[] vector, BytesColumnVector outputColVector);
@Override
public String vectorExpressionParameters() {
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java
index 1a94408..aebfa25 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -55,8 +56,12 @@ public class FuncRand extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
double[] outputVector = outputColVector.vector;
- outputColVector.noNulls = true;
outputColVector.isRepeating = false;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
// return immediately if batch is empty
if (n == 0) {
@@ -64,11 +69,30 @@ public class FuncRand extends VectorExpression {
}
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = random.nextDouble();
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = random.nextDouble();
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = random.nextDouble();
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = random.nextDouble();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java
index d289dff..f0d7c60 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -55,8 +56,12 @@ public class FuncRandNoSeed extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
double[] outputVector = outputColVector.vector;
- outputColVector.noNulls = true;
outputColVector.isRepeating = false;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
// return immediately if batch is empty
if (n == 0) {
@@ -64,11 +69,30 @@ public class FuncRandNoSeed extends VectorExpression {
}
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = random.nextDouble();
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = random.nextDouble();
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = random.nextDouble();
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = random.nextDouble();
}