You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:36 UTC
[22/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
index 544b700..9b7005d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
@@ -71,8 +71,10 @@ public class StringUnaryUDF extends VectorExpression {
byte[][] vector = inputColVector.vector;
int [] start = inputColVector.start;
int [] length = inputColVector.length;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.initBuffer();
Text t;
if (n == 0) {
@@ -86,72 +88,86 @@ public class StringUnaryUDF extends VectorExpression {
// It's implemented in the simplest way now, just calling the
// existing built-in function.
- if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
s.set(vector[0], start[0], length[0]);
t = func.evaluate(s);
- setString(outV, 0, t);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
+ setString(outputColVector, 0, t);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
- /* Fill output isNull with false for selected elements since there is a chance we'll
- * convert to noNulls == false in setString();
- */
- outV.isNull[i] = false;
- s.set(vector[i], start[i], length[i]);
- t = func.evaluate(s);
- setString(outV, i, t);
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ s.set(vector[i], start[i], length[i]);
+ t = func.evaluate(s);
+ setString(outputColVector, i, t);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ s.set(vector[i], start[i], length[i]);
+ t = func.evaluate(s);
+ setString(outputColVector, i, t);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
- // Set all elements to not null. The setString call can override this.
- Arrays.fill(outV.isNull, 0, n, false);
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
s.set(vector[i], start[i], length[i]);
t = func.evaluate(s);
- setString(outV, i, t);
+ setString(outputColVector, i, t);
}
- outV.isRepeating = false;
}
- } else {
- // Handle case with nulls. Don't do function if the value is null, to save time,
- // because calling the function can be expensive.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0]; // setString can override this
- if (!inputColVector.isNull[0]) {
- s.set(vector[0], start[0], length[0]);
- t = func.evaluate(s);
- setString(outV, 0, t);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVector.isNull[i]; // setString can override this
+ outputColVector.isNull[i] = inputColVector.isNull[i]; // setString can override this
if (!inputColVector.isNull[i]) {
s.set(vector[i], start[i], length[i]);
t = func.evaluate(s);
- setString(outV, i, t);
+ setString(outputColVector, i, t);
}
}
- outV.isRepeating = false;
} else {
// setString can override this null propagation
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
s.set(vector[i], start[i], length[i]);
t = func.evaluate(s);
- setString(outV, i, t);
+ setString(outputColVector, i, t);
}
}
- outV.isRepeating = false;
}
}
}
@@ -159,13 +175,13 @@ public class StringUnaryUDF extends VectorExpression {
/* Set the output string entry i to the contents of Text object t.
* If t is a null object reference, record that the value is a SQL NULL.
*/
- private static void setString(BytesColumnVector outV, int i, Text t) {
+ private static void setString(BytesColumnVector outputColVector, int i, Text t) {
if (t == null) {
- outV.noNulls = false;
- outV.isNull[i] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
return;
}
- outV.setVal(i, t.getBytes(), 0, t.getLength());
+ outputColVector.setVal(i, t.getBytes(), 0, t.getLength());
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
index 2f8b627..9462347 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -44,7 +46,7 @@ abstract public class StringUnaryUDFDirect extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i);
+ abstract protected void func(BytesColumnVector outputColVector, byte[][] vector, int[] start, int[] length, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -59,59 +61,82 @@ abstract public class StringUnaryUDFDirect extends VectorExpression {
byte[][] vector = inputColVector.vector;
int start[] = inputColVector.start;
int length[] = inputColVector.length;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.initBuffer();
if (n == 0) {
//Nothing to do
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, vector, start, length, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- func(outV, vector, start, length, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, vector, start, length, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, vector, start, length, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, vector, start, length, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, vector, start, length, i);
+ func(outputColVector, vector, start, length, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- func(outV, vector, start, length, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- func(outV, vector, start, length, i);
+ func(outputColVector, vector, start, length, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
- func(outV, vector, start, length, i);
+ func(outputColVector, vector, start, length, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
index 7fb95f5..31a0ad1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
@@ -73,8 +73,8 @@ public class TimestampColumnInList extends VectorExpression implements ITimestam
TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] outputVector = outputColVector.vector;
@@ -83,49 +83,69 @@ public class TimestampColumnInList extends VectorExpression implements ITimestam
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
}
}
- } else {
- if (inputColVector.isRepeating) {
-
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outNulls[i] = nullPos[i];
- if (!nullPos[i]) {
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
}
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
index 5eb2090..13abfd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ abstract public class TimestampToStringUnaryUDF extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i);
+ abstract protected void func(BytesColumnVector outputColVector, TimestampColumnVector inV, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -56,59 +58,82 @@ abstract public class TimestampToStringUnaryUDF extends VectorExpression {
TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.initBuffer();
if (n == 0) {
//Nothing to do
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- func(outV, inputColVector, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inputColVector, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inputColVector, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- func(outV, inputColVector, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- func(outV, inputColVector, i);
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
- func(outV, inputColVector, i);
+ func(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
index ea78a2e..3a560ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
@@ -35,6 +35,10 @@ public class VectorCoalesce extends VectorExpression {
private final int[] inputColumns;
+ // The unassigned batchIndex for the rows that have not received a non-NULL value yet.
+ // A temporary work array.
+ private transient int[] unassignedBatchIndices;
+
public VectorCoalesce(int [] inputColumns, int outputColumnNum) {
super(outputColumnNum);
this.inputColumns = inputColumns;
@@ -57,66 +61,174 @@ public class VectorCoalesce extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
- ColumnVector outputVector = batch.cols[outputColumnNum];
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
if (n <= 0) {
// Nothing to do
return;
}
- outputVector.init();
+ if (unassignedBatchIndices == null || n > unassignedBatchIndices.length) {
+
+ // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE).
+ final int roundUpSize =
+ ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE)
+ * VectorizedRowBatch.DEFAULT_SIZE;
+ unassignedBatchIndices = new int[roundUpSize];
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- boolean noNulls = false;
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ final int columnCount = inputColumns.length;
+
+ /*
+ * Process the input columns to find a non-NULL value for each row.
+ *
+ * We track the unassigned batchIndex of the rows that have not received
+ * a non-NULL value yet. Similar to a selected array.
+ */
+ boolean isAllUnassigned = true;
+ int unassignedColumnCount = 0;
for (int k = 0; k < inputColumns.length; k++) {
ColumnVector cv = batch.cols[inputColumns[k]];
- // non-nulls in any column qualifies coalesce having no nulls
- // common case: last column is a constant & non-null
- noNulls = noNulls || cv.noNulls;
- }
-
- outputVector.noNulls = noNulls;
- outputVector.isRepeating = false;
-
- ColumnVector first = batch.cols[inputColumns[0]];
-
- if (first.noNulls && first.isRepeating) {
- outputVector.isRepeating = true;
- outputVector.isNull[0] = false;
- outputVector.setElement(0, 0, first);
- } else if (batch.selectedInUse) {
- for (int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector.isNull[i] = true;
- for (int k = 0; k < inputColumns.length; k++) {
- ColumnVector cv = batch.cols[inputColumns[k]];
- if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
- outputVector.isNull[i] = false;
- outputVector.setElement(i, 0, cv);
- break;
- } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
- outputVector.isNull[i] = false;
- outputVector.setElement(i, i, cv);
- break;
+ if (cv.isRepeating) {
+
+ if (cv.noNulls || !cv.isNull[0]) {
+
+ /*
+ * With a repeating value we can finish all remaining rows.
+ */
+ if (isAllUnassigned) {
+
+ // No other columns provided non-NULL values. We can return repeated output.
+ outputIsNull[0] = false;
+ outputColVector.setElement(0, 0, cv);
+ outputColVector.isRepeating = true;
+ return;
+ } else {
+
+ // Some rows have already been assigned values. Assign the remaining.
+ // We cannot use copySelected method here.
+ for (int i = 0; i < unassignedColumnCount; i++) {
+ final int batchIndex = unassignedBatchIndices[i];
+ outputIsNull[batchIndex] = false;
+
+ // Our input is repeating (i.e. inputColNumber = 0).
+ outputColVector.setElement(batchIndex, 0, cv);
+ }
+ return;
}
+ } else {
+
+ // Repeated NULLs -- skip this input column.
}
- }
- } else {
- for (int i = 0; i != n; i++) {
- outputVector.isNull[i] = true;
- for (int k = 0; k < inputColumns.length; k++) {
- ColumnVector cv = batch.cols[inputColumns[k]];
- if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
- outputVector.isNull[i] = false;
- outputVector.setElement(i, 0, cv);
- break;
- } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
- outputVector.isNull[i] = false;
- outputVector.setElement(i, i, cv);
- break;
+ } else {
+
+ /*
+ * Non-repeating input column. Use any non-NULL values for unassigned rows.
+ */
+ if (isAllUnassigned) {
+
+ /*
+ * No other columns provided non-NULL values. We *may* be able to finish all rows
+ * with this input column...
+ */
+ if (cv.noNulls){
+
+ // Since no NULLs, we can provide values for all rows.
+ if (batch.selectedInUse) {
+ for (int i = 0; i < n; i++) {
+ final int batchIndex = sel[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ }
+ } else {
+ Arrays.fill(outputIsNull, 0, n, false);
+ for (int batchIndex = 0; batchIndex < n; batchIndex++) {
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ }
+ }
+ return;
+ } else {
+
+ // We might not be able to assign all rows because of input NULLs. Start tracking any
+ // unassigned rows.
+ boolean[] inputIsNull = cv.isNull;
+ if (batch.selectedInUse) {
+ for (int i = 0; i < n; i++) {
+ final int batchIndex = sel[i];
+ if (!inputIsNull[batchIndex]) {
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ } else {
+ unassignedBatchIndices[unassignedColumnCount++] = batchIndex;
+ }
+ }
+ } else {
+ for (int batchIndex = 0; batchIndex < n; batchIndex++) {
+ if (!inputIsNull[batchIndex]) {
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ } else {
+ unassignedBatchIndices[unassignedColumnCount++] = batchIndex;
+ }
+ }
+ }
+ if (unassignedColumnCount == 0) {
+ return;
+ }
+ isAllUnassigned = false;
+ }
+ } else {
+
+ /*
+ * We previously assigned *some* rows with non-NULL values. The batch indices of
+ * the unassigned row were tracked.
+ */
+ if (cv.noNulls) {
+
+ // Assign all remaining rows.
+ for (int i = 0; i < unassignedColumnCount; i++) {
+ final int batchIndex = unassignedBatchIndices[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ }
+ return;
+ } else {
+
+ // Use any non-NULL values found; remember the remaining unassigned.
+ boolean[] inputIsNull = cv.isNull;
+ int newUnassignedColumnCount = 0;
+ for (int i = 0; i < unassignedColumnCount; i++) {
+ final int batchIndex = unassignedBatchIndices[i];
+ if (!inputIsNull[batchIndex]) {
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, cv);
+ } else {
+ unassignedBatchIndices[newUnassignedColumnCount++] = batchIndex;
+ }
+ }
+ if (newUnassignedColumnCount == 0) {
+ return;
+ }
+ unassignedColumnCount = newUnassignedColumnCount;
}
}
}
}
+
+ // NULL out the remaining columns.
+ outputColVector.noNulls = false;
+ for (int i = 0; i < unassignedColumnCount; i++) {
+ final int batchIndex = unassignedBatchIndices[i];
+ outputIsNull[batchIndex] = true;
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
index 0dde5bd..a30a7df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
@@ -24,6 +24,14 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/*
+ * ELT(index, string, ....) returns the string column/expression value at the specified
+ * index expression.
+ *
+ * The first argument expression indicates the index of the string to be retrieved from
+ * remaining arguments. We return NULL when the index number is less than 1 or
+ * index number is greater than the number of the string arguments.
+ */
public class VectorElt extends VectorExpression {
private static final long serialVersionUID = 1L;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
index f7fdb57..bd594e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
@@ -387,8 +387,35 @@ public final class VectorExpressionWriterFactory {
* if the wrong vector column is used.
*/
private static abstract class VectorExpressionWriterDecimal extends VectorExpressionWriterBase {
+
@Override
public Object writeValue(ColumnVector column, int row) throws HiveException {
+ if (column instanceof Decimal64ColumnVector) {
+ Decimal64ColumnVector d64cv = (Decimal64ColumnVector) column;
+ final long decimal64Long;
+ if (d64cv.noNulls && !d64cv.isRepeating) {
+ decimal64Long = d64cv.vector[row];
+ } else if (d64cv.noNulls && d64cv.isRepeating) {
+ decimal64Long = d64cv.vector[0];
+ } else if (!d64cv.noNulls && !d64cv.isRepeating && !d64cv.isNull[row]) {
+ decimal64Long = d64cv.vector[row];
+ } else if (!d64cv.noNulls && !d64cv.isRepeating && d64cv.isNull[row]) {
+ return null;
+ } else if (!d64cv.noNulls && d64cv.isRepeating && !d64cv.isNull[0]) {
+ decimal64Long = d64cv.vector[0];
+ } else if (!d64cv.noNulls && d64cv.isRepeating && d64cv.isNull[0]) {
+ return null;
+ } else {
+ throw new HiveException(
+ String.format(
+ "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
+ row, d64cv.noNulls, d64cv.isRepeating, d64cv.isNull[row], d64cv.isNull[0]));
+ }
+
+ HiveDecimalWritable scratchHiveDecimalWritable = d64cv.getScratchWritable();
+ scratchHiveDecimalWritable.deserialize64(decimal64Long, d64cv.scale);
+ return writeValue(scratchHiveDecimalWritable);
+ }
DecimalColumnVector dcv = (DecimalColumnVector) column;
if (dcv.noNulls && !dcv.isRepeating) {
return writeValue(dcv.vector[row]);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
index b2891a8..f6e9c8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
@@ -91,7 +91,9 @@ public class VectorUDFDateAddColCol extends VectorExpression {
return;
}
- // Handle null
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
switch (primitiveCategory) {
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
index e232555..7bb5c54 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hive.common.util.DateParser;
import java.sql.Date;
+import java.util.Arrays;
public class VectorUDFDateAddColScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -77,52 +78,84 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
super.evaluateChildren(batch);
}
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputCol = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+ boolean[] outputIsNull = outputColVector.isNull;
if(batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
- /* true for all algebraic UDFs with no state */
- outV.isRepeating = inputCol.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
switch (primitiveCategory) {
case DATE:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateDate(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
}
@@ -130,35 +163,66 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
break;
case TIMESTAMP:
- if (inputCol.noNulls) {
- outV.noNulls = true;
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
}
@@ -168,35 +232,66 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
case STRING:
case CHAR:
case VARCHAR:
- if (inputCol.noNulls) {
- outV.noNulls = true;
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ evaluateString(inputCol, outputColVector, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- evaluateString(inputCol, outV, i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluateString(inputCol, outputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluateString(inputCol, outputColVector, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- evaluateString(inputCol, outV, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ evaluateString(inputCol, outputColVector, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
index 0aaba26..ecde39b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
@@ -30,6 +30,7 @@ import org.apache.hive.common.util.DateParser;
import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.sql.Timestamp;
+import java.util.Arrays;
public class VectorUDFDateAddScalarCol extends VectorExpression {
@@ -91,7 +92,8 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
switch (primitiveCategory) {
case DATE:
@@ -107,15 +109,15 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
case VARCHAR:
boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate);
if (!parsed) {
- outV.noNulls = false;
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j=0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
}
return;
@@ -130,39 +132,73 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
return;
}
- /* true for all algebraic UDFs with no state */
- outV.isRepeating = inputCol.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
long baseDateDays = DateWritable.millisToDays(baseDate.getTime());
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ evaluate(baseDateDays, inputCol.vector[0], outputColVector, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- evaluate(baseDateDays, inputCol.vector[i], outV, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- evaluate(baseDateDays, inputCol.vector[i], outV, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluate(baseDateDays, inputCol.vector[i], outV, i);
+ evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluate(baseDateDays, inputCol.vector[i], outV, i);
+ evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
index 982467e..0d794fe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
@@ -89,6 +89,9 @@ public class VectorUDFDateDiffColCol extends VectorExpression {
return;
}
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
LongColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
index 97e3669..08c91e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
@@ -35,6 +35,7 @@ import java.sql.Date;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
public class VectorUDFDateDiffColScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -80,20 +81,21 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
super.evaluateChildren(batch);
}
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputCol = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+ boolean[] outputIsNull = outputColVector.isNull;
if(batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
- /* true for all algebraic UDFs with no state */
- outV.isRepeating = inputCol.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory();
switch (primitiveCategory1) {
@@ -114,15 +116,15 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
baseDate = DateWritable.dateToDays(date);
break;
} catch (Exception e) {
- outV.noNulls = false;
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j=0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
}
return;
@@ -134,35 +136,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
switch (primitiveCategory0) {
case DATE:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateDate(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
}
@@ -170,35 +203,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
break;
case TIMESTAMP:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
}
@@ -208,35 +272,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
case STRING:
case CHAR:
case VARCHAR:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- evaluateString(inputCol, outV, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ evaluateString(inputCol, outputColVector, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluateString(inputCol, outputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluateString(inputCol, outputColVector, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- evaluateString(inputCol, outV, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ evaluateString(inputCol, outputColVector, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
index c575c05..c436c96 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
@@ -33,6 +33,7 @@ import java.sql.Date;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
public class VectorUDFDateDiffScalarCol extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -78,20 +79,21 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
super.evaluateChildren(batch);
}
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputCol = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+ boolean[] outputIsNull = outputColVector.isNull;
if(batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
- /* true for all algebraic UDFs with no state */
- outV.isRepeating = inputCol.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
PrimitiveCategory primitiveCategory0 =
((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
@@ -113,15 +115,15 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
baseDate = DateWritable.dateToDays(date);
break;
} catch (Exception e) {
- outV.noNulls = false;
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j=0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = true;
+ outputColVector.isNull[i] = true;
}
}
return;
@@ -134,35 +136,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory();
switch (primitiveCategory1) {
case DATE:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateDate(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
- } else {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateDate(inputCol, i);
+ outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
}
@@ -170,35 +203,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
break;
case TIMESTAMP:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
}
@@ -208,35 +272,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
case STRING:
case CHAR:
case VARCHAR:
- if (inputCol.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- evaluateString(inputCol, outV, i);
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ evaluateString(inputCol, outputColVector, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputCol.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluateString(inputCol, outputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluateString(inputCol, outputColVector, i);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- evaluateString(inputCol, outV, i);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ evaluateString(inputCol, outputColVector, i);
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs..
+
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
- outV.noNulls = false;
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j = 0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputCol.isNull[i];
+ outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
- evaluateString(inputCol, outV, i);
+ evaluateString(inputCol, outputColVector, i);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
index 9d72bdf..1f2d5cb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
@@ -55,7 +55,10 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
// indexColumnVector includes the keys of Map
indexColumnVector = batch.cols[indexColumnNum];
- outV.noNulls = true;
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
int[] mapValueIndex;
if (mapV.isRepeating) {
if (mapV.isNull[0]) {
@@ -71,9 +74,8 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
outV.noNulls = false;
} else {
// the key is found in MapColumnVector, set the value
- outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
outV.isNull[0] = false;
- outV.noNulls = true;
+ outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
}
outV.isRepeating = true;
} else {
@@ -97,8 +99,8 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
outV.isNull[j] = true;
outV.noNulls = false;
} else {
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
outV.isNull[j] = false;
+ outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
}
}
outV.isRepeating = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
index e6a86ae..a7d730b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
@@ -50,7 +50,10 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
ColumnVector outV = batch.cols[outputColumnNum];
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
- outV.noNulls = true;
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
int[] mapValueIndex;
if (mapV.isRepeating) {
if (mapV.isNull[0]) {
@@ -65,7 +68,6 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
} else {
// the key is found in MapColumnVector, set the value
outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
- outV.noNulls = true;
}
}
outV.isRepeating = true;
@@ -77,8 +79,8 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
outV.isNull[j] = true;
outV.noNulls = false;
} else {
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
outV.isNull[j] = false;
+ outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
}
}
outV.isRepeating = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
index 519a4e4..eb6d6dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
import java.util.Calendar;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -82,53 +83,85 @@ public abstract class VectorUDFTimestampFieldDate extends VectorExpression {
super.evaluateChildren(batch);
}
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
+ boolean[] outputIsNull = outputColVector.isNull;
if(batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
- /* true for all algebraic UDFs with no state */
- outV.isRepeating = inputColVec.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
LongColumnVector longColVector = (LongColumnVector) inputColVec;
+ if (inputColVec.isRepeating) {
+ if (inputColVec.noNulls || !inputColVec.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ outputColVector.vector[0] = getDateField(longColVector.vector[0]);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVec.noNulls) {
- outV.noNulls = true;
- if (selectedInUse) {
- for(int j=0; j < n; j++) {
- int i = sel[j];
- outV.vector[i] = getDateField(longColVector.vector[i]);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.vector[i] = getDateField(longColVector.vector[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.vector[i] = getDateField(longColVector.vector[i]);
+ }
}
} else {
- for(int i = 0; i < n; i++) {
- outV.vector[i] = getDateField(longColVector.vector[i]);
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != n; i++) {
+ outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
- } else {
- // Handle case with nulls. Don't do function if the value is null, to save time,
- // because calling the function can be expensive.
- outV.noNulls = false;
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (selectedInUse) {
for(int j=0; j < n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVec.isNull[i];
+ outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
- outV.vector[i] = getDateField(longColVector.vector[i]);
+ outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
for(int i = 0; i < n; i++) {
- outV.isNull[i] = inputColVec.isNull[i];
+ outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
- outV.vector[i] = getDateField(longColVector.vector[i]);
+ outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
index c5762d1..2918546 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
@@ -106,11 +106,27 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression {
return;
}
- // true for all algebraic UDFs with no state
- outV.isRepeating = inputCol.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
+ if (inputCol.isRepeating) {
+ if (inputCol.noNulls || !inputCol.isNull[0]) {
+ try {
+ outV.isNull[0] = false;
+ outV.vector[0] = getField(inputCol.vector[0], inputCol.start[0], inputCol.length[0]);
+ } catch (ParseException e) {
+ outV.noNulls = false;
+ outV.isNull[0] = true;
+ }
+ } else {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
if (inputCol.noNulls) {
- outV.noNulls = true;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
@@ -133,11 +149,11 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression {
}
}
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
- // Handle case with nulls. Don't do function if the value is null, to save time,
- // because calling the function can be expensive.
+ // Carefully handle NULLs...
outV.noNulls = false;
+
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];