You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:41 UTC
[27/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
index a2e4a52..8326002 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -52,9 +54,9 @@ public class CastDecimalToDecimal extends VectorExpression {
* Convert input decimal value to a decimal with a possibly different precision and scale,
* at position i in the respective vectors.
*/
- protected void convert(DecimalColumnVector outV, DecimalColumnVector inV, int i) {
+ protected void convert(DecimalColumnVector outputColVector, DecimalColumnVector inputColVector, int i) {
// The set routine enforces precision and scale.
- outV.vector[i].set(inV.vector[i]);
+ outputColVector.vector[i].set(inputColVector.vector[i]);
}
/**
@@ -70,10 +72,12 @@ public class CastDecimalToDecimal extends VectorExpression {
super.evaluateChildren(batch);
}
- DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+ DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -81,51 +85,82 @@ public class CastDecimalToDecimal extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- convert(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- convert(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ outputColVector.isRepeating = true;
+ if (inputColVector.noNulls || !inputColVector.isNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[0] = false;
+ convert(outputColVector, inputColVector, 0);
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ convert(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ convert(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- convert(outV, inV, i);
+ convert(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- convert(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- convert(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ convert(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- convert(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ convert(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
index aa529ed..7ad0493 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
@@ -64,6 +64,7 @@ public class CastDecimalToLong extends FuncDecimalToLong {
outV.noNulls = false;
return;
}
+ outV.isNull[i] = false;
switch (integerPrimitiveCategory) {
case BYTE:
outV.vector[i] = decWritable.byteValue();
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
index 08abf27..5494579 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,7 +58,6 @@ public class CastDoubleToTimestamp extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
double[] vector = inputColVector.vector;
@@ -65,39 +66,82 @@ public class CastDoubleToTimestamp extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- setDouble(outputColVector, vector, 0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ setDouble(outputColVector, vector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- setDouble(outputColVector, vector, i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setDouble(outputColVector, vector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ setDouble(outputColVector, vector, i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
setDouble(outputColVector, vector, i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- setDouble(outputColVector, vector, i);
- outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setDouble(outputColVector, vector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
} else {
for(int i = 0; i != n; i++) {
- setDouble(outputColVector, vector, i);
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setDouble(outputColVector, vector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
index df25eac..a3c4212 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -56,7 +58,6 @@ public class CastLongToTimestamp extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] vector = inputColVector.vector;
@@ -65,39 +66,79 @@ public class CastLongToTimestamp extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- setSeconds(outputColVector, vector, 0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ setSeconds(outputColVector, vector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- setSeconds(outputColVector, vector, i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setSeconds(outputColVector, vector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ setSeconds(outputColVector, vector, i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
setSeconds(outputColVector, vector, i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- setSeconds(outputColVector, vector, i);
- outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setSeconds(outputColVector, vector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
} else {
+ // Set isNull before call in case it changes it mind.
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- setSeconds(outputColVector, vector, i);
+ if (!inputIsNull[i]) {
+ setSeconds(outputColVector, vector, i);
+ }
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
index 42c34c8..6a29c62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -54,7 +56,6 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] vector = inputColVector.vector;
@@ -63,39 +64,84 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- setMilliseconds(outputColVector, vector, 0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ setMilliseconds(outputColVector, vector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- setMilliseconds(outputColVector, vector, i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setMilliseconds(outputColVector, vector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ setMilliseconds(outputColVector, vector, i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
setMilliseconds(outputColVector, vector, i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- setMilliseconds(outputColVector, vector, i);
- outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setMilliseconds(outputColVector, vector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
} else {
+ // Set isNull before calls in case they change their mind.
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- setMilliseconds(outputColVector, vector, i);
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ setMilliseconds(outputColVector, vector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index 34269da..b55712a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hive.common.util.DateParser;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
/**
* Casts a string vector to a date vector.
@@ -62,7 +63,10 @@ public class CastStringToDate extends VectorExpression {
BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inV.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -70,65 +74,94 @@ public class CastStringToDate extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inV.isRepeating) {
+ if (inV.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ evaluate(outputColVector, inV, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- evaluate(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- evaluate(outV, inV, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(outputColVector, inV, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluate(outputColVector, inV, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- evaluate(outV, inV, i);
+ evaluate(outputColVector, inV, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- evaluate(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ evaluate(outputColVector, inV, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ if (!inputIsNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ evaluate(outputColVector, inV, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
- private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+ private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
if (dateParser.parseDate(dateString, sqlDate)) {
- outV.vector[i] = DateWritable.dateToDays(sqlDate);
+ outputColVector.vector[i] = DateWritable.dateToDays(sqlDate);
return;
}
- outV.vector[i] = 1;
- outV.isNull[i] = true;
- outV.noNulls = false;
+ outputColVector.vector[i] = 1;
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
index 41443c5..cbefa80 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
@@ -51,7 +53,7 @@ public class CastStringToDecimal extends VectorExpression {
/**
* Convert input string to a decimal, at position i in the respective vectors.
*/
- protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) {
+ protected void func(DecimalColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
String s;
try {
@@ -59,13 +61,13 @@ public class CastStringToDecimal extends VectorExpression {
* e.g. by converting to decimal from the input bytes directly without
* making a new string.
*/
- s = new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8");
- outV.vector[i].set(HiveDecimal.create(s));
+ s = new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8");
+ outputColVector.vector[i].set(HiveDecimal.create(s));
} catch (Exception e) {
// for any exception in conversion to decimal, produce NULL
- outV.noNulls = false;
- outV.isNull[i] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
}
}
@@ -76,10 +78,13 @@ public class CastStringToDecimal extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -87,51 +92,82 @@ public class CastStringToDecimal extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
index 3ea1e8c..9ad442a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -51,25 +53,25 @@ public class CastStringToDouble extends VectorExpression {
/**
* Convert input string to a double, at position i in the respective vectors.
*/
- protected void func(DoubleColumnVector outV, BytesColumnVector inV, int batchIndex) {
+ protected void func(DoubleColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) {
- byte[] bytes = inV.vector[batchIndex];
- final int start = inV.start[batchIndex];
- final int length = inV.length[batchIndex];
+ byte[] bytes = inputColVector.vector[batchIndex];
+ final int start = inputColVector.start[batchIndex];
+ final int length = inputColVector.length[batchIndex];
try {
if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
- outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
+ outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
return;
}
- outV.vector[batchIndex] = StringToDouble.strtod(bytes, start, length);
+ outputColVector.vector[batchIndex] = StringToDouble.strtod(bytes, start, length);
} catch (Exception e) {
// for any exception in conversion to integer, produce NULL
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
- outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
+ outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
}
}
@@ -80,10 +82,13 @@ public class CastStringToDouble extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum];
+ DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -91,51 +96,82 @@ public class CastStringToDouble extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
index feb0ab6..8a64dcf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -55,10 +57,13 @@ public class CastStringToIntervalDayTime extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
+ IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -66,64 +71,88 @@ public class CastStringToIntervalDayTime extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- evaluate(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- evaluate(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ evaluate(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluate(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- evaluate(outV, inV, i);
+ evaluate(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- evaluate(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ evaluate(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ // Set isNull before calls in case they change their mind.
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ evaluate(outputColVector, inputColVector, i);
}
}
- outV.isRepeating = false;
}
}
}
- private void evaluate(IntervalDayTimeColumnVector outV, BytesColumnVector inV, int i) {
+ private void evaluate(IntervalDayTimeColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
try {
HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf(
- new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
- outV.set(i, interval);
+ new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"));
+ outputColVector.set(i, interval);
} catch (Exception e) {
- outV.setNullValue(i);
- outV.isNull[i] = true;
- outV.noNulls = false;
+ outputColVector.setNullValue(i);
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
index 09dd4d9..598113f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -53,10 +55,13 @@ public class CastStringToIntervalYearMonth extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -64,64 +69,96 @@ public class CastStringToIntervalYearMonth extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- evaluate(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- evaluate(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ evaluate(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ evaluate(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- evaluate(outV, inV, i);
+ evaluate(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
-
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- evaluate(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(outputColVector, inputColVector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ // Set isNull before calls in case they change their mind.
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- evaluate(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ evaluate(outputColVector, inputColVector, i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
- private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+ private void evaluate(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
try {
HiveIntervalYearMonth interval = HiveIntervalYearMonth.valueOf(
- new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
- outV.vector[i] = interval.getTotalMonths();
+ new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"));
+ outputColVector.vector[i] = interval.getTotalMonths();
} catch (Exception e) {
- outV.vector[i] = 1;
- outV.isNull[i] = true;
- outV.noNulls = false;
+ outputColVector.vector[i] = 1;
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
index a6cfee8..e3da77e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -70,11 +72,11 @@ public class CastStringToLong extends VectorExpression {
/**
* Convert input string to a long, at position i in the respective vectors.
*/
- protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex) {
+ protected void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) {
- byte[] bytes = inV.vector[batchIndex];
- final int start = inV.start[batchIndex];
- final int length = inV.length[batchIndex];
+ byte[] bytes = inputColVector.vector[batchIndex];
+ final int start = inputColVector.start[batchIndex];
+ final int length = inputColVector.length[batchIndex];
try {
switch (integerPrimitiveCategory) {
@@ -90,8 +92,8 @@ public class CastStringToLong extends VectorExpression {
booleanValue = true;
} else {
// No boolean value match for 4 char field.
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
} else if (length == 5) {
@@ -103,8 +105,8 @@ public class CastStringToLong extends VectorExpression {
booleanValue = false;
} else {
// No boolean value match for 5 char field.
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
} else if (length == 1) {
@@ -115,50 +117,50 @@ public class CastStringToLong extends VectorExpression {
booleanValue = false;
} else {
// No boolean value match for extended 1 char field.
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
} else {
// No boolean value match for other lengths.
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
- outV.vector[batchIndex] = (booleanValue ? 1 : 0);
+ outputColVector.vector[batchIndex] = (booleanValue ? 1 : 0);
}
break;
case BYTE:
if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
- outV.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10);
+ outputColVector.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10);
break;
case SHORT:
if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
- outV.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10);
+ outputColVector.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10);
break;
case INT:
if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
- outV.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10);
+ outputColVector.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10);
break;
case LONG:
if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
return;
}
- outV.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10);
+ outputColVector.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10);
break;
default:
throw new Error("Unexpected primitive category " + integerPrimitiveCategory);
@@ -166,8 +168,8 @@ public class CastStringToLong extends VectorExpression {
} catch (Exception e) {
// for any exception in conversion to integer, produce NULL
- outV.noNulls = false;
- outV.isNull[batchIndex] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isNull[batchIndex] = true;
}
}
@@ -178,10 +180,13 @@ public class CastStringToLong extends VectorExpression {
super.evaluateChildren(batch);
}
- BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
int[] sel = batch.selected;
int n = batch.size;
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -189,51 +194,81 @@ public class CastStringToLong extends VectorExpression {
return;
}
- if (inV.noNulls) {
- outV.noNulls = true;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- func(outV, inV, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, inV, i);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, inputColVector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, inV, i);
+ func(outputColVector, inputColVector, i);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are NULLs in the inputColVector */ {
- // Handle case with nulls. Don't do function if the value is null,
- // because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inV.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inV.isNull[0];
- if (!inV.isNull[0]) {
- func(outV, inV, 0);
- }
- } else if (batch.selectedInUse) {
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inV.isNull[i];
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
} else {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inV.isNull[i]) {
- func(outV, inV, i);
+ if (!inputColVector.isNull[i]) {
+ // Set isNull before call in case it changes it mind.
+ outputColVector.isNull[i] = false;
+ func(outputColVector, inputColVector, i);
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
index 1231cda..1836131 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr;
import org.apache.hadoop.hive.ql.exec.vector.*;
@@ -56,7 +58,6 @@ public class CastTimestampToBoolean extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] outputVector = outputColVector.vector;
@@ -65,39 +66,51 @@ public class CastTimestampToBoolean extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = toBool(inputColVector, 0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = toBool(inputColVector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = toBool(inputColVector, i);
+ outputIsNull[i] = false;
+ outputVector[i] = toBool(inputColVector, i);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outputVector[i] = toBool(inputColVector, i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = toBool(inputColVector, i);
outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = toBool(inputColVector, i);
}
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = toBool(inputColVector, i);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
index e696455..c11797b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -51,7 +53,6 @@ public class CastTimestampToDouble extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
double[] outputVector = outputColVector.vector;
@@ -60,39 +61,79 @@ public class CastTimestampToDouble extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = inputColVector.getDouble(0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = inputColVector.getDouble(0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inputColVector.getDouble(i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inputColVector.getDouble(i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inputColVector.getDouble(i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inputColVector.getDouble(i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = inputColVector.getDouble(i);
- outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ outputIsNull[i] = false;
+ outputVector[i] = inputColVector.getDouble(i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = inputColVector.getDouble(i);
+ if (!inputIsNull[i]) {
+ outputIsNull[i] = false;
+ outputVector[i] = inputColVector.getDouble(i);
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
index 36b9f13..a0f0927 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr;
import org.apache.hadoop.hive.ql.exec.vector.*;
@@ -48,7 +50,6 @@ public class CastTimestampToLong extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] outputVector = outputColVector.vector;
@@ -57,39 +58,79 @@ public class CastTimestampToLong extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = inputColVector.getTimestampAsLong(0);
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = inputColVector.getTimestampAsLong(0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inputColVector.getTimestampAsLong(i);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inputColVector.getTimestampAsLong(i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inputColVector.getTimestampAsLong(i);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inputColVector.getTimestampAsLong(i);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = inputColVector.getTimestampAsLong(i);
- outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
+ inputIsNull[i] = false;
+ outputVector[i] = inputColVector.getTimestampAsLong(i);
+ } else {
+ inputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = inputColVector.getTimestampAsLong(i);
+ if (!inputIsNull[i]) {
+ inputIsNull[i] = false;
+ outputVector[i] = inputColVector.getTimestampAsLong(i);
+ } else {
+ inputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
index 127e431..6fb29a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -66,104 +68,121 @@ public class ColAndCol extends VectorExpression {
return;
}
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
long vector1Value = vector1[0];
long vector2Value = vector2[0];
if (inputColVector1.noNulls && inputColVector2.noNulls) {
+
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
// All must be selected otherwise size would be zero
// Repeating property will not change.
outV.isRepeating = true;
+ outputIsNull[0] = false;
outputVector[0] = vector1[0] & vector2[0];
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1Value & vector2[i];
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value & vector2[i];
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1[i] & vector2Value;
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2Value;
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outputVector[i] = vector1[i] & vector2[i];
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2[i];
}
}
- outV.isRepeating = false;
}
- outV.noNulls = true;
- } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+ return;
+ }
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outV.noNulls = false;
+
+ if (inputColVector1.noNulls && !inputColVector2.noNulls) {
// only input 2 side has nulls
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
// All must be selected otherwise size would be zero
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] & vector2[0];
- outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0];
+ outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0];
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+ outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
}
} else {
+
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+ outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+ outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
}
}
- outV.isRepeating = false;
}
- outV.noNulls = false;
} else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
// only input 1 side has nulls
if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
@@ -171,49 +190,46 @@ public class ColAndCol extends VectorExpression {
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] & vector2[0];
- outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1);
+ outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1);
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
+ outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
+ outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
}
}
- outV.isRepeating = false;
} else /* neither side is repeating */{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+ outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
}
}
- outV.isRepeating = false;
}
outV.noNulls = false;
} else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{
@@ -223,7 +239,7 @@ public class ColAndCol extends VectorExpression {
// Repeating property will not change.
outV.isRepeating = true;
outputVector[0] = vector1[0] & vector2[0];
- outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0])
+ outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[0] && (vector2[0] == 1))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[0]);
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
@@ -231,32 +247,31 @@ public class ColAndCol extends VectorExpression {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[0] && (vector2[i] == 1))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1Value & vector2[i];
- outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[0] && (vector2[i] == 1))
|| (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
}
}
- outV.isRepeating = false;
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+ outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[i] && (vector2[0] == 1))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2Value;
- outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+ outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
|| (inputColVector1.isNull[i] && (vector2[0] == 1))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
}
@@ -267,21 +282,19 @@ public class ColAndCol extends VectorExpression {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[i] && (vector2[i] == 1))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector1[i] & vector2[i];
- outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+ outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
|| (inputColVector1.isNull[i] && (vector2[i] == 1))
|| (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
}
}
- outV.isRepeating = false;
}
- outV.noNulls = false;
}
}