You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:38 UTC
[24/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
index 0991bda..67d1e76 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -60,8 +62,6 @@ public class LongColDivideLongScalar extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
- outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -71,27 +71,69 @@ public class LongColDivideLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
if (value == 0) {
// Denominator is zero, convert the batch to nulls
outputColVector.noNulls = false;
outputColVector.isRepeating = true;
outputIsNull[0] = true;
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
+ return;
} else if (inputColVector.isRepeating) {
- outputVector[0] = vector[0] / (double) value;
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
- } else if (inputColVector.noNulls) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = vector[0] / (double) value;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] / (double) value;
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = vector[i] / (double) value;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = vector[i] / (double) value;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = vector[i] / (double) value;
}
}
} else /* there are nulls */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java
index 2d66cee..608c32a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java
@@ -66,17 +66,14 @@ public class LongColEqualLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
-
+
/* Disregard nulls for processing. In other words,
- * the arithmetic operation is performed even if one or
+ * the arithmetic operation is performed even if one or
* more inputs are null. This is to improve speed by avoiding
* conditional checks in the inner loop.
*/
@@ -117,9 +114,9 @@ public class LongColEqualLongColumn extends VectorExpression {
}
}
}
-
- /* For the case when the output can have null values, follow
- * the convention that the data values must be 1 for long and
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
* NaN for double. This is to prevent possible later zero-divide errors
* in complex arithmetic expressions like col2 / (col1 - 1)
* in the case when some col1 entries are null.
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java
index 242fddc..1a82e8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -25,8 +27,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
public class LongColEqualLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongColEqualLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -45,6 +47,12 @@ public class LongColEqualLongScalar extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) {
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -52,55 +60,75 @@ public class LongColEqualLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
- int n = batch.size;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
- // return immediately if batch is empty
- if (n == 0) {
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = vector[0] == value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
return;
}
- outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = vector[0] == value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] == value ? 1 : 0;
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1"
outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] == value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] == value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java
index dc1a331..eb040ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java
@@ -66,17 +66,14 @@ public class LongColGreaterEqualLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
-
+
/* Disregard nulls for processing. In other words,
- * the arithmetic operation is performed even if one or
+ * the arithmetic operation is performed even if one or
* more inputs are null. This is to improve speed by avoiding
* conditional checks in the inner loop.
*/
@@ -117,9 +114,9 @@ public class LongColGreaterEqualLongColumn extends VectorExpression {
}
}
}
-
- /* For the case when the output can have null values, follow
- * the convention that the data values must be 1 for long and
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
* NaN for double. This is to prevent possible later zero-divide errors
* in complex arithmetic expressions like col2 / (col1 - 1)
* in the case when some col1 entries are null.
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java
index 633015e..3f0ece7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongColGreaterEqualLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private int colNum;
- private long value;
+ protected int colNum;
+ protected long value;
public LongColGreaterEqualLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongColGreaterEqualLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,69 @@ public class LongColGreaterEqualLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = vector[0] >= value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] >= value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1"
outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] >= value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] >= value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java
index e56d800..9ab9e1e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java
@@ -66,17 +66,14 @@ public class LongColGreaterLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
-
+
/* Disregard nulls for processing. In other words,
- * the arithmetic operation is performed even if one or
+ * the arithmetic operation is performed even if one or
* more inputs are null. This is to improve speed by avoiding
* conditional checks in the inner loop.
*/
@@ -117,9 +114,9 @@ public class LongColGreaterLongColumn extends VectorExpression {
}
}
}
-
- /* For the case when the output can have null values, follow
- * the convention that the data values must be 1 for long and
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
* NaN for double. This is to prevent possible later zero-divide errors
* in complex arithmetic expressions like col2 / (col1 - 1)
* in the case when some col1 entries are null.
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java
index 25c07df..4d34707 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -25,8 +27,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
public class LongColGreaterLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongColGreaterLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -52,8 +54,8 @@ public class LongColGreaterLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -63,44 +65,69 @@ public class LongColGreaterLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = vector[0] > value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] > value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = (value - vector[i]) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = (value - vector[i]) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a > b" is "(b - a) >>> 63"
outputVector[i] = (value - vector[i]) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] > value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] > value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (value - vector[i]) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (value - vector[i]) >>> 63;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java
index f052675..004bf4f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java
@@ -66,17 +66,14 @@ public class LongColLessEqualLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
-
+
/* Disregard nulls for processing. In other words,
- * the arithmetic operation is performed even if one or
+ * the arithmetic operation is performed even if one or
* more inputs are null. This is to improve speed by avoiding
* conditional checks in the inner loop.
*/
@@ -117,9 +114,9 @@ public class LongColLessEqualLongColumn extends VectorExpression {
}
}
}
-
- /* For the case when the output can have null values, follow
- * the convention that the data values must be 1 for long and
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
* NaN for double. This is to prevent possible later zero-divide errors
* in complex arithmetic expressions like col2 / (col1 - 1)
* in the case when some col1 entries are null.
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java
index 1e5b349..b50bdd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongColLessEqualLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private int colNum;
- private long value;
+ protected int colNum;
+ protected long value;
public LongColLessEqualLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongColLessEqualLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,69 @@ public class LongColLessEqualLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = vector[0] <= value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] <= value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1"
outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] <= value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] <= value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java
index fe700c3..3a3425b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java
@@ -66,12 +66,9 @@ public class LongColLessLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java
index 2f282a9..f32de24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongColLessLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongColLessLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongColLessLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,69 @@ public class LongColLessLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = vector[0] < value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] < value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = (vector[i] - value) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = (vector[i] - value) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a < b" is "(a - b) >>> 63"
outputVector[i] = (vector[i] - value) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] < value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] < value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (vector[i] - value) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (vector[i] - value) >>> 63;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java
index 19fc3a6..cfd61a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java
@@ -71,12 +71,9 @@ public class LongColModuloLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java
index 8307e78..833b8fa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java
@@ -66,17 +66,14 @@ public class LongColNotEqualLongColumn extends VectorExpression {
return;
}
- outputColVector.isRepeating =
- inputColVector1.isRepeating && inputColVector2.isRepeating
- || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
- || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
-
- // Handle nulls first
+ /*
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+ */
NullUtil.propagateNullsColCol(
inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
-
+
/* Disregard nulls for processing. In other words,
- * the arithmetic operation is performed even if one or
+ * the arithmetic operation is performed even if one or
* more inputs are null. This is to improve speed by avoiding
* conditional checks in the inner loop.
*/
@@ -117,9 +114,9 @@ public class LongColNotEqualLongColumn extends VectorExpression {
}
}
}
-
- /* For the case when the output can have null values, follow
- * the convention that the data values must be 1 for long and
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
* NaN for double. This is to prevent possible later zero-divide errors
* in complex arithmetic expressions like col2 / (col1 - 1)
* in the case when some col1 entries are null.
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java
index 0e78f8d..b4c68fa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongColNotEqualLongScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongColNotEqualLongScalar(int colNum, long value, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongColNotEqualLongScalar extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,69 @@ public class LongColNotEqualLongScalar extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = vector[0] != value ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = vector[i] != value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = vector[0] != value ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] != value ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
index 6c5bb68..181aeae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
@@ -64,8 +64,8 @@ public class LongColumnInList extends VectorExpression implements ILongInExpr {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -75,49 +75,69 @@ public class LongColumnInList extends VectorExpression implements ILongInExpr {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
- } else {
- if (inputColVector.isRepeating) {
-
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outNulls[i] = nullPos[i];
- if (!nullPos[i]) {
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
index 7cdce0b..df78433 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -60,8 +62,6 @@ public class LongScalarDivideLongColumn extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
- outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -71,23 +71,51 @@ public class LongScalarDivideLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
boolean hasDivBy0 = false;
if (inputColVector.isRepeating) {
- long denom = vector[0];
- outputVector[0] = value / denom;
- hasDivBy0 = hasDivBy0 || (denom == 0);
-
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ long denom = vector[0];
+ outputVector[0] = value / denom;
+ hasDivBy0 = hasDivBy0 || (denom == 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- long denom = vector[i];
- outputVector[i] = value / denom;
- hasDivBy0 = hasDivBy0 || (denom == 0);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ long denom = vector[i];
+ outputVector[i] = value / denom;
+ hasDivBy0 = hasDivBy0 || (denom == 0);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ long denom = vector[i];
+ outputVector[i] = value / denom;
+ hasDivBy0 = hasDivBy0 || (denom == 0);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
long denom = vector[i];
outputVector[i] = value / denom;
@@ -95,6 +123,10 @@ public class LongScalarDivideLongColumn extends VectorExpression {
}
}
} else /* there are nulls */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -104,12 +136,12 @@ public class LongScalarDivideLongColumn extends VectorExpression {
outputIsNull[i] = inputIsNull[i];
}
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
long denom = vector[i];
outputVector[i] = value / denom;
hasDivBy0 = hasDivBy0 || (denom == 0);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java
index 8d915c2..2ca74c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarEqualLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongScalarEqualLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarEqualLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,69 @@ public class LongScalarEqualLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = vector[0] == value ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = value == vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value == vector[i] ? 1 : 0;
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1"
outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value == vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value == vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java
index a06fb08..ac245f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -25,8 +27,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
public class LongScalarGreaterEqualLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongScalarGreaterEqualLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -52,8 +54,8 @@ public class LongScalarGreaterEqualLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -63,44 +65,72 @@ public class LongScalarGreaterEqualLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = value >= vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value >= vector[i] ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1"
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1"
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
// The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1"
outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value >= vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value >= vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((value - vector[i]) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java
index 6610288..f8cd8a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarGreaterLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private int colNum;
- private long value;
+ protected int colNum;
+ protected long value;
public LongScalarGreaterLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarGreaterLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,72 @@ public class LongScalarGreaterLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = value > vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value > vector[i] ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ // The SIMD optimized form of "a > b" is "(b - a) >>> 63"
+ outputVector[i] = (vector[i] - value) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // The SIMD optimized form of "a > b" is "(b - a) >>> 63"
+ outputVector[i] = (vector[i] - value) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
// The SIMD optimized form of "a > b" is "(b - a) >>> 63"
outputVector[i] = (vector[i] - value) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value > vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value > vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (vector[i] - value) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (vector[i] - value) >>> 63;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java
index 7a305d3..7b3d4b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarLessEqualLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongScalarLessEqualLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarLessEqualLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,72 @@ public class LongScalarLessEqualLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = value <= vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value <= vector[i] ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1"
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1"
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
// The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1"
outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value <= vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value <= vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((vector[i] - value) >>> 63) ^ 1;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java
index 763dfdf..948f812 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarLessLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongScalarLessLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarLessLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,72 @@ public class LongScalarLessLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = value < vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value < vector[i] ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ // The SIMD optimized form of "a < b" is "(a - b) >>> 63"
+ outputVector[i] = (value - vector[i]) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // The SIMD optimized form of "a < b" is "(a - b) >>> 63"
+ outputVector[i] = (value - vector[i]) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
// The SIMD optimized form of "a < b" is "(a - b) >>> 63"
outputVector[i] = (value - vector[i]) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value < vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value < vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = (value - vector[i]) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = (value - vector[i]) >>> 63;
}