You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/03 20:20:24 UTC
svn commit: r1489091 [1/4] - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/
java/org/apache/hadoop/...
Author: hashutosh
Date: Mon Jun 3 18:20:22 2013
New Revision: 1489091
URL: http://svn.apache.org/r1489091
Log:
HIVE-4592 : fix failure to set output isNull to true and other NULL propagation issues; update arithmetic tests (Eric Hanson via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java Mon Jun 3 18:20:22 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.Writable;
public class DoubleColumnVector extends ColumnVector {
public double[] vector;
private final DoubleWritable writableObj = new DoubleWritable();
+ public static final double NULL_VALUE = Double.NaN;
/**
* Use this constructor by default. All column vectors
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java Mon Jun 3 18:20:22 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.Writable;
public class LongColumnVector extends ColumnVector {
public long[] vector;
private final LongWritable writableObj = new LongWritable();
+ public static final long NULL_VALUE = 1;
/**
* Use this constructor by default. All column vectors
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java Mon Jun 3 18:20:22 2013
@@ -15,13 +15,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
+import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
+ */
public class LongColDivideLongColumn extends VectorExpression {
int colNum1;
int colNum2;
@@ -47,89 +53,26 @@ public class LongColDivideLongColumn ext
int n = batch.size;
long[] vector1 = inputColVector1.vector;
long[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
-
+
// return immediately if batch is empty
if (n == 0) {
return;
}
-
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
- if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
outputVector[0] = vector1[0] / (double) vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,16 +97,24 @@ public class LongColDivideLongColumn ext
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] / (double) vector2[i];
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = vector1[i] / (double) vector2[i];
+ outputVector[i] = vector1[i] / (double) vector2[i];
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java Mon Jun 3 18:20:22 2013
@@ -15,17 +15,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
+/**
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
+ */
public class LongColDivideLongScalar extends VectorExpression {
- private final int colNum;
- private final double value;
- private final int outputColumn;
+ private int colNum;
+ private long value;
+ private int outputColumn;
public LongColDivideLongScalar(int colNum, long value, int outputColumn) {
this.colNum = colNum;
@@ -46,56 +53,59 @@ public class LongColDivideLongScalar ext
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
-
+
// return immediately if batch is empty
if (n == 0) {
return;
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- outputVector[0] = vector[0] / value;
+ outputVector[0] = vector[0] / (double) value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
+ outputIsNull[0] = inputIsNull[0];
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] / value;
+ outputVector[i] = vector[i] / (double) value;
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = vector[i] / value;
+ outputVector[i] = vector[i] / (double) value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = vector[i] / value;
+ outputVector[i] = vector[i] / (double) value;
outputIsNull[i] = inputIsNull[i];
}
} else {
for(int i = 0; i != n; i++) {
- outputVector[i] = vector[i] / value;
+ outputVector[i] = vector[i] / (double) value;
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ /* Set double data vector array entries for NULL elements to the correct value.
+ * Unlike other col-scalar operations, this one doesn't benefit from carrying
+ * over NaN values from the input array.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
public int getOutputColumn() {
return outputColumn;
}
-
+
@Override
public String getOutputType() {
return "double";
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java Mon Jun 3 18:20:22 2013
@@ -15,34 +15,32 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
/**
- * Implements a vectorized arithmetic operator with a scalar on the left and a
- * column vector on the right. The result is output to an output column vector.
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
*/
public class LongScalarDivideLongColumn extends VectorExpression {
- private final int colNum;
+ private int colNum;
private final double value;
- private final int outputColumn;
+ private int outputColumn;
public LongScalarDivideLongColumn(long value, int colNum, int outputColumn) {
this.colNum = colNum;
- this.value = value;
+ this.value = (double) value;
this.outputColumn = outputColumn;
}
@Override
- /**
- * Method to evaluate scalar-column operation in vectorized fashion.
- *
- * @batch a package of rows with each column stored in a vector
- */
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -55,26 +53,21 @@ public class LongScalarDivideLongColumn
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
-
+
// return immediately if batch is empty
if (n == 0) {
return;
}
if (inputColVector.isRepeating) {
-
- /*
- * All must be selected otherwise size would be zero
- * Repeating property will not change.
- */
outputVector[0] = value / vector[0];
-
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
+ outputIsNull[0] = inputIsNull[0];
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -86,8 +79,7 @@ public class LongScalarDivideLongColumn
outputVector[i] = value / vector[i];
}
}
- outputColVector.isRepeating = false;
- } else { /* there are nulls */
+ } else /* there are nulls */ {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -100,15 +92,20 @@ public class LongScalarDivideLongColumn
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ /* Set double data vector array entries for NULL elements to the correct value.
+ * Unlike other col-scalar operations, this one doesn't benefit from carrying
+ * over NaN values from the input array.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
public int getOutputColumn() {
return outputColumn;
}
-
+
@Override
public String getOutputType() {
return "double";
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1489091&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Mon Jun 3 18:20:22 2013
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+
+
+/**
+ * Utility functions to handle null propagation.
+ */
+public class NullUtil {
+
+ /*
+ * Set the data value for all NULL entries to the designated NULL_VALUE.
+ */
+ public static void setNullDataEntriesLong(
+ LongColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ if (v.noNulls) {
+ return;
+ } else if (v.isRepeating && v.isNull[0]) {
+ v.vector[0] = LongColumnVector.NULL_VALUE;
+ } else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if(v.isNull[i]) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if(v.isNull[i]) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ }
+ }
+
+ // for use by Column-Scalar and Scalar-Column arithmetic for null propagation
+ public static void setNullOutputEntriesColScalar(
+ ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ if (v instanceof DoubleColumnVector) {
+
+ // No need to set null data entries because the input NaN values
+ // will automatically propagate to the output.
+ return;
+ }
+ setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n);
+ }
+
+ /*
+ * Set the data value for all NULL entries to NaN
+ */
+ public static void setNullDataEntriesDouble(
+ DoubleColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ if (v.noNulls) {
+ return;
+ } else if (v.isRepeating && v.isNull[0]) {
+ v.vector[0] = DoubleColumnVector.NULL_VALUE;
+ } else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if(v.isNull[i]) {
+ v.vector[i] = DoubleColumnVector.NULL_VALUE;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if(v.isNull[i]) {
+ v.vector[i] = DoubleColumnVector.NULL_VALUE;
+ }
+ }
+ }
+ }
+
+ /*
+ * Propagate null values for a two-input operator.
+ */
+ public static void propagateNullsColCol(ColumnVector inputColVector1,
+ ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel,
+ int n, boolean selectedInUse) {
+
+ outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls;
+
+ if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+ if (inputColVector2.isRepeating) {
+ outputColVector.isNull[0] = inputColVector2.isNull[0];
+ } else {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputColVector2.isNull[i];
+ }
+ } else {
+ System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n);
+ }
+ }
+ } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating) {
+ outputColVector.isNull[0] = inputColVector1.isNull[0];
+ } else {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputColVector1.isNull[i];
+ }
+ } else {
+ System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n);
+ }
+ }
+ } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0];
+ if (outputColVector.isNull[0]) {
+ outputColVector.isRepeating = true;
+ return;
+ }
+ } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
+ if (inputColVector1.isNull[0]) {
+ outputColVector.isNull[0] = true;
+ outputColVector.isRepeating = true; // because every value will be NULL
+ return;
+ } else {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputColVector2.isNull[i];
+ }
+ } else {
+
+ // copy nulls from the non-repeating side
+ System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n);
+ }
+ }
+ } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (inputColVector2.isNull[0]) {
+ outputColVector.isNull[0] = true;
+ outputColVector.isRepeating = true; // because every value will be NULL
+ return;
+ } else {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputColVector1.isNull[i];
+ }
+ } else {
+
+ // copy nulls from the non-repeating side
+ System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n);
+ }
+ }
+ } else { // neither side is repeating
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
+ }
+ }
+ }
+ }
+ }
+}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColAddDoubleColumn ex
int n = batch.size;
double[] vector1 = inputColVector1.vector;
double[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColAddDoubleColumn ex
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] + vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColAddDoubleColumn ex
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] + vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColAddDoubleColumn ex
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColAddDoubleScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColAddDoubleScalar ex
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColAddDoubleScalar ex
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] + value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] + value;
}
@@ -74,10 +74,9 @@ public class DoubleColAddDoubleScalar ex
outputVector[i] = vector[i] + value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] + value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColAddDoubleScalar ex
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColAddLongColumn exte
int n = batch.size;
double[] vector1 = inputColVector1.vector;
long[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColAddLongColumn exte
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] + vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColAddLongColumn exte
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] + vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColAddLongColumn exte
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColAddLongScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColAddLongScalar exte
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColAddLongScalar exte
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] + value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] + value;
}
@@ -74,10 +74,9 @@ public class DoubleColAddLongScalar exte
outputVector[i] = vector[i] + value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] + value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColAddLongScalar exte
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColDivideDoubleColumn
int n = batch.size;
double[] vector1 = inputColVector1.vector;
double[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColDivideDoubleColumn
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] / vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColDivideDoubleColumn
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] / vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColDivideDoubleColumn
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColDivideDoubleScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColDivideDoubleScalar
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColDivideDoubleScalar
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] / value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] / value;
}
@@ -74,10 +74,9 @@ public class DoubleColDivideDoubleScalar
outputVector[i] = vector[i] / value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] / value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColDivideDoubleScalar
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColDivideLongColumn e
int n = batch.size;
double[] vector1 = inputColVector1.vector;
long[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColDivideLongColumn e
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] / vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColDivideLongColumn e
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] / vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColDivideLongColumn e
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColDivideLongScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColDivideLongScalar e
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColDivideLongScalar e
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] / value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] / value;
}
@@ -74,10 +74,9 @@ public class DoubleColDivideLongScalar e
outputVector[i] = vector[i] / value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] / value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColDivideLongScalar e
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColModuloDoubleColumn
int n = batch.size;
double[] vector1 = inputColVector1.vector;
double[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColModuloDoubleColumn
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] % vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColModuloDoubleColumn
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] % vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColModuloDoubleColumn
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColModuloDoubleScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColModuloDoubleScalar
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColModuloDoubleScalar
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] % value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] % value;
}
@@ -74,10 +74,9 @@ public class DoubleColModuloDoubleScalar
outputVector[i] = vector[i] % value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] % value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColModuloDoubleScalar
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java Mon Jun 3 18:20:22 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -47,7 +48,6 @@ public class DoubleColModuloLongColumn e
int n = batch.size;
double[] vector1 = inputColVector1.vector;
long[] vector2 = inputColVector2.vector;
-
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColModuloLongColumn e
return;
}
- /* Set repeating property to false (the default).
- * It will be set to true later if needed later.
- */
- outputColVector.isRepeating = false;
-
- //Handle nulls first
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector2.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i];
- }
- }
- }
- } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
- outputColVector.noNulls = false;
- if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
- //Output will also be repeating and null
- outputColVector.isRepeating = true;
- outputColVector.isNull[0] = true;
- //return as no further processing is needed
- return;
- } else {
- if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- } else {
- for(int i = 0; i != n; i++) {
- outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
- }
- }
- }
- }
-
-
- //Disregard nulls for processing
+ outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector1[0] % vector2[0];
- outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColModuloLongColumn e
}
} else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector1[i] % vector2[i];
}
@@ -164,6 +102,14 @@ public class DoubleColModuloLongColumn e
}
}
}
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java Mon Jun 3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
public class DoubleColModuloLongScalar extends VectorExpression {
private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColModuloLongScalar e
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
outputColVector.noNulls = inputColVector.noNulls;
+ outputColVector.isRepeating = inputColVector.isRepeating;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColModuloLongScalar e
}
if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
outputVector[0] = vector[0] % value;
+
// Even if there are no nulls, we always copy over entry 0. Simplifies code.
outputIsNull[0] = inputIsNull[0];
- outputColVector.isRepeating = true;
} else if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] % value;
}
@@ -74,10 +74,9 @@ public class DoubleColModuloLongScalar e
outputVector[i] = vector[i] % value;
}
}
- outputColVector.isRepeating = false;
} else /* there are nulls */ {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for(int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] % value;
outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColModuloLongScalar e
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
+
+ NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
}
@Override