You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/03 20:20:24 UTC

svn commit: r1489091 [1/4] - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/ java/org/apache/hadoop/...

Author: hashutosh
Date: Mon Jun  3 18:20:22 2013
New Revision: 1489091

URL: http://svn.apache.org/r1489091
Log:
HIVE-4592 : fix failure to set output isNull to true and other NULL propagation issues; update arithmetic tests (Eric Hanson via Ashutosh Chauhan)

Added:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java Mon Jun  3 18:20:22 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.Writable;
 public class DoubleColumnVector extends ColumnVector {
   public double[] vector;
   private final DoubleWritable writableObj = new DoubleWritable();
+  public static final double NULL_VALUE = Double.NaN;
 
   /**
    * Use this constructor by default. All column vectors

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java Mon Jun  3 18:20:22 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.Writable;
 public class LongColumnVector extends ColumnVector {
   public long[] vector;
   private final LongWritable writableObj = new LongWritable();
+  public static final long NULL_VALUE = 1;
 
   /**
    * Use this constructor by default. All column vectors

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java Mon Jun  3 18:20:22 2013
@@ -15,13 +15,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+ 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
+import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
+ */
 public class LongColDivideLongColumn extends VectorExpression {
   int colNum1;
   int colNum2;
@@ -47,89 +53,26 @@ public class LongColDivideLongColumn ext
     int n = batch.size;
     long[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
-
+    
     // return immediately if batch is empty
     if (n == 0) {
       return;
     }
-
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
-    if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
+    
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
+    if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
       outputVector[0] = vector1[0] / (double) vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,16 +97,24 @@ public class LongColDivideLongColumn ext
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] / (double) vector2[i];
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] = vector1[i] / (double) vector2[i];
+          outputVector[i] = vector1[i] /  (double) vector2[i];
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java Mon Jun  3 18:20:22 2013
@@ -15,17 +15,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+ 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
+/**
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
+ */
 public class LongColDivideLongScalar extends VectorExpression {
-  private final int colNum;
-  private final double value;
-  private final int outputColumn;
+  private int colNum;
+  private long value;
+  private int outputColumn;
 
   public LongColDivideLongScalar(int colNum, long value, int outputColumn) {
     this.colNum = colNum;
@@ -46,56 +53,59 @@ public class LongColDivideLongScalar ext
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
-
+    
     // return immediately if batch is empty
     if (n == 0) {
       return;
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      outputVector[0] = vector[0] / value;
+      outputVector[0] = vector[0] / (double) value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
-      outputColVector.isRepeating = true;
+      outputIsNull[0] = inputIsNull[0]; 
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = vector[i] / value;
+          outputVector[i] = vector[i] / (double) value;
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] = vector[i] / value;
+          outputVector[i] = vector[i] / (double) value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = vector[i] / value;
+          outputVector[i] = vector[i] / (double) value;
           outputIsNull[i] = inputIsNull[i];
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] = vector[i] / value;
+          outputVector[i] = vector[i] / (double) value;
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    /* Set double data vector array entries for NULL elements to the correct value.
+     * Unlike other col-scalar operations, this one doesn't benefit from carrying 
+     * over NaN values from the input array.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override
   public int getOutputColumn() {
     return outputColumn;
   }
-
+  
   @Override
   public String getOutputType() {
     return "double";

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java Mon Jun  3 18:20:22 2013
@@ -15,34 +15,32 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+ 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
- * Implements a vectorized arithmetic operator with a scalar on the left and a
- * column vector on the right. The result is output to an output column vector.
+ * This operation is handled as a special case because Hive
+ * long/long division returns double. This file is thus not generated
+ * from a template like the other arithmetic operations are.
  */
 public class LongScalarDivideLongColumn extends VectorExpression {
-  private final int colNum;
+  private int colNum;
   private final double value;
-  private final int outputColumn;
+  private int outputColumn;
 
   public LongScalarDivideLongColumn(long value, int colNum, int outputColumn) {
     this.colNum = colNum;
-    this.value = value;
+    this.value = (double) value;
     this.outputColumn = outputColumn;
   }
 
   @Override
-  /**
-   * Method to evaluate scalar-column operation in vectorized fashion.
-   *
-   * @batch a package of rows with each column stored in a vector
-   */
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -55,26 +53,21 @@ public class LongScalarDivideLongColumn 
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
-
+    
     // return immediately if batch is empty
     if (n == 0) {
       return;
     }
 
     if (inputColVector.isRepeating) {
-
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value / vector[0];
-
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
-      outputColVector.isRepeating = true;
+      outputIsNull[0] = inputIsNull[0]; 
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -86,8 +79,7 @@ public class LongScalarDivideLongColumn 
           outputVector[i] = value / vector[i];
         }
       }
-      outputColVector.isRepeating = false;
-    } else {                         /* there are nulls */
+    } else /* there are nulls */ {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
@@ -100,15 +92,20 @@ public class LongScalarDivideLongColumn 
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    /* Set double data vector array entries for NULL elements to the correct value.
+     * Unlike other col-scalar operations, this one doesn't benefit from carrying 
+     * over NaN values from the input array.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override
   public int getOutputColumn() {
     return outputColumn;
   }
-
+  
   @Override
   public String getOutputType() {
     return "double";

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1489091&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Mon Jun  3 18:20:22 2013
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+
+
+/**
+ * Utility functions to handle null propagation.
+ */
+public class NullUtil {
+  
+  /*
+   * Set the data value for all NULL entries to the designated NULL_VALUE.
+   */
+  public static void setNullDataEntriesLong(
+      LongColumnVector v, boolean selectedInUse, int[] sel, int n) {
+    if (v.noNulls) {
+      return;
+    } else if (v.isRepeating && v.isNull[0]) {
+      v.vector[0] = LongColumnVector.NULL_VALUE;
+    } else if (selectedInUse) {
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if(v.isNull[i]) {
+          v.vector[i] = LongColumnVector.NULL_VALUE;
+        }       
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        if(v.isNull[i]) {
+          v.vector[i] = LongColumnVector.NULL_VALUE;
+        }
+      }
+    } 
+  }
+  
+  // for use by Column-Scalar and Scalar-Column arithmetic for null propagation
+  public static void setNullOutputEntriesColScalar(
+      ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+    if (v instanceof DoubleColumnVector) {
+      
+      // No need to set null data entries because the input NaN values
+      // will automatically propagate to the output.
+      return;
+    }
+    setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n);
+  }
+  
+  /*
+   * Set the data value for all NULL entries to NaN
+   */
+  public static void setNullDataEntriesDouble(
+      DoubleColumnVector v, boolean selectedInUse, int[] sel, int n) {
+    if (v.noNulls) {
+      return;
+    } else if (v.isRepeating && v.isNull[0]) {
+      v.vector[0] = DoubleColumnVector.NULL_VALUE;
+    } else if (selectedInUse) {
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if(v.isNull[i]) {
+          v.vector[i] = DoubleColumnVector.NULL_VALUE;
+        }       
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        if(v.isNull[i]) {
+          v.vector[i] = DoubleColumnVector.NULL_VALUE;
+        }
+      }
+    } 
+  }
+  
+  /*
+   * Propagate null values for a two-input operator.
+   */
+  public static void propagateNullsColCol(ColumnVector inputColVector1,
+      ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel, 
+      int n, boolean selectedInUse) {
+
+    outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls;
+    
+    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+      if (inputColVector2.isRepeating) {
+        outputColVector.isNull[0] = inputColVector2.isNull[0];
+      } else {
+        if (selectedInUse) {
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputColVector.isNull[i] = inputColVector2.isNull[i];
+          }
+        } else {
+          System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n);
+        }
+      }
+    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating) {
+        outputColVector.isNull[0] = inputColVector1.isNull[0];
+      } else {
+        if (selectedInUse) {
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputColVector.isNull[i] = inputColVector1.isNull[i];
+          }
+        } else {
+          System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n);
+        }
+      }
+    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0];
+        if (outputColVector.isNull[0]) {
+          outputColVector.isRepeating = true;
+          return;
+        }
+      } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
+        if (inputColVector1.isNull[0]) {
+          outputColVector.isNull[0] = true;
+          outputColVector.isRepeating = true;   // because every value will be NULL
+          return;
+        } else {
+          if (selectedInUse) {
+             for(int j = 0; j != n; j++) {
+               int i = sel[j];
+               outputColVector.isNull[i] = inputColVector2.isNull[i];
+             }
+          } else {
+          
+            // copy nulls from the non-repeating side
+            System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n);
+          }
+        }       
+      } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (inputColVector2.isNull[0]) {
+          outputColVector.isNull[0] = true;
+          outputColVector.isRepeating = true;   // because every value will be NULL
+          return;
+        } else {
+          if (selectedInUse) {
+             for(int j = 0; j != n; j++) {
+               int i = sel[j];
+               outputColVector.isNull[i] = inputColVector1.isNull[i];
+             }
+          } else {
+          
+            // copy nulls from the non-repeating side
+            System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n);
+          }
+        } 
+      } else {                      // neither side is repeating
+        if (selectedInUse) {
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
+          }
+        } else {
+          for(int i = 0; i != n; i++) {
+            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
+          }
+        }
+      }
+    }
+  }
+}

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColAddDoubleColumn ex
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColAddDoubleColumn ex
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] + vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColAddDoubleColumn ex
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] + vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColAddDoubleColumn ex
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColAddDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColAddDoubleScalar ex
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColAddDoubleScalar ex
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] + value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
         }
@@ -74,10 +74,9 @@ public class DoubleColAddDoubleScalar ex
           outputVector[i] = vector[i] + value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColAddDoubleScalar ex
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColAddLongColumn exte
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColAddLongColumn exte
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] + vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColAddLongColumn exte
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] + vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColAddLongColumn exte
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColAddLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColAddLongScalar exte
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColAddLongScalar exte
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] + value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
         }
@@ -74,10 +74,9 @@ public class DoubleColAddLongScalar exte
           outputVector[i] = vector[i] + value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColAddLongScalar exte
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColDivideDoubleColumn
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColDivideDoubleColumn
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] / vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColDivideDoubleColumn
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] / vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColDivideDoubleColumn
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColDivideDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColDivideDoubleScalar
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColDivideDoubleScalar
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] / value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
         }
@@ -74,10 +74,9 @@ public class DoubleColDivideDoubleScalar
           outputVector[i] = vector[i] / value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColDivideDoubleScalar
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColDivideLongColumn e
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColDivideLongColumn e
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] / vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColDivideLongColumn e
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] / vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColDivideLongColumn e
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColDivideLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColDivideLongScalar e
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColDivideLongScalar e
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] / value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
         }
@@ -74,10 +74,9 @@ public class DoubleColDivideLongScalar e
           outputVector[i] = vector[i] / value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColDivideLongScalar e
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColModuloDoubleColumn
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColModuloDoubleColumn
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] % vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColModuloDoubleColumn
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] % vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColModuloDoubleColumn
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColModuloDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColModuloDoubleScalar
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColModuloDoubleScalar
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] % value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] % value;
         }
@@ -74,10 +74,9 @@ public class DoubleColModuloDoubleScalar
           outputVector[i] = vector[i] % value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] % value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColModuloDoubleScalar
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColModuloLongColumn e
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColModuloLongColumn e
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] % vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColModuloLongColumn e
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] % vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColModuloLongColumn e
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColModuloLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColModuloLongScalar e
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColModuloLongScalar e
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] % value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] % value;
         }
@@ -74,10 +74,9 @@ public class DoubleColModuloLongScalar e
           outputVector[i] = vector[i] % value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] % value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColModuloLongScalar e
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override