You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:36 UTC

[22/32] hive git commit: HIVE-18622: Vectorization: IF Statements, Comparisons, and more do not handle NULLs correctly (Matt McCline, reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
index 544b700..9b7005d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
@@ -71,8 +71,10 @@ public class StringUnaryUDF extends VectorExpression {
     byte[][] vector = inputColVector.vector;
     int [] start = inputColVector.start;
     int [] length = inputColVector.length;
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
-    outV.initBuffer();
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.initBuffer();
     Text t;
 
     if (n == 0) {
@@ -86,72 +88,86 @@ public class StringUnaryUDF extends VectorExpression {
     // It's implemented in the simplest way now, just calling the
     // existing built-in function.
 
-    if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         s.set(vector[0], start[0], length[0]);
         t = func.evaluate(s);
-        setString(outV, 0, t);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
+        setString(outputColVector, 0, t);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
 
-          /* Fill output isNull with false for selected elements since there is a chance we'll
-           * convert to noNulls == false in setString();
-           */
-          outV.isNull[i] = false;
-          s.set(vector[i], start[i], length[i]);
-          t = func.evaluate(s);
-          setString(outV, i, t);
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           s.set(vector[i], start[i], length[i]);
+           t = func.evaluate(s);
+           setString(outputColVector, i, t);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            s.set(vector[i], start[i], length[i]);
+            t = func.evaluate(s);
+            setString(outputColVector, i, t);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
 
-        // Set all elements to not null. The setString call can override this.
-        Arrays.fill(outV.isNull, 0, n, false);
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           s.set(vector[i], start[i], length[i]);
           t = func.evaluate(s);
-          setString(outV, i, t);
+          setString(outputColVector, i, t);
         }
-        outV.isRepeating = false;
       }
-    } else {
-      // Handle case with nulls. Don't do function if the value is null, to save time,
-      // because calling the function can be expensive.
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0]; // setString can override this
-        if (!inputColVector.isNull[0]) {
-          s.set(vector[0], start[0], length[0]);
-          t = func.evaluate(s);
-          setString(outV, 0, t);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputColVector.isNull[i]; // setString can override this
+          outputColVector.isNull[i] = inputColVector.isNull[i]; // setString can override this
           if (!inputColVector.isNull[i]) {
             s.set(vector[i], start[i], length[i]);
             t = func.evaluate(s);
-            setString(outV, i, t);
+            setString(outputColVector, i, t);
           }
         }
-        outV.isRepeating = false;
       } else {
 
         // setString can override this null propagation
-        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
           if (!inputColVector.isNull[i]) {
             s.set(vector[i], start[i], length[i]);
             t = func.evaluate(s);
-            setString(outV, i, t);
+            setString(outputColVector, i, t);
           }
         }
-        outV.isRepeating = false;
       }
     }
   }
@@ -159,13 +175,13 @@ public class StringUnaryUDF extends VectorExpression {
   /* Set the output string entry i to the contents of Text object t.
    * If t is a null object reference, record that the value is a SQL NULL.
    */
-  private static void setString(BytesColumnVector outV, int i, Text t) {
+  private static void setString(BytesColumnVector outputColVector, int i, Text t) {
     if (t == null) {
-      outV.noNulls = false;
-      outV.isNull[i] = true;
+      outputColVector.noNulls = false;
+      outputColVector.isNull[i] = true;
       return;
     }
-    outV.setVal(i, t.getBytes(), 0, t.getLength());
+    outputColVector.setVal(i, t.getBytes(), 0, t.getLength());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
index 2f8b627..9462347 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -44,7 +46,7 @@ abstract public class StringUnaryUDFDirect extends VectorExpression {
     inputColumn = -1;
   }
 
-  abstract protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i);
+  abstract protected void func(BytesColumnVector outputColVector, byte[][] vector, int[] start, int[] length, int i);
 
   @Override
   public void evaluate(VectorizedRowBatch batch) {
@@ -59,59 +61,82 @@ abstract public class StringUnaryUDFDirect extends VectorExpression {
     byte[][] vector = inputColVector.vector;
     int start[] = inputColVector.start;
     int length[] = inputColVector.length;
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
-    outV.initBuffer();
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.initBuffer();
 
     if (n == 0) {
       //Nothing to do
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, vector, start, length, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, vector, start, length, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, vector, start, length, i);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, vector, start, length, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, vector, start, length, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, vector, start, length, i);
+          func(outputColVector, vector, start, length, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          func(outV, vector, start, length, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputColVector.isNull[i];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
-            func(outV, vector, start, length, i);
+            func(outputColVector, vector, start, length, i);
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
           if (!inputColVector.isNull[i]) {
-            func(outV, vector, start, length, i);
+            func(outputColVector, vector, start, length, i);
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
index 7fb95f5..31a0ad1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java
@@ -73,8 +73,8 @@ public class TimestampColumnInList extends VectorExpression implements ITimestam
     TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol];
     LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
-    boolean[] nullPos = inputColVector.isNull;
-    boolean[] outNulls = outputColVector.isNull;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
     int n = batch.size;
     long[] outputVector = outputColVector.vector;
 
@@ -83,49 +83,69 @@ public class TimestampColumnInList extends VectorExpression implements ITimestam
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
     outputColVector.isRepeating = false;
-    outputColVector.noNulls = inputColVector.noNulls;
-    if (inputColVector.noNulls) {
-      if (inputColVector.isRepeating) {
 
-        // All must be selected otherwise size would be zero
-        // Repeating property will not change.
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0;
-        outputColVector.isRepeating = true;
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
         }
       }
-    } else {
-      if (inputColVector.isRepeating) {
-
-        //All must be selected otherwise size would be zero
-        //Repeating property will not change.
-        if (!nullPos[0]) {
-          outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0;
-          outNulls[0] = false;
-        } else {
-          outNulls[0] = true;
-        }
-        outputColVector.isRepeating = true;
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outNulls[i] = nullPos[i];
-          if (!nullPos[i]) {
+          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
             outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
           }
         }
       } else {
-        System.arraycopy(nullPos, 0, outNulls, 0, n);
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!nullPos[i]) {
+          if (!inputIsNull[i]) {
             outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
           }
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
index 5eb2090..13abfd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ abstract public class TimestampToStringUnaryUDF extends VectorExpression {
     inputColumn = -1;
   }
 
-  abstract protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i);
+  abstract protected void func(BytesColumnVector outputColVector, TimestampColumnVector inV, int i);
 
   @Override
   public void evaluate(VectorizedRowBatch batch) {
@@ -56,59 +58,82 @@ abstract public class TimestampToStringUnaryUDF extends VectorExpression {
     TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
-    outV.initBuffer();
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.initBuffer();
 
     if (n == 0) {
       //Nothing to do
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, inputColVector, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, inputColVector, i);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, inputColVector, i);
+          func(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          func(outV, inputColVector, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputColVector.isNull[i];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
-            func(outV, inputColVector, i);
+            func(outputColVector, inputColVector, i);
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
           if (!inputColVector.isNull[i]) {
-            func(outV, inputColVector, i);
+            func(outputColVector, inputColVector, i);
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
index ea78a2e..3a560ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
@@ -35,6 +35,10 @@ public class VectorCoalesce extends VectorExpression {
 
   private final int[] inputColumns;
 
+  // The unassigned batchIndex for the rows that have not received a non-NULL value yet.
+  // A temporary work array.
+  private transient int[] unassignedBatchIndices;
+
   public VectorCoalesce(int [] inputColumns, int outputColumnNum) {
     super(outputColumnNum);
     this.inputColumns = inputColumns;
@@ -57,66 +61,174 @@ public class VectorCoalesce extends VectorExpression {
 
     int[] sel = batch.selected;
     int n = batch.size;
-    ColumnVector outputVector = batch.cols[outputColumnNum];
+    ColumnVector outputColVector = batch.cols[outputColumnNum];
+    boolean[] outputIsNull = outputColVector.isNull;
     if (n <= 0) {
       // Nothing to do
       return;
     }
 
-    outputVector.init();
+    if (unassignedBatchIndices == null || n > unassignedBatchIndices.length) {
+
+      // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE).
+      final int roundUpSize =
+          ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE)
+              * VectorizedRowBatch.DEFAULT_SIZE;
+      unassignedBatchIndices = new int[roundUpSize];
+    }
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-    boolean noNulls = false;
+    // CONSIDER: Should be do this for all vector expressions that can
+    //           work on BytesColumnVector output columns???
+    outputColVector.init();
+
+    final int columnCount = inputColumns.length;
+
+    /*
+     * Process the input columns to find a non-NULL value for each row.
+     *
+     * We track the unassigned batchIndex of the rows that have not received
+     * a non-NULL value yet.  Similar to a selected array.
+     */
+    boolean isAllUnassigned = true;
+    int unassignedColumnCount = 0;
 
     for (int k = 0; k < inputColumns.length; k++) {
       ColumnVector cv = batch.cols[inputColumns[k]];
-      // non-nulls in any column qualifies coalesce having no nulls
-      // common case: last column is a constant & non-null
-      noNulls = noNulls || cv.noNulls;
-    }
-
-    outputVector.noNulls = noNulls;
-    outputVector.isRepeating = false;
-
-    ColumnVector first = batch.cols[inputColumns[0]];
-
-    if (first.noNulls && first.isRepeating) {
-      outputVector.isRepeating = true;
-      outputVector.isNull[0] = false;
-      outputVector.setElement(0, 0, first);
-    } else if (batch.selectedInUse) {
-      for (int j = 0; j != n; j++) {
-        int i = sel[j];
-        outputVector.isNull[i] = true;
-        for (int k = 0; k < inputColumns.length; k++) {
-          ColumnVector cv = batch.cols[inputColumns[k]];
-          if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
-            outputVector.isNull[i] = false;
-            outputVector.setElement(i, 0, cv);
-            break;
-          } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
-            outputVector.isNull[i] = false;
-            outputVector.setElement(i, i, cv);
-            break;
+      if (cv.isRepeating) {
+
+        if (cv.noNulls || !cv.isNull[0]) {
+
+          /*
+           * With a repeating value we can finish all remaining rows.
+           */
+          if (isAllUnassigned) {
+
+            // No other columns provided non-NULL values.  We can return repeated output.
+            outputIsNull[0] = false;
+            outputColVector.setElement(0, 0, cv);
+            outputColVector.isRepeating = true;
+            return;
+          } else {
+
+            // Some rows have already been assigned values. Assign the remaining.
+            // We cannot use copySelected method here.
+            for (int i = 0; i < unassignedColumnCount; i++) {
+              final int batchIndex = unassignedBatchIndices[i];
+              outputIsNull[batchIndex] = false;
+
+              // Our input is repeating (i.e. inputColNumber = 0).
+              outputColVector.setElement(batchIndex, 0, cv);
+            }
+            return;
           }
+        } else {
+
+          // Repeated NULLs -- skip this input column.
         }
-      }
-    } else {
-      for (int i = 0; i != n; i++) {
-        outputVector.isNull[i] = true;
-        for (int k = 0; k < inputColumns.length; k++) {
-          ColumnVector cv = batch.cols[inputColumns[k]];
-          if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
-            outputVector.isNull[i] = false;
-            outputVector.setElement(i, 0, cv);
-            break;
-          } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
-            outputVector.isNull[i] = false;
-            outputVector.setElement(i, i, cv);
-            break;
+      } else {
+
+        /*
+         * Non-repeating input column. Use any non-NULL values for unassigned rows.
+         */
+        if (isAllUnassigned) {
+
+          /*
+           * No other columns provided non-NULL values.  We *may* be able to finish all rows
+           * with this input column...
+           */
+          if (cv.noNulls){
+
+            // Since no NULLs, we can provide values for all rows.
+            if (batch.selectedInUse) {
+              for (int i = 0; i < n; i++) {
+                final int batchIndex = sel[i];
+                outputIsNull[batchIndex] = false;
+                outputColVector.setElement(batchIndex, batchIndex, cv);
+              }
+            } else {
+              Arrays.fill(outputIsNull, 0, n, false);
+              for (int batchIndex = 0; batchIndex < n; batchIndex++) {
+                outputColVector.setElement(batchIndex, batchIndex, cv);
+              }
+            }
+            return;
+          } else {
+
+            // We might not be able to assign all rows because of input NULLs.  Start tracking any
+            // unassigned rows.
+            boolean[] inputIsNull = cv.isNull;
+            if (batch.selectedInUse) {
+              for (int i = 0; i < n; i++) {
+                final int batchIndex = sel[i];
+                if (!inputIsNull[batchIndex]) {
+                  outputIsNull[batchIndex] = false;
+                  outputColVector.setElement(batchIndex, batchIndex, cv);
+                } else {
+                  unassignedBatchIndices[unassignedColumnCount++] = batchIndex;
+                }
+              }
+            } else {
+              for (int batchIndex = 0; batchIndex < n; batchIndex++) {
+                if (!inputIsNull[batchIndex]) {
+                  outputIsNull[batchIndex] = false;
+                  outputColVector.setElement(batchIndex, batchIndex, cv);
+                } else {
+                  unassignedBatchIndices[unassignedColumnCount++] = batchIndex;
+                }
+              }
+            }
+            if (unassignedColumnCount == 0) {
+              return;
+            }
+            isAllUnassigned = false;
+          }
+        } else {
+
+          /*
+           * We previously assigned *some* rows with non-NULL values. The batch indices of
+           * the unassigned row were tracked.
+           */
+          if (cv.noNulls) {
+
+            // Assign all remaining rows.
+            for (int i = 0; i < unassignedColumnCount; i++) {
+              final int batchIndex = unassignedBatchIndices[i];
+              outputIsNull[batchIndex] = false;
+              outputColVector.setElement(batchIndex, batchIndex, cv);
+            }
+            return;
+          } else {
+
+            // Use any non-NULL values found; remember the remaining unassigned.
+            boolean[] inputIsNull = cv.isNull;
+            int newUnassignedColumnCount = 0;
+            for (int i = 0; i < unassignedColumnCount; i++) {
+              final int batchIndex = unassignedBatchIndices[i];
+              if (!inputIsNull[batchIndex]) {
+                outputIsNull[batchIndex] = false;
+                outputColVector.setElement(batchIndex, batchIndex, cv);
+              } else {
+                unassignedBatchIndices[newUnassignedColumnCount++] = batchIndex;
+              }
+            }
+            if (newUnassignedColumnCount == 0) {
+              return;
+            }
+            unassignedColumnCount = newUnassignedColumnCount;
           }
         }
       }
     }
+
+    // NULL out the remaining columns.
+    outputColVector.noNulls = false;
+    for (int i = 0; i < unassignedColumnCount; i++) {
+      final int batchIndex = unassignedBatchIndices[i];
+      outputIsNull[batchIndex] = true;
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
index 0dde5bd..a30a7df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
@@ -24,6 +24,14 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/*
+ * ELT(index, string, ....) returns the string column/expression value at the specified
+ * index expression.
+ *
+ * The first argument expression indicates the index of the string to be retrieved from
+ * remaining arguments.  We return NULL when the index number is less than 1 or
+ * index number is greater than the number of the string arguments.
+ */
 public class VectorElt extends VectorExpression {
   private static final long serialVersionUID = 1L;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
index f7fdb57..bd594e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
@@ -387,8 +387,35 @@ public final class VectorExpressionWriterFactory {
    * if the wrong vector column is used.
    */
   private static abstract class VectorExpressionWriterDecimal extends VectorExpressionWriterBase {
+
     @Override
     public Object writeValue(ColumnVector column, int row) throws HiveException {
+      if (column instanceof Decimal64ColumnVector) {
+        Decimal64ColumnVector d64cv = (Decimal64ColumnVector) column;
+        final long decimal64Long;
+        if (d64cv.noNulls && !d64cv.isRepeating) {
+          decimal64Long = d64cv.vector[row];
+        } else if (d64cv.noNulls && d64cv.isRepeating) {
+          decimal64Long = d64cv.vector[0];
+        } else if (!d64cv.noNulls && !d64cv.isRepeating && !d64cv.isNull[row]) {
+          decimal64Long = d64cv.vector[row];
+        } else if (!d64cv.noNulls && !d64cv.isRepeating && d64cv.isNull[row]) {
+          return null;
+        } else if (!d64cv.noNulls && d64cv.isRepeating && !d64cv.isNull[0]) {
+          decimal64Long = d64cv.vector[0];
+        } else if (!d64cv.noNulls && d64cv.isRepeating && d64cv.isNull[0]) {
+          return null;
+        } else {
+          throw new HiveException(
+              String.format(
+                  "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
+                  row, d64cv.noNulls, d64cv.isRepeating, d64cv.isNull[row], d64cv.isNull[0]));
+        }
+
+        HiveDecimalWritable scratchHiveDecimalWritable = d64cv.getScratchWritable();
+        scratchHiveDecimalWritable.deserialize64(decimal64Long, d64cv.scale);
+        return writeValue(scratchHiveDecimalWritable);
+      }
       DecimalColumnVector dcv = (DecimalColumnVector) column;
       if (dcv.noNulls && !dcv.isRepeating) {
         return writeValue(dcv.vector[row]);

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
index b2891a8..f6e9c8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
@@ -91,7 +91,9 @@ public class VectorUDFDateAddColCol extends VectorExpression {
       return;
     }
 
-    // Handle null
+    /*
+     * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+     */
     NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
 
     switch (primitiveCategory) {

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
index e232555..7bb5c54 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hive.common.util.DateParser;
 
 import java.sql.Date;
+import java.util.Arrays;
 
 public class VectorUDFDateAddColScalar extends VectorExpression {
   private static final long serialVersionUID = 1L;
@@ -77,52 +78,84 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     ColumnVector inputCol = batch.cols[this.colNum];
     /* every line below this is identical for evaluateLong & evaluateString */
     final int n = inputCol.isRepeating ? 1 : batch.size;
     int[] sel = batch.selected;
     final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if(batch.size == 0) {
       /* n != batch.size when isRepeating */
       return;
     }
 
-    /* true for all algebraic UDFs with no state */
-    outV.isRepeating = inputCol.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     switch (primitiveCategory) {
       case DATE:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateDate(inputCol, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateDate(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateDate(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateDate(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateDate(inputCol, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           }
@@ -130,35 +163,66 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
         break;
 
       case TIMESTAMP:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
           if (batch.selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (batch.selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           }
@@ -168,35 +232,66 @@ public class VectorUDFDateAddColScalar extends VectorExpression {
       case STRING:
       case CHAR:
       case VARCHAR:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            evaluateString(inputCol, outputColVector, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
           if (batch.selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              evaluateString(inputCol, outV, i);
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               evaluateString(inputCol, outputColVector, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                evaluateString(inputCol, outputColVector, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              evaluateString(inputCol, outV, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              evaluateString(inputCol, outputColVector, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (batch.selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
index 0aaba26..ecde39b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
@@ -30,6 +30,7 @@ import org.apache.hive.common.util.DateParser;
 import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.util.Arrays;
 
 
 public class VectorUDFDateAddScalarCol extends VectorExpression {
@@ -91,7 +92,8 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
     final int n = inputCol.isRepeating ? 1 : batch.size;
     int[] sel = batch.selected;
     final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+    boolean[] outputIsNull = outputColVector.isNull;
 
     switch (primitiveCategory) {
       case DATE:
@@ -107,15 +109,15 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
       case VARCHAR:
         boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate);
         if (!parsed) {
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
           if (selectedInUse) {
             for(int j=0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           }
           return;
@@ -130,39 +132,73 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
       return;
     }
 
-    /* true for all algebraic UDFs with no state */
-    outV.isRepeating = inputCol.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     long baseDateDays = DateWritable.millisToDays(baseDate.getTime());
+    if (inputCol.isRepeating) {
+      if (inputCol.noNulls || !inputCol.isNull[0]) {
+        outputColVector.isNull[0] = false;
+        evaluate(baseDateDays, inputCol.vector[0], outputColVector, 0);
+      } else {
+        outputColVector.isNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inputCol.noNulls) {
-      outV.noNulls = true;
-      if (selectedInUse) {
-        for(int j=0; j < n; j++) {
-          int i = sel[j];
-          evaluate(baseDateDays, inputCol.vector[i], outV, i);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
+          }
         }
       } else {
-        for(int i = 0; i < n; i++) {
-          evaluate(baseDateDays, inputCol.vector[i], outV, i);
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for(int i = 0; i != n; i++) {
+          evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
         }
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs..
+
       // Handle case with nulls. Don't do function if the value is null, to save time,
       // because calling the function can be expensive.
-      outV.noNulls = false;
+      outputColVector.noNulls = false;
+
       if (selectedInUse) {
         for(int j = 0; j < n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputCol.isNull[i];
+          outputColVector.isNull[i] = inputCol.isNull[i];
           if (!inputCol.isNull[i]) {
-            evaluate(baseDateDays, inputCol.vector[i], outV, i);
+            evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
           }
         }
       } else {
         for(int i = 0; i < n; i++) {
-          outV.isNull[i] = inputCol.isNull[i];
+          outputColVector.isNull[i] = inputCol.isNull[i];
           if (!inputCol.isNull[i]) {
-            evaluate(baseDateDays, inputCol.vector[i], outV, i);
+            evaluate(baseDateDays, inputCol.vector[i], outputColVector, i);
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
index 982467e..0d794fe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
@@ -89,6 +89,9 @@ public class VectorUDFDateDiffColCol extends VectorExpression {
       return;
     }
 
+    /*
+     * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
+     */
     NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
 
     LongColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1);

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
index 97e3669..08c91e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
@@ -35,6 +35,7 @@ import java.sql.Date;
 import java.sql.Timestamp;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.Arrays;
 
 public class VectorUDFDateDiffColScalar extends VectorExpression {
   private static final long serialVersionUID = 1L;
@@ -80,20 +81,21 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     ColumnVector inputCol = batch.cols[this.colNum];
     /* every line below this is identical for evaluateLong & evaluateString */
     final int n = inputCol.isRepeating ? 1 : batch.size;
     int[] sel = batch.selected;
     final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if(batch.size == 0) {
       /* n != batch.size when isRepeating */
       return;
     }
 
-    /* true for all algebraic UDFs with no state */
-    outV.isRepeating = inputCol.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory();
     switch (primitiveCategory1) {
@@ -114,15 +116,15 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
           baseDate = DateWritable.dateToDays(date);
           break;
         } catch (Exception e) {
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
           if (selectedInUse) {
             for(int j=0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           }
           return;
@@ -134,35 +136,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
     PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
     switch (primitiveCategory0) {
       case DATE:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateDate(inputCol, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateDate(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateDate(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateDate(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateDate(inputCol, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           }
@@ -170,35 +203,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
         break;
 
       case TIMESTAMP:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           }
@@ -208,35 +272,66 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
       case STRING:
       case CHAR:
       case VARCHAR:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              evaluateString(inputCol, outV, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            evaluateString(inputCol, outputColVector, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               evaluateString(inputCol, outputColVector, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                evaluateString(inputCol, outputColVector, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              evaluateString(inputCol, outV, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              evaluateString(inputCol, outputColVector, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
index c575c05..c436c96 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
@@ -33,6 +33,7 @@ import java.sql.Date;
 import java.sql.Timestamp;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.Arrays;
 
 public class VectorUDFDateDiffScalarCol extends VectorExpression {
   private static final long serialVersionUID = 1L;
@@ -78,20 +79,21 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     ColumnVector inputCol = batch.cols[this.colNum];
     /* every line below this is identical for evaluateLong & evaluateString */
     final int n = inputCol.isRepeating ? 1 : batch.size;
     int[] sel = batch.selected;
     final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if(batch.size == 0) {
       /* n != batch.size when isRepeating */
       return;
     }
 
-    /* true for all algebraic UDFs with no state */
-    outV.isRepeating = inputCol.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     PrimitiveCategory primitiveCategory0 =
         ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
@@ -113,15 +115,15 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
           baseDate = DateWritable.dateToDays(date);
           break;
         } catch (Exception e) {
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
           if (selectedInUse) {
             for(int j=0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = true;
+              outputColVector.isNull[i] = true;
             }
           }
           return;
@@ -134,35 +136,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
         ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory();
     switch (primitiveCategory1) {
       case DATE:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateDate(inputCol, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateDate(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateDate(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateDate(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateDate(inputCol, i);
             }
           }
-        } else {
+        } else /* there are NULLs in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateDate(inputCol, i);
+                outputColVector.vector[i] = evaluateDate(inputCol, i);
               }
             }
           }
@@ -170,35 +203,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
         break;
 
       case TIMESTAMP:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              outV.vector[i] = evaluateTimestamp(inputCol, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                outV.vector[i] = evaluateTimestamp(inputCol, i);
+                outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
               }
             }
           }
@@ -208,35 +272,66 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
       case STRING:
       case CHAR:
       case VARCHAR:
-        if (inputCol.noNulls) {
-          outV.noNulls = true;
-          if (selectedInUse) {
-            for(int j=0; j < n; j++) {
-              int i = sel[j];
-              evaluateString(inputCol, outV, i);
+        if (inputCol.isRepeating) {
+          if (inputCol.noNulls || !inputCol.isNull[0]) {
+            outputColVector.isNull[0] = false;
+            evaluateString(inputCol, outputColVector, 0);
+          } else {
+            outputColVector.isNull[0] = true;
+            outputColVector.noNulls = false;
+          }
+          outputColVector.isRepeating = true;
+        } else if (inputCol.noNulls) {
+          if (batch.selectedInUse) {
+
+            // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+            if (!outputColVector.noNulls) {
+              for(int j = 0; j != n; j++) {
+               final int i = sel[j];
+               // Set isNull before call in case it changes it mind.
+               outputIsNull[i] = false;
+               evaluateString(inputCol, outputColVector, i);
+             }
+            } else {
+              for(int j = 0; j != n; j++) {
+                final int i = sel[j];
+                evaluateString(inputCol, outputColVector, i);
+              }
             }
           } else {
-            for(int i = 0; i < n; i++) {
-              evaluateString(inputCol, outV, i);
+            if (!outputColVector.noNulls) {
+
+              // Assume it is almost always a performance win to fill all of isNull so we can
+              // safely reset noNulls.
+              Arrays.fill(outputIsNull, false);
+              outputColVector.noNulls = true;
+            }
+            for(int i = 0; i != n; i++) {
+              evaluateString(inputCol, outputColVector, i);
             }
           }
-        } else {
+        } else /* there are nulls in the inputColVector */ {
+
+          // Carefully handle NULLs..
+
           // Handle case with nulls. Don't do function if the value is null, to save time,
           // because calling the function can be expensive.
-          outV.noNulls = false;
+          outputColVector.noNulls = false;
+
           if (selectedInUse) {
             for(int j = 0; j < n; j++) {
               int i = sel[j];
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           } else {
             for(int i = 0; i < n; i++) {
-              outV.isNull[i] = inputCol.isNull[i];
+              outputColVector.isNull[i] = inputCol.isNull[i];
               if (!inputCol.isNull[i]) {
-                evaluateString(inputCol, outV, i);
+                evaluateString(inputCol, outputColVector, i);
               }
             }
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
index 9d72bdf..1f2d5cb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
@@ -55,7 +55,10 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
     // indexColumnVector includes the keys of Map
     indexColumnVector = batch.cols[indexColumnNum];
 
-    outV.noNulls = true;
+    /*
+     * Do careful maintenance of the outputColVector.noNulls flag.
+     */
+
     int[] mapValueIndex;
     if (mapV.isRepeating) {
       if (mapV.isNull[0]) {
@@ -71,9 +74,8 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
             outV.noNulls = false;
           } else {
             // the key is found in MapColumnVector, set the value
-            outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
             outV.isNull[0] = false;
-            outV.noNulls = true;
+            outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
           }
           outV.isRepeating = true;
         } else {
@@ -97,8 +99,8 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
         outV.isNull[j] = true;
         outV.noNulls = false;
       } else {
-        outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
         outV.isNull[j] = false;
+        outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
       }
     }
     outV.isRepeating = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
index e6a86ae..a7d730b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
@@ -50,7 +50,10 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
     ColumnVector outV = batch.cols[outputColumnNum];
     MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
 
-    outV.noNulls = true;
+    /*
+     * Do careful maintenance of the outputColVector.noNulls flag.
+     */
+
     int[] mapValueIndex;
     if (mapV.isRepeating) {
       if (mapV.isNull[0]) {
@@ -65,7 +68,6 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
         } else {
           // the key is found in MapColumnVector, set the value
           outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
-          outV.noNulls = true;
         }
       }
       outV.isRepeating = true;
@@ -77,8 +79,8 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
           outV.isNull[j] = true;
           outV.noNulls = false;
         } else {
-          outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
           outV.isNull[j] = false;
+          outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
         }
       }
       outV.isRepeating = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
index 519a4e4..eb6d6dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
 import java.util.Calendar;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -82,53 +83,85 @@ public abstract class VectorUDFTimestampFieldDate extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     ColumnVector inputColVec = batch.cols[this.colNum];
 
     /* every line below this is identical for evaluateLong & evaluateString */
     final int n = inputColVec.isRepeating ? 1 : batch.size;
     int[] sel = batch.selected;
     final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if(batch.size == 0) {
       /* n != batch.size when isRepeating */
       return;
     }
 
-    /* true for all algebraic UDFs with no state */
-    outV.isRepeating = inputColVec.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     LongColumnVector longColVector = (LongColumnVector) inputColVec;
 
+    if (inputColVec.isRepeating) {
+      if (inputColVec.noNulls || !inputColVec.isNull[0]) {
+        outputColVector.isNull[0] = false;
+        outputColVector.vector[0] = getDateField(longColVector.vector[0]);
+      } else {
+        outputColVector.isNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inputColVec.noNulls) {
-      outV.noNulls = true;
-      if (selectedInUse) {
-        for(int j=0; j < n; j++) {
-          int i = sel[j];
-          outV.vector[i] = getDateField(longColVector.vector[i]);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputColVector.vector[i] = getDateField(longColVector.vector[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputColVector.vector[i] = getDateField(longColVector.vector[i]);
+          }
         }
       } else {
-        for(int i = 0; i < n; i++) {
-          outV.vector[i] = getDateField(longColVector.vector[i]);
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for(int i = 0; i != n; i++) {
+          outputColVector.vector[i] = getDateField(longColVector.vector[i]);
         }
       }
-    } else {
-      // Handle case with nulls. Don't do function if the value is null, to save time,
-      // because calling the function can be expensive.
-      outV.noNulls = false;
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
       if (selectedInUse) {
         for(int j=0; j < n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputColVec.isNull[i];
+          outputColVector.isNull[i] = inputColVec.isNull[i];
           if (!inputColVec.isNull[i]) {
-            outV.vector[i] = getDateField(longColVector.vector[i]);
+            outputColVector.vector[i] = getDateField(longColVector.vector[i]);
           }
         }
       } else {
         for(int i = 0; i < n; i++) {
-          outV.isNull[i] = inputColVec.isNull[i];
+          outputColVector.isNull[i] = inputColVec.isNull[i];
           if (!inputColVec.isNull[i]) {
-            outV.vector[i] = getDateField(longColVector.vector[i]);
+            outputColVector.vector[i] = getDateField(longColVector.vector[i]);
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
index c5762d1..2918546 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
@@ -106,11 +106,27 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression {
       return;
     }
 
-    // true for all algebraic UDFs with no state
-    outV.isRepeating = inputCol.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outV.isRepeating = false;
+
+    if (inputCol.isRepeating) {
+      if (inputCol.noNulls || !inputCol.isNull[0]) {
+        try {
+          outV.isNull[0] = false;
+          outV.vector[0] = getField(inputCol.vector[0], inputCol.start[0], inputCol.length[0]);
+        } catch (ParseException e) {
+          outV.noNulls = false;
+          outV.isNull[0] = true;
+        }
+      } else {
+        outV.isNull[0] = true;
+        outV.noNulls = false;
+      }
+      outV.isRepeating = true;
+      return;
+    }
 
     if (inputCol.noNulls) {
-      outV.noNulls = true;
       if (selectedInUse) {
         for (int j = 0; j < n; j++) {
           int i = sel[j];
@@ -133,11 +149,11 @@ public abstract class VectorUDFTimestampFieldString extends VectorExpression {
           }
         }
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
 
-      // Handle case with nulls. Don't do function if the value is null, to save time,
-      // because calling the function can be expensive.
+      // Carefully handle NULLs...
       outV.noNulls = false;
+
       if (selectedInUse) {
         for (int j = 0; j < n; j++) {
           int i = sel[j];