You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:37 UTC

[23/32] hive git commit: HIVE-18622: Vectorization: IF Statements, Comparisons, and more do not handle NULLs correctly (Matt McCline, reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
index aecaed2..1191b31 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
 
   private static final long serialVersionUID = 1L;
 
-  private final int colNum;
-  private final long value;
+  protected final int colNum;
+  protected final long value;
 
   public LongScalarNotEqualLongColumn(long value, int colNum, int outputColumnNum) {
     super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
     LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
     LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
-    boolean[] nullPos = inputColVector.isNull;
-    boolean[] outNulls = outputColVector.isNull;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     long[] outputVector = outputColVector.vector;
@@ -64,44 +66,72 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
     outputColVector.isRepeating = false;
-    outputColVector.noNulls = inputColVector.noNulls;
-    if (inputColVector.noNulls) {
-      if (inputColVector.isRepeating) {
-        //All must be selected otherwise size would be zero
-        //Repeating property will not change.
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
         outputVector[0] = value != vector[0] ? 1 : 0;
-        outputColVector.isRepeating = true;
-      } else if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = value != vector[i] ? 1 : 0;
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
+           outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
+            outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
           outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
         }
       }
-    } else {
-      if (inputColVector.isRepeating) {
-        //All must be selected otherwise size would be zero
-        //Repeating property will not change.
-        if (!nullPos[0]) {
-          outputVector[0] = value != vector[0] ? 1 : 0;
-          outNulls[0] = false;
-        } else {
-          outNulls[0] = true;
-        }
-        outputColVector.isRepeating = true;
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+
+      /*
+       * For better performance on LONG/DOUBLE we don't want the conditional
+       * statements inside the for loop.
+       */
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j=0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = value != vector[i] ? 1 : 0;
-          outNulls[i] = nullPos[i];
+          outputIsNull[i] = inputIsNull[i];
+          outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
         }
       } else {
-        System.arraycopy(nullPos, 0, outNulls, 0, n);
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
           outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
index c52e337..0976f20 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ abstract public class LongToStringUnaryUDF extends VectorExpression {
     inputColumn = -1;
   }
 
-  abstract protected void func(BytesColumnVector outV, long[] vector, int i);
+  abstract protected void func(BytesColumnVector outputColVector, long[] vector, int i);
 
   @Override
   public void evaluate(VectorizedRowBatch batch) {
@@ -57,59 +59,87 @@ abstract public class LongToStringUnaryUDF extends VectorExpression {
     int[] sel = batch.selected;
     int n = batch.size;
     long[] vector = inputColVector.vector;
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
-    outV.initBuffer();
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+    outputColVector.initBuffer();
 
     if (n == 0) {
       //Nothing to do
       return;
     }
 
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, vector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, vector, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, vector, i);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, vector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, vector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, vector, i);
+          func(outputColVector, vector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
 
       // Handle case with nulls. Don't do function if the value is null,
       // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          func(outV, vector, 0);
-        }
-      } else if (batch.selectedInUse) {
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inputColVector.isNull[i];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
-            func(outV, vector, i);
+            func(outputColVector, vector, i);
           }
         }
-        outV.isRepeating = false;
+        outputColVector.isRepeating = false;
       } else {
-        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
           if (!inputColVector.isNull[i]) {
-            func(outV, vector, i);
+            func(outputColVector, vector, i);
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
index ccc0fcb..aad408f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -64,7 +66,6 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -74,38 +75,69 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression {
       return;
     }
 
-    if (inputColVector.isRepeating) {
-      outputVector[0] = func(vector[0]);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = func(vector[0]);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      cleanup(outputColVector, sel, batch.selectedInUse, n);
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = func(vector[i]);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] = func(vector[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] = func(vector[i]);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = func(vector[i]);
           outputIsNull[i] = inputIsNull[i];
-      }
+          outputVector[i] = func(vector[i]);
+        }
       } else {
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
     cleanup(outputColVector, sel, batch.selectedInUse, n);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
index 3375a56..dcebc24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -63,7 +65,6 @@ public abstract class MathFuncLongToDouble extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -73,38 +74,69 @@ public abstract class MathFuncLongToDouble extends VectorExpression {
       return;
     }
 
-    if (inputColVector.isRepeating) {
-      outputVector[0] = func(vector[0]);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = func(vector[0]);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      cleanup(outputColVector, sel, batch.selectedInUse, n);
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = func(vector[i]);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] = func(vector[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] = func(vector[i]);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = func(vector[i]);
           outputIsNull[i] = inputIsNull[i];
-      }
+          outputVector[i] = func(vector[i]);
+        }
       } else {
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
     cleanup(outputColVector, sel, batch.selectedInUse, n);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
index 898cf96..e5b6902 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -62,7 +64,6 @@ public abstract class MathFuncLongToLong extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     long[] outputVector = outputColVector.vector;
@@ -72,38 +73,68 @@ public abstract class MathFuncLongToLong extends VectorExpression {
       return;
     }
 
-    if (inputColVector.isRepeating) {
-      outputVector[0] = func(vector[0]);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = func(vector[0]);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = func(vector[i]);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] = func(vector[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] = func(vector[i]);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = func(vector[i]);
           outputIsNull[i] = inputIsNull[i];
-      }
+          outputVector[i] = func(vector[i]);
+        }
       } else {
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
           outputVector[i] = func(vector[i]);
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
index 30f20f3..be69f7f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -54,45 +56,61 @@ public class NotCol extends VectorExpression {
     long[] vector = inputColVector.vector;
     LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
     long[] outputVector = outV.vector;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outV.isNull;
 
     if (n <= 0) {
       // Nothing to do, this is EOF
       return;
     }
 
-    if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outV.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         // 0 XOR 1 yields 1, 1 XOR 1 yields 0
         outputVector[0] = vector[0] ^ 1;
-      } else if (batch.selectedInUse) {
+      } else {
+        outputIsNull[0] = true;
+        outV.noNulls = false;
+      }
+      outV.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
         for (int j = 0; j != n; j++) {
           int i = sel[j];
+          outV.isNull[i] = false;
           outputVector[i] = vector[i] ^ 1;
         }
-        outV.isRepeating = false;
       } else {
+        Arrays.fill(outV.isNull, 0, n, false);
         for (int i = 0; i != n; i++) {
           outputVector[i] = vector[i] ^ 1;
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+
+      /*
+       * For better performance on LONG/DOUBLE we don't want the conditional
+       * statements inside the for loop.
+       */
       outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outputVector[0] = vector[0] ^ 1;
-        outV.isNull[0] = inputColVector.isNull[0];
-      } else if (batch.selectedInUse) {
-        outV.isRepeating = false;
+
+      if (batch.selectedInUse) {
         for (int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] ^ 1;
           outV.isNull[i] = inputColVector.isNull[i];
         }
       } else {
-        outV.isRepeating = false;
         for (int i = 0; i != n; i++) {
           outputVector[i] = vector[i] ^ 1;
           outV.isNull[i] = inputColVector.isNull[i];

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
index eaaade6..3c18853 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
@@ -284,25 +284,56 @@ public class NullUtil {
   }
 
   /*
-   * Propagate null values for a two-input operator.
+   * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
    */
   public static void propagateNullsColCol(ColumnVector inputColVector1,
       ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel,
       int n, boolean selectedInUse) {
 
-    outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-    if (outputColVector.noNulls) {
-      // the inputs might not always have isNull initialized for
-      // inputColVector1.isNull[i] || inputColVector2.isNull[i] to be valid
-      Arrays.fill(outputColVector.isNull, false);
-      return;
-    }
+    if (inputColVector1.noNulls && inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        outputColVector.isNull[0] = false;
+        outputColVector.isRepeating = true;
+      } else {
+        if (selectedInUse) {
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputColVector.isNull[i] = false;
+          }
+        } else {
+          Arrays.fill(outputColVector.isNull, 0, n, false);
+        }
+      }
+    } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
 
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      if (inputColVector2.isRepeating) {
-        outputColVector.isNull[0] = inputColVector2.isNull[0];
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (!inputColVector2.isNull[0]) {
+          outputColVector.isNull[0] = false;
+        } else {
+          outputColVector.isNull[0] = true;
+          outputColVector.noNulls = false;
+        }
+        outputColVector.isRepeating = true;
+      } else if (inputColVector2.isRepeating) {
+        if (!inputColVector2.isNull[0]) {
+          if (selectedInUse) {
+            for(int j = 0; j != n; j++) {
+              int i = sel[j];
+              outputColVector.isNull[i] = false;
+            }
+          } else {
+            Arrays.fill(outputColVector.isNull, 0, n, false);
+          }
+        } else {
+          outputColVector.isNull[0] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isRepeating = true;   // Because every value will be NULL.
+        }
       } else {
+        outputColVector.noNulls = false;
         if (selectedInUse) {
           for(int j = 0; j != n; j++) {
             int i = sel[j];
@@ -313,9 +344,32 @@ public class NullUtil {
         }
       }
     } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      if (inputColVector1.isRepeating) {
-        outputColVector.isNull[0] = inputColVector1.isNull[0];
+
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (!inputColVector1.isNull[0]) {
+          outputColVector.isNull[0] = false;
+        } else {
+          outputColVector.isNull[0] = true;
+          outputColVector.noNulls = false;
+        }
+        outputColVector.isRepeating = true;
+      } else if (inputColVector1.isRepeating) {
+        if (!inputColVector1.isNull[0]) {
+          if (selectedInUse) {
+            for(int j = 0; j != n; j++) {
+              int i = sel[j];
+              outputColVector.isNull[i] = false;
+            }
+          } else {
+            Arrays.fill(outputColVector.isNull, 0, n, false);
+          }
+        } else {
+          outputColVector.isNull[0] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isRepeating = true;   // Because every value will be NULL.
+        }
       } else {
+        outputColVector.noNulls = false;
         if (selectedInUse) {
           for(int j = 0; j != n; j++) {
             int i = sel[j];
@@ -326,18 +380,23 @@ public class NullUtil {
         }
       }
     } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
+
       if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
-        outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0];
-        if (outputColVector.isNull[0]) {
-          outputColVector.isRepeating = true;
-          return;
+        if (!inputColVector1.isNull[0] && !inputColVector2.isNull[0]) {
+          outputColVector.isNull[0] = false;
+        } else {
+          outputColVector.isNull[0] = true;
+          outputColVector.noNulls = false;
         }
+        outputColVector.isRepeating = true;
       } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
+
         if (inputColVector1.isNull[0]) {
           outputColVector.isNull[0] = true;
-          outputColVector.isRepeating = true;   // because every value will be NULL
-          return;
+          outputColVector.noNulls = false;
+          outputColVector.isRepeating = true;   // Because every value will be NULL.
         } else {
+          outputColVector.noNulls = false;
           if (selectedInUse) {
              for(int j = 0; j != n; j++) {
                int i = sel[j];
@@ -352,9 +411,10 @@ public class NullUtil {
       } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
         if (inputColVector2.isNull[0]) {
           outputColVector.isNull[0] = true;
-          outputColVector.isRepeating = true;   // because every value will be NULL
-          return;
+          outputColVector.noNulls = false;
+          outputColVector.isRepeating = true;   // Because every value will be NULL.
         } else {
+          outputColVector.noNulls = false;
           if (selectedInUse) {
              for(int j = 0; j != n; j++) {
                int i = sel[j];
@@ -366,6 +426,7 @@ public class NullUtil {
           }
         }
       } else {                      // neither side is repeating
+        outputColVector.noNulls = false;
         if (selectedInUse) {
           for(int j = 0; j != n; j++) {
             int i = sel[j];

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
index bfd7334..62873e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -49,33 +51,68 @@ public class OctetLength extends VectorExpression {
     }
 
     BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
     int[] sel = batch.selected;
     int n = batch.size;
     int [] length = inputColVector.length;
-    long[] resultLen = outV.vector;
+    long[] resultLen = outputColVector.vector;
 
     if (n == 0) {
       //Nothing to do
       return;
     }
 
-    if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         resultLen[0] = length[0];
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          resultLen[i] = length[i];
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    /*
+     * Do careful maintenance of the outputColVector.noNulls flag.
+     */
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           resultLen[i] = length[i];
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            resultLen[i] = length[i];
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           resultLen[i] = length[i];
         }
-        outV.isRepeating = false;
       }
     } else {
 
@@ -83,30 +120,23 @@ public class OctetLength extends VectorExpression {
        * Handle case with nulls. Don't do function if the value is null, to save time,
        * because calling the function can be expensive.
        */
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          resultLen[0] = length[0];
-        }
-      } else if (batch.selectedInUse) {
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          if (!inputColVector.isNull[i]) {
+          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
             resultLen[i] = length[i];
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       } else {
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inputColVector.isNull[i]) {
+          if (!inputIsNull[i]) {
             resultLen[i] = length[i];
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
index 20a0a37..db684c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
@@ -15,10 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker;
@@ -70,42 +71,50 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
 
     LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
     long[] outputVector = outV.vector;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outV.isNull;
 
     // return immediately if batch is empty
     if (n == 0) {
       return;
     }
 
-    outV.noNulls = inputColVector.noNulls;
-    outV.isRepeating = inputColVector.isRepeating;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outV.isRepeating = false;
 
-    if (inputColVector.noNulls) {
-      if (inputColVector.isRepeating) {
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
-        outV.isNull[0] = false;
-      } else if (batch.selectedInUse) {
+      } else {
+        outputIsNull[0] = true;
+        outV.noNulls = false;
+      }
+      outV.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
         for (int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
           outV.isNull[i] = false;
+          outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
         }
       } else {
+        Arrays.fill(outV.isNull, 0, n, false);
         for (int i = 0; i != n; i++) {
           outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
-          outV.isNull[i] = false;
         }
       }
-    } else {
-      if (inputColVector.isRepeating) {
-        //All must be selected otherwise size would be zero. Repeating property will not change.
-        if (!nullPos[0]) {
-          outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
-          outV.isNull[0] = false;
-        } else {
-          outputVector[0] = LongColumnVector.NULL_VALUE;
-          outV.isNull[0] = true;
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are nulls in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for (int j = 0; j != n; j++) {
           int i = sel[j];
           if (!nullPos[i]) {
@@ -114,6 +123,7 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
           } else {
             outputVector[i] = LongColumnVector.NULL_VALUE;
             outV.isNull[i] = true;
+            outV.noNulls = false;
           }
         }
       } else {
@@ -124,11 +134,12 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
           } else {
             outputVector[i] = LongColumnVector.NULL_VALUE;
             outV.isNull[i] = true;
+            outV.noNulls = false;
           }
         }
       }
     }
-	}
+  }
 
   private Checker borrowChecker() {
     FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar();

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
index c889ac1..eb91321 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
@@ -74,57 +74,82 @@ public class StringColumnInList extends VectorExpression implements IStringInExp
     BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
     LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
-    boolean[] nullPos = inputColVector.isNull;
+    boolean[] inputIsNull = inputColVector.isNull;
     int n = batch.size;
     byte[][] vector = inputColVector.vector;
     int[] start = inputColVector.start;
     int[] len = inputColVector.length;
     long[] outputVector = outputColVector.vector;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     // return immediately if batch is empty
     if (n == 0) {
       return;
     }
 
-    outputColVector.isRepeating = inputColVector.isRepeating;
-    outputColVector.noNulls = inputColVector.noNulls;
-    if (inputColVector.noNulls) {
-      if (inputColVector.isRepeating) {
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
-        // All must be selected otherwise size would be zero
-        // Repeating property will not change.
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
         }
       }
-    } else {
-      if (inputColVector.isRepeating) {
+    } else /* there are nulls in the inputColVector */ {
 
-        // All must be selected otherwise size would be zero
-        // Repeating property will not change.
-        if (!nullPos[0]) {
-          outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
-        }
-        outputColVector.isNull[0] = nullPos[0];
-      } else if (batch.selectedInUse) {
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          if (!nullPos[i]) {
+          outputColVector.isNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
             outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
           }
-          outputColVector.isNull[i] = nullPos[i];
         }
       } else {
-        System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n);
+        System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!nullPos[i]) {
+          if (!inputIsNull[i]) {
             outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
           }
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
index f730c9d..6c92e39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,12 +57,14 @@ public class StringGroupColConcatStringScalar extends VectorExpression {
     }
 
     BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
     int n = batch.size;
     byte[][] vector = inputColVector.vector;
     int[] start = inputColVector.start;
     int[] length = inputColVector.length;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -70,55 +73,79 @@ public class StringGroupColConcatStringScalar extends VectorExpression {
     }
 
     // initialize output vector buffer to receive data
-    outV.initBuffer();
+    outputColVector.initBuffer();
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        outputColVector.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
 
     if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+          outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
 
       /*
        * Handle case with nulls. Don't do function if the value is null, to save time,
        * because calling the function can be expensive.
        */
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
-        }
-      } else if (batch.selectedInUse) {
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
-            outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+            outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       } else {
         for(int i = 0; i != n; i++) {
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
-            outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+            outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
index cbdcc76..6c40a28 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -57,6 +59,7 @@ public class StringGroupConcatColCol extends VectorExpression {
     BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1];
     BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2];
     BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+    boolean[] outputIsNull = outV.isNull;
     int[] sel = batch.selected;
     int n = batch.size;
     byte[][] vector1 = inV1.vector;
@@ -81,7 +84,7 @@ public class StringGroupConcatColCol extends VectorExpression {
 
     if (inV1.noNulls && !inV2.noNulls) {
 
-      // propagate nulls
+      // Carefully handle NULLs...
 
       /* We'll assume that there *may* be nulls in the input if !noNulls is true
        * for an input vector. This is to be more forgiving of errors in loading
@@ -89,6 +92,7 @@ public class StringGroupConcatColCol extends VectorExpression {
        * isNull[0] is set if !noNulls and isRepeating are true for the vector.
        */
       outV.noNulls = false;
+
       if (inV2.isRepeating) {
         if (inV2.isNull[0]) {
 
@@ -321,8 +325,9 @@ public class StringGroupConcatColCol extends VectorExpression {
       }
     } else {      // there are no nulls in either input vector
 
-      // propagate null information
-      outV.noNulls = true;
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
 
       // perform data operation
       if (inV1.isRepeating && inV2.isRepeating) {
@@ -330,13 +335,16 @@ public class StringGroupConcatColCol extends VectorExpression {
         // All must be selected otherwise size would be zero. Repeating property will not change.
         outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
         outV.isRepeating = true;
+        outputIsNull[0] = false;
       } else if (inV1.isRepeating) {
         if (batch.selectedInUse) {
           for(int j = 0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for(int i = 0; i != n; i++) {
             outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
           }
@@ -345,9 +353,11 @@ public class StringGroupConcatColCol extends VectorExpression {
         if (batch.selectedInUse) {
           for(int j = 0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for(int i = 0; i != n; i++) {
             outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
           }
@@ -356,9 +366,11 @@ public class StringGroupConcatColCol extends VectorExpression {
         if (batch.selectedInUse) {
           for(int j=0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for(int i = 0; i != n; i++) {
             outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
index 9b9c063..f1fabb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -53,66 +55,88 @@ public class StringLength extends VectorExpression {
     }
 
     BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
     int n = batch.size;
     byte[][] vector = inputColVector.vector;
     int [] start = inputColVector.start;
     int [] length = inputColVector.length;
-    long[] resultLen = outV.vector;
+    long[] resultLen = outputColVector.vector;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
       //Nothing to do
       return;
     }
 
-    if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
         resultLen[0] = utf8StringLength(vector[0], start[0], length[0]);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else /* there are nulls in the inputColVector */ {
 
-      /*
-       * Handle case with nulls. Don't do function if the value is null, to save time,
-       * because calling the function can be expensive.
-       */
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          resultLen[0] = utf8StringLength(vector[0], start[0], length[0]);
-        }
-      } else if (batch.selectedInUse) {
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
             resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
+        outputColVector.isRepeating = false;
       } else {
         for(int i = 0; i != n; i++) {
+          outputColVector.isNull[i] = inputColVector.isNull[i];
           if (!inputColVector.isNull[i]) {
             resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
index 94fbef8..a9f09dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,12 +57,14 @@ public class StringScalarConcatStringGroupCol extends VectorExpression {
       }
 
     BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
     int[] sel = batch.selected;
     int n = batch.size;
     byte[][] vector = inputColVector.vector;
     int[] start = inputColVector.start;
     int[] length = inputColVector.length;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -70,55 +73,79 @@ public class StringScalarConcatStringGroupCol extends VectorExpression {
     }
 
     // initialize output vector buffer to receive data
-    outV.initBuffer();
+    outputColVector.initBuffer();
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        outputColVector.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
 
     if (inputColVector.noNulls) {
-      outV.noNulls = true;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+          outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else /* there are NULLs in the inputColVector */ {
+
+      // Carefully handle NULLs...
 
       /*
        * Handle case with nulls. Don't do function if the value is null, to save time,
        * because calling the function can be expensive.
        */
-      outV.noNulls = false;
-      if (inputColVector.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inputColVector.isNull[0];
-        if (!inputColVector.isNull[0]) {
-          outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
-        }
-      } else if (batch.selectedInUse) {
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
           if (!inputColVector.isNull[i]) {
-            outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+            outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       } else {
         for(int i = 0; i != n; i++) {
           if (!inputColVector.isNull[i]) {
-            outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+            outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
           }
-          outV.isNull[i] = inputColVector.isNull[i];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
index 5934f6f..7c58838 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -125,7 +126,7 @@ public class StringSubstrColStart extends VectorExpression {
     }
 
     BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum];
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
 
     int n = batch.size;
 
@@ -137,82 +138,101 @@ public class StringSubstrColStart extends VectorExpression {
     int[] sel = batch.selected;
     int[] len = inV.length;
     int[] start = inV.start;
-    outV.initBuffer();
+    outputColVector.initBuffer();
+    boolean[] outputIsNull = outputColVector.isNull;
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     if (inV.isRepeating) {
-      outV.isRepeating = true;
       if (!inV.noNulls && inV.isNull[0]) {
-        outV.isNull[0] = true;
-        outV.noNulls = false;
-        outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
         return;
       } else {
-        outV.noNulls = true;
+        outputIsNull[0] = false;
         int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx);
         if (offset != -1) {
-          outV.setVal(0, vector[0], offset, len[0] - (offset - start[0]));
+          outputColVector.setVal(0, vector[0], offset, len[0] - (offset - start[0]));
         } else {
-          outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+          outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
         }
       }
-    } else {
-      outV.isRepeating = false;
-      if (batch.selectedInUse) {
-        if (!inV.noNulls) {
-          outV.noNulls = false;
-          for (int i = 0; i != n; ++i) {
-            int selected = sel[i];
-            if (!inV.isNull[selected]) {
-              int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
-                  startIdx);
-              outV.isNull[selected] = false;
-              if (offset != -1) {
-                outV.setVal(selected, vector[selected], offset,
-                    len[selected] - (offset - start[selected]));
-              } else {
-                outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
-              }
-            } else {
-              outV.isNull[selected] = true;
-            }
-          }
-        } else {
-          outV.noNulls = true;
-          for (int i = 0; i != n; ++i) {
-            int selected = sel[i];
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (batch.selectedInUse) {
+      if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+        // Carefully handle NULLs...
+
+        for (int i = 0; i != n; ++i) {
+          int selected = sel[i];
+          if (!inV.isNull[selected]) {
+            outputIsNull[selected] = false;
             int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
                 startIdx);
+            outputColVector.isNull[selected] = false;
             if (offset != -1) {
-              outV.setVal(selected, vector[selected], offset,
+              outputColVector.setVal(selected, vector[selected], offset,
                   len[selected] - (offset - start[selected]));
             } else {
-              outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
+              outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
             }
+          } else {
+            outputColVector.isNull[selected] = true;
+            outputColVector.noNulls = false;
           }
         }
       } else {
-        if (!inV.noNulls) {
-          outV.noNulls = false;
-          System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
-          for (int i = 0; i != n; ++i) {
-            if (!inV.isNull[i]) {
-              int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
-              if (offset != -1) {
-                outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
-              } else {
-                outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
-              }
-            }
+        for (int i = 0; i != n; ++i) {
+          int selected = sel[i];
+          outputColVector.isNull[selected] = false;
+          int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
+              startIdx);
+          if (offset != -1) {
+            outputColVector.setVal(selected, vector[selected], offset,
+                len[selected] - (offset - start[selected]));
+          } else {
+            outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
           }
-        } else {
-          outV.noNulls = true;
-          for (int i = 0; i != n; ++i) {
+        }
+      }
+    } else {
+      if (!inV.noNulls)  /* there are nulls in the inputColVector */ {
+
+        // Carefully handle NULLs...
+
+        for (int i = 0; i != n; ++i) {
+          if (!inV.isNull[i]) {
+            outputColVector.isNull[i] = false;
             int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
             if (offset != -1) {
-              outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
+              outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
             } else {
-              outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
+              outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
             }
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
+          }
+        }
+      } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for (int i = 0; i != n; ++i) {
+          int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
+          if (offset != -1) {
+            outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
+          } else {
+            outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
index 9d6eccf..7c5d19a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -146,7 +147,7 @@ public class StringSubstrColStartLen extends VectorExpression {
     }
 
     BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum];
-    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
 
     int n = batch.size;
 
@@ -158,82 +159,98 @@ public class StringSubstrColStartLen extends VectorExpression {
     int[] sel = batch.selected;
     int[] len = inV.length;
     int[] start = inV.start;
-    outV.initBuffer();
+    outputColVector.initBuffer();
+    boolean[] outputIsNull = outputColVector.isNull;
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
 
     if (inV.isRepeating) {
-      outV.isRepeating = true;
+
       if (!inV.noNulls && inV.isNull[0]) {
-        outV.isNull[0] = true;
-        outV.noNulls = false;
-        outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
-        return;
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
       } else {
-        outV.noNulls = true;
+        outputIsNull[0] = false;
         populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray);
         if (offsetArray[0] != -1) {
-          outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]);
+          outputColVector.setVal(0, vector[0], offsetArray[0], offsetArray[1]);
         } else {
-          outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+          outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
         }
       }
-    } else {
-      outV.isRepeating = false;
-      if (batch.selectedInUse) {
-        if (!inV.noNulls) {
-          outV.noNulls = false;
-          for (int i = 0; i != n; ++i) {
-            int selected = sel[i];
-            if (!inV.isNull[selected]) {
-              outV.isNull[selected] = false;
-              populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
-                  length, offsetArray);
-              if (offsetArray[0] != -1) {
-                outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
-              } else {
-                outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
-              }
-            } else {
-              outV.isNull[selected] = true;
-            }
-          }
-        } else {
-          outV.noNulls = true;
-          for (int i = 0; i != n; ++i) {
-            int selected = sel[i];
-            outV.isNull[selected] = false;
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (batch.selectedInUse) {
+      if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+        // Carefully handle NULLs...
+
+        for (int i = 0; i != n; ++i) {
+          int selected = sel[i];
+          if (!inV.isNull[selected]) {
+            outputIsNull[selected] = false;
             populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
                 length, offsetArray);
             if (offsetArray[0] != -1) {
-              outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
+              outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
             } else {
-              outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
+              outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
             }
+          } else {
+            outputIsNull[selected] = true;
+            outputColVector.noNulls = false;
           }
         }
       } else {
-        if (!inV.noNulls) {
-          System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
-          outV.noNulls = false;
-          for (int i = 0; i != n; ++i) {
-            if (!inV.isNull[i]) {
-              populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
-              if (offsetArray[0] != -1) {
-                outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
-              } else {
-                outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
-              }
-            }
+        for (int i = 0; i != n; ++i) {
+          int selected = sel[i];
+          outputColVector.isNull[selected] = false;
+          populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
+              length, offsetArray);
+          if (offsetArray[0] != -1) {
+            outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
+          } else {
+            outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
           }
-        } else {
-          outV.noNulls = true;
-          for (int i = 0; i != n; ++i) {
-            outV.isNull[i] = false;
+        }
+      }
+    } else {
+      if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+        // Carefully handle NULLs...
+
+        for (int i = 0; i != n; ++i) {
+          if (!inV.isNull[i]) {
+            outputIsNull[i] = false;
             populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
             if (offsetArray[0] != -1) {
-              outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
+              outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
             } else {
-              outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
+              outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
             }
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
+        }
+      } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for (int i = 0; i != n; ++i) {
+          populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
+          if (offsetArray[0] != -1) {
+            outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
+          } else {
+            outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
           }
         }
       }