You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/22 23:06:19 UTC

svn commit: r1485422 - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/

Author: omalley
Date: Wed May 22 21:06:19 2013
New Revision: 1485422

URL: http://svn.apache.org/r1485422
Log:
HIVE-4472 OR, NOT Filter logic can lose an array, and always takes time O(VectorizedRowBatch.DEFAULT_SIZE) (Jitendra Nath Pandey via omalley)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java Wed May 22 21:06:19 2013
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
 
 /**
@@ -59,7 +60,7 @@ public class DoubleColumnVector extends 
       index = 0;
     }
     if (!noNulls && isNull[index]) {
-      return null;
+      return NullWritable.get();
     } else {
       writableObj.set(vector[index]);
       return writableObj;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java Wed May 22 21:06:19 2013
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
 
 /**
@@ -59,7 +60,7 @@ public class LongColumnVector extends Co
       index = 0;
     }
     if (!noNulls && isNull[index]) {
-      return null;
+      return NullWritable.get();
     } else {
       writableObj.set(vector[index]);
       return writableObj;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java Wed May 22 21:06:19 2013
@@ -24,11 +24,11 @@ import org.apache.hadoop.hive.ql.exec.ve
  * This class represents an Or expression. This applies short circuit optimization.
  */
 public class FilterExprOrExpr extends VectorExpression {
-  VectorExpression childExpr1;
-  VectorExpression childExpr2;
-  int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE];
-  int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
-  int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private final VectorExpression childExpr1;
+  private final VectorExpression childExpr2;
+  private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
 
   public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) {
     this.childExpr1 = childExpr1;
@@ -42,15 +42,14 @@ public class FilterExprOrExpr extends Ve
       return;
     }
     boolean prevSelectInUse = batch.selectedInUse;
-    //Clone the selected vector
-    int [] sel = batch.selected;
+
+    // Save the original selected vector
+    int[] sel = batch.selected;
     if (batch.selectedInUse) {
-      for (int i = 0; i < n; i++) {
-        tmpSelect1[i] = sel[i];
-      }
+      System.arraycopy(sel, 0, initialSelected, 0, n);
     } else {
       for (int i = 0; i < n; i++) {
-        tmpSelect1[i] = i;
+        initialSelected[i] = i;
         sel[i] = i;
       }
       batch.selectedInUse = true;
@@ -58,40 +57,44 @@ public class FilterExprOrExpr extends Ve
 
     childExpr1.evaluate(batch);
 
-    //Calculate unselected ones in last evaluate.
-    for (int i = 0; i < tmp.length; i++) {
-      tmp[i] = 0;
+    // Preserve the selected reference and size values generated
+    // after the first child is evaluated.
+    int sizeAfterFirstChild = batch.size;
+    int[] selectedAfterFirstChild = batch.selected;
+
+    // Calculate unselected ones in last evaluate.
+    for (int j = 0; j < n; j++) {
+      tmp[initialSelected[j]] = 0;
     }
     for (int j = 0; j < batch.size; j++) {
-      int i = sel[j];
-      tmp[i] = 1;
+      tmp[selectedAfterFirstChild[j]] = 1;
     }
     int unselectedSize = 0;
-    for (int j =0; j < n; j++) {
-      int i = tmpSelect1[j];
+    for (int j = 0; j < n; j++) {
+      int i = initialSelected[j];
       if (tmp[i] == 0) {
         unselected[unselectedSize++] = i;
       }
     }
-    //Preserve current selected and size
-    int currentSize = batch.size;
-    int [] currentSelected = batch.selected;
 
-    //Evaluate second child expression over unselected ones only.
+    // Evaluate second child expression over unselected ones only.
     batch.selected = unselected;
     batch.size = unselectedSize;
+
     childExpr2.evaluate(batch);
 
-    //Merge the result of last evaluate to previous evaluate.
-    int newSize = batch.size + currentSize;
-    for (int i = batch.size; i < newSize; i++ ) {
-      batch.selected[i] = currentSelected[i-batch.size];
-    }
+    // Merge the result of last evaluate to previous evaluate.
+    int newSize = batch.size + sizeAfterFirstChild;
+    System.arraycopy(selectedAfterFirstChild, 0, batch.selected, batch.size, sizeAfterFirstChild);
     batch.size = newSize;
     if (newSize == n) {
-      //Filter didn't do anything
+      // Filter didn't do anything
       batch.selectedInUse = prevSelectInUse;
     }
+
+    // unselected array is taken away by the row batch
+    // so take the row batch's original one.
+    unselected = selectedAfterFirstChild;
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java Wed May 22 21:06:19 2013
@@ -24,10 +24,10 @@ import org.apache.hadoop.hive.ql.exec.ve
  * This class represents an NOT filter expression. This applies short circuit optimization.
  */
 public class FilterNotExpr extends VectorExpression {
-  VectorExpression childExpr1;
-  int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE];
-  int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
-  int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private final VectorExpression childExpr1;
+  private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+  private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
 
   public FilterNotExpr(VectorExpression childExpr1) {
     this.childExpr1 = childExpr1;
@@ -37,19 +37,17 @@ public class FilterNotExpr extends Vecto
   public void evaluate(VectorizedRowBatch batch) {
     int n = batch.size;
 
-    if (n<=0) {
+    if (n <= 0) {
       return;
     }
 
-    //Clone the selected vector
-    int [] sel = batch.selected;
+    // Clone the selected vector
+    int[] sel = batch.selected;
     if (batch.selectedInUse) {
-      for (int i = 0; i < n; i++) {
-        tmpSelect1[i] = sel[i];
-      }
+      System.arraycopy(sel, 0, initialSelected, 0, n);
     } else {
       for (int i = 0; i < n; i++) {
-        tmpSelect1[i] = i;
+        initialSelected[i] = i;
         sel[i] = i;
       }
       batch.selectedInUse = true;
@@ -57,25 +55,30 @@ public class FilterNotExpr extends Vecto
 
     childExpr1.evaluate(batch);
 
-    //Calculate unselected ones in last evaluate.
-    for (int i = 0; i < tmp.length; i++) {
-      tmp[i] = 0;
+    // Calculate unselected ones in last evaluate.
+    for (int i = 0; i < n; i++) {
+      tmp[initialSelected[i]] = 0;
     }
+
+    // Need to set sel reference again, because the child expression might
+    // have invalidated the earlier reference
+    sel = batch.selected;
     for (int j = 0; j < batch.size; j++) {
       int i = sel[j];
       tmp[i] = 1;
     }
     int unselectedSize = 0;
-    for (int j =0; j < n; j++) {
-      int i = tmpSelect1[j];
+    for (int j = 0; j < n; j++) {
+      int i = initialSelected[j];
       if (tmp[i] == 0) {
         unselected[unselectedSize++] = i;
       }
     }
 
-    //The unselected is the new selected
+    // The unselected is the new selected, swap the arrays
     batch.selected = unselected;
-    batch.size =  unselectedSize;
+    unselected = sel;
+    batch.size = unselectedSize;
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java Wed May 22 21:06:19 2013
@@ -21,15 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression selects a row if the given boolean column is false.
+ */
 public class SelectColumnIsFalse extends VectorExpression {
-	int colNum1;
+  private final int colNum1;
 
-	public SelectColumnIsFalse(int colNum1)
-	{
-		this.colNum1 = colNum1;
-	}
+  public SelectColumnIsFalse(int colNum1) {
+    this.colNum1 = colNum1;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -43,29 +45,29 @@ public class SelectColumnIsFalse extends
     boolean[] nullVector = inputColVector1.isNull;
 
     if (n <= 0) {
-      //Nothing to do
+      // Nothing to do
       return;
     }
 
     if (inputColVector1.noNulls) {
       if (inputColVector1.isRepeating) {
-        // All must be selected otherwise size would be zero
-        // Repeating property will not change.
         if (vector1[0] == 1) {
           // All are filtered out
           batch.size = 0;
+          return;
+        } else {
+          // All are selected;
+          return;
         }
       } else if (batch.selectedInUse) {
-        int[] newSelected = new int[n];
         int newSize = 0;
         for (int j = 0; j != n; j++) {
           int i = sel[j];
           if (vector1[i] == 0) {
-            newSelected[newSize++] = i;
+            sel[newSize++] = i;
           }
         }
         batch.size = newSize;
-        batch.selected = newSelected;
       } else {
         int newSize = 0;
         for (int i = 0; i != n; i++) {
@@ -80,19 +82,22 @@ public class SelectColumnIsFalse extends
       }
     } else {
       if (inputColVector1.isRepeating) {
-        //Repeating and null value
-        batch.size = 0;
+        if (nullVector[0] || (vector1[0] == 1)) {
+          // All are filtered out
+          batch.size = 0;
+        } else {
+          // All are selected;
+          return;
+        }
       } else if (batch.selectedInUse) {
-        int[] newSelected = new int[n];
         int newSize = 0;
         for (int j = 0; j != n; j++) {
           int i = sel[j];
           if (vector1[i] == 0 && !nullVector[i]) {
-            newSelected[newSize++] = i;
+            sel[newSize++] = i;
           }
         }
         batch.size = newSize;
-        batch.selected = newSelected;
       } else {
         int newSize = 0;
         for (int i = 0; i != n; i++) {
@@ -106,7 +111,7 @@ public class SelectColumnIsFalse extends
         }
       }
     }
-	}
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java Wed May 22 21:06:19 2013
@@ -21,14 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression selects a row if the given column is null.
+ */
 public class SelectColumnIsNotNull extends VectorExpression {
-	int colNum;
+  private final int colNum;
 
-	public SelectColumnIsNotNull(int colNum) {
-		this.colNum = colNum;
-	}
+  public SelectColumnIsNotNull(int colNum) {
+    this.colNum = colNum;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -37,46 +40,47 @@ public class SelectColumnIsNotNull exten
 
     ColumnVector inputColVector = batch.cols[colNum];
     int[] sel = batch.selected;
-    //Note: if type of isNull could be long[], could we just re-use this
-    //vector as the output vector. No iterations would be needed.
     boolean[] nullPos = inputColVector.isNull;
     int n = batch.size;
     if (n <= 0) {
-      //Nothing to do
+      // Nothing to do
       return;
     }
 
     if (inputColVector.noNulls) {
-      //All selected, do nothing
+      // All selected, do nothing
       return;
     } else if (inputColVector.isRepeating) {
-      //All must be null
-      batch.size = 0;
+      if (nullPos[0]) {
+        // All are null so none are selected
+        batch.size = 0;
+        return;
+      } else {
+        // None are null, so all are selected
+        return;
+      }
     } else if (batch.selectedInUse) {
-      int [] newSelected = new int[n];
-      int newSize=0;
-			for(int j=0; j != n; j++) {
-				int i = sel[j];
-				if (!nullPos[i]) {
-          newSelected[newSize++] = i;
-				}
-			}
-      batch.selected = newSelected;
+      int newSize = 0;
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if (!nullPos[i]) {
+          sel[newSize++] = i;
+        }
+      }
       batch.size = newSize;
-		}
-		else {
+    } else {
       int newSize = 0;
-			for(int i = 0; i != n; i++) {
-				if (!nullPos[i]) {
-				  sel[newSize++] = i;
+      for (int i = 0; i != n; i++) {
+        if (!nullPos[i]) {
+          sel[newSize++] = i;
         }
-			}
+      }
       if (newSize < n) {
         batch.selectedInUse = true;
         batch.size = newSize;
       }
-		}
-	}
+    }
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java Wed May 22 21:06:19 2013
@@ -21,60 +21,63 @@ package org.apache.hadoop.hive.ql.exec.v
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression selects a row if the given column is null.
+ */
 public class SelectColumnIsNull extends VectorExpression {
-	int colNum;
+  private final int colNum;
 
-	public SelectColumnIsNull(int colNum) {
-		this.colNum = colNum;
-	}
+  public SelectColumnIsNull(int colNum) {
+    this.colNum = colNum;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
     if (childExpressions != null) {
       super.evaluateChildren(batch);
     }
     ColumnVector inputColVector = batch.cols[colNum];
     int[] sel = batch.selected;
-    //Note: if type of isNull could be long[], could we just re-use this
-    //vector as the output vector. No iterations would be needed.
     boolean[] nullPos = inputColVector.isNull;
     int n = batch.size;
     if (n <= 0) {
-      //Nothing to do
+      // Nothing to do
       return;
     }
 
     if (inputColVector.noNulls) {
       batch.size = 0;
     } else if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Selection property will not change.
-      return;
+      if (nullPos[0]) {
+        // All are null, so all must be selected.
+        return;
+      } else {
+        // None are null, so none are selected
+        batch.size = 0;
+        return;
+      }
     } else if (batch.selectedInUse) {
-      int [] newSelected = new int[n];
-      int newSize=0;
-			for(int j=0; j != n; j++) {
-				int i = sel[j];
-				if (nullPos[i]) {
-          newSelected[newSize++] = i;
-				}
-			}
-      batch.selected = newSelected;
+      int newSize = 0;
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if (nullPos[i]) {
+          sel[newSize++] = i;
+        }
+      }
       batch.size = newSize;
-		}
-		else {
+    } else {
       int newSize = 0;
-			for(int i = 0; i != n; i++) {
-				if (nullPos[i]) {
-				  sel[newSize++] = i;
+      for (int i = 0; i != n; i++) {
+        if (nullPos[i]) {
+          sel[newSize++] = i;
         }
-			}
+      }
       if (newSize < n) {
         batch.selectedInUse = true;
         batch.size = newSize;
       }
-		}
-	}
+    }
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java Wed May 22 21:06:19 2013
@@ -21,14 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression selects a row if the given boolean column is true.
+ */
 public class SelectColumnIsTrue extends VectorExpression {
-	int colNum1;
+  private final int colNum1;
 
-	public SelectColumnIsTrue(int colNum1) {
-		this.colNum1 = colNum1;
-	}
+  public SelectColumnIsTrue(int colNum1) {
+    this.colNum1 = colNum1;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
     if (childExpressions != null) {
       super.evaluateChildren(batch);
@@ -40,29 +43,29 @@ public class SelectColumnIsTrue extends 
     boolean[] nullVector = inputColVector1.isNull;
 
     if (n <= 0) {
-      //Nothing to do
+      // Nothing to do
       return;
     }
 
     if (inputColVector1.noNulls) {
       if (inputColVector1.isRepeating) {
-        // All must be selected otherwise size would be zero
-        // Repeating property will not change.
         if (vector1[0] == 0) {
           // All are filtered out
           batch.size = 0;
+          return;
+        } else {
+          // All are selected
+          return;
         }
       } else if (batch.selectedInUse) {
-        int[] newSelected = new int[n];
         int newSize = 0;
         for (int j = 0; j != n; j++) {
           int i = sel[j];
           if (vector1[i] == 1) {
-            newSelected[newSize++] = i;
+            sel[newSize++] = i;
           }
         }
         batch.size = newSize;
-        batch.selected = newSelected;
       } else {
         int newSize = 0;
         for (int i = 0; i != n; i++) {
@@ -76,23 +79,27 @@ public class SelectColumnIsTrue extends 
         }
       }
     } else {
-      if (inputColVector1.isRepeating)  {
-        //Repeating null value
-        batch.size = 0;
+      if (inputColVector1.isRepeating) {
+        if (nullVector[0] || (vector1[0] == 0)) {
+          // All are filtered
+          batch.size = 0;
+          return;
+        } else {
+          // All are selected
+          return;
+        }
       } else if (batch.selectedInUse) {
-        int [] newSelected = new int[n];
-        int newSize=0;
-        for(int j=0; j != n; j++) {
+        int newSize = 0;
+        for (int j = 0; j != n; j++) {
           int i = sel[j];
           if (vector1[i] == 1 && !nullVector[i]) {
-            newSelected[newSize++] = i;
+            sel[newSize++] = i;
           }
         }
         batch.size = newSize;
-        batch.selected = newSelected;
       } else {
-        int newSize=0;
-        for(int i = 0; i != n; i++) {
+        int newSize = 0;
+        for (int i = 0; i != n; i++) {
           if (vector1[i] == 1 && !nullVector[i]) {
             sel[newSize++] = i;
           }
@@ -103,7 +110,7 @@ public class SelectColumnIsTrue extends 
         }
       }
     }
-	}
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Wed May 22 21:06:19 2013
@@ -19,19 +19,28 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.junit.Assert;
 import org.junit.Test;
 
-
+/**
+* Unit tests for logical expressions AND, OR, NOT, IsNull etc.
+*/
 public class TestVectorLogicalExpressions {
 
+  private static final int BOOLEAN_COLUMN_TEST_SIZE = 9;
+
   @Test
   public void testLongColOrLongCol() {
     VectorizedRowBatch batch = getBatchThreeBooleanCols();
-    ColOrCol expr = new ColOrCol(0,1,2);
+    ColOrCol expr = new ColOrCol(0, 1, 2);
     LongColumnVector outCol = (LongColumnVector) batch.cols[2];
     expr.evaluate(batch);
     // verify
@@ -67,9 +76,12 @@ public class TestVectorLogicalExpression
 
     // try isRepeating path (left input only), no nulls
     batch = getBatchThreeBooleanCols();
-    batch.cols[0].noNulls = true; batch.cols[0].isRepeating = true;
-    batch.cols[1].noNulls = true; batch.cols[1].isRepeating = false;
-    batch.cols[2].noNulls = false; batch.cols[2].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    batch.cols[1].isRepeating = false;
+    batch.cols[2].noNulls = false;
+    batch.cols[2].isRepeating = true;
     outCol = (LongColumnVector) batch.cols[2];
     expr.evaluate(batch);
 
@@ -114,21 +126,23 @@ public class TestVectorLogicalExpression
     v2.isRepeating = true; // this value should get over-written with correct value
     v2.noNulls = true; // ditto
 
-    batch.size = 9;
+    batch.size = BOOLEAN_COLUMN_TEST_SIZE;
     return batch;
   }
 
   @Test
   public void testBooleanNot() {
     VectorizedRowBatch batch = getBatchThreeBooleanCols();
-    NotCol expr = new NotCol(0,2);
+    NotCol expr = new NotCol(0, 2);
     LongColumnVector outCol = (LongColumnVector) batch.cols[2];
     expr.evaluate(batch);
 
     // Case with nulls
     Assert.assertFalse(outCol.isRepeating);
-    Assert.assertEquals(1, outCol.vector[0]);    Assert.assertFalse(outCol.isNull[0]);
-    Assert.assertEquals(0, outCol.vector[2]);    Assert.assertFalse(outCol.isNull[0]);
+    Assert.assertEquals(1, outCol.vector[0]);
+    Assert.assertFalse(outCol.isNull[0]);
+    Assert.assertEquals(0, outCol.vector[2]);
+    Assert.assertFalse(outCol.isNull[0]);
     Assert.assertTrue(outCol.isNull[4]);
 
     // No nulls case
@@ -145,7 +159,7 @@ public class TestVectorLogicalExpression
     batch.cols[0].isRepeating = true;
     batch.cols[0].isNull[0] = true;
     expr.evaluate(batch);
-    Assert.assertTrue(outCol.isRepeating);;
+    Assert.assertTrue(outCol.isRepeating);
     Assert.assertTrue(outCol.isNull[0]);
 
     // isRepeating, and no nulls
@@ -160,10 +174,10 @@ public class TestVectorLogicalExpression
   }
 
   @Test
-  public void testIsNullExpr () {
+  public void testIsNullExpr() {
     // has nulls, not repeating
     VectorizedRowBatch batch = getBatchThreeBooleanCols();
-    IsNull expr = new IsNull(0,2);
+    IsNull expr = new IsNull(0, 2);
     LongColumnVector outCol = (LongColumnVector) batch.cols[2];
     expr.evaluate(batch);
     Assert.assertEquals(0, outCol.vector[0]);
@@ -185,7 +199,8 @@ public class TestVectorLogicalExpression
     batch.cols[0].isRepeating = true;
     batch.cols[0].isNull[0] = true;
     expr.evaluate(batch);
-    Assert.assertTrue(outCol.isRepeating);;
+    Assert.assertTrue(outCol.isRepeating);
+    ;
     Assert.assertEquals(1, outCol.vector[0]);
     Assert.assertTrue(outCol.noNulls);
 
@@ -238,4 +253,162 @@ public class TestVectorLogicalExpression
     assertEquals(4, batch.selected[4]);
     assertEquals(5, batch.selected[5]);
   }
+
+  @Test
+  public void testFilterNotExpr() {
+    VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+    VectorizedRowBatch batch2 = getBatchThreeBooleanCols();
+
+    SelectColumnIsTrue expr = new SelectColumnIsTrue(0);
+    FilterNotExpr notExpr = new FilterNotExpr(expr);
+
+    notExpr.evaluate(batch1);
+
+    notExpr.evaluate(batch2);
+
+    assertEquals(batch1.size, batch2.size);
+    for (int j = 0; j < batch1.size; j++) {
+      assertEquals(batch1.selected[j], batch2.selected[j]);
+      int i = j;
+      assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]),
+          (((LongColumnVector) batch2.cols[0]).vector[i]));
+    }
+  }
+
+  @Test
+  public void testFilterExprOrExpr() {
+    VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+    VectorizedRowBatch batch2 = getBatchThreeBooleanCols();
+
+    SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+    SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+    FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+    orExpr.evaluate(batch1);
+    orExpr.evaluate(batch2);
+
+    assertEquals(batch1.size, batch2.size);
+    for (int j = 0; j < batch1.size; j++) {
+      assertEquals(batch1.selected[j], batch2.selected[j]);
+      int i = j;
+      assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]),
+          (((LongColumnVector) batch2.cols[0]).vector[i]));
+    }
+
+    assertEquals(5, batch1.size);
+    Set<Integer> expectedSet = new HashSet<Integer>();
+    expectedSet.add(0);
+    expectedSet.add(2);
+    expectedSet.add(3);
+    expectedSet.add(4);
+    expectedSet.add(7);
+
+    assertTrue(expectedSet.contains(batch1.selected[0]));
+    assertTrue(expectedSet.contains(batch1.selected[1]));
+    assertTrue(expectedSet.contains(batch1.selected[2]));
+    assertTrue(expectedSet.contains(batch1.selected[3]));
+    assertTrue(expectedSet.contains(batch1.selected[4]));
+
+    // Repeat the expression on the same batch,
+    // the result must be unchanged.
+    orExpr.evaluate(batch1);
+
+    assertEquals(5, batch1.size);
+    assertTrue(expectedSet.contains(batch1.selected[0]));
+    assertTrue(expectedSet.contains(batch1.selected[1]));
+    assertTrue(expectedSet.contains(batch1.selected[2]));
+    assertTrue(expectedSet.contains(batch1.selected[3]));
+    assertTrue(expectedSet.contains(batch1.selected[4]));
+  }
+
+  @Test
+  public void testFilterExprOrExprWithBatchReuse() {
+    VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+    SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+    SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+    FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+    orExpr.evaluate(batch1);
+
+    // Now re-initialize batch1 to simulate batch-object re-use.
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      batch1.selected[i] = 0;
+    }
+    batch1.size = BOOLEAN_COLUMN_TEST_SIZE;
+    batch1.selectedInUse = false;
+
+    // Swap column vectors to simulate change in data
+    ColumnVector tmp = batch1.cols[0];
+    batch1.cols[0] = batch1.cols[1];
+    batch1.cols[1] = tmp;
+
+    orExpr.evaluate(batch1);
+
+    assertEquals(5, batch1.size);
+    Set<Integer> expectedSet = new HashSet<Integer>();
+    expectedSet.add(0);
+    expectedSet.add(1);
+    expectedSet.add(3);
+    expectedSet.add(5);
+    expectedSet.add(6);
+
+    assertTrue(expectedSet.contains(batch1.selected[0]));
+    assertTrue(expectedSet.contains(batch1.selected[1]));
+    assertTrue(expectedSet.contains(batch1.selected[2]));
+    assertTrue(expectedSet.contains(batch1.selected[3]));
+    assertTrue(expectedSet.contains(batch1.selected[4]));
+  }
+
+  @Test
+  public void testFilterExprOrExprWithSelectInUse() {
+    VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+    SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+    SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+    FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+    // Evaluate batch1 so that temporary arrays in the expression
+    // have residual values to interfere in later computation
+    orExpr.evaluate(batch1);
+
+    // Swap column vectors, but keep selected vector unchanged
+    ColumnVector tmp = batch1.cols[0];
+    batch1.cols[0] = batch1.cols[1];
+    batch1.cols[1] = tmp;
+    // Make sure row-7 is in the output.
+    batch1.cols[1].isNull[7] = false;
+    ((LongColumnVector) batch1.cols[1]).vector[7] = 0;
+
+    orExpr.evaluate(batch1);
+
+    assertEquals(3, batch1.size);
+    Set<Integer> expectedSet = new HashSet<Integer>();
+    expectedSet.add(0);
+    expectedSet.add(3);
+    expectedSet.add(7);
+
+    assertTrue(expectedSet.contains(batch1.selected[0]));
+    assertTrue(expectedSet.contains(batch1.selected[1]));
+    assertTrue(expectedSet.contains(batch1.selected[2]));
+  }
+
+  @Test
+  public void testFilterExprAndExpr() {
+    VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+    SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+    SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+    FilterExprAndExpr orExpr = new FilterExprAndExpr(expr1, expr2);
+
+    orExpr.evaluate(batch1);
+
+    assertEquals(1, batch1.size);
+
+    assertEquals(2, batch1.selected[0]);
+  }
 }