You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/22 23:07:57 UTC

svn commit: r1485423 - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/expressions/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/

Author: omalley
Date: Wed May 22 21:07:57 2013
New Revision: 1485423

URL: http://svn.apache.org/r1485423
Log:
HIVE-4534 IsNotNull and NotCol incorrectly handle nulls. (Jitendra Pandey via
omalley)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java Wed May 22 21:07:57 2013
@@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression evaluates to true if the given input columns is not null.
+ * The boolean output is stored in the specified output column.
+ */
 public class IsNotNull extends VectorExpression {
-	int colNum;
-	int outputColumn;
+  int colNum;
+  int outputColumn;
 
-	public IsNotNull(int colNum, int outputColumn) {
-		this.colNum = colNum;
-		this.outputColumn = outputColumn;
-	}
+  public IsNotNull(int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.outputColumn = outputColumn;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -44,38 +48,34 @@ public class IsNotNull extends VectorExp
     long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector;
 
     if (n <= 0) {
-      //Nothing to do
+      // Nothing to do
       return;
     }
 
-    if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Selection property will not change.
-      if (nullPos[0]) {
-        outputVector[0] = 0;
+    // output never has nulls for this operator
+    batch.cols[outputColumn].noNulls = true;
+    if (inputColVector.noNulls) {
+      outputVector[0] = 1;
+      batch.cols[outputColumn].isRepeating = true;
+    } else if (inputColVector.isRepeating) {
+      // All must be selected otherwise size would be zero
+      // Selection property will not change.
+      outputVector[0] = nullPos[0] ? 0 : 1;
+      batch.cols[outputColumn].isRepeating = true;
+    } else {
+      batch.cols[outputColumn].isRepeating = false;
+      if (batch.selectedInUse) {
+        for (int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = nullPos[i] ? 0 : 1;
+        }
       } else {
-        outputVector[0] = 1;
+        for (int i = 0; i != n; i++) {
+          outputVector[i] = nullPos[i] ? 0 : 1;
+        }
       }
-    } else if (batch.selectedInUse) {
-			for(int j=0; j != n; j++) {
-				int i = sel[j];
-				if (nullPos[i]) {
-				  outputVector[i] = 0;
-				} else {
-				  outputVector[i] = 1;
-				}
-			}
-		}
-		else {
-			for(int i = 0; i != n; i++) {
-				if (nullPos[i]) {
-				  outputVector[i] = 0;
-        } else {
-          outputVector[i] = 1;
-				}
-			}
-		}
-	}
+    }
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java Wed May 22 21:07:57 2013
@@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * This expression evaluates to true if the given input columns is null.
+ * The boolean output is stored in the specified output column.
+ */
 public class IsNull extends VectorExpression {
-	int colNum;
-	int outputColumn;
+  int colNum;
+  int outputColumn;
 
-	public IsNull(int colNum, int outputColumn) {
-		this.colNum = colNum;
-		this.outputColumn = outputColumn;
-	}
+  public IsNull(int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.outputColumn = outputColumn;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -39,52 +43,36 @@ public class IsNull extends VectorExpres
 
     ColumnVector inputColVector = batch.cols[colNum];
     int[] sel = batch.selected;
-    //Note: if type of isNull could be long[], could we just re-use this
-    //vector as the output vector. No iterations would be needed.
     boolean[] nullPos = inputColVector.isNull;
     int n = batch.size;
     long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector;
     if (n <= 0) {
-      //Nothing to do, this is EOF
+      // Nothing to do, this is EOF
       return;
     }
 
     // output never has nulls for this operator
     batch.cols[outputColumn].noNulls = true;
-    if (inputColVector.isRepeating && inputColVector.noNulls) {
+    if (inputColVector.noNulls) {
       outputVector[0] = 0;
       batch.cols[outputColumn].isRepeating = true;
-    } else if (inputColVector.isRepeating && !inputColVector.noNulls) {
+    } else if (inputColVector.isRepeating) {
       outputVector[0] = nullPos[0] ? 1 : 0;
       batch.cols[outputColumn].isRepeating = true;
-    } else if (!inputColVector.isRepeating && inputColVector.noNulls) {
+    } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] = 0;
-        }
-      }
-      else {
-        for(int i = 0; i != n; i++) {
-          outputVector[i] = 0;
-        }
-      }
-      batch.cols[outputColumn].isRepeating = false;
-    } else /* !inputColVector.isRepeating && !inputColVector.noNulls */ {
-      if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for (int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = nullPos[i] ? 1 : 0;
         }
-      }
-      else {
-        for(int i = 0; i != n; i++) {
+      } else {
+        for (int i = 0; i != n; i++) {
           outputVector[i] = nullPos[i] ? 1 : 0;
         }
       }
       batch.cols[outputColumn].isRepeating = false;
     }
-	}
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java Wed May 22 21:07:57 2013
@@ -20,16 +20,19 @@ package org.apache.hadoop.hive.ql.exec.v
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
+/**
+ * Evaluates the boolean complement of the input.
+ */
 public class NotCol extends VectorExpression {
-	int colNum;
-	int outputColumn;
+  private final int colNum;
+  private final int outputColumn;
 
-	public NotCol(int colNum, int outputColumn) {
-		this.colNum = colNum;
-		this.outputColumn = outputColumn;
-	}
+  public NotCol(int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.outputColumn = outputColumn;
+  }
 
-	@Override
+  @Override
   public void evaluate(VectorizedRowBatch batch) {
 
     if (childExpressions != null) {
@@ -44,40 +47,50 @@ public class NotCol extends VectorExpres
     long[] outputVector = outV.vector;
 
     if (n <= 0) {
-      //Nothing to do, this is EOF
+      // Nothing to do, this is EOF
       return;
     }
 
-    if (inputColVector.isRepeating) {
-      outV.isRepeating = true;
-      // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1
-      outputVector[0] = ~vector[0] & 1;
-    } else if (batch.selectedInUse) {
-			for(int j=0; j != n; j++) {
-				int i = sel[j];
-				outputVector[i] = ~vector[i] & 1;
-			}
-			outV.isRepeating = false;
-		}
-		else {
-			for(int i = 0; i != n; i++) {
-			  outputVector[i] = ~vector[i] & 1;
-			}
-			outV.isRepeating = false;
-		}
-
-    // handle NULLs
     if (inputColVector.noNulls) {
       outV.noNulls = true;
+      if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1
+        outputVector[0] = ~vector[0] & 1;
+      } else if (batch.selectedInUse) {
+        for (int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = ~vector[i] & 1;
+        }
+        outV.isRepeating = false;
+      } else {
+        for (int i = 0; i != n; i++) {
+          outputVector[i] = ~vector[i] & 1;
+        }
+        outV.isRepeating = false;
+      }
     } else {
       outV.noNulls = false;
       if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        outputVector[0] = ~vector[0] & 1;
         outV.isNull[0] = inputColVector.isNull[0];
+      } else if (batch.selectedInUse) {
+        outV.isRepeating = false;
+        for (int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = ~vector[i] & 1;
+          outV.isNull[i] = inputColVector.isNull[i];
+        }
       } else {
-        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        outV.isRepeating = false;
+        for (int i = 0; i != n; i++) {
+          outputVector[i] = ~vector[i] & 1;
+          outV.isNull[i] = inputColVector.isNull[i];
+        }
       }
     }
-	}
+  }
 
   @Override
   public int getOutputColumn() {

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Wed May 22 21:07:57 2013
@@ -188,10 +188,9 @@ public class TestVectorLogicalExpression
     // No nulls case, not repeating
     batch.cols[0].noNulls = true;
     expr.evaluate(batch);
-    Assert.assertFalse(outCol.isRepeating);
+    Assert.assertTrue(outCol.isRepeating);
     Assert.assertTrue(outCol.noNulls);
     Assert.assertEquals(0, outCol.vector[0]);
-    Assert.assertEquals(0, outCol.vector[4]);
 
     // isRepeating, and there are nulls
     batch = getBatchThreeBooleanCols();
@@ -200,7 +199,6 @@ public class TestVectorLogicalExpression
     batch.cols[0].isNull[0] = true;
     expr.evaluate(batch);
     Assert.assertTrue(outCol.isRepeating);
-    ;
     Assert.assertEquals(1, outCol.vector[0]);
     Assert.assertTrue(outCol.noNulls);
 
@@ -216,6 +214,46 @@ public class TestVectorLogicalExpression
   }
 
   @Test
+  public void testIsNotNullExpr() {
+    // has nulls, not repeating
+    VectorizedRowBatch batch = getBatchThreeBooleanCols();
+    IsNotNull expr = new IsNotNull(0, 2);
+    LongColumnVector outCol = (LongColumnVector) batch.cols[2];
+    expr.evaluate(batch);
+    Assert.assertEquals(1, outCol.vector[0]);
+    Assert.assertEquals(0, outCol.vector[4]);
+    Assert.assertTrue(outCol.noNulls);
+    Assert.assertFalse(outCol.isRepeating);
+
+    // No nulls case, not repeating
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertTrue(outCol.isRepeating);
+    Assert.assertTrue(outCol.noNulls);
+    Assert.assertEquals(1, outCol.vector[0]);
+
+    // isRepeating, and there are nulls
+    batch = getBatchThreeBooleanCols();
+    outCol = (LongColumnVector) batch.cols[2];
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertTrue(outCol.isRepeating);
+    Assert.assertEquals(0, outCol.vector[0]);
+    Assert.assertTrue(outCol.noNulls);
+
+    // isRepeating, and no nulls
+    batch = getBatchThreeBooleanCols();
+    outCol = (LongColumnVector) batch.cols[2];
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertTrue(outCol.isRepeating);
+    Assert.assertTrue(outCol.noNulls);
+    Assert.assertEquals(1, outCol.vector[0]);
+  }
+
+  @Test
   public void testBooleanFiltersOnColumns() {
     VectorizedRowBatch batch = getBatchThreeBooleanCols();
 
@@ -233,25 +271,76 @@ public class TestVectorLogicalExpression
     assertEquals(0, batch.selected[0]);
     assertEquals(2, batch.selected[1]);
     assertEquals(4, batch.selected[2]);
+  }
 
-    batch = getBatchThreeBooleanCols();
-    SelectColumnIsNull expr2 = new SelectColumnIsNull(0);
-    expr2.evaluate(batch);
+  @Test
+  public void testSelectColumnIsNull() {
+    // has nulls, not repeating
+    VectorizedRowBatch batch = getBatchThreeBooleanCols();
+    SelectColumnIsNull expr = new SelectColumnIsNull(0);
+    expr.evaluate(batch);
     assertEquals(3, batch.size);
     assertEquals(4, batch.selected[0]);
     assertEquals(5, batch.selected[1]);
     assertEquals(8, batch.selected[2]);
 
+    // No nulls case, not repeating
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size);
+
+    // isRepeating, and there are nulls
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].isNull[0] = true;
+    int initialSize = batch.size;
+    expr.evaluate(batch);
+    Assert.assertEquals(initialSize, batch.size);
+
+    // isRepeating, and no nulls
     batch = getBatchThreeBooleanCols();
-    SelectColumnIsNotNull expr3 = new SelectColumnIsNotNull(1);
-    expr3.evaluate(batch);
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size);
+  }
+
+  @Test
+  public void testSelectColumnIsNotNull() {
+    // has nulls, not repeating
+    VectorizedRowBatch batch = getBatchThreeBooleanCols();
+    SelectColumnIsNotNull expr = new SelectColumnIsNotNull(0);
+    expr.evaluate(batch);
     assertEquals(6, batch.size);
     assertEquals(0, batch.selected[0]);
     assertEquals(1, batch.selected[1]);
     assertEquals(2, batch.selected[2]);
     assertEquals(3, batch.selected[3]);
-    assertEquals(4, batch.selected[4]);
-    assertEquals(5, batch.selected[5]);
+    assertEquals(6, batch.selected[4]);
+    assertEquals(7, batch.selected[5]);
+
+    // No nulls case, not repeating
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].noNulls = true;
+    int initialSize = batch.size;
+    expr.evaluate(batch);
+    Assert.assertEquals(initialSize, batch.size);
+
+    // isRepeating, and there are nulls
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size);
+
+    // isRepeating, and no nulls
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    initialSize = batch.size;
+    expr.evaluate(batch);
+    Assert.assertEquals(initialSize, batch.size);
   }
 
   @Test