You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by eh...@apache.org on 2014/01/18 02:32:17 UTC

svn commit: r1559303 - in /hive/trunk/ql/src: gen/vectorization/ExpressionTemplates/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/

Author: ehans
Date: Sat Jan 18 01:32:16 2014
New Revision: 1559303

URL: http://svn.apache.org/r1559303
Log:
HIVE-6186: error in vectorized Column-Column comparison filter for repeating case (Eric Hanson)

Modified:
    hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java

Modified: hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt (original)
+++ hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt Sat Jan 18 01:32:16 2014
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -64,70 +65,43 @@ public class <ClassName> extends VectorE
       return;
     }
     
-    if (inputColVector1.noNulls && inputColVector2.noNulls) {
-      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
-        //All must be selected otherwise size would be zero
-        //Repeating property will not change.
-        if (!(vector1[0] <OperatorSymbol> vector2[0])) {
-          batch.size = 0;
-        }
-      } else if (inputColVector1.isRepeating) {
-        if (batch.selectedInUse) {
-          int newSize = 0;
-          for(int j=0; j != n; j++) {
-            int i = sel[j];
-            if (vector1[0] <OperatorSymbol> vector2[i]) {
-              sel[newSize++] = i;
-            }
-          }
-          batch.size = newSize;
-        } else {
-          int newSize = 0;
-          for(int i = 0; i != n; i++) {
-            if (vector1[0] <OperatorSymbol> vector2[i]) {
-              sel[newSize++] = i;
-            }
-          }
-          if (newSize < batch.size) {
-            batch.size = newSize;
-            batch.selectedInUse = true;
-          }
-        }
-      } else if (inputColVector2.isRepeating) {
-        if (batch.selectedInUse) {
-          int newSize = 0;
-          for(int j=0; j != n; j++) {
-            int i = sel[j];
-            if (vector1[i] <OperatorSymbol> vector2[0]) {
-              sel[newSize++] = i;
-            }
-          }
-          batch.size = newSize;
-        } else {
-          int newSize = 0;
-          for(int i = 0; i != n; i++) {
-            if (vector1[i] <OperatorSymbol> vector2[0]) {
-              sel[newSize++] = i;
-            }
-          }
-          if (newSize < batch.size) {
-            batch.size = newSize;
-            batch.selectedInUse = true;
-          }
-        }
-      } else if (batch.selectedInUse) {
-        int newSize = 0;
-        for(int j=0; j != n; j++) {
+    // filter rows with NULL on left input
+    int newSize;
+    newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n);
+    if (newSize < n) {
+      n = batch.size = newSize;
+      batch.selectedInUse = true;
+    }
+    
+    // filter rows with NULL on right input
+    newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n);
+    if (newSize < n) {
+      n = batch.size = newSize;
+      batch.selectedInUse = true;
+    }
+    
+    // All rows with nulls have been filtered out, so just do normal filter for non-null case
+    if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) {
+      
+      // All must be selected otherwise size would be zero
+      // Repeating property will not change.
+      if (!(vector1[0] <OperatorSymbol> vector2[0])) {
+        batch.size = 0;
+      }
+    } else if (inputColVector1.isRepeating) {
+      if (batch.selectedInUse) {
+        newSize = 0;
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
-          if (vector1[i] <OperatorSymbol> vector2[i]) {
+          if (vector1[0] <OperatorSymbol> vector2[i]) {
             sel[newSize++] = i;
           }
         }
         batch.size = newSize;
       } else {
-        int newSize = 0;
+        newSize = 0;
         for(int i = 0; i != n; i++) {
-          if (vector1[i] <OperatorSymbol>  vector2[i]) {
+          if (vector1[0] <OperatorSymbol> vector2[i]) {
             sel[newSize++] = i;
           }
         }
@@ -136,88 +110,42 @@ public class <ClassName> extends VectorE
           batch.selectedInUse = true;
         }
       }
-    } else if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
-      if (nullPos1[0] || nullPos2[0]) {
-        batch.size = 0; 
-      } 
-    } else if (inputColVector1.isRepeating) {
-      if (nullPos1[0]) {
-        batch.size = 0;
-      } else {
-        if (batch.selectedInUse) {
-          int newSize = 0;
-          for(int j=0; j != n; j++) {
-            int i = sel[j];
-            if (!nullPos2[i]) {
-              if (vector1[0] <OperatorSymbol> vector2[i]) {
-                sel[newSize++] = i;
-              }
-            }
-          }
-          batch.size = newSize;
-        } else {
-          int newSize = 0;
-          for(int i = 0; i != n; i++) {
-            if (!nullPos2[i]) {
-              if (vector1[0] <OperatorSymbol> vector2[i]) {
-                sel[newSize++] = i;
-              }
-            }
-          }
-          if (newSize < batch.size) {
-            batch.size = newSize;
-            batch.selectedInUse = true;
+    } else if (inputColVector2.isRepeating) {
+      if (batch.selectedInUse) {
+        newSize = 0;
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          if (vector1[i] <OperatorSymbol> vector2[0]) {
+            sel[newSize++] = i;
           }
         }
-      }
-    } else if (inputColVector2.isRepeating) {
-      if (nullPos2[0]) {
-        batch.size = 0;
+        batch.size = newSize;
       } else {
-        if (batch.selectedInUse) {
-          int newSize = 0;
-          for(int j=0; j != n; j++) {
-            int i = sel[j];
-            if (!nullPos1[i]) {
-              if (vector1[i] <OperatorSymbol> vector2[0]) {
-                sel[newSize++] = i;
-              }
-            }
+        newSize = 0;
+        for(int i = 0; i != n; i++) {
+          if (vector1[i] <OperatorSymbol> vector2[0]) {
+            sel[newSize++] = i;
           }
+        }
+        if (newSize < batch.size) {
           batch.size = newSize;
-        } else {
-          int newSize = 0;
-          for(int i = 0; i != n; i++) {
-            if (!nullPos1[i]) {
-              if (vector1[i] <OperatorSymbol> vector2[0]) {
-                sel[newSize++] = i;
-              }
-            }
-          }
-          if (newSize < batch.size) {
-            batch.size = newSize;
-            batch.selectedInUse = true;
-          }
+          batch.selectedInUse = true;
         }
       }
     } else if (batch.selectedInUse) {
-      int newSize = 0;
-      for(int j=0; j != n; j++) {
+      newSize = 0;
+      for(int j = 0; j != n; j++) {
         int i = sel[j];
-        if (!nullPos1[i] && !nullPos2[i]) {
-          if (vector1[i] <OperatorSymbol> vector2[i]) {
-            sel[newSize++] = i;
-          }
+        if (vector1[i] <OperatorSymbol> vector2[i]) {
+          sel[newSize++] = i;
         }
       }
       batch.size = newSize;
     } else {
-      int newSize = 0;
+      newSize = 0;
       for(int i = 0; i != n; i++) {
-        if (!nullPos1[i] && !nullPos2[i]) {
-          if (vector1[i] <OperatorSymbol> vector2[i]) {
-            sel[newSize++] = i;
-          }
+        if (vector1[i] <OperatorSymbol>  vector2[i]) {
+          sel[newSize++] = i;
         }
       }
       if (newSize < batch.size) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Sat Jan 18 01:32:16 2014
@@ -336,4 +336,39 @@ public class NullUtil {
       Arrays.fill(v.isNull, 0, n, false);
     }
   }
+
+  /**
+   * Filter out rows with null values. Return the number of rows in the batch.
+   */
+  public static int filterNulls(ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+    int newSize = 0;
+
+    if (v.noNulls) {
+
+      // no rows will be filtered
+      return n;
+    }
+
+    if (v.isRepeating) {
+
+      // all rows are filtered if repeating null, otherwise no rows are filtered
+      return v.isNull[0] ? 0 : n;
+    }
+
+    if (selectedInUse) {
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if (!v.isNull[i]) {
+          sel[newSize++] = i;
+        }
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        if (!v.isNull[i]) {
+          sel[newSize++] = i;
+        }
+      }
+    }
+    return newSize;
+  }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java Sat Jan 18 01:32:16 2014
@@ -70,31 +70,60 @@ public class TestVectorFilterExpressions
   }
 
   @Test
-  public void testFilterLongColEqualLongColumn() {
+  public void testFilterLongColGreaterLongColumn() {
     int seed = 17;
-    VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
+    VectorizedRowBatch b = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
         VectorizedRowBatch.DEFAULT_SIZE,
         2, seed);
-    LongColumnVector lcv0 = (LongColumnVector) vrg.cols[0];
-    LongColumnVector lcv1 = (LongColumnVector) vrg.cols[1];
+    LongColumnVector lcv0 = (LongColumnVector) b.cols[0];
+    LongColumnVector lcv1 = (LongColumnVector) b.cols[1];
+    b.size = 3;
     FilterLongColGreaterLongColumn expr = new FilterLongColGreaterLongColumn(0, 1);
 
-    //Basic case
-    lcv0.vector[1] = 23;
-    lcv1.vector[1] = 19;
-    lcv0.vector[5] = 23;
-    lcv1.vector[5] = 19;
-    expr.evaluate(vrg);
-    assertEquals(2, vrg.size);
-    assertEquals(1, vrg.selected[0]);
-    assertEquals(5, vrg.selected[1]);
+    // Basic case
+    lcv0.vector[0] = 10;
+    lcv0.vector[1] = 10;
+    lcv0.vector[2] = 10;
+    lcv1.vector[0] = 20;
+    lcv1.vector[1] = 1;
+    lcv1.vector[2] = 7;
+
+    expr.evaluate(b);
+    assertEquals(2, b.size);
+    assertEquals(1, b.selected[0]);
+    assertEquals(2, b.selected[1]);
 
-    //handle null
+    // handle null with selected in use
     lcv0.noNulls = false;
     lcv0.isNull[1] = true;
-    expr.evaluate(vrg);
-    assertEquals(1, vrg.size);
-    assertEquals(5, vrg.selected[0]);
+    expr.evaluate(b);
+    assertEquals(1, b.size);
+    assertEquals(2, b.selected[0]);
+
+    // handle repeating
+    b.size = 3;
+    b.selectedInUse = false;
+    lcv0.isRepeating = true;
+    lcv0.noNulls = true;
+    expr.evaluate(b);
+    assertEquals(2, b.size);
+
+    // handle repeating null
+    b.size = 3;
+    b.selectedInUse = false;
+    lcv0.isNull[0] = true;
+    lcv0.noNulls = false;
+    expr.evaluate(b);
+    assertEquals(0, b.size);
+
+    // handle null on both sizes (not repeating)
+    b.size = 3;
+    b.selectedInUse = false;
+    lcv0.isRepeating = false;
+    lcv1.noNulls = false;
+    lcv1.isNull[2] = true;
+    expr.evaluate(b);
+    assertEquals(0, b.size);
   }
 
   @Test