You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/06/20 01:00:57 UTC

svn commit: r1604044 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java

Author: jitendra
Date: Thu Jun 19 23:00:56 2014
New Revision: 1604044

URL: http://svn.apache.org/r1604044
Log:
HIVE-7188 : sum(if()) returns wrong results with vectorization (Hari Sankar via jitendra)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java?rev=1604044&r1=1604043&r2=1604044&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java Thu Jun 19 23:00:56 2014
@@ -64,75 +64,220 @@ public class ColAndCol extends VectorExp
       return;
     }
 
-    // Handle null
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outV.noNulls = false;
-      if (inputColVector2.isRepeating) {
+    if (inputColVector1.noNulls && inputColVector2.noNulls) {
+      if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
         outV.isRepeating = true;
-        outV.isNull[0] = true;
-      } else {
+        outputVector[0] = vector1[0] & vector2[0];
+      } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
-            outV.isNull[i] = inputColVector2.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
           }
         } else {
           for (int i = 0; i != n; i++) {
-            outV.isNull[i] = inputColVector2.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
           }
         }
+        outV.isRepeating = false;
+      } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[0];
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[0];
+          }
+        }
+        outV.isRepeating = false;
+      } else /* neither side is repeating */{
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[i];
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[i];
+          }
+        }
+        outV.isRepeating = false;
       }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outV.noNulls = false;
-      if (inputColVector1.isRepeating) {
+      outV.noNulls = true;
+    } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+      // only input 2 side has nulls
+      if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
         outV.isRepeating = true;
-        outV.isNull[0] = true;
-      } else {
+        outputVector[0] = vector1[0] & vector2[0];
+        outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0];
+      } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
-            outV.isNull[i] = inputColVector1.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
           }
         } else {
           for (int i = 0; i != n; i++) {
-            outV.isNull[i] = inputColVector1.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
           }
         }
+        outV.isRepeating = false;
+      } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+          }
+        }
+        outV.isRepeating = false;
+      } else /* neither side is repeating */{
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+          }
+        }
+        outV.isRepeating = false;
       }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
       outV.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
+    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
+      // only input 1 side has nulls
+      if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
         outV.isRepeating = true;
-        outV.isNull[0] = true;
-      } else {
+        outputVector[0] = vector1[0] & vector2[0];
+        outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1);
+      } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
-            outV.isNull[i] = inputColVector1.isNull[i] && inputColVector2.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
           }
         } else {
           for (int i = 0; i != n; i++) {
-            outV.isNull[i] = inputColVector1.isNull[i] && inputColVector2.isNull[i];
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
           }
         }
+        outV.isRepeating = false;
+      } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+          }
+        }
+        outV.isRepeating = false;
+      } else /* neither side is repeating */{
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+          }
+        }
+        outV.isRepeating = false;
       }
-    }
-
-    // Now disregard null in second pass.
-    if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
-      // All must be selected otherwise size would be zero
-      // Repeating property will not change.
-      outV.isRepeating = true;
-      outputVector[0] = vector1[0] & vector2[0];
-    } else if (batch.selectedInUse) {
-      for (int j = 0; j != n; j++) {
-        int i = sel[j];
-        outputVector[i] = vector1[i] & vector2[i];
-      }
-    } else {
-      for (int i = 0; i != n; i++) {
-        outputVector[i] = vector1[i] & vector2[i];
+      outV.noNulls = false;
+    } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{
+      // either input 1 or input 2 may have nulls
+      if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        outV.isRepeating = true;
+        outputVector[0] = vector1[0] & vector2[0];
+        outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0])
+            || (inputColVector1.isNull[0] && (vector2[0] == 1))
+            || (inputColVector1.isNull[0] && inputColVector2.isNull[0]);
+      } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+                || (inputColVector1.isNull[0] && (vector2[i] == 1))
+                || (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[0] & vector2[i];
+            outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+                || (inputColVector1.isNull[0] && (vector2[i] == 1))
+                || (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
+          }
+        }
+        outV.isRepeating = false;
+      } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+                || (inputColVector1.isNull[i] && (vector2[0] == 1))
+                || (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[0];
+            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+                || (inputColVector1.isNull[i] && (vector2[0] == 1))
+                || (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
+          }
+        }
+        outV.isRepeating = false;
+      } else /* neither side is repeating */{
+        if (batch.selectedInUse) {
+          for (int j = 0; j != n; j++) {
+            int i = sel[j];
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+                || (inputColVector1.isNull[i] && (vector2[i] == 1))
+                || (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
+          }
+        } else {
+          for (int i = 0; i != n; i++) {
+            outputVector[i] = vector1[i] & vector2[i];
+            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+                || (inputColVector1.isNull[i] && (vector2[i] == 1))
+                || (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
+          }
+        }
+        outV.isRepeating = false;
       }
+      outV.noNulls = false;
     }
   }
 

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1604044&r1=1604043&r2=1604044&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Thu Jun 19 23:00:56 2014
@@ -90,6 +90,65 @@ public class TestVectorLogicalExpression
     Assert.assertEquals(1, outCol.vector[3]);
   }
 
+  @Test
+  public void testLongColAndLongCol() {
+    VectorizedRowBatch batch = getBatchThreeBooleanCols();
+    ColAndCol expr = new ColAndCol(0, 1, 2);
+    LongColumnVector outCol = (LongColumnVector) batch.cols[2];
+    expr.evaluate(batch);
+
+    // verify
+    Assert.assertEquals(0, outCol.vector[0]);
+    Assert.assertEquals(0, outCol.vector[1]);
+    Assert.assertEquals(0, outCol.vector[2]);
+    Assert.assertEquals(1, outCol.vector[3]);
+    Assert.assertEquals(0, outCol.vector[4]);
+    Assert.assertFalse(outCol.isNull[4]);
+    Assert.assertTrue(outCol.isNull[5]);    
+    Assert.assertEquals(0, outCol.vector[6]);
+    Assert.assertFalse(outCol.isNull[6]);
+    Assert.assertTrue(outCol.isNull[7]); 
+    Assert.assertTrue(outCol.isNull[8]);
+
+    Assert.assertEquals(batch.size, 9);
+    Assert.assertFalse(outCol.noNulls);
+    Assert.assertFalse(outCol.isRepeating);
+
+    // try non-null path
+    batch = getBatchThreeBooleanCols();
+    batch.cols[0].noNulls = true;
+    batch.cols[1].noNulls = true;
+    batch.cols[2].noNulls = false;
+    outCol = (LongColumnVector) batch.cols[2];
+    expr.evaluate(batch);
+
+    // spot check
+    Assert.assertTrue(outCol.noNulls);
+    Assert.assertEquals(0, outCol.vector[0]);
+    Assert.assertEquals(0, outCol.vector[1]);
+    Assert.assertEquals(0, outCol.vector[2]);
+    Assert.assertEquals(1, outCol.vector[3]);
+
+    // try isRepeating path (left input only), no nulls
+    batch = getBatchThreeBooleanCols();
+    ((LongColumnVector)batch.cols[0]).vector[0] = 1;
+    batch.cols[0].noNulls = true;
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    batch.cols[1].isRepeating = false;
+    batch.cols[2].noNulls = false;
+    batch.cols[2].isRepeating = true;
+    outCol = (LongColumnVector) batch.cols[2];
+    expr.evaluate(batch);
+
+    // spot check
+    Assert.assertFalse(outCol.isRepeating);   
+    Assert.assertEquals(0, outCol.vector[0]);
+    Assert.assertEquals(1, outCol.vector[1]);
+    Assert.assertEquals(0, outCol.vector[2]);
+    Assert.assertEquals(1, outCol.vector[3]);
+  }  
+  
   /**
    * Get a batch with three boolean (long) columns.
    */