You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/03 20:20:24 UTC

svn commit: r1489091 [4/4] - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/ java/org/apache/hadoop/...

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java Mon Jun  3 18:20:22 2013
@@ -24,10 +24,12 @@ import static org.junit.Assert.assertTru
 import junit.framework.Assert;
 
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
 import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
 import org.junit.Test;
 
@@ -58,25 +60,46 @@ public class TestVectorArithmeticExpress
     vrg.size = size;
     return vrg;
   }
+  
+  public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() {
+    VectorizedRowBatch batch = new VectorizedRowBatch(3);
+    LongColumnVector lcv, lcv2;
+    lcv = new LongColumnVector();
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      lcv.vector[i] = i * 37;
+    }
+    batch.cols[0] = lcv;
+    lcv2 = new LongColumnVector();
+    batch.cols[1] = lcv2;
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      lcv2.vector[i] = i * 37;
+    }    
+    batch.cols[2] = new DoubleColumnVector();
+    batch.size = VectorizedRowBatch.DEFAULT_SIZE;
+    return batch;
+  }
 
   @Test
   public void testLongColAddLongScalarWithNulls()  {
-    VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector
+    VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector
         (VectorizedRowBatch.DEFAULT_SIZE);
-    LongColumnVector lcv = (LongColumnVector) vrg.cols[0];
+    LongColumnVector lcv = (LongColumnVector) batch.cols[0];
+    LongColumnVector lcvOut = (LongColumnVector) batch.cols[1];
     TestVectorizedRowBatch.addRandomNulls(lcv);
     LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1);
-    expr.evaluate(vrg);
-    //verify
+    expr.evaluate(batch);
+    
+    // verify
     for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
       if (!lcv.isNull[i]) {
-        Assert.assertEquals(i*37+23, ((LongColumnVector)vrg.cols[1]).vector[i]);
+        Assert.assertEquals(i*37+23, lcvOut.vector[i]);
       } else {
-        Assert.assertTrue(((LongColumnVector)vrg.cols[1]).isNull[i]);
+        Assert.assertTrue(lcvOut.isNull[i]);
       }
     }
-    Assert.assertFalse(((LongColumnVector)vrg.cols[1]).noNulls);
-    Assert.assertFalse(((LongColumnVector)vrg.cols[1]).isRepeating);
+    Assert.assertFalse(lcvOut.noNulls);
+    Assert.assertFalse(lcvOut.isRepeating);
+    verifyLongNullDataVectorEntries(lcvOut, batch.selected, batch.selectedInUse, batch.size);
   }
 
   @Test
@@ -117,6 +140,36 @@ public class TestVectorArithmeticExpress
     Assert.assertTrue(out.isRepeating);
     Assert.assertFalse(out.noNulls);
     Assert.assertEquals(true, out.isNull[0]);
+    verifyLongNullDataVectorEntries(out, batch.selected, batch.selectedInUse, batch.size);
+  }
+  
+  /* Make sure all the NULL entries in this long column output vector have their data vector 
+   * element set to the correct value, as per the specification, to prevent later arithmetic 
+   * errors (e.g. zero-divide).
+   */
+  public static void verifyLongNullDataVectorEntries(
+      LongColumnVector v, int[] sel, boolean selectedInUse, int n) {
+    if (n == 0 || v.noNulls) {
+      return;
+    } else if (v.isRepeating) {
+      if (v.isNull[0]) {
+        assertEquals(LongColumnVector.NULL_VALUE, v.vector[0]);
+      }
+    }
+    else if (selectedInUse) {
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if (v.isNull[i]) {
+          assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]);
+        }
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        if (v.isNull[i]) {
+          assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]);
+        }        
+      }
+    }
   }
 
   @Test
@@ -138,7 +191,7 @@ public class TestVectorArithmeticExpress
     }
     assertTrue(lcv2.noNulls);
 
-    //Now set one column nullable
+    // Now set one column nullable
     lcv1.noNulls = false;
     lcv1.isNull[1] = true;
     lcv2.isRepeating = true;   // set output isRepeating to true to make sure it gets over-written
@@ -147,8 +200,9 @@ public class TestVectorArithmeticExpress
     assertTrue(lcv2.isNull[1]);
     assertFalse(lcv2.noNulls);
     assertFalse(lcv2.isRepeating);
+    verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
 
-    //Now set other column nullable too
+    // Now set other column nullable too
     lcv0.noNulls = false;
     lcv0.isNull[1] = true;
     lcv0.isNull[3] = true;
@@ -156,8 +210,9 @@ public class TestVectorArithmeticExpress
     assertTrue(lcv2.isNull[1]);
     assertTrue(lcv2.isNull[3]);
     assertFalse(lcv2.noNulls);
+    verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
 
-    //Now test with repeating flag
+    // Now test with repeating flag
     lcv3.isRepeating = true;
     LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5);
     expr2.evaluate(vrg);
@@ -165,14 +220,15 @@ public class TestVectorArithmeticExpress
       assertEquals(seed * ( 4 + 5*(i+1)), lcv5.vector[i]);
     }
 
-    //Repeating with other as nullable
+    // Repeating with other as nullable
     lcv4.noNulls = false;
     lcv4.isNull[0] = true;
     expr2.evaluate(vrg);
     assertTrue(lcv5.isNull[0]);
     assertFalse(lcv5.noNulls);
+    verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
 
-    //Repeating null value
+    // Repeating null value
     lcv3.isRepeating = true;
     lcv3.noNulls = false;
     lcv3.isNull[0] = true;
@@ -180,5 +236,53 @@ public class TestVectorArithmeticExpress
     assertFalse(lcv5.noNulls);
     assertTrue(lcv5.isRepeating);
     assertTrue(lcv5.isNull[0]);
+    verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
+    
+    // Neither input has nulls. Verify that this propagates to output.
+    vrg.selectedInUse = false;
+    lcv0.noNulls = true;
+    lcv1.noNulls = true;
+    lcv0.isRepeating = false;
+    lcv1.isRepeating = false;   
+    lcv2.noNulls = false;         // set output noNulls to true to make sure it gets over-written
+    lcv2.isRepeating = true;      // similarly with isRepeating
+    expr.evaluate(vrg);
+    assertTrue(lcv2.noNulls);
+    assertFalse(lcv2.isRepeating);
+  }
+  
+  @Test
+  public void testLongColDivideLongColumn() {
+    
+    /* Testing for equality of doubles after a math operation is
+     * not always reliable so use this as a tolerance.
+     */
+    final double eps = 1e-7d; 
+    VectorizedRowBatch batch = getVectorizedRowBatch2LongInDoubleOut();
+    LongColDivideLongColumn expr = new LongColDivideLongColumn(0, 1, 2);
+    batch.cols[0].isNull[1] = true;
+    batch.cols[0].noNulls = false;
+    batch.cols[1].noNulls = false;
+    DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+    
+    // Set so we can verify they are reset by operation
+    out.noNulls = true;
+    out.isRepeating = true;
+    
+    expr.evaluate(batch);
+    
+    // 0/0 for entry 0 should work but generate NaN
+    assertTrue(Double.isNaN(out.vector[0]));
+    
+    // verify NULL output in entry 1 is correct
+    assertTrue(out.isNull[1]);
+    assertTrue(Double.isNaN(out.vector[1]));
+
+    // check entries beyond first 2 
+    for (int i = 2; i != batch.size; i++) {
+      assertTrue(out.vector[i] > 1.0d - eps && out.vector[i] < 1.0d + eps);
+    }
+    assertFalse(out.noNulls);
+    assertFalse(out.isRepeating);
   }
 }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java Mon Jun  3 18:20:22 2013
@@ -18,10 +18,15 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 import junit.framework.Assert;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalarDivideLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -32,6 +37,12 @@ import org.junit.Test;
  * the left and a column vector on the right.
  */
 public class TestVectorScalarColArithmetic {
+  
+  /* Testing for equality of doubles after a math operation is
+   * not always reliable so use this as a tolerance.
+   */
+  private final static double eps = 1e-7d; 
+  
   private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) {
     VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
     LongColumnVector lcv = new LongColumnVector(size);
@@ -105,6 +116,8 @@ public class TestVectorScalarColArithmet
     }
     Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls);
     Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+    TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries(
+        (LongColumnVector) batch.cols[1], batch.selected, batch.selectedInUse, batch.size); 
   }
 
   @Test
@@ -147,5 +160,62 @@ public class TestVectorScalarColArithmet
     Assert.assertTrue(out.isRepeating);
     Assert.assertFalse(out.noNulls);
     Assert.assertEquals(true, out.isNull[0]);
+    TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries(
+        out, batch.selected, batch.selectedInUse, batch.size); 
+  }
+  
+  private static boolean equalsWithinTolerance(double a, double b) {
+    return Math.abs(a - b) < eps;
+  }
+  
+  @Test
+  public void testLongScalarDivide() {
+    VectorizedRowBatch batch = 
+        TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut();
+    LongColDivideLongScalar expr = new LongColDivideLongScalar(0, 100, 2);
+    batch.cols[0].isNull[0] = true;
+    batch.cols[0].noNulls = false;
+    DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+    out.noNulls = true;     // set now so we can verify it changed
+    out.isRepeating = true; 
+    expr.evaluate(batch);
+    
+    // verify NULL output in entry 0 is correct
+    assertTrue(out.isNull[0]);
+    assertTrue(Double.isNaN(out.vector[0]));
+
+    // check entries beyond first one 
+    for (int i = 1; i != batch.size; i++) {
+      assertTrue(equalsWithinTolerance((i * 37) / 100d, out.vector[i]));
+    }
+    assertFalse(out.noNulls);
+    assertFalse(out.isRepeating);
+  }
+  
+  @Test 
+  public void testScalarLongDivide() {
+    VectorizedRowBatch batch = 
+        TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut();
+    LongScalarDivideLongColumn expr = new LongScalarDivideLongColumn(100, 0, 2);
+    batch.cols[0].isNull[1] = true;
+    batch.cols[0].noNulls = false;
+    DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+    out.noNulls = true;     // set now so we can verify it changed
+    out.isRepeating = true;
+    expr.evaluate(batch);
+    
+    // verify zero-divide result for position 0
+    assertTrue(Double.isInfinite(out.vector[0]));
+    
+    // verify NULL output in entry 1 is correct
+    assertTrue(out.isNull[1]);
+    assertTrue(Double.isNaN(out.vector[1]));
+
+    // check entries beyond 2nd one 
+    for (int i = 2; i != batch.size; i++) {
+      assertTrue(equalsWithinTolerance(100d / (i * 37), out.vector[i]));
+    }
+    assertFalse(out.noNulls);
+    assertFalse(out.isRepeating);
   }
 }