You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/03 20:20:24 UTC
svn commit: r1489091 [4/4] - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/
java/org/apache/hadoop/...
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java Mon Jun 3 18:20:22 2013
@@ -24,10 +24,12 @@ import static org.junit.Assert.assertTru
import junit.framework.Assert;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
import org.junit.Test;
@@ -58,25 +60,46 @@ public class TestVectorArithmeticExpress
vrg.size = size;
return vrg;
}
+
+ public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(3);
+ LongColumnVector lcv, lcv2;
+ lcv = new LongColumnVector();
+ for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+ lcv.vector[i] = i * 37;
+ }
+ batch.cols[0] = lcv;
+ lcv2 = new LongColumnVector();
+ batch.cols[1] = lcv2;
+ for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+ lcv2.vector[i] = i * 37;
+ }
+ batch.cols[2] = new DoubleColumnVector();
+ batch.size = VectorizedRowBatch.DEFAULT_SIZE;
+ return batch;
+ }
@Test
public void testLongColAddLongScalarWithNulls() {
- VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector
+ VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector
(VectorizedRowBatch.DEFAULT_SIZE);
- LongColumnVector lcv = (LongColumnVector) vrg.cols[0];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[0];
+ LongColumnVector lcvOut = (LongColumnVector) batch.cols[1];
TestVectorizedRowBatch.addRandomNulls(lcv);
LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1);
- expr.evaluate(vrg);
- //verify
+ expr.evaluate(batch);
+
+ // verify
for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
if (!lcv.isNull[i]) {
- Assert.assertEquals(i*37+23, ((LongColumnVector)vrg.cols[1]).vector[i]);
+ Assert.assertEquals(i*37+23, lcvOut.vector[i]);
} else {
- Assert.assertTrue(((LongColumnVector)vrg.cols[1]).isNull[i]);
+ Assert.assertTrue(lcvOut.isNull[i]);
}
}
- Assert.assertFalse(((LongColumnVector)vrg.cols[1]).noNulls);
- Assert.assertFalse(((LongColumnVector)vrg.cols[1]).isRepeating);
+ Assert.assertFalse(lcvOut.noNulls);
+ Assert.assertFalse(lcvOut.isRepeating);
+ verifyLongNullDataVectorEntries(lcvOut, batch.selected, batch.selectedInUse, batch.size);
}
@Test
@@ -117,6 +140,36 @@ public class TestVectorArithmeticExpress
Assert.assertTrue(out.isRepeating);
Assert.assertFalse(out.noNulls);
Assert.assertEquals(true, out.isNull[0]);
+ verifyLongNullDataVectorEntries(out, batch.selected, batch.selectedInUse, batch.size);
+ }
+
+ /* Make sure all the NULL entries in this long column output vector have their data vector
+ * element set to the correct value, as per the specification, to prevent later arithmetic
+ * errors (e.g. zero-divide).
+ */
+ public static void verifyLongNullDataVectorEntries(
+ LongColumnVector v, int[] sel, boolean selectedInUse, int n) {
+ if (n == 0 || v.noNulls) {
+ return;
+ } else if (v.isRepeating) {
+ if (v.isNull[0]) {
+ assertEquals(LongColumnVector.NULL_VALUE, v.vector[0]);
+ }
+ }
+ else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (v.isNull[i]) {
+ assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]);
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (v.isNull[i]) {
+ assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]);
+ }
+ }
+ }
}
@Test
@@ -138,7 +191,7 @@ public class TestVectorArithmeticExpress
}
assertTrue(lcv2.noNulls);
- //Now set one column nullable
+ // Now set one column nullable
lcv1.noNulls = false;
lcv1.isNull[1] = true;
lcv2.isRepeating = true; // set output isRepeating to true to make sure it gets over-written
@@ -147,8 +200,9 @@ public class TestVectorArithmeticExpress
assertTrue(lcv2.isNull[1]);
assertFalse(lcv2.noNulls);
assertFalse(lcv2.isRepeating);
+ verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
- //Now set other column nullable too
+ // Now set other column nullable too
lcv0.noNulls = false;
lcv0.isNull[1] = true;
lcv0.isNull[3] = true;
@@ -156,8 +210,9 @@ public class TestVectorArithmeticExpress
assertTrue(lcv2.isNull[1]);
assertTrue(lcv2.isNull[3]);
assertFalse(lcv2.noNulls);
+ verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
- //Now test with repeating flag
+ // Now test with repeating flag
lcv3.isRepeating = true;
LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5);
expr2.evaluate(vrg);
@@ -165,14 +220,15 @@ public class TestVectorArithmeticExpress
assertEquals(seed * ( 4 + 5*(i+1)), lcv5.vector[i]);
}
- //Repeating with other as nullable
+ // Repeating with other as nullable
lcv4.noNulls = false;
lcv4.isNull[0] = true;
expr2.evaluate(vrg);
assertTrue(lcv5.isNull[0]);
assertFalse(lcv5.noNulls);
+ verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
- //Repeating null value
+ // Repeating null value
lcv3.isRepeating = true;
lcv3.noNulls = false;
lcv3.isNull[0] = true;
@@ -180,5 +236,53 @@ public class TestVectorArithmeticExpress
assertFalse(lcv5.noNulls);
assertTrue(lcv5.isRepeating);
assertTrue(lcv5.isNull[0]);
+ verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
+
+ // Neither input has nulls. Verify that this propagates to output.
+ vrg.selectedInUse = false;
+ lcv0.noNulls = true;
+ lcv1.noNulls = true;
+ lcv0.isRepeating = false;
+ lcv1.isRepeating = false;
+ lcv2.noNulls = false; // set output noNulls to true to make sure it gets over-written
+ lcv2.isRepeating = true; // similarly with isRepeating
+ expr.evaluate(vrg);
+ assertTrue(lcv2.noNulls);
+ assertFalse(lcv2.isRepeating);
+ }
+
+ @Test
+ public void testLongColDivideLongColumn() {
+
+ /* Testing for equality of doubles after a math operation is
+ * not always reliable so use this as a tolerance.
+ */
+ final double eps = 1e-7d;
+ VectorizedRowBatch batch = getVectorizedRowBatch2LongInDoubleOut();
+ LongColDivideLongColumn expr = new LongColDivideLongColumn(0, 1, 2);
+ batch.cols[0].isNull[1] = true;
+ batch.cols[0].noNulls = false;
+ batch.cols[1].noNulls = false;
+ DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+
+ // Set so we can verify they are reset by operation
+ out.noNulls = true;
+ out.isRepeating = true;
+
+ expr.evaluate(batch);
+
+ // 0/0 for entry 0 should work but generate NaN
+ assertTrue(Double.isNaN(out.vector[0]));
+
+ // verify NULL output in entry 1 is correct
+ assertTrue(out.isNull[1]);
+ assertTrue(Double.isNaN(out.vector[1]));
+
+ // check entries beyond first 2
+ for (int i = 2; i != batch.size; i++) {
+ assertTrue(out.vector[i] > 1.0d - eps && out.vector[i] < 1.0d + eps);
+ }
+ assertFalse(out.noNulls);
+ assertFalse(out.isRepeating);
}
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java Mon Jun 3 18:20:22 2013
@@ -18,10 +18,15 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
import junit.framework.Assert;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalarDivideLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -32,6 +37,12 @@ import org.junit.Test;
* the left and a column vector on the right.
*/
public class TestVectorScalarColArithmetic {
+
+ /* Testing for equality of doubles after a math operation is
+ * not always reliable so use this as a tolerance.
+ */
+ private final static double eps = 1e-7d;
+
private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
LongColumnVector lcv = new LongColumnVector(size);
@@ -105,6 +116,8 @@ public class TestVectorScalarColArithmet
}
Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+ TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries(
+ (LongColumnVector) batch.cols[1], batch.selected, batch.selectedInUse, batch.size);
}
@Test
@@ -147,5 +160,62 @@ public class TestVectorScalarColArithmet
Assert.assertTrue(out.isRepeating);
Assert.assertFalse(out.noNulls);
Assert.assertEquals(true, out.isNull[0]);
+ TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries(
+ out, batch.selected, batch.selectedInUse, batch.size);
+ }
+
+ private static boolean equalsWithinTolerance(double a, double b) {
+ return Math.abs(a - b) < eps;
+ }
+
+ @Test
+ public void testLongScalarDivide() {
+ VectorizedRowBatch batch =
+ TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut();
+ LongColDivideLongScalar expr = new LongColDivideLongScalar(0, 100, 2);
+ batch.cols[0].isNull[0] = true;
+ batch.cols[0].noNulls = false;
+ DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+ out.noNulls = true; // set now so we can verify it changed
+ out.isRepeating = true;
+ expr.evaluate(batch);
+
+ // verify NULL output in entry 0 is correct
+ assertTrue(out.isNull[0]);
+ assertTrue(Double.isNaN(out.vector[0]));
+
+ // check entries beyond first one
+ for (int i = 1; i != batch.size; i++) {
+ assertTrue(equalsWithinTolerance((i * 37) / 100d, out.vector[i]));
+ }
+ assertFalse(out.noNulls);
+ assertFalse(out.isRepeating);
+ }
+
+ @Test
+ public void testScalarLongDivide() {
+ VectorizedRowBatch batch =
+ TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut();
+ LongScalarDivideLongColumn expr = new LongScalarDivideLongColumn(100, 0, 2);
+ batch.cols[0].isNull[1] = true;
+ batch.cols[0].noNulls = false;
+ DoubleColumnVector out = (DoubleColumnVector) batch.cols[2];
+ out.noNulls = true; // set now so we can verify it changed
+ out.isRepeating = true;
+ expr.evaluate(batch);
+
+ // verify zero-divide result for position 0
+ assertTrue(Double.isInfinite(out.vector[0]));
+
+ // verify NULL output in entry 1 is correct
+ assertTrue(out.isNull[1]);
+ assertTrue(Double.isNaN(out.vector[1]));
+
+ // check entries beyond 2nd one
+ for (int i = 2; i != batch.size; i++) {
+ assertTrue(equalsWithinTolerance(100d / (i * 37), out.vector[i]));
+ }
+ assertFalse(out.noNulls);
+ assertFalse(out.isRepeating);
}
}