You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/22 23:07:57 UTC
svn commit: r1485423 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Author: omalley
Date: Wed May 22 21:07:57 2013
New Revision: 1485423
URL: http://svn.apache.org/r1485423
Log:
HIVE-4534 IsNotNull and NotCol incorrectly handle nulls. (Jitendra Pandey via
omalley)
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java Wed May 22 21:07:57 2013
@@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression evaluates to true if the given input columns is not null.
+ * The boolean output is stored in the specified output column.
+ */
public class IsNotNull extends VectorExpression {
- int colNum;
- int outputColumn;
+ int colNum;
+ int outputColumn;
- public IsNotNull(int colNum, int outputColumn) {
- this.colNum = colNum;
- this.outputColumn = outputColumn;
- }
+ public IsNotNull(int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -44,38 +48,34 @@ public class IsNotNull extends VectorExp
long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector;
if (n <= 0) {
- //Nothing to do
+ // Nothing to do
return;
}
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Selection property will not change.
- if (nullPos[0]) {
- outputVector[0] = 0;
+ // output never has nulls for this operator
+ batch.cols[outputColumn].noNulls = true;
+ if (inputColVector.noNulls) {
+ outputVector[0] = 1;
+ batch.cols[outputColumn].isRepeating = true;
+ } else if (inputColVector.isRepeating) {
+ // All must be selected otherwise size would be zero
+ // Selection property will not change.
+ outputVector[0] = nullPos[0] ? 0 : 1;
+ batch.cols[outputColumn].isRepeating = true;
+ } else {
+ batch.cols[outputColumn].isRepeating = false;
+ if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = nullPos[i] ? 0 : 1;
+ }
} else {
- outputVector[0] = 1;
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = nullPos[i] ? 0 : 1;
+ }
}
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (nullPos[i]) {
- outputVector[i] = 0;
- } else {
- outputVector[i] = 1;
- }
- }
- }
- else {
- for(int i = 0; i != n; i++) {
- if (nullPos[i]) {
- outputVector[i] = 0;
- } else {
- outputVector[i] = 1;
- }
- }
- }
- }
+ }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java Wed May 22 21:07:57 2013
@@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression evaluates to true if the given input columns is null.
+ * The boolean output is stored in the specified output column.
+ */
public class IsNull extends VectorExpression {
- int colNum;
- int outputColumn;
+ int colNum;
+ int outputColumn;
- public IsNull(int colNum, int outputColumn) {
- this.colNum = colNum;
- this.outputColumn = outputColumn;
- }
+ public IsNull(int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -39,52 +43,36 @@ public class IsNull extends VectorExpres
ColumnVector inputColVector = batch.cols[colNum];
int[] sel = batch.selected;
- //Note: if type of isNull could be long[], could we just re-use this
- //vector as the output vector. No iterations would be needed.
boolean[] nullPos = inputColVector.isNull;
int n = batch.size;
long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector;
if (n <= 0) {
- //Nothing to do, this is EOF
+ // Nothing to do, this is EOF
return;
}
// output never has nulls for this operator
batch.cols[outputColumn].noNulls = true;
- if (inputColVector.isRepeating && inputColVector.noNulls) {
+ if (inputColVector.noNulls) {
outputVector[0] = 0;
batch.cols[outputColumn].isRepeating = true;
- } else if (inputColVector.isRepeating && !inputColVector.noNulls) {
+ } else if (inputColVector.isRepeating) {
outputVector[0] = nullPos[0] ? 1 : 0;
batch.cols[outputColumn].isRepeating = true;
- } else if (!inputColVector.isRepeating && inputColVector.noNulls) {
+ } else {
if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = 0;
- }
- }
- else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = 0;
- }
- }
- batch.cols[outputColumn].isRepeating = false;
- } else /* !inputColVector.isRepeating && !inputColVector.noNulls */ {
- if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
+ for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = nullPos[i] ? 1 : 0;
}
- }
- else {
- for(int i = 0; i != n; i++) {
+ } else {
+ for (int i = 0; i != n; i++) {
outputVector[i] = nullPos[i] ? 1 : 0;
}
}
batch.cols[outputColumn].isRepeating = false;
}
- }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java Wed May 22 21:07:57 2013
@@ -20,16 +20,19 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * Evaluates the boolean complement of the input.
+ */
public class NotCol extends VectorExpression {
- int colNum;
- int outputColumn;
+ private final int colNum;
+ private final int outputColumn;
- public NotCol(int colNum, int outputColumn) {
- this.colNum = colNum;
- this.outputColumn = outputColumn;
- }
+ public NotCol(int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -44,40 +47,50 @@ public class NotCol extends VectorExpres
long[] outputVector = outV.vector;
if (n <= 0) {
- //Nothing to do, this is EOF
+ // Nothing to do, this is EOF
return;
}
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1
- outputVector[0] = ~vector[0] & 1;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = ~vector[i] & 1;
- }
- outV.isRepeating = false;
- }
- else {
- for(int i = 0; i != n; i++) {
- outputVector[i] = ~vector[i] & 1;
- }
- outV.isRepeating = false;
- }
-
- // handle NULLs
if (inputColVector.noNulls) {
outV.noNulls = true;
+ if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1
+ outputVector[0] = ~vector[0] & 1;
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = ~vector[i] & 1;
+ }
+ outV.isRepeating = false;
+ } else {
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = ~vector[i] & 1;
+ }
+ outV.isRepeating = false;
+ }
} else {
outV.noNulls = false;
if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ outputVector[0] = ~vector[0] & 1;
outV.isNull[0] = inputColVector.isNull[0];
+ } else if (batch.selectedInUse) {
+ outV.isRepeating = false;
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = ~vector[i] & 1;
+ outV.isNull[i] = inputColVector.isNull[i];
+ }
} else {
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ outV.isRepeating = false;
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = ~vector[i] & 1;
+ outV.isNull[i] = inputColVector.isNull[i];
+ }
}
}
- }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1485423&r1=1485422&r2=1485423&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Wed May 22 21:07:57 2013
@@ -188,10 +188,9 @@ public class TestVectorLogicalExpression
// No nulls case, not repeating
batch.cols[0].noNulls = true;
expr.evaluate(batch);
- Assert.assertFalse(outCol.isRepeating);
+ Assert.assertTrue(outCol.isRepeating);
Assert.assertTrue(outCol.noNulls);
Assert.assertEquals(0, outCol.vector[0]);
- Assert.assertEquals(0, outCol.vector[4]);
// isRepeating, and there are nulls
batch = getBatchThreeBooleanCols();
@@ -200,7 +199,6 @@ public class TestVectorLogicalExpression
batch.cols[0].isNull[0] = true;
expr.evaluate(batch);
Assert.assertTrue(outCol.isRepeating);
- ;
Assert.assertEquals(1, outCol.vector[0]);
Assert.assertTrue(outCol.noNulls);
@@ -216,6 +214,46 @@ public class TestVectorLogicalExpression
}
@Test
+ public void testIsNotNullExpr() {
+ // has nulls, not repeating
+ VectorizedRowBatch batch = getBatchThreeBooleanCols();
+ IsNotNull expr = new IsNotNull(0, 2);
+ LongColumnVector outCol = (LongColumnVector) batch.cols[2];
+ expr.evaluate(batch);
+ Assert.assertEquals(1, outCol.vector[0]);
+ Assert.assertEquals(0, outCol.vector[4]);
+ Assert.assertTrue(outCol.noNulls);
+ Assert.assertFalse(outCol.isRepeating);
+
+ // No nulls case, not repeating
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertTrue(outCol.isRepeating);
+ Assert.assertTrue(outCol.noNulls);
+ Assert.assertEquals(1, outCol.vector[0]);
+
+ // isRepeating, and there are nulls
+ batch = getBatchThreeBooleanCols();
+ outCol = (LongColumnVector) batch.cols[2];
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertTrue(outCol.isRepeating);
+ Assert.assertEquals(0, outCol.vector[0]);
+ Assert.assertTrue(outCol.noNulls);
+
+ // isRepeating, and no nulls
+ batch = getBatchThreeBooleanCols();
+ outCol = (LongColumnVector) batch.cols[2];
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertTrue(outCol.isRepeating);
+ Assert.assertTrue(outCol.noNulls);
+ Assert.assertEquals(1, outCol.vector[0]);
+ }
+
+ @Test
public void testBooleanFiltersOnColumns() {
VectorizedRowBatch batch = getBatchThreeBooleanCols();
@@ -233,25 +271,76 @@ public class TestVectorLogicalExpression
assertEquals(0, batch.selected[0]);
assertEquals(2, batch.selected[1]);
assertEquals(4, batch.selected[2]);
+ }
- batch = getBatchThreeBooleanCols();
- SelectColumnIsNull expr2 = new SelectColumnIsNull(0);
- expr2.evaluate(batch);
+ @Test
+ public void testSelectColumnIsNull() {
+ // has nulls, not repeating
+ VectorizedRowBatch batch = getBatchThreeBooleanCols();
+ SelectColumnIsNull expr = new SelectColumnIsNull(0);
+ expr.evaluate(batch);
assertEquals(3, batch.size);
assertEquals(4, batch.selected[0]);
assertEquals(5, batch.selected[1]);
assertEquals(8, batch.selected[2]);
+ // No nulls case, not repeating
+ batch = getBatchThreeBooleanCols();
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+
+ // isRepeating, and there are nulls
+ batch = getBatchThreeBooleanCols();
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].isNull[0] = true;
+ int initialSize = batch.size;
+ expr.evaluate(batch);
+ Assert.assertEquals(initialSize, batch.size);
+
+ // isRepeating, and no nulls
batch = getBatchThreeBooleanCols();
- SelectColumnIsNotNull expr3 = new SelectColumnIsNotNull(1);
- expr3.evaluate(batch);
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+ }
+
+ @Test
+ public void testSelectColumnIsNotNull() {
+ // has nulls, not repeating
+ VectorizedRowBatch batch = getBatchThreeBooleanCols();
+ SelectColumnIsNotNull expr = new SelectColumnIsNotNull(0);
+ expr.evaluate(batch);
assertEquals(6, batch.size);
assertEquals(0, batch.selected[0]);
assertEquals(1, batch.selected[1]);
assertEquals(2, batch.selected[2]);
assertEquals(3, batch.selected[3]);
- assertEquals(4, batch.selected[4]);
- assertEquals(5, batch.selected[5]);
+ assertEquals(6, batch.selected[4]);
+ assertEquals(7, batch.selected[5]);
+
+ // No nulls case, not repeating
+ batch = getBatchThreeBooleanCols();
+ batch.cols[0].noNulls = true;
+ int initialSize = batch.size;
+ expr.evaluate(batch);
+ Assert.assertEquals(initialSize, batch.size);
+
+ // isRepeating, and there are nulls
+ batch = getBatchThreeBooleanCols();
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+
+ // isRepeating, and no nulls
+ batch = getBatchThreeBooleanCols();
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ initialSize = batch.size;
+ expr.evaluate(batch);
+ Assert.assertEquals(initialSize, batch.size);
}
@Test