You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by eh...@apache.org on 2014/01/18 02:32:17 UTC
svn commit: r1559303 - in /hive/trunk/ql/src:
gen/vectorization/ExpressionTemplates/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Author: ehans
Date: Sat Jan 18 01:32:16 2014
New Revision: 1559303
URL: http://svn.apache.org/r1559303
Log:
HIVE-6186: error in vectorized Column-Column comparison filter for repeating case (Eric Hanson)
Modified:
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
Modified: hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt (original)
+++ hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt Sat Jan 18 01:32:16 2014
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
import org.apache.hadoop.hive.ql.exec.vector.*;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -64,70 +65,43 @@ public class <ClassName> extends VectorE
return;
}
- if (inputColVector1.noNulls && inputColVector2.noNulls) {
- if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!(vector1[0] <OperatorSymbol> vector2[0])) {
- batch.size = 0;
- }
- } else if (inputColVector1.isRepeating) {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
- }
- } else if (inputColVector2.isRepeating) {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
- }
- } else if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
+ // filter rows with NULL on left input
+ int newSize;
+ newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n);
+ if (newSize < n) {
+ n = batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+
+ // filter rows with NULL on right input
+ newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n);
+ if (newSize < n) {
+ n = batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+
+ // All rows with nulls have been filtered out, so just do normal filter for non-null case
+ if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ if (!(vector1[0] <OperatorSymbol> vector2[0])) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
int i = sel[j];
- if (vector1[i] <OperatorSymbol> vector2[i]) {
+ if (vector1[0] <OperatorSymbol> vector2[i]) {
sel[newSize++] = i;
}
}
batch.size = newSize;
} else {
- int newSize = 0;
+ newSize = 0;
for(int i = 0; i != n; i++) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
+ if (vector1[0] <OperatorSymbol> vector2[i]) {
sel[newSize++] = i;
}
}
@@ -136,88 +110,42 @@ public class <ClassName> extends VectorE
batch.selectedInUse = true;
}
}
- } else if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- if (nullPos1[0] || nullPos2[0]) {
- batch.size = 0;
- }
- } else if (inputColVector1.isRepeating) {
- if (nullPos1[0]) {
- batch.size = 0;
- } else {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (!nullPos2[i]) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (!nullPos2[i]) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (vector1[i] <OperatorSymbol> vector2[0]) {
+ sel[newSize++] = i;
}
}
- }
- } else if (inputColVector2.isRepeating) {
- if (nullPos2[0]) {
- batch.size = 0;
+ batch.size = newSize;
} else {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (!nullPos1[i]) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
+ newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (vector1[i] <OperatorSymbol> vector2[0]) {
+ sel[newSize++] = i;
}
+ }
+ if (newSize < batch.size) {
batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (!nullPos1[i]) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
+ batch.selectedInUse = true;
}
}
} else if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
int i = sel[j];
- if (!nullPos1[i] && !nullPos2[i]) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
+ if (vector1[i] <OperatorSymbol> vector2[i]) {
+ sel[newSize++] = i;
}
}
batch.size = newSize;
} else {
- int newSize = 0;
+ newSize = 0;
for(int i = 0; i != n; i++) {
- if (!nullPos1[i] && !nullPos2[i]) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
+ if (vector1[i] <OperatorSymbol> vector2[i]) {
+ sel[newSize++] = i;
}
}
if (newSize < batch.size) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Sat Jan 18 01:32:16 2014
@@ -336,4 +336,39 @@ public class NullUtil {
Arrays.fill(v.isNull, 0, n, false);
}
}
+
+ /**
+ * Filter out rows with null values. Return the number of rows in the batch.
+ */
+ public static int filterNulls(ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ int newSize = 0;
+
+ if (v.noNulls) {
+
+ // no rows will be filtered
+ return n;
+ }
+
+ if (v.isRepeating) {
+
+ // all rows are filtered if repeating null, otherwise no rows are filtered
+ return v.isNull[0] ? 0 : n;
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ return newSize;
+ }
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java Sat Jan 18 01:32:16 2014
@@ -70,31 +70,60 @@ public class TestVectorFilterExpressions
}
@Test
- public void testFilterLongColEqualLongColumn() {
+ public void testFilterLongColGreaterLongColumn() {
int seed = 17;
- VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
+ VectorizedRowBatch b = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
VectorizedRowBatch.DEFAULT_SIZE,
2, seed);
- LongColumnVector lcv0 = (LongColumnVector) vrg.cols[0];
- LongColumnVector lcv1 = (LongColumnVector) vrg.cols[1];
+ LongColumnVector lcv0 = (LongColumnVector) b.cols[0];
+ LongColumnVector lcv1 = (LongColumnVector) b.cols[1];
+ b.size = 3;
FilterLongColGreaterLongColumn expr = new FilterLongColGreaterLongColumn(0, 1);
- //Basic case
- lcv0.vector[1] = 23;
- lcv1.vector[1] = 19;
- lcv0.vector[5] = 23;
- lcv1.vector[5] = 19;
- expr.evaluate(vrg);
- assertEquals(2, vrg.size);
- assertEquals(1, vrg.selected[0]);
- assertEquals(5, vrg.selected[1]);
+ // Basic case
+ lcv0.vector[0] = 10;
+ lcv0.vector[1] = 10;
+ lcv0.vector[2] = 10;
+ lcv1.vector[0] = 20;
+ lcv1.vector[1] = 1;
+ lcv1.vector[2] = 7;
+
+ expr.evaluate(b);
+ assertEquals(2, b.size);
+ assertEquals(1, b.selected[0]);
+ assertEquals(2, b.selected[1]);
- //handle null
+ // handle null with selected in use
lcv0.noNulls = false;
lcv0.isNull[1] = true;
- expr.evaluate(vrg);
- assertEquals(1, vrg.size);
- assertEquals(5, vrg.selected[0]);
+ expr.evaluate(b);
+ assertEquals(1, b.size);
+ assertEquals(2, b.selected[0]);
+
+ // handle repeating
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isRepeating = true;
+ lcv0.noNulls = true;
+ expr.evaluate(b);
+ assertEquals(2, b.size);
+
+ // handle repeating null
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isNull[0] = true;
+ lcv0.noNulls = false;
+ expr.evaluate(b);
+ assertEquals(0, b.size);
+
+ // handle null on both sizes (not repeating)
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isRepeating = false;
+ lcv1.noNulls = false;
+ lcv1.isNull[2] = true;
+ expr.evaluate(b);
+ assertEquals(0, b.size);
}
@Test