You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/22 23:06:19 UTC
svn commit: r1485422 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Author: omalley
Date: Wed May 22 21:06:19 2013
New Revision: 1485422
URL: http://svn.apache.org/r1485422
Log:
HIVE-4472 OR, NOT Filter logic can lose an array, and always takes time O(VectorizedRowBatch.DEFAULT_SIZE) (Jitendra Nath Pandey via omalley)
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java Wed May 22 21:06:19 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
/**
@@ -59,7 +60,7 @@ public class DoubleColumnVector extends
index = 0;
}
if (!noNulls && isNull[index]) {
- return null;
+ return NullWritable.get();
} else {
writableObj.set(vector[index]);
return writableObj;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java Wed May 22 21:06:19 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
/**
@@ -59,7 +60,7 @@ public class LongColumnVector extends Co
index = 0;
}
if (!noNulls && isNull[index]) {
- return null;
+ return NullWritable.get();
} else {
writableObj.set(vector[index]);
return writableObj;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java Wed May 22 21:06:19 2013
@@ -24,11 +24,11 @@ import org.apache.hadoop.hive.ql.exec.ve
* This class represents an Or expression. This applies short circuit optimization.
*/
public class FilterExprOrExpr extends VectorExpression {
- VectorExpression childExpr1;
- VectorExpression childExpr2;
- int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE];
- int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
- int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private final VectorExpression childExpr1;
+ private final VectorExpression childExpr2;
+ private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) {
this.childExpr1 = childExpr1;
@@ -42,15 +42,14 @@ public class FilterExprOrExpr extends Ve
return;
}
boolean prevSelectInUse = batch.selectedInUse;
- //Clone the selected vector
- int [] sel = batch.selected;
+
+ // Save the original selected vector
+ int[] sel = batch.selected;
if (batch.selectedInUse) {
- for (int i = 0; i < n; i++) {
- tmpSelect1[i] = sel[i];
- }
+ System.arraycopy(sel, 0, initialSelected, 0, n);
} else {
for (int i = 0; i < n; i++) {
- tmpSelect1[i] = i;
+ initialSelected[i] = i;
sel[i] = i;
}
batch.selectedInUse = true;
@@ -58,40 +57,44 @@ public class FilterExprOrExpr extends Ve
childExpr1.evaluate(batch);
- //Calculate unselected ones in last evaluate.
- for (int i = 0; i < tmp.length; i++) {
- tmp[i] = 0;
+ // Preserve the selected reference and size values generated
+ // after the first child is evaluated.
+ int sizeAfterFirstChild = batch.size;
+ int[] selectedAfterFirstChild = batch.selected;
+
+ // Calculate unselected ones in last evaluate.
+ for (int j = 0; j < n; j++) {
+ tmp[initialSelected[j]] = 0;
}
for (int j = 0; j < batch.size; j++) {
- int i = sel[j];
- tmp[i] = 1;
+ tmp[selectedAfterFirstChild[j]] = 1;
}
int unselectedSize = 0;
- for (int j =0; j < n; j++) {
- int i = tmpSelect1[j];
+ for (int j = 0; j < n; j++) {
+ int i = initialSelected[j];
if (tmp[i] == 0) {
unselected[unselectedSize++] = i;
}
}
- //Preserve current selected and size
- int currentSize = batch.size;
- int [] currentSelected = batch.selected;
- //Evaluate second child expression over unselected ones only.
+ // Evaluate second child expression over unselected ones only.
batch.selected = unselected;
batch.size = unselectedSize;
+
childExpr2.evaluate(batch);
- //Merge the result of last evaluate to previous evaluate.
- int newSize = batch.size + currentSize;
- for (int i = batch.size; i < newSize; i++ ) {
- batch.selected[i] = currentSelected[i-batch.size];
- }
+ // Merge the result of last evaluate to previous evaluate.
+ int newSize = batch.size + sizeAfterFirstChild;
+ System.arraycopy(selectedAfterFirstChild, 0, batch.selected, batch.size, sizeAfterFirstChild);
batch.size = newSize;
if (newSize == n) {
- //Filter didn't do anything
+ // Filter didn't do anything
batch.selectedInUse = prevSelectInUse;
}
+
+ // unselected array is taken away by the row batch
+ // so take the row batch's original one.
+ unselected = selectedAfterFirstChild;
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java Wed May 22 21:06:19 2013
@@ -24,10 +24,10 @@ import org.apache.hadoop.hive.ql.exec.ve
* This class represents an NOT filter expression. This applies short circuit optimization.
*/
public class FilterNotExpr extends VectorExpression {
- VectorExpression childExpr1;
- int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE];
- int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
- int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private final VectorExpression childExpr1;
+ private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
public FilterNotExpr(VectorExpression childExpr1) {
this.childExpr1 = childExpr1;
@@ -37,19 +37,17 @@ public class FilterNotExpr extends Vecto
public void evaluate(VectorizedRowBatch batch) {
int n = batch.size;
- if (n<=0) {
+ if (n <= 0) {
return;
}
- //Clone the selected vector
- int [] sel = batch.selected;
+ // Clone the selected vector
+ int[] sel = batch.selected;
if (batch.selectedInUse) {
- for (int i = 0; i < n; i++) {
- tmpSelect1[i] = sel[i];
- }
+ System.arraycopy(sel, 0, initialSelected, 0, n);
} else {
for (int i = 0; i < n; i++) {
- tmpSelect1[i] = i;
+ initialSelected[i] = i;
sel[i] = i;
}
batch.selectedInUse = true;
@@ -57,25 +55,30 @@ public class FilterNotExpr extends Vecto
childExpr1.evaluate(batch);
- //Calculate unselected ones in last evaluate.
- for (int i = 0; i < tmp.length; i++) {
- tmp[i] = 0;
+ // Calculate unselected ones in last evaluate.
+ for (int i = 0; i < n; i++) {
+ tmp[initialSelected[i]] = 0;
}
+
+ // Need to set sel reference again, because the child expression might
+ // have invalidated the earlier reference
+ sel = batch.selected;
for (int j = 0; j < batch.size; j++) {
int i = sel[j];
tmp[i] = 1;
}
int unselectedSize = 0;
- for (int j =0; j < n; j++) {
- int i = tmpSelect1[j];
+ for (int j = 0; j < n; j++) {
+ int i = initialSelected[j];
if (tmp[i] == 0) {
unselected[unselectedSize++] = i;
}
}
- //The unselected is the new selected
+ // The unselected is the new selected, swap the arrays
batch.selected = unselected;
- batch.size = unselectedSize;
+ unselected = sel;
+ batch.size = unselectedSize;
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java Wed May 22 21:06:19 2013
@@ -21,15 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression selects a row if the given boolean column is false.
+ */
public class SelectColumnIsFalse extends VectorExpression {
- int colNum1;
+ private final int colNum1;
- public SelectColumnIsFalse(int colNum1)
- {
- this.colNum1 = colNum1;
- }
+ public SelectColumnIsFalse(int colNum1) {
+ this.colNum1 = colNum1;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -43,29 +45,29 @@ public class SelectColumnIsFalse extends
boolean[] nullVector = inputColVector1.isNull;
if (n <= 0) {
- //Nothing to do
+ // Nothing to do
return;
}
if (inputColVector1.noNulls) {
if (inputColVector1.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
if (vector1[0] == 1) {
// All are filtered out
batch.size = 0;
+ return;
+ } else {
+ // All are selected;
+ return;
}
} else if (batch.selectedInUse) {
- int[] newSelected = new int[n];
int newSize = 0;
for (int j = 0; j != n; j++) {
int i = sel[j];
if (vector1[i] == 0) {
- newSelected[newSize++] = i;
+ sel[newSize++] = i;
}
}
batch.size = newSize;
- batch.selected = newSelected;
} else {
int newSize = 0;
for (int i = 0; i != n; i++) {
@@ -80,19 +82,22 @@ public class SelectColumnIsFalse extends
}
} else {
if (inputColVector1.isRepeating) {
- //Repeating and null value
- batch.size = 0;
+ if (nullVector[0] || (vector1[0] == 1)) {
+ // All are filtered out
+ batch.size = 0;
+ } else {
+ // All are selected;
+ return;
+ }
} else if (batch.selectedInUse) {
- int[] newSelected = new int[n];
int newSize = 0;
for (int j = 0; j != n; j++) {
int i = sel[j];
if (vector1[i] == 0 && !nullVector[i]) {
- newSelected[newSize++] = i;
+ sel[newSize++] = i;
}
}
batch.size = newSize;
- batch.selected = newSelected;
} else {
int newSize = 0;
for (int i = 0; i != n; i++) {
@@ -106,7 +111,7 @@ public class SelectColumnIsFalse extends
}
}
}
- }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java Wed May 22 21:06:19 2013
@@ -21,14 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression selects a row if the given column is null.
+ */
public class SelectColumnIsNotNull extends VectorExpression {
- int colNum;
+ private final int colNum;
- public SelectColumnIsNotNull(int colNum) {
- this.colNum = colNum;
- }
+ public SelectColumnIsNotNull(int colNum) {
+ this.colNum = colNum;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
@@ -37,46 +40,47 @@ public class SelectColumnIsNotNull exten
ColumnVector inputColVector = batch.cols[colNum];
int[] sel = batch.selected;
- //Note: if type of isNull could be long[], could we just re-use this
- //vector as the output vector. No iterations would be needed.
boolean[] nullPos = inputColVector.isNull;
int n = batch.size;
if (n <= 0) {
- //Nothing to do
+ // Nothing to do
return;
}
if (inputColVector.noNulls) {
- //All selected, do nothing
+ // All selected, do nothing
return;
} else if (inputColVector.isRepeating) {
- //All must be null
- batch.size = 0;
+ if (nullPos[0]) {
+ // All are null so none are selected
+ batch.size = 0;
+ return;
+ } else {
+ // None are null, so all are selected
+ return;
+ }
} else if (batch.selectedInUse) {
- int [] newSelected = new int[n];
- int newSize=0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (!nullPos[i]) {
- newSelected[newSize++] = i;
- }
- }
- batch.selected = newSelected;
+ int newSize = 0;
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos[i]) {
+ sel[newSize++] = i;
+ }
+ }
batch.size = newSize;
- }
- else {
+ } else {
int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
- sel[newSize++] = i;
+ for (int i = 0; i != n; i++) {
+ if (!nullPos[i]) {
+ sel[newSize++] = i;
}
- }
+ }
if (newSize < n) {
batch.selectedInUse = true;
batch.size = newSize;
}
- }
- }
+ }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java Wed May 22 21:06:19 2013
@@ -21,60 +21,63 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression selects a row if the given column is null.
+ */
public class SelectColumnIsNull extends VectorExpression {
- int colNum;
+ private final int colNum;
- public SelectColumnIsNull(int colNum) {
- this.colNum = colNum;
- }
+ public SelectColumnIsNull(int colNum) {
+ this.colNum = colNum;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector inputColVector = batch.cols[colNum];
int[] sel = batch.selected;
- //Note: if type of isNull could be long[], could we just re-use this
- //vector as the output vector. No iterations would be needed.
boolean[] nullPos = inputColVector.isNull;
int n = batch.size;
if (n <= 0) {
- //Nothing to do
+ // Nothing to do
return;
}
if (inputColVector.noNulls) {
batch.size = 0;
} else if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Selection property will not change.
- return;
+ if (nullPos[0]) {
+ // All are null, so all must be selected.
+ return;
+ } else {
+ // None are null, so none are selected
+ batch.size = 0;
+ return;
+ }
} else if (batch.selectedInUse) {
- int [] newSelected = new int[n];
- int newSize=0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (nullPos[i]) {
- newSelected[newSize++] = i;
- }
- }
- batch.selected = newSelected;
+ int newSize = 0;
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (nullPos[i]) {
+ sel[newSize++] = i;
+ }
+ }
batch.size = newSize;
- }
- else {
+ } else {
int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (nullPos[i]) {
- sel[newSize++] = i;
+ for (int i = 0; i != n; i++) {
+ if (nullPos[i]) {
+ sel[newSize++] = i;
}
- }
+ }
if (newSize < n) {
batch.selectedInUse = true;
batch.size = newSize;
}
- }
- }
+ }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java Wed May 22 21:06:19 2013
@@ -21,14 +21,17 @@ package org.apache.hadoop.hive.ql.exec.v
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+/**
+ * This expression selects a row if the given boolean column is true.
+ */
public class SelectColumnIsTrue extends VectorExpression {
- int colNum1;
+ private final int colNum1;
- public SelectColumnIsTrue(int colNum1) {
- this.colNum1 = colNum1;
- }
+ public SelectColumnIsTrue(int colNum1) {
+ this.colNum1 = colNum1;
+ }
- @Override
+ @Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
@@ -40,29 +43,29 @@ public class SelectColumnIsTrue extends
boolean[] nullVector = inputColVector1.isNull;
if (n <= 0) {
- //Nothing to do
+ // Nothing to do
return;
}
if (inputColVector1.noNulls) {
if (inputColVector1.isRepeating) {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
if (vector1[0] == 0) {
// All are filtered out
batch.size = 0;
+ return;
+ } else {
+ // All are selected
+ return;
}
} else if (batch.selectedInUse) {
- int[] newSelected = new int[n];
int newSize = 0;
for (int j = 0; j != n; j++) {
int i = sel[j];
if (vector1[i] == 1) {
- newSelected[newSize++] = i;
+ sel[newSize++] = i;
}
}
batch.size = newSize;
- batch.selected = newSelected;
} else {
int newSize = 0;
for (int i = 0; i != n; i++) {
@@ -76,23 +79,27 @@ public class SelectColumnIsTrue extends
}
}
} else {
- if (inputColVector1.isRepeating) {
- //Repeating null value
- batch.size = 0;
+ if (inputColVector1.isRepeating) {
+ if (nullVector[0] || (vector1[0] == 0)) {
+ // All are filtered
+ batch.size = 0;
+ return;
+ } else {
+ // All are selected
+ return;
+ }
} else if (batch.selectedInUse) {
- int [] newSelected = new int[n];
- int newSize=0;
- for(int j=0; j != n; j++) {
+ int newSize = 0;
+ for (int j = 0; j != n; j++) {
int i = sel[j];
if (vector1[i] == 1 && !nullVector[i]) {
- newSelected[newSize++] = i;
+ sel[newSize++] = i;
}
}
batch.size = newSize;
- batch.selected = newSelected;
} else {
- int newSize=0;
- for(int i = 0; i != n; i++) {
+ int newSize = 0;
+ for (int i = 0; i != n; i++) {
if (vector1[i] == 1 && !nullVector[i]) {
sel[newSize++] = i;
}
@@ -103,7 +110,7 @@ public class SelectColumnIsTrue extends
}
}
}
- }
+ }
@Override
public int getOutputColumn() {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1485422&r1=1485421&r2=1485422&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Wed May 22 21:06:19 2013
@@ -19,19 +19,28 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.junit.Assert;
import org.junit.Test;
-
+/**
+* Unit tests for logical expressions AND, OR, NOT, IsNull etc.
+*/
public class TestVectorLogicalExpressions {
+ private static final int BOOLEAN_COLUMN_TEST_SIZE = 9;
+
@Test
public void testLongColOrLongCol() {
VectorizedRowBatch batch = getBatchThreeBooleanCols();
- ColOrCol expr = new ColOrCol(0,1,2);
+ ColOrCol expr = new ColOrCol(0, 1, 2);
LongColumnVector outCol = (LongColumnVector) batch.cols[2];
expr.evaluate(batch);
// verify
@@ -67,9 +76,12 @@ public class TestVectorLogicalExpression
// try isRepeating path (left input only), no nulls
batch = getBatchThreeBooleanCols();
- batch.cols[0].noNulls = true; batch.cols[0].isRepeating = true;
- batch.cols[1].noNulls = true; batch.cols[1].isRepeating = false;
- batch.cols[2].noNulls = false; batch.cols[2].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].noNulls = true;
+ batch.cols[1].isRepeating = false;
+ batch.cols[2].noNulls = false;
+ batch.cols[2].isRepeating = true;
outCol = (LongColumnVector) batch.cols[2];
expr.evaluate(batch);
@@ -114,21 +126,23 @@ public class TestVectorLogicalExpression
v2.isRepeating = true; // this value should get over-written with correct value
v2.noNulls = true; // ditto
- batch.size = 9;
+ batch.size = BOOLEAN_COLUMN_TEST_SIZE;
return batch;
}
@Test
public void testBooleanNot() {
VectorizedRowBatch batch = getBatchThreeBooleanCols();
- NotCol expr = new NotCol(0,2);
+ NotCol expr = new NotCol(0, 2);
LongColumnVector outCol = (LongColumnVector) batch.cols[2];
expr.evaluate(batch);
// Case with nulls
Assert.assertFalse(outCol.isRepeating);
- Assert.assertEquals(1, outCol.vector[0]); Assert.assertFalse(outCol.isNull[0]);
- Assert.assertEquals(0, outCol.vector[2]); Assert.assertFalse(outCol.isNull[0]);
+ Assert.assertEquals(1, outCol.vector[0]);
+ Assert.assertFalse(outCol.isNull[0]);
+ Assert.assertEquals(0, outCol.vector[2]);
+ Assert.assertFalse(outCol.isNull[0]);
Assert.assertTrue(outCol.isNull[4]);
// No nulls case
@@ -145,7 +159,7 @@ public class TestVectorLogicalExpression
batch.cols[0].isRepeating = true;
batch.cols[0].isNull[0] = true;
expr.evaluate(batch);
- Assert.assertTrue(outCol.isRepeating);;
+ Assert.assertTrue(outCol.isRepeating);
Assert.assertTrue(outCol.isNull[0]);
// isRepeating, and no nulls
@@ -160,10 +174,10 @@ public class TestVectorLogicalExpression
}
@Test
- public void testIsNullExpr () {
+ public void testIsNullExpr() {
// has nulls, not repeating
VectorizedRowBatch batch = getBatchThreeBooleanCols();
- IsNull expr = new IsNull(0,2);
+ IsNull expr = new IsNull(0, 2);
LongColumnVector outCol = (LongColumnVector) batch.cols[2];
expr.evaluate(batch);
Assert.assertEquals(0, outCol.vector[0]);
@@ -185,7 +199,8 @@ public class TestVectorLogicalExpression
batch.cols[0].isRepeating = true;
batch.cols[0].isNull[0] = true;
expr.evaluate(batch);
- Assert.assertTrue(outCol.isRepeating);;
+ Assert.assertTrue(outCol.isRepeating);
+ ;
Assert.assertEquals(1, outCol.vector[0]);
Assert.assertTrue(outCol.noNulls);
@@ -238,4 +253,162 @@ public class TestVectorLogicalExpression
assertEquals(4, batch.selected[4]);
assertEquals(5, batch.selected[5]);
}
+
+ @Test
+ public void testFilterNotExpr() {
+ VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+ VectorizedRowBatch batch2 = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr = new SelectColumnIsTrue(0);
+ FilterNotExpr notExpr = new FilterNotExpr(expr);
+
+ notExpr.evaluate(batch1);
+
+ notExpr.evaluate(batch2);
+
+ assertEquals(batch1.size, batch2.size);
+ for (int j = 0; j < batch1.size; j++) {
+ assertEquals(batch1.selected[j], batch2.selected[j]);
+ int i = j;
+ assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]),
+ (((LongColumnVector) batch2.cols[0]).vector[i]));
+ }
+ }
+
+ @Test
+ public void testFilterExprOrExpr() {
+ VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+ VectorizedRowBatch batch2 = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+ SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+ FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+ orExpr.evaluate(batch1);
+ orExpr.evaluate(batch2);
+
+ assertEquals(batch1.size, batch2.size);
+ for (int j = 0; j < batch1.size; j++) {
+ assertEquals(batch1.selected[j], batch2.selected[j]);
+ int i = j;
+ assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]),
+ (((LongColumnVector) batch2.cols[0]).vector[i]));
+ }
+
+ assertEquals(5, batch1.size);
+ Set<Integer> expectedSet = new HashSet<Integer>();
+ expectedSet.add(0);
+ expectedSet.add(2);
+ expectedSet.add(3);
+ expectedSet.add(4);
+ expectedSet.add(7);
+
+ assertTrue(expectedSet.contains(batch1.selected[0]));
+ assertTrue(expectedSet.contains(batch1.selected[1]));
+ assertTrue(expectedSet.contains(batch1.selected[2]));
+ assertTrue(expectedSet.contains(batch1.selected[3]));
+ assertTrue(expectedSet.contains(batch1.selected[4]));
+
+ // Repeat the expression on the same batch,
+ // the result must be unchanged.
+ orExpr.evaluate(batch1);
+
+ assertEquals(5, batch1.size);
+ assertTrue(expectedSet.contains(batch1.selected[0]));
+ assertTrue(expectedSet.contains(batch1.selected[1]));
+ assertTrue(expectedSet.contains(batch1.selected[2]));
+ assertTrue(expectedSet.contains(batch1.selected[3]));
+ assertTrue(expectedSet.contains(batch1.selected[4]));
+ }
+
+ @Test
+ public void testFilterExprOrExprWithBatchReuse() {
+ VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+ SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+ FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+ orExpr.evaluate(batch1);
+
+ // Now re-initialize batch1 to simulate batch-object re-use.
+ for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+ batch1.selected[i] = 0;
+ }
+ batch1.size = BOOLEAN_COLUMN_TEST_SIZE;
+ batch1.selectedInUse = false;
+
+ // Swap column vectors to simulate change in data
+ ColumnVector tmp = batch1.cols[0];
+ batch1.cols[0] = batch1.cols[1];
+ batch1.cols[1] = tmp;
+
+ orExpr.evaluate(batch1);
+
+ assertEquals(5, batch1.size);
+ Set<Integer> expectedSet = new HashSet<Integer>();
+ expectedSet.add(0);
+ expectedSet.add(1);
+ expectedSet.add(3);
+ expectedSet.add(5);
+ expectedSet.add(6);
+
+ assertTrue(expectedSet.contains(batch1.selected[0]));
+ assertTrue(expectedSet.contains(batch1.selected[1]));
+ assertTrue(expectedSet.contains(batch1.selected[2]));
+ assertTrue(expectedSet.contains(batch1.selected[3]));
+ assertTrue(expectedSet.contains(batch1.selected[4]));
+ }
+
+ @Test
+ public void testFilterExprOrExprWithSelectInUse() {
+ VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+ SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+ FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2);
+
+ // Evaluate batch1 so that temporary arrays in the expression
+ // have residual values to interfere in later computation
+ orExpr.evaluate(batch1);
+
+ // Swap column vectors, but keep selected vector unchanged
+ ColumnVector tmp = batch1.cols[0];
+ batch1.cols[0] = batch1.cols[1];
+ batch1.cols[1] = tmp;
+ // Make sure row-7 is in the output.
+ batch1.cols[1].isNull[7] = false;
+ ((LongColumnVector) batch1.cols[1]).vector[7] = 0;
+
+ orExpr.evaluate(batch1);
+
+ assertEquals(3, batch1.size);
+ Set<Integer> expectedSet = new HashSet<Integer>();
+ expectedSet.add(0);
+ expectedSet.add(3);
+ expectedSet.add(7);
+
+ assertTrue(expectedSet.contains(batch1.selected[0]));
+ assertTrue(expectedSet.contains(batch1.selected[1]));
+ assertTrue(expectedSet.contains(batch1.selected[2]));
+ }
+
+ @Test
+ public void testFilterExprAndExpr() {
+ VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0);
+ SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1);
+
+ FilterExprAndExpr orExpr = new FilterExprAndExpr(expr1, expr2);
+
+ orExpr.evaluate(batch1);
+
+ assertEquals(1, batch1.size);
+
+ assertEquals(2, batch1.selected[0]);
+ }
}