You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/30 17:11:06 UTC
svn commit: r1487887 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/exec/vector/
test/org/apache/hadoop/hive/ql/exec/vector/
test/org/apache/hadoop/hive/ql/exec/vector/util/
Author: omalley
Date: Thu May 30 15:11:06 2013
New Revision: 1487887
URL: http://svn.apache.org/r1487887
Log:
HIVE-4596 Support strings in GROUP BY keys (Remus Rusanu via omalley)
Added:
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java
Removed:
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromIterables.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java Thu May 30 15:11:06 2013
@@ -20,29 +20,38 @@ package org.apache.hadoop.hive.ql.exec;
import java.util.Arrays;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
/**
* A hash map key wrapper for vectorized processing.
* It stores the key values as primitives in arrays for each supported primitive type.
- * This works in conjunction with
+ * This works in conjunction with
* {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch}
- * to hash vectorized processing units (batches).
+ * to hash vectorized processing units (batches).
*/
public class VectorHashKeyWrapper extends KeyWrapper {
-
+
private long[] longValues;
private double[] doubleValues;
+
+ private byte[][] byteValues;
+ private int[] byteStarts;
+ private int[] byteLengths;
+
private boolean[] isNull;
private int hashcode;
-
- public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount) {
+
+ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) {
longValues = new long[longValuesCount];
doubleValues = new double[doubleValuesCount];
- isNull = new boolean[longValuesCount + doubleValuesCount];
+ byteValues = new byte[byteValuesCount][];
+ byteStarts = new int[byteValuesCount];
+ byteLengths = new int[byteValuesCount];
+ isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount];
}
-
+
private VectorHashKeyWrapper() {
}
@@ -56,32 +65,90 @@ public class VectorHashKeyWrapper extend
hashcode = Arrays.hashCode(longValues) ^
Arrays.hashCode(doubleValues) ^
Arrays.hashCode(isNull);
+
+ // This code, with branches and all, is not executed if there are no string keys
+ for (int i = 0; i < byteValues.length; ++i) {
+ /*
+ * Hashing the string is potentially expensive so is better to branch.
+ * Additionally not looking at values for nulls allows us not reset the values.
+ */
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ byte[] bytes = byteValues[i];
+ int start = byteStarts[i];
+ int length = byteLengths[i];
+ if (length == bytes.length && start == 0) {
+ hashcode ^= Arrays.hashCode(bytes);
+ }
+ else {
+ // Unfortunately there is no Arrays.hashCode(byte[], start, length)
+ for(int j = start; j < start + length; ++j) {
+ // use 461 as is a (sexy!) prime.
+ hashcode ^= 461 * bytes[j];
+ }
+ }
+ }
+ }
}
-
+
@Override
public int hashCode() {
return hashcode;
}
-
- @Override
+
+ @Override
public boolean equals(Object that) {
if (that instanceof VectorHashKeyWrapper) {
VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that;
return hashcode == keyThat.hashcode &&
Arrays.equals(longValues, keyThat.longValues) &&
Arrays.equals(doubleValues, keyThat.doubleValues) &&
- Arrays.equals(isNull, keyThat.isNull);
+ Arrays.equals(isNull, keyThat.isNull) &&
+ byteValues.length == keyThat.byteValues.length &&
+ (0 == byteValues.length || bytesEquals(keyThat));
}
return false;
}
-
+
+ private boolean bytesEquals(VectorHashKeyWrapper keyThat) {
+ //By the time we enter here the byteValues.lentgh and isNull must have already been compared
+ for (int i = 0; i < byteValues.length; ++i) {
+ // the byte comparison is potentially expensive so is better to branch on null
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ if (0 != StringExpr.compare(
+ byteValues[i],
+ byteStarts[i],
+ byteLengths[i],
+ keyThat.byteValues[i],
+ keyThat.byteStarts[i],
+ keyThat.byteLengths[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
@Override
protected Object clone() {
VectorHashKeyWrapper clone = new VectorHashKeyWrapper();
clone.longValues = longValues.clone();
clone.doubleValues = doubleValues.clone();
clone.isNull = isNull.clone();
+
+ clone.byteValues = new byte[byteValues.length][];
+ clone.byteStarts = new int[byteValues.length];
+ clone.byteLengths = byteLengths.clone();
+ for (int i = 0; i < byteValues.length; ++i) {
+ // avoid allocation/copy of nulls, because it potentially expensive. branch instead.
+ if (!isNull[i]) {
+ clone.byteValues[i] = Arrays.copyOfRange(
+ byteValues[i],
+ byteStarts[i],
+ byteStarts[i] + byteLengths[i]);
+ }
+ }
clone.hashcode = hashcode;
+ assert clone.equals(this);
return clone;
}
@@ -121,19 +188,32 @@ public class VectorHashKeyWrapper extend
longValues[index] = 0; // assign 0 to simplify hashcode
isNull[index] = true;
}
-
+
+ public void assignString(int index, byte[] bytes, int start, int length) {
+ byteValues[index] = bytes;
+ byteStarts[index] = start;
+ byteLengths[index] = length;
+ isNull[longValues.length + doubleValues.length + index] = false;
+ }
+
+ public void assignNullString(int index) {
+ // We do not assign the value to [] because the value is never used on null
+ isNull[longValues.length + doubleValues.length + index] = true;
+ }
+
@Override
- public String toString()
+ public String toString()
{
- return String.format("%d[%s] %d[%s]",
+ return String.format("%d[%s] %d[%s] %d[%s]",
longValues.length, Arrays.toString(longValues),
- doubleValues.length, Arrays.toString(doubleValues));
+ doubleValues.length, Arrays.toString(doubleValues),
+ byteValues.length, Arrays.toString(byteValues));
}
public boolean getIsNull(int i) {
return isNull[i];
}
-
+
public long getLongValue(int i) {
return longValues[i];
}
@@ -142,4 +222,18 @@ public class VectorHashKeyWrapper extend
return doubleValues[i - longValues.length];
}
+ public byte[] getBytes(int i) {
+ return byteValues[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteStart(int i) {
+ return byteStarts[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteLength(int i) {
+ return byteLengths[i - longValues.length - doubleValues.length];
+ }
+
+
}
+
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java Thu May 30 15:11:06 2013
@@ -19,22 +19,25 @@
package org.apache.hadoop.hive.ql.exec;
import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
/**
- * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
+ * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
* row batch in a vectorized fashion.
* This class stores additional information about keys needed to evaluate and output the key values.
*
*/
public class VectorHashKeyWrapperBatch {
-
+
/**
* Helper class for looking up a key value based on key index
*
@@ -42,53 +45,61 @@ public class VectorHashKeyWrapperBatch {
private static class KeyLookupHelper {
public int longIndex;
public int doubleIndex;
+ public int stringIndex;
}
-
+
/**
* The key expressions that require evaluation and output the primitive values for each key.
*/
private VectorExpression[] keyExpressions;
-
+
/**
* indices of LONG primitive keys
*/
private int[] longIndices;
-
+
/**
* indices of DOUBLE primitive keys
*/
private int[] doubleIndices;
-
+
+ /*
+ * indices of stirng (byte[]) primitive keys
+ */
+ private int[] stringIndices;
+
/**
- * pre-allocated batch size vector of keys wrappers.
+ * pre-allocated batch size vector of keys wrappers.
* N.B. these keys are **mutable** and should never be used in a HashMap.
- * Always clone the key wrapper to obtain an immutable keywrapper suitable
+ * Always clone the key wrapper to obtain an immutable keywrapper suitable
* to use a key in a HashMap.
*/
private VectorHashKeyWrapper[] vectorHashKeyWrappers;
-
+
/**
* lookup vector to map from key index to primitive type index
*/
private KeyLookupHelper[] indexLookup;
-
+
/**
- * preallocated and reused LongWritable objects for emiting row mode key values
+ * preallocated and reused LongWritable objects for emiting row mode key values
*/
private LongWritable[] longKeyValueOutput;
-
+
/**
* preallocated and reused DoubleWritable objects for emiting row mode key values
*/
private DoubleWritable[] doubleKeyValueOutput;
-
+
+ private BytesWritable[] stringKeyValueOutput;
+
/**
- * Accessor for the batch-sized array of key wrappers
+ * Accessor for the batch-sized array of key wrappers
*/
public VectorHashKeyWrapper[] getVectorHashKeyWrappers() {
return vectorHashKeyWrappers;
}
-
+
/**
* Processes a batch:
* <ul>
@@ -96,71 +107,191 @@ public class VectorHashKeyWrapperBatch {
* <li>Copies out each key's primitive values into the key wrappers</li>
* <li>computes the hashcode of the key wrappers</li>
* </ul>
- * @param vrb
+ * @param batch
* @throws HiveException
*/
- public void evaluateBatch (VectorizedRowBatch vrb) throws HiveException {
+ public void evaluateBatch (VectorizedRowBatch batch) throws HiveException {
for(int i = 0; i < keyExpressions.length; ++i) {
- keyExpressions[i].evaluate(vrb);
+ keyExpressions[i].evaluate(batch);
}
for(int i = 0; i< longIndices.length; ++i) {
int keyIndex = longIndices[i];
int columnIndex = keyExpressions[keyIndex].getOutputColumn();
- LongColumnVector columnVector = (LongColumnVector) vrb.cols[columnIndex];
- if (columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
- assignLongNoNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
- } else if (columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
- assignLongNoNullsNoRepeatingSelection(i, vrb.size, columnVector, vrb.selected);
+ LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
} else if (columnVector.noNulls && columnVector.isRepeating) {
- assignLongNoNullsRepeating(i, vrb.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
- assignLongNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
+ assignLongNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNullsNoRepeatingNoSelection(i, batch.size, columnVector);
} else if (!columnVector.noNulls && columnVector.isRepeating) {
- assignLongNullsRepeating(i, vrb.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
- assignLongNullsNoRepeatingSelection (i, vrb.size, columnVector, vrb.selected);
+ assignLongNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
} else {
throw new HiveException (String.format("Unimplemented Long null/repeat/selected combination %b/%b/%b",
- columnVector.noNulls, columnVector.isRepeating, vrb.selectedInUse));
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
}
}
for(int i=0;i<doubleIndices.length; ++i) {
int keyIndex = doubleIndices[i];
int columnIndex = keyExpressions[keyIndex].getOutputColumn();
- DoubleColumnVector columnVector = (DoubleColumnVector) vrb.cols[columnIndex];
- if (columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
- assignDoubleNoNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
- } else if (columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
- assignDoubleNoNullsNoRepeatingSelection(i, vrb.size, columnVector, vrb.selected);
+ DoubleColumnVector columnVector = (DoubleColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignDoubleNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignDoubleNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
} else if (columnVector.noNulls && columnVector.isRepeating) {
- assignDoubleNoNullsRepeating(i, vrb.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
- assignDoubleNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
+ assignDoubleNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignDoubleNullsNoRepeatingNoSelection(i, batch.size, columnVector);
} else if (!columnVector.noNulls && columnVector.isRepeating) {
- assignDoubleNullsRepeating(i, vrb.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
- assignDoubleNullsNoRepeatingSelection (i, vrb.size, columnVector, vrb.selected);
+ assignDoubleNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignDoubleNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
} else {
throw new HiveException (String.format("Unimplemented Double null/repeat/selected combination %b/%b/%b",
- columnVector.noNulls, columnVector.isRepeating, vrb.selectedInUse));
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+ }
+ }
+ for(int i=0;i<stringIndices.length; ++i) {
+ int keyIndex = stringIndices[i];
+ int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+ BytesColumnVector columnVector = (BytesColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignStringNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignStringNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+ } else if (columnVector.noNulls && columnVector.isRepeating) {
+ assignStringNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignStringNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && columnVector.isRepeating) {
+ assignStringNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignStringNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+ } else {
+ throw new HiveException (String.format("Unimplemented String null/repeat/selected combination %b/%b/%b",
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
}
}
- for(int i=0;i<vrb.size;++i) {
+ for(int i=0;i<batch.size;++i) {
vectorHashKeyWrappers[i].setHashKey();
}
}
-
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for string type, possible nulls, no repeat values, batch selection vector.
+ */
+ private void assignStringNullsNoRepeatingSelection(int index, int size,
+ BytesColumnVector columnVector, int[] selected) {
+ for(int i=0; i<size; ++i) {
+ int row = selected[i];
+ if (columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ } else {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[row],
+ columnVector.start[row],
+ columnVector.length[row]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, possible nulls, repeat values.
+ */
+ private void assignStringNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+ if (columnVector.isNull[0]) {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ }
+ } else {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[0],
+ columnVector.start[0],
+ columnVector.length[0]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for string type, possible nulls, no repeat values, no selection vector.
+ */
+ private void assignStringNullsNoRepeatingNoSelection(int index, int size,
+ BytesColumnVector columnVector) {
+ for(int i=0; i<size; ++i) {
+ if (columnVector.isNull[i]) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ } else {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[i],
+ columnVector.start[i],
+ columnVector.length[i]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, repeat values, no selection vector.
+ */
+ private void assignStringNoNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[0],
+ columnVector.start[0],
+ columnVector.length[0]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, batch selection vector.
+ */
+ private void assignStringNoNullsNoRepeatingSelection(int index, int size,
+ BytesColumnVector columnVector, int[] selected) {
+ for(int i=0; i<size; ++i) {
+ int row = selected[i];
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[row],
+ columnVector.start[row],
+ columnVector.length[row]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, no selection vector.
+ */
+ private void assignStringNoNullsNoRepeatingNoSelection(int index, int size,
+ BytesColumnVector columnVector) {
+ for(int i=0; i<size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[i],
+ columnVector.start[i],
+ columnVector.length[i]);
+ }
+ }
+
/**
* Helper method to assign values from a vector column into the key wrapper.
* Optimized for double type, possible nulls, no repeat values, batch selection vector.
*/
private void assignDoubleNullsNoRepeatingSelection(int index, int size,
DoubleColumnVector columnVector, int[] selected) {
- for(int r = 0; r < size; ++r) {
- if (!columnVector.isNull[r]) {
- vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[selected[r]]);
+ for(int i = 0; i < size; ++i) {
+ int row = selected[i];
+ if (!columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignDouble(index, columnVector.vector[row]);
} else {
- vectorHashKeyWrappers[r].assignNullDouble(index);
+ vectorHashKeyWrappers[i].assignNullDouble(index);
}
}
}
@@ -173,7 +304,7 @@ public class VectorHashKeyWrapperBatch {
DoubleColumnVector columnVector) {
for(int r = 0; r < size; ++r) {
vectorHashKeyWrappers[r].assignNullDouble(index);
- }
+ }
}
/**
@@ -188,7 +319,7 @@ public class VectorHashKeyWrapperBatch {
} else {
vectorHashKeyWrappers[r].assignNullDouble(index);
}
- }
+ }
}
/**
@@ -222,18 +353,19 @@ public class VectorHashKeyWrapperBatch {
vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[r]);
}
}
-
+
/**
* Helper method to assign values from a vector column into the key wrapper.
* Optimized for double type, possible nulls, no repeat values, batch selection vector.
*/
private void assignLongNullsNoRepeatingSelection(int index, int size,
LongColumnVector columnVector, int[] selected) {
- for(int r = 0; r < size; ++r) {
- if (!columnVector.isNull[selected[r]]) {
- vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[selected[r]]);
+ for(int i = 0; i < size; ++i) {
+ int row = selected[i];
+ if (!columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignLong(index, columnVector.vector[row]);
} else {
- vectorHashKeyWrappers[r].assignNullLong(index);
+ vectorHashKeyWrappers[i].assignNullLong(index);
}
}
}
@@ -261,7 +393,7 @@ public class VectorHashKeyWrapperBatch {
} else {
vectorHashKeyWrappers[r].assignNullLong(index);
}
- }
+ }
}
/**
@@ -306,30 +438,41 @@ public class VectorHashKeyWrapperBatch {
throws HiveException {
VectorHashKeyWrapperBatch compiledKeyWrapperBatch = new VectorHashKeyWrapperBatch();
compiledKeyWrapperBatch.keyExpressions = keyExpressions;
-
+
// We'll overallocate and then shrink the array for each type
int[] longIndices = new int[keyExpressions.length];
int longIndicesIndex = 0;
int[] doubleIndices = new int[keyExpressions.length];
int doubleIndicesIndex = 0;
+ int[] stringIndices = new int[keyExpressions.length];
+ int stringIndicesIndex = 0;
KeyLookupHelper[] indexLookup = new KeyLookupHelper[keyExpressions.length];
-
+
// Inspect the output type of each key expression.
for(int i=0; i < keyExpressions.length; ++i) {
indexLookup[i] = new KeyLookupHelper();
String outputType = keyExpressions[i].getOutputType();
- if (outputType.equalsIgnoreCase("long") ||
- outputType.equalsIgnoreCase("bigint")) {
+ if (outputType.equalsIgnoreCase("long") ||
+ outputType.equalsIgnoreCase("bigint") ||
+ outputType.equalsIgnoreCase("int")) {
longIndices[longIndicesIndex] = i;
indexLookup[i].longIndex = longIndicesIndex;
indexLookup[i].doubleIndex = -1;
+ indexLookup[i].stringIndex = -1;
++longIndicesIndex;
} else if (outputType.equalsIgnoreCase("double")) {
doubleIndices[doubleIndicesIndex] = i;
indexLookup[i].longIndex = -1;
indexLookup[i].doubleIndex = doubleIndicesIndex;
+ indexLookup[i].stringIndex = -1;
++doubleIndicesIndex;
- } else {
+ } else if (outputType.equalsIgnoreCase("string")) {
+ indexLookup[i].longIndex = -1;
+ indexLookup[i].doubleIndex = -1;
+ stringIndices[i]= stringIndicesIndex;
+ ++stringIndicesIndex;
+ }
+ else {
throw new HiveException("Unsuported vector output type: " + outputType);
}
}
@@ -342,13 +485,18 @@ public class VectorHashKeyWrapperBatch {
for (int i=0; i < doubleIndicesIndex; ++i) {
compiledKeyWrapperBatch.doubleKeyValueOutput[i] = new DoubleWritable();
}
+ compiledKeyWrapperBatch.stringKeyValueOutput = new BytesWritable[stringIndicesIndex];
+ for (int i = 0; i < stringIndicesIndex; ++i) {
+ compiledKeyWrapperBatch.stringKeyValueOutput[i] = new BytesWritable();
+ }
compiledKeyWrapperBatch.longIndices = Arrays.copyOf(longIndices, longIndicesIndex);
compiledKeyWrapperBatch.doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex);
- compiledKeyWrapperBatch.vectorHashKeyWrappers =
+ compiledKeyWrapperBatch.stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex);
+ compiledKeyWrapperBatch.vectorHashKeyWrappers =
new VectorHashKeyWrapper[VectorizedRowBatch.DEFAULT_SIZE];
for(int i=0;i<VectorizedRowBatch.DEFAULT_SIZE; ++i) {
- compiledKeyWrapperBatch.vectorHashKeyWrappers[i] =
- new VectorHashKeyWrapper(longIndicesIndex, doubleIndicesIndex);
+ compiledKeyWrapperBatch.vectorHashKeyWrappers[i] =
+ new VectorHashKeyWrapper(longIndicesIndex, doubleIndicesIndex, stringIndicesIndex);
}
return compiledKeyWrapperBatch;
}
@@ -356,7 +504,7 @@ public class VectorHashKeyWrapperBatch {
/**
* Get the row-mode writable object value of a key from a key wrapper
*/
- public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i)
+ public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i)
throws HiveException {
if (kw.getIsNull(i)) {
return null;
@@ -368,11 +516,15 @@ public class VectorHashKeyWrapperBatch {
} else if (klh.doubleIndex >= 0) {
doubleKeyValueOutput[klh.doubleIndex].set(kw.getDoubleValue(i));
return doubleKeyValueOutput[klh.doubleIndex];
+ } else if (klh.stringIndex >= 0) {
+ stringKeyValueOutput[klh.stringIndex].set(
+ kw.getBytes(i), kw.getByteStart(i), kw.getByteLength(i));
+ return stringKeyValueOutput[klh.stringIndex];
} else {
throw new HiveException(String.format(
- "Internal inconsistent KeyLookupHelper at index [%d]:%d %d",
- i, klh.longIndex, klh.doubleIndex));
+ "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d",
+ i, klh.longIndex, klh.doubleIndex, klh.stringIndex));
}
- }
+ }
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Thu May 30 15:11:06 2013
@@ -1063,14 +1063,17 @@ public class VectorizationContext {
}
}
- public ObjectInspector createObjectInspector(VectorExpression vectorExpression)
+ public ObjectInspector createObjectInspector(VectorExpression vectorExpression)
throws HiveException {
String columnType = vectorExpression.getOutputType();
if (columnType.equalsIgnoreCase("long") ||
- columnType.equalsIgnoreCase("bigint")) {
+ columnType.equalsIgnoreCase("bigint") ||
+ columnType.equalsIgnoreCase("int")) {
return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
} else if (columnType.equalsIgnoreCase("double")) {
return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ } else if (columnType.equalsIgnoreCase("string")) {
+ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
} else {
throw new HiveException(String.format("Must implement type %s", columnType));
}
Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java?rev=1487887&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java Thu May 30 15:11:06 2013
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchBase;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Test helper class that creates vectorized execution batches from arbitrary type iterables.
+ */
+public class FakeVectorRowBatchFromObjectIterables extends FakeVectorRowBatchBase {
+
+ private final String[] types;
+ private final List<Iterator<Object>> iterators;
+ private final VectorizedRowBatch batch;
+ private boolean eof;
+ private final int batchSize;
+
+ /**
+ * Helper interface for assigning values to primitive vector column types.
+ */
+ private static interface ColumnVectorAssign
+ {
+ public void assign(
+ ColumnVector columnVector,
+ int row,
+ Object value);
+ }
+
+ private final ColumnVectorAssign[] columnAssign;
+
+ public FakeVectorRowBatchFromObjectIterables(int batchSize, String[] types,
+ Iterable<Object> ...iterables) throws HiveException {
+ this.types = types;
+ this.batchSize = batchSize;
+ iterators = new ArrayList<Iterator<Object>>(types.length);
+ columnAssign = new ColumnVectorAssign[types.length];
+
+ batch = new VectorizedRowBatch(types.length, batchSize);
+ for(int i=0; i< types.length; ++i) {
+ if (types[i].equalsIgnoreCase("long")) {
+ batch.cols[i] = new LongColumnVector(batchSize);
+ columnAssign[i] = new ColumnVectorAssign() {
+ @Override
+ public void assign(
+ ColumnVector columnVector,
+ int row,
+ Object value) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ lcv.vector[row] = (Long) value;
+ }
+ };
+ } else if (types[i].equalsIgnoreCase("string")) {
+ batch.cols[i] = new BytesColumnVector(batchSize);
+ columnAssign[i] = new ColumnVectorAssign() {
+ @Override
+ public void assign(
+ ColumnVector columnVector,
+ int row,
+ Object value) {
+ BytesColumnVector bcv = (BytesColumnVector) columnVector;
+ String s = (String) value;
+ byte[] bytes = s.getBytes();
+ bcv.vector[row] = bytes;
+ bcv.start[row] = 0;
+ bcv.length[row] = bytes.length;
+ }
+ };
+ } else if (types[i].equalsIgnoreCase("double")) {
+ batch.cols[i] = new DoubleColumnVector(batchSize);
+ columnAssign[i] = new ColumnVectorAssign() {
+ @Override
+ public void assign(
+ ColumnVector columnVector,
+ int row,
+ Object value) {
+ DoubleColumnVector dcv = (DoubleColumnVector) columnVector;
+ dcv.vector[row] = (Double) value;
+ }
+ };
+ } else {
+ throw new HiveException("Unimplemented type " + types[i]);
+ }
+ iterators.add(iterables[i].iterator());
+ }
+ }
+
+ @Override
+ public VectorizedRowBatch produceNextBatch() {
+ batch.size = 0;
+ batch.selectedInUse = false;
+ for (int i=0; i < types.length; ++i) {
+ ColumnVector col = batch.cols[i];
+ col.noNulls = true;
+ col.isRepeating = false;
+ }
+ while (!eof && batch.size < this.batchSize){
+ int r = batch.size;
+ for (int i=0; i < types.length; ++i) {
+ Iterator<Object> it = iterators.get(i);
+ if (!it.hasNext()) {
+ eof = true;
+ break;
+ }
+ Object value = it.next();
+ if (null == value) {
+ batch.cols[i].isNull[batch.size] = true;
+ batch.cols[i].noNulls = false;
+ } else {
+ columnAssign[i].assign(batch.cols[i], batch.size, value);
+ }
+ }
+ if (!eof) {
+ batch.size += 1;
+ }
+ }
+ return batch;
+ }
+}
+
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java Thu May 30 15:11:06 2013
@@ -35,7 +35,7 @@ import java.util.Set;
import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator;
import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat;
-import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromIterables;
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables;
import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromRepeats;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
@@ -43,7 +43,9 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.junit.Test;
@@ -54,9 +56,11 @@ public class TestVectorGroupByOperator {
private static ExprNodeDesc buildColumnDesc(
VectorizationContext ctx,
- String column) {
+ String column,
+ TypeInfo typeInfo) {
+
return new ExprNodeColumnDesc(
- TypeInfoFactory.longTypeInfo, column, "table", false);
+ typeInfo, column, "table", false);
}
private static AggregationDesc buildAggregationDesc(
@@ -64,7 +68,7 @@ public class TestVectorGroupByOperator {
String aggregate,
String column) {
- ExprNodeDesc inputColumn = buildColumnDesc(ctx, column);
+ ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, TypeInfoFactory.longTypeInfo);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(inputColumn);
@@ -99,19 +103,40 @@ public class TestVectorGroupByOperator {
VectorizationContext ctx,
String aggregate,
String column,
+ TypeInfo typeInfo,
String key) {
GroupByDesc desc = buildGroupByDesc(ctx, aggregate, column);
-
- ExprNodeDesc keyExp = buildColumnDesc(ctx, key);
+
+ ExprNodeDesc keyExp = buildColumnDesc(ctx, key, typeInfo);
ArrayList<ExprNodeDesc> keys = new ArrayList<ExprNodeDesc>();
keys.add(keyExp);
desc.setKeys(keys);
-
+
return desc;
}
@Test
+ public void testMinLongNullStringKeys() throws HiveException {
+ testAggregateStringKeyAggregate(
+ "min",
+ 2,
+ Arrays.asList(new Object[]{"A",null,"A",null}),
+ Arrays.asList(new Object[]{13L, 5L, 7L,19L}),
+ buildHashMap("A", 7L, null, 5L));
+ }
+
+ @Test
+ public void testMinLongStringKeys() throws HiveException {
+ testAggregateStringKeyAggregate(
+ "min",
+ 2,
+ Arrays.asList(new Object[]{"A","B","A","B"}),
+ Arrays.asList(new Object[]{13L, 5L, 7L,19L}),
+ buildHashMap("A", 7L, "B", 5L));
+ }
+
+ @Test
public void testMinLongKeyGroupByCompactBatch() throws HiveException {
testAggregateLongKeyAggregate(
"min",
@@ -120,7 +145,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L,5L,7L,19L}),
buildHashMap(1L, 5L, 2L, 7L));
}
-
+
@Test
public void testMinLongKeyGroupBySingleBatch() throws HiveException {
testAggregateLongKeyAggregate(
@@ -130,7 +155,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L,5L,7L,19L}),
buildHashMap(1L, 5L, 2L, 7L));
}
-
+
@Test
public void testMinLongKeyGroupByCrossBatch() throws HiveException {
testAggregateLongKeyAggregate(
@@ -170,7 +195,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L,5L,7L,19L}),
buildHashMap(null, 13L, 2L, 19L));
}
-
+
@Test
public void testCountLongNullKeyGroupBySingleBatch() throws HiveException {
testAggregateLongKeyAggregate(
@@ -180,7 +205,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L,5L,7L,19L}),
buildHashMap(null, 2L, 2L, 2L));
}
-
+
@Test
public void testSumLongNullKeyGroupBySingleBatch() throws HiveException {
testAggregateLongKeyAggregate(
@@ -190,7 +215,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L,5L,7L,19L}),
buildHashMap(null, 20L, 2L, 24L));
}
-
+
@Test
public void testAvgLongNullKeyGroupBySingleBatch() throws HiveException {
testAggregateLongKeyAggregate(
@@ -210,7 +235,7 @@ public class TestVectorGroupByOperator {
Arrays.asList(new Long[]{13L, 5L,18L,19L,12L,15L}),
buildHashMap(null, 0.0, 2L, 49.0, 01L, 6.0));
}
-
+
@Test
public void testMinNullLongNullKeyGroupBy() throws HiveException {
testAggregateLongKeyAggregate(
@@ -230,7 +255,7 @@ public class TestVectorGroupByOperator {
5L);
}
-
+
@Test
public void testMinLongSimple() throws HiveException {
testAggregateLongAggregate(
@@ -408,7 +433,7 @@ public class TestVectorGroupByOperator {
new FakeVectorRowBatchFromConcat(
new FakeVectorRowBatchFromRepeats(
new Long[] {19L}, 10, 2),
- new FakeVectorRowBatchFromIterables(
+ new FakeVectorRowBatchFromLongIterables(
3,
Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
7L);
@@ -485,7 +510,7 @@ public class TestVectorGroupByOperator {
new FakeVectorRowBatchFromConcat(
new FakeVectorRowBatchFromRepeats(
new Long[] {19L}, 10, 2),
- new FakeVectorRowBatchFromIterables(
+ new FakeVectorRowBatchFromLongIterables(
3,
Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
14L);
@@ -561,7 +586,7 @@ public class TestVectorGroupByOperator {
new FakeVectorRowBatchFromConcat(
new FakeVectorRowBatchFromRepeats(
new Long[] {19L}, 10, 2),
- new FakeVectorRowBatchFromIterables(
+ new FakeVectorRowBatchFromLongIterables(
3,
Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
19L*10L + 13L + 7L + 23L +29L);
@@ -692,7 +717,7 @@ public class TestVectorGroupByOperator {
new FakeVectorRowBatchFromConcat(
new FakeVectorRowBatchFromRepeats(
new Long[] {19L}, 10, 2),
- new FakeVectorRowBatchFromIterables(
+ new FakeVectorRowBatchFromLongIterables(
3,
Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
(double) (19L*10L + 13L + 7L + 23L +29L) / (double) 14 );
@@ -873,7 +898,7 @@ public class TestVectorGroupByOperator {
new Long[] {value}, repeat, batchSize);
testAggregateLongIterable (aggregateName, fdr, expected);
}
-
+
public HashMap<Object, Object> buildHashMap(Object... pairs) {
HashMap<Object, Object> map = new HashMap<Object, Object>();
for(int i = 0; i < pairs.length; i += 2) {
@@ -882,19 +907,35 @@ public class TestVectorGroupByOperator {
return map;
}
+ public void testAggregateStringKeyAggregate (
+ String aggregateName,
+ int batchSize,
+ Iterable<Object> list,
+ Iterable<Object> values,
+ HashMap<Object, Object> expected) throws HiveException {
+
+ @SuppressWarnings("unchecked")
+ FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(
+ batchSize,
+ new String[] {"string", "long"},
+ list,
+ values);
+ testAggregateStringKeyIterable (aggregateName, fdr, expected);
+ }
+
public void testAggregateLongKeyAggregate (
String aggregateName,
int batchSize,
- Iterable<Long> keys,
+ List<Long> list,
Iterable<Long> values,
HashMap<Object, Object> expected) throws HiveException {
@SuppressWarnings("unchecked")
- FakeVectorRowBatchFromIterables fdr = new FakeVectorRowBatchFromIterables(batchSize, keys, values);
+ FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, list, values);
testAggregateLongKeyIterable (aggregateName, fdr, expected);
}
-
+
public void testAggregateLongAggregate (
String aggregateName,
int batchSize,
@@ -902,7 +943,7 @@ public class TestVectorGroupByOperator {
Object expected) throws HiveException {
@SuppressWarnings("unchecked")
- FakeVectorRowBatchFromIterables fdr = new FakeVectorRowBatchFromIterables(batchSize, values);
+ FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, values);
testAggregateLongIterable (aggregateName, fdr, expected);
}
@@ -1085,19 +1126,20 @@ public class TestVectorGroupByOperator {
VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
Set<Object> keys = new HashSet<Object>();
- GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", "Key");
+ GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
+ TypeInfoFactory.longTypeInfo, "Key");
VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo);
vgo.initialize(null, null);
out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
-
+
private int rowIndex;
private String aggregateName;
private HashMap<Object,Object> expected;
private Set<Object> keys;
-
+
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
@@ -1117,7 +1159,72 @@ public class TestVectorGroupByOperator {
validator.validate(expectedValue, new Object[] {value});
keys.add(keyValue);
}
-
+
+ private FakeCaptureOutputOperator.OutputInspector init(
+ String aggregateName, HashMap<Object,Object> expected, Set<Object> keys) {
+ this.aggregateName = aggregateName;
+ this.expected = expected;
+ this.keys = keys;
+ return this;
+ }
+ }.init(aggregateName, expected, keys));
+
+ for (VectorizedRowBatch unit: data) {
+ vgo.process(unit, 0);
+ }
+ vgo.close(false);
+
+ List<Object> outBatchList = out.getCapturedRows();
+ assertNotNull(outBatchList);
+ assertEquals(expected.size(), outBatchList.size());
+ assertEquals(expected.size(), keys.size());
+ }
+
+ public void testAggregateStringKeyIterable (
+ String aggregateName,
+ Iterable<VectorizedRowBatch> data,
+ HashMap<Object,Object> expected) throws HiveException {
+ Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
+ mapColumnNames.put("Key", 0);
+ mapColumnNames.put("Value", 1);
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+ Set<Object> keys = new HashSet<Object>();
+
+ GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
+ TypeInfoFactory.stringTypeInfo, "Key");
+
+ VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc);
+
+ FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo);
+ vgo.initialize(null, null);
+ out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
+
+ private int rowIndex;
+ private String aggregateName;
+ private HashMap<Object,Object> expected;
+ private Set<Object> keys;
+
+ @SuppressWarnings("deprecation")
+ @Override
+ public void inspectRow(Object row, int tag) throws HiveException {
+ assertTrue(row instanceof Object[]);
+ Object[] fields = (Object[]) row;
+ assertEquals(2, fields.length);
+ Object key = fields[0];
+ String keyValue = null;
+ if (null != key) {
+ assertTrue(key instanceof BytesWritable);
+ BytesWritable bwKey = (BytesWritable)key;
+ keyValue = new String(bwKey.get());
+ }
+ assertTrue(expected.containsKey(keyValue));
+ Object expectedValue = expected.get(keyValue);
+ Object value = fields[1];
+ Validator validator = getValidator(aggregateName);
+ validator.validate(expectedValue, new Object[] {value});
+ keys.add(keyValue);
+ }
+
private FakeCaptureOutputOperator.OutputInspector init(
String aggregateName, HashMap<Object,Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
@@ -1131,11 +1238,13 @@ public class TestVectorGroupByOperator {
vgo.process(unit, 0);
}
vgo.close(false);
-
+
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
+
+
}
Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java?rev=1487887&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java Thu May 30 15:11:06 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.util;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * VectorizedRowBatch test source from individual column values (as iterables)
+ * Used in unit test only.
+ */
+public class FakeVectorRowBatchFromLongIterables extends FakeVectorRowBatchBase {
+ private VectorizedRowBatch batch;
+ private final int numCols;
+ private final int batchSize;
+ private List<Iterator<Long>> iterators;
+ private boolean eof;
+
+ public FakeVectorRowBatchFromLongIterables(int batchSize, Iterable<Long>...iterables) {
+ numCols = iterables.length;
+ this.batchSize = batchSize;
+ iterators = new ArrayList<Iterator<Long>>();
+ batch = new VectorizedRowBatch(numCols, batchSize);
+ for (int i =0; i < numCols; i++) {
+ batch.cols[i] = new LongColumnVector(batchSize);
+ iterators.add(iterables[i].iterator());
+ }
+ }
+
+ @Override
+ public VectorizedRowBatch produceNextBatch() {
+ batch.size = 0;
+ batch.selectedInUse = false;
+ for (int i=0; i < numCols; ++i) {
+ ColumnVector col = batch.cols[i];
+ col.noNulls = true;
+ col.isRepeating = false;
+ }
+ while (!eof && batch.size < this.batchSize){
+ int r = batch.size;
+ for (int i=0; i < numCols; ++i) {
+ Iterator<Long> it = iterators.get(i);
+ if (!it.hasNext()) {
+ eof = true;
+ break;
+ }
+ LongColumnVector col = (LongColumnVector)batch.cols[i];
+ Long value = it.next();
+ if (null == value) {
+ col.noNulls = false;
+ col.isNull[batch.size] = true;
+ } else {
+ long[] vector = col.vector;
+ vector[r] = value;
+ col.isNull[batch.size] = false;
+ }
+ }
+ if (!eof) {
+ batch.size += 1;
+ }
+ }
+ return batch;
+ }
+}
+