You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/30 17:11:06 UTC

svn commit: r1487887 - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/exec/vector/ test/org/apache/hadoop/hive/ql/exec/vector/ test/org/apache/hadoop/hive/ql/exec/vector/util/

Author: omalley
Date: Thu May 30 15:11:06 2013
New Revision: 1487887

URL: http://svn.apache.org/r1487887
Log:
HIVE-4596 Support strings in GROUP BY keys (Remus Rusanu via omalley)

Added:
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java
Removed:
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromIterables.java
Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java Thu May 30 15:11:06 2013
@@ -20,29 +20,38 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.util.Arrays;
 
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
 /**
  * A hash map key wrapper for vectorized processing.
  * It stores the key values as primitives in arrays for each supported primitive type.
- * This works in conjunction with 
+ * This works in conjunction with
  * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch}
- * to hash vectorized processing units (batches). 
+ * to hash vectorized processing units (batches).
  */
 public class VectorHashKeyWrapper extends KeyWrapper {
-  
+
   private long[] longValues;
   private double[] doubleValues;
+
+  private byte[][] byteValues;
+  private int[] byteStarts;
+  private int[] byteLengths;
+
   private boolean[] isNull;
   private int hashcode;
-  
-  public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount) {
+
+  public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) {
     longValues = new long[longValuesCount];
     doubleValues = new double[doubleValuesCount];
-    isNull = new boolean[longValuesCount + doubleValuesCount];
+    byteValues = new byte[byteValuesCount][];
+    byteStarts = new int[byteValuesCount];
+    byteLengths = new int[byteValuesCount];
+    isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount];
   }
-  
+
   private VectorHashKeyWrapper() {
   }
 
@@ -56,32 +65,90 @@ public class VectorHashKeyWrapper extend
     hashcode = Arrays.hashCode(longValues) ^
         Arrays.hashCode(doubleValues) ^
         Arrays.hashCode(isNull);
+
+    // This code, with branches and all, is not executed if there are no string keys
+    for (int i = 0; i < byteValues.length; ++i) {
+      /*
+       *  Hashing the string is potentially expensive so is better to branch.
+       *  Additionally not looking at values for nulls allows us not reset the values.
+       */
+      if (!isNull[longValues.length + doubleValues.length + i]) {
+        byte[] bytes = byteValues[i];
+        int start = byteStarts[i];
+        int length = byteLengths[i];
+        if (length == bytes.length && start == 0) {
+          hashcode ^= Arrays.hashCode(bytes);
+        }
+        else {
+          // Unfortunately there is no Arrays.hashCode(byte[], start, length)
+          for(int j = start; j < start + length; ++j) {
+            // use 461 as is a (sexy!) prime.
+            hashcode ^= 461 * bytes[j];
+          }
+        }
+      }
+    }
   }
-  
+
   @Override
   public int hashCode() {
     return hashcode;
   }
-  
-  @Override 
+
+  @Override
   public boolean equals(Object that) {
     if (that instanceof VectorHashKeyWrapper) {
       VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that;
       return hashcode == keyThat.hashcode &&
           Arrays.equals(longValues, keyThat.longValues) &&
           Arrays.equals(doubleValues, keyThat.doubleValues) &&
-          Arrays.equals(isNull, keyThat.isNull);
+          Arrays.equals(isNull, keyThat.isNull) &&
+          byteValues.length == keyThat.byteValues.length &&
+          (0 == byteValues.length || bytesEquals(keyThat));
     }
     return false;
   }
-  
+
+  private boolean bytesEquals(VectorHashKeyWrapper keyThat) {
+    //By the time we enter here the byteValues.lentgh and isNull must have already been compared
+    for (int i = 0; i < byteValues.length; ++i) {
+      // the byte comparison is potentially expensive so is better to branch on null
+      if (!isNull[longValues.length + doubleValues.length + i]) {
+        if (0 != StringExpr.compare(
+            byteValues[i],
+            byteStarts[i],
+            byteLengths[i],
+            keyThat.byteValues[i],
+            keyThat.byteStarts[i],
+            keyThat.byteLengths[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   @Override
   protected Object clone() {
     VectorHashKeyWrapper clone = new VectorHashKeyWrapper();
     clone.longValues = longValues.clone();
     clone.doubleValues = doubleValues.clone();
     clone.isNull = isNull.clone();
+
+    clone.byteValues = new byte[byteValues.length][];
+    clone.byteStarts = new int[byteValues.length];
+    clone.byteLengths = byteLengths.clone();
+    for (int i = 0; i < byteValues.length; ++i) {
+      // avoid allocation/copy of nulls, because it potentially expensive. branch instead.
+      if (!isNull[i]) {
+        clone.byteValues[i] = Arrays.copyOfRange(
+            byteValues[i],
+            byteStarts[i],
+            byteStarts[i] + byteLengths[i]);
+      }
+    }
     clone.hashcode = hashcode;
+    assert clone.equals(this);
     return clone;
   }
 
@@ -121,19 +188,32 @@ public class VectorHashKeyWrapper extend
     longValues[index] = 0; // assign 0 to simplify hashcode
     isNull[index] = true;
   }
-  
+
+  public void assignString(int index, byte[] bytes, int start, int length) {
+    byteValues[index] = bytes;
+    byteStarts[index] = start;
+    byteLengths[index] = length;
+    isNull[longValues.length + doubleValues.length + index] = false;
+  }
+
+  public void assignNullString(int index) {
+    // We do not assign the value to [] because the value is never used on null
+    isNull[longValues.length + doubleValues.length + index] = true;
+  }
+
   @Override
-  public String toString() 
+  public String toString()
   {
-    return String.format("%d[%s] %d[%s]", 
+    return String.format("%d[%s] %d[%s] %d[%s]",
         longValues.length, Arrays.toString(longValues),
-        doubleValues.length, Arrays.toString(doubleValues));
+        doubleValues.length, Arrays.toString(doubleValues),
+        byteValues.length, Arrays.toString(byteValues));
   }
 
   public boolean getIsNull(int i) {
     return isNull[i];
   }
-  
+
   public long getLongValue(int i) {
     return longValues[i];
   }
@@ -142,4 +222,18 @@ public class VectorHashKeyWrapper extend
     return doubleValues[i - longValues.length];
   }
 
+  public byte[] getBytes(int i) {
+    return byteValues[i - longValues.length - doubleValues.length];
+  }
+
+  public int getByteStart(int i) {
+    return byteStarts[i - longValues.length - doubleValues.length];
+  }
+
+  public int getByteLength(int i) {
+    return byteLengths[i - longValues.length - doubleValues.length];
+  }
+
+
 }
+

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java Thu May 30 15:11:06 2013
@@ -19,22 +19,25 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.LongWritable;
 
 /**
- * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a 
+ * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
  * row batch in a vectorized fashion.
  * This class stores additional information about keys needed to evaluate and output the key values.
  *
  */
 public class VectorHashKeyWrapperBatch {
-  
+
   /**
    * Helper class for looking up a key value based on key index
    *
@@ -42,53 +45,61 @@ public class VectorHashKeyWrapperBatch {
   private static class KeyLookupHelper {
     public int longIndex;
     public int doubleIndex;
+    public int stringIndex;
   }
-  
+
   /**
    * The key expressions that require evaluation and output the primitive values for each key.
    */
   private VectorExpression[] keyExpressions;
-  
+
   /**
    * indices of LONG primitive keys
    */
   private int[] longIndices;
-  
+
   /**
    * indices of DOUBLE primitive keys
    */
   private int[] doubleIndices;
-  
+
+  /*
+   * indices of stirng (byte[]) primitive keys
+   */
+  private int[] stringIndices;
+
   /**
-   * pre-allocated batch size vector of keys wrappers. 
+   * pre-allocated batch size vector of keys wrappers.
    * N.B. these keys are **mutable** and should never be used in a HashMap.
-   * Always clone the key wrapper to obtain an immutable keywrapper suitable 
+   * Always clone the key wrapper to obtain an immutable keywrapper suitable
    * to use a key in a HashMap.
    */
   private VectorHashKeyWrapper[] vectorHashKeyWrappers;
-  
+
   /**
    * lookup vector to map from key index to primitive type index
    */
   private KeyLookupHelper[] indexLookup;
-  
+
   /**
-   * preallocated and reused LongWritable objects for emiting row mode key values 
+   * preallocated and reused LongWritable objects for emiting row mode key values
    */
   private LongWritable[] longKeyValueOutput;
-  
+
   /**
    * preallocated and reused DoubleWritable objects for emiting row mode key values
    */
   private DoubleWritable[] doubleKeyValueOutput;
-  
+
+  private BytesWritable[] stringKeyValueOutput;
+
   /**
-   * Accessor for the batch-sized array of key wrappers 
+   * Accessor for the batch-sized array of key wrappers
    */
   public VectorHashKeyWrapper[] getVectorHashKeyWrappers() {
     return vectorHashKeyWrappers;
   }
-  
+
   /**
    * Processes a batch:
    * <ul>
@@ -96,71 +107,191 @@ public class VectorHashKeyWrapperBatch {
    * <li>Copies out each key's primitive values into the key wrappers</li>
    * <li>computes the hashcode of the key wrappers</li>
    * </ul>
-   * @param vrb
+   * @param batch
    * @throws HiveException
    */
-  public void evaluateBatch (VectorizedRowBatch vrb) throws HiveException {
+  public void evaluateBatch (VectorizedRowBatch batch) throws HiveException {
     for(int i = 0; i < keyExpressions.length; ++i) {
-      keyExpressions[i].evaluate(vrb);
+      keyExpressions[i].evaluate(batch);
     }
     for(int i = 0; i< longIndices.length; ++i) {
       int keyIndex = longIndices[i];
       int columnIndex = keyExpressions[keyIndex].getOutputColumn();
-      LongColumnVector columnVector = (LongColumnVector) vrb.cols[columnIndex];
-      if (columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
-        assignLongNoNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
-      } else if (columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
-        assignLongNoNullsNoRepeatingSelection(i, vrb.size, columnVector, vrb.selected);
+      LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex];
+      if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+      } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
       } else if (columnVector.noNulls && columnVector.isRepeating) {
-        assignLongNoNullsRepeating(i, vrb.size, columnVector);
-      } else if (!columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
-        assignLongNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
+        assignLongNoNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignLongNullsNoRepeatingNoSelection(i, batch.size, columnVector);
       } else if (!columnVector.noNulls && columnVector.isRepeating) {
-        assignLongNullsRepeating(i, vrb.size, columnVector);
-      } else if (!columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
-        assignLongNullsNoRepeatingSelection (i, vrb.size, columnVector, vrb.selected);
+        assignLongNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignLongNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
       } else {
         throw new HiveException (String.format("Unimplemented Long null/repeat/selected combination %b/%b/%b",
-            columnVector.noNulls, columnVector.isRepeating, vrb.selectedInUse));
+            columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
       }
     }
     for(int i=0;i<doubleIndices.length; ++i) {
       int keyIndex = doubleIndices[i];
       int columnIndex = keyExpressions[keyIndex].getOutputColumn();
-      DoubleColumnVector columnVector = (DoubleColumnVector) vrb.cols[columnIndex];
-      if (columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
-        assignDoubleNoNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
-      } else if (columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
-        assignDoubleNoNullsNoRepeatingSelection(i, vrb.size, columnVector, vrb.selected);
+      DoubleColumnVector columnVector = (DoubleColumnVector) batch.cols[columnIndex];
+      if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignDoubleNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+      } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignDoubleNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
       } else if (columnVector.noNulls && columnVector.isRepeating) {
-        assignDoubleNoNullsRepeating(i, vrb.size, columnVector);
-      } else if (!columnVector.noNulls && !columnVector.isRepeating && !vrb.selectedInUse) {
-        assignDoubleNullsNoRepeatingNoSelection(i, vrb.size, columnVector);
+        assignDoubleNoNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignDoubleNullsNoRepeatingNoSelection(i, batch.size, columnVector);
       } else if (!columnVector.noNulls && columnVector.isRepeating) {
-        assignDoubleNullsRepeating(i, vrb.size, columnVector);
-      } else if (!columnVector.noNulls && !columnVector.isRepeating && vrb.selectedInUse) {
-        assignDoubleNullsNoRepeatingSelection (i, vrb.size, columnVector, vrb.selected);
+        assignDoubleNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignDoubleNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
       } else {
         throw new HiveException (String.format("Unimplemented Double null/repeat/selected combination %b/%b/%b",
-            columnVector.noNulls, columnVector.isRepeating, vrb.selectedInUse));
+            columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+      }
+    }
+    for(int i=0;i<stringIndices.length; ++i) {
+      int keyIndex = stringIndices[i];
+      int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+      BytesColumnVector columnVector = (BytesColumnVector) batch.cols[columnIndex];
+      if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignStringNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+      } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignStringNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+      } else if (columnVector.noNulls && columnVector.isRepeating) {
+        assignStringNoNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+        assignStringNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && columnVector.isRepeating) {
+        assignStringNullsRepeating(i, batch.size, columnVector);
+      } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+        assignStringNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+      } else {
+        throw new HiveException (String.format("Unimplemented String null/repeat/selected combination %b/%b/%b",
+            columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
       }
     }
-    for(int i=0;i<vrb.size;++i) {
+    for(int i=0;i<batch.size;++i) {
       vectorHashKeyWrappers[i].setHashKey();
     }
   }
-  
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for string type, possible nulls, no repeat values, batch selection vector.
+   */
+  private void assignStringNullsNoRepeatingSelection(int index, int size,
+      BytesColumnVector columnVector, int[] selected) {
+    for(int i=0; i<size; ++i) {
+      int row = selected[i];
+      if (columnVector.isNull[row]) {
+        vectorHashKeyWrappers[i].assignNullString(index);
+      } else {
+        vectorHashKeyWrappers[i].assignString(index,
+            columnVector.vector[row],
+            columnVector.start[row],
+            columnVector.length[row]);
+      }
+    }
+  }
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for double type, possible nulls, repeat values.
+   */
+  private void assignStringNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+    if (columnVector.isNull[0]) {
+      for(int i = 0; i < size; ++i) {
+        vectorHashKeyWrappers[i].assignNullString(index);
+      }
+    } else {
+      for(int i = 0; i < size; ++i) {
+        vectorHashKeyWrappers[i].assignString(index,
+            columnVector.vector[0],
+            columnVector.start[0],
+            columnVector.length[0]);
+      }
+    }
+  }
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for string type, possible nulls, no repeat values, no selection vector.
+   */
+  private void assignStringNullsNoRepeatingNoSelection(int index, int size,
+      BytesColumnVector columnVector) {
+    for(int i=0; i<size; ++i) {
+      if (columnVector.isNull[i]) {
+        vectorHashKeyWrappers[i].assignNullString(index);
+      } else {
+        vectorHashKeyWrappers[i].assignString(index,
+            columnVector.vector[i],
+            columnVector.start[i],
+            columnVector.length[i]);
+      }
+    }
+  }
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for double type, no nulls, repeat values, no selection vector.
+   */
+  private void assignStringNoNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+    for(int i = 0; i < size; ++i) {
+      vectorHashKeyWrappers[i].assignString(index,
+          columnVector.vector[0],
+          columnVector.start[0],
+          columnVector.length[0]);
+    }
+  }
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for double type, no nulls, no repeat values, batch selection vector.
+   */
+  private void assignStringNoNullsNoRepeatingSelection(int index, int size,
+      BytesColumnVector columnVector, int[] selected) {
+    for(int i=0; i<size; ++i) {
+      int row = selected[i];
+      vectorHashKeyWrappers[i].assignString(index,
+          columnVector.vector[row],
+          columnVector.start[row],
+          columnVector.length[row]);
+    }
+  }
+
+  /**
+   * Helper method to assign values from a vector column into the key wrapper.
+   * Optimized for double type, no nulls, no repeat values, no selection vector.
+   */
+  private void assignStringNoNullsNoRepeatingNoSelection(int index, int size,
+      BytesColumnVector columnVector) {
+    for(int i=0; i<size; ++i) {
+      vectorHashKeyWrappers[i].assignString(index,
+          columnVector.vector[i],
+          columnVector.start[i],
+          columnVector.length[i]);
+    }
+  }
+
   /**
    * Helper method to assign values from a vector column into the key wrapper.
    * Optimized for double type, possible nulls, no repeat values, batch selection vector.
    */
   private void assignDoubleNullsNoRepeatingSelection(int index, int size,
       DoubleColumnVector columnVector, int[] selected) {
-    for(int r = 0; r < size; ++r) {
-      if (!columnVector.isNull[r]) {
-        vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[selected[r]]);
+    for(int i = 0; i < size; ++i) {
+      int row = selected[i];
+      if (!columnVector.isNull[row]) {
+        vectorHashKeyWrappers[i].assignDouble(index, columnVector.vector[row]);
       } else {
-        vectorHashKeyWrappers[r].assignNullDouble(index);
+        vectorHashKeyWrappers[i].assignNullDouble(index);
       }
     }
   }
@@ -173,7 +304,7 @@ public class VectorHashKeyWrapperBatch {
       DoubleColumnVector columnVector) {
     for(int r = 0; r < size; ++r) {
       vectorHashKeyWrappers[r].assignNullDouble(index);
-    }    
+    }
   }
 
   /**
@@ -188,7 +319,7 @@ public class VectorHashKeyWrapperBatch {
       } else {
         vectorHashKeyWrappers[r].assignNullDouble(index);
       }
-    }    
+    }
   }
 
   /**
@@ -222,18 +353,19 @@ public class VectorHashKeyWrapperBatch {
       vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[r]);
     }
   }
-  
+
   /**
    * Helper method to assign values from a vector column into the key wrapper.
    * Optimized for double type, possible nulls, no repeat values, batch selection vector.
    */
   private void assignLongNullsNoRepeatingSelection(int index, int size,
       LongColumnVector columnVector, int[] selected) {
-    for(int r = 0; r < size; ++r) {
-      if (!columnVector.isNull[selected[r]]) {
-        vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[selected[r]]);
+    for(int i = 0; i < size; ++i) {
+      int row = selected[i];
+      if (!columnVector.isNull[row]) {
+        vectorHashKeyWrappers[i].assignLong(index, columnVector.vector[row]);
       } else {
-        vectorHashKeyWrappers[r].assignNullLong(index);
+        vectorHashKeyWrappers[i].assignNullLong(index);
       }
     }
   }
@@ -261,7 +393,7 @@ public class VectorHashKeyWrapperBatch {
       } else {
         vectorHashKeyWrappers[r].assignNullLong(index);
       }
-    }    
+    }
   }
 
   /**
@@ -306,30 +438,41 @@ public class VectorHashKeyWrapperBatch {
     throws HiveException {
     VectorHashKeyWrapperBatch compiledKeyWrapperBatch = new VectorHashKeyWrapperBatch();
     compiledKeyWrapperBatch.keyExpressions = keyExpressions;
-    
+
     // We'll overallocate and then shrink the array for each type
     int[] longIndices = new int[keyExpressions.length];
     int longIndicesIndex = 0;
     int[] doubleIndices = new int[keyExpressions.length];
     int doubleIndicesIndex  = 0;
+    int[] stringIndices = new int[keyExpressions.length];
+    int stringIndicesIndex = 0;
     KeyLookupHelper[] indexLookup = new KeyLookupHelper[keyExpressions.length];
-    
+
     // Inspect the output type of each key expression.
     for(int i=0; i < keyExpressions.length; ++i) {
       indexLookup[i] = new KeyLookupHelper();
       String outputType = keyExpressions[i].getOutputType();
-      if (outputType.equalsIgnoreCase("long") || 
-          outputType.equalsIgnoreCase("bigint")) {
+      if (outputType.equalsIgnoreCase("long") ||
+          outputType.equalsIgnoreCase("bigint") ||
+          outputType.equalsIgnoreCase("int")) {
         longIndices[longIndicesIndex] = i;
         indexLookup[i].longIndex = longIndicesIndex;
         indexLookup[i].doubleIndex = -1;
+        indexLookup[i].stringIndex = -1;
         ++longIndicesIndex;
       } else if (outputType.equalsIgnoreCase("double")) {
         doubleIndices[doubleIndicesIndex] = i;
         indexLookup[i].longIndex = -1;
         indexLookup[i].doubleIndex = doubleIndicesIndex;
+        indexLookup[i].stringIndex = -1;
         ++doubleIndicesIndex;
-      } else {
+      } else if (outputType.equalsIgnoreCase("string")) {
+        indexLookup[i].longIndex = -1;
+        indexLookup[i].doubleIndex = -1;
+        stringIndices[i]= stringIndicesIndex;
+        ++stringIndicesIndex;
+      }
+      else {
         throw new HiveException("Unsuported vector output type: " + outputType);
       }
     }
@@ -342,13 +485,18 @@ public class VectorHashKeyWrapperBatch {
     for (int i=0; i < doubleIndicesIndex; ++i) {
       compiledKeyWrapperBatch.doubleKeyValueOutput[i] = new DoubleWritable();
     }
+    compiledKeyWrapperBatch.stringKeyValueOutput = new BytesWritable[stringIndicesIndex];
+    for (int i = 0; i < stringIndicesIndex; ++i) {
+      compiledKeyWrapperBatch.stringKeyValueOutput[i] = new BytesWritable();
+    }
     compiledKeyWrapperBatch.longIndices = Arrays.copyOf(longIndices, longIndicesIndex);
     compiledKeyWrapperBatch.doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex);
-    compiledKeyWrapperBatch.vectorHashKeyWrappers = 
+    compiledKeyWrapperBatch.stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex);
+    compiledKeyWrapperBatch.vectorHashKeyWrappers =
         new VectorHashKeyWrapper[VectorizedRowBatch.DEFAULT_SIZE];
     for(int i=0;i<VectorizedRowBatch.DEFAULT_SIZE; ++i) {
-      compiledKeyWrapperBatch.vectorHashKeyWrappers[i] = 
-          new VectorHashKeyWrapper(longIndicesIndex, doubleIndicesIndex);
+      compiledKeyWrapperBatch.vectorHashKeyWrappers[i] =
+          new VectorHashKeyWrapper(longIndicesIndex, doubleIndicesIndex, stringIndicesIndex);
     }
     return compiledKeyWrapperBatch;
   }
@@ -356,7 +504,7 @@ public class VectorHashKeyWrapperBatch {
   /**
    * Get the row-mode writable object value of a key from a key wrapper
    */
-  public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i) 
+  public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i)
     throws HiveException {
     if (kw.getIsNull(i)) {
       return null;
@@ -368,11 +516,15 @@ public class VectorHashKeyWrapperBatch {
     } else if (klh.doubleIndex >= 0) {
       doubleKeyValueOutput[klh.doubleIndex].set(kw.getDoubleValue(i));
       return doubleKeyValueOutput[klh.doubleIndex];
+    } else if (klh.stringIndex >= 0) {
+      stringKeyValueOutput[klh.stringIndex].set(
+          kw.getBytes(i), kw.getByteStart(i), kw.getByteLength(i));
+      return stringKeyValueOutput[klh.stringIndex];
     } else {
       throw new HiveException(String.format(
-          "Internal inconsistent KeyLookupHelper at index [%d]:%d %d",
-          i, klh.longIndex, klh.doubleIndex));
+          "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d",
+          i, klh.longIndex, klh.doubleIndex, klh.stringIndex));
     }
-  }  
+  }
 }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Thu May 30 15:11:06 2013
@@ -1063,14 +1063,17 @@ public class VectorizationContext {
     }
   }
 
-  public ObjectInspector createObjectInspector(VectorExpression vectorExpression) 
+  public ObjectInspector createObjectInspector(VectorExpression vectorExpression)
       throws HiveException {
     String columnType = vectorExpression.getOutputType();
     if (columnType.equalsIgnoreCase("long") ||
-        columnType.equalsIgnoreCase("bigint")) {
+        columnType.equalsIgnoreCase("bigint") ||
+        columnType.equalsIgnoreCase("int")) {
       return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
     } else if (columnType.equalsIgnoreCase("double")) {
       return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+    } else if (columnType.equalsIgnoreCase("string")) {
+      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
     } else {
       throw new HiveException(String.format("Must implement type %s", columnType));
     }

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java?rev=1487887&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/FakeVectorRowBatchFromObjectIterables.java Thu May 30 15:11:06 2013
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchBase;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * Test helper class that creates vectorized execution batches from arbitrary type iterables.
+ */
+public class FakeVectorRowBatchFromObjectIterables extends FakeVectorRowBatchBase {
+
+  private final String[] types;
+  private final List<Iterator<Object>> iterators;
+  private final VectorizedRowBatch batch;
+  private boolean eof;
+  private final int batchSize;
+
+  /**
+   * Helper interface for assigning values to primitive vector column types.
+   */
+  private static interface ColumnVectorAssign
+  {
+    public void assign(
+        ColumnVector columnVector,
+        int row,
+        Object value);
+  }
+
+  private final ColumnVectorAssign[] columnAssign;
+
+  public FakeVectorRowBatchFromObjectIterables(int batchSize, String[] types,
+      Iterable<Object> ...iterables) throws HiveException {
+    this.types = types;
+    this.batchSize = batchSize;
+    iterators = new ArrayList<Iterator<Object>>(types.length);
+    columnAssign = new ColumnVectorAssign[types.length];
+
+    batch = new VectorizedRowBatch(types.length, batchSize);
+    for(int i=0; i< types.length; ++i) {
+      if (types[i].equalsIgnoreCase("long")) {
+        batch.cols[i] = new LongColumnVector(batchSize);
+        columnAssign[i] = new ColumnVectorAssign() {
+          @Override
+          public void assign(
+              ColumnVector columnVector,
+              int row,
+              Object value) {
+            LongColumnVector lcv = (LongColumnVector) columnVector;
+            lcv.vector[row] = (Long) value;
+          }
+        };
+      } else if (types[i].equalsIgnoreCase("string")) {
+        batch.cols[i] = new BytesColumnVector(batchSize);
+        columnAssign[i] = new ColumnVectorAssign() {
+          @Override
+          public void assign(
+              ColumnVector columnVector,
+              int row,
+              Object value) {
+            BytesColumnVector bcv = (BytesColumnVector) columnVector;
+            String s = (String) value;
+            byte[] bytes = s.getBytes();
+            bcv.vector[row] = bytes;
+            bcv.start[row] = 0;
+            bcv.length[row] = bytes.length;
+          }
+        };
+      } else if (types[i].equalsIgnoreCase("double")) {
+        batch.cols[i] = new DoubleColumnVector(batchSize);
+        columnAssign[i] = new ColumnVectorAssign() {
+          @Override
+          public void assign(
+              ColumnVector columnVector,
+              int row,
+              Object value) {
+            DoubleColumnVector dcv = (DoubleColumnVector) columnVector;
+            dcv.vector[row] = (Double) value;
+          }
+        };
+      } else {
+        throw new HiveException("Unimplemented type " + types[i]);
+      }
+      iterators.add(iterables[i].iterator());
+    }
+  }
+
+  @Override
+  public VectorizedRowBatch produceNextBatch() {
+    batch.size = 0;
+    batch.selectedInUse = false;
+    for (int i=0; i < types.length; ++i) {
+      ColumnVector col = batch.cols[i];
+      col.noNulls = true;
+      col.isRepeating = false;
+    }
+    while (!eof && batch.size < this.batchSize){
+      int r = batch.size;
+      for (int i=0; i < types.length; ++i) {
+        Iterator<Object> it = iterators.get(i);
+        if (!it.hasNext()) {
+          eof = true;
+          break;
+        }
+        Object value = it.next();
+        if (null == value) {
+          batch.cols[i].isNull[batch.size] = true;
+          batch.cols[i].noNulls = false;
+        } else {
+          columnAssign[i].assign(batch.cols[i], batch.size, value);
+        }
+      }
+      if (!eof) {
+        batch.size += 1;
+      }
+    }
+    return batch;
+  }
+}
+

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java?rev=1487887&r1=1487886&r2=1487887&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java Thu May 30 15:11:06 2013
@@ -35,7 +35,7 @@ import java.util.Set;
 
 import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator;
 import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat;
-import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromIterables;
+import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables;
 import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromRepeats;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
@@ -43,7 +43,9 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.junit.Test;
 
@@ -54,9 +56,11 @@ public class TestVectorGroupByOperator {
 
   private static ExprNodeDesc buildColumnDesc(
       VectorizationContext ctx,
-      String column) {
+      String column,
+      TypeInfo typeInfo) {
+
     return new ExprNodeColumnDesc(
-        TypeInfoFactory.longTypeInfo, column, "table", false);
+        typeInfo, column, "table", false);
   }
 
   private static AggregationDesc buildAggregationDesc(
@@ -64,7 +68,7 @@ public class TestVectorGroupByOperator {
       String aggregate,
       String column) {
 
-    ExprNodeDesc inputColumn = buildColumnDesc(ctx, column);
+    ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, TypeInfoFactory.longTypeInfo);
 
     ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
     params.add(inputColumn);
@@ -99,19 +103,40 @@ public class TestVectorGroupByOperator {
       VectorizationContext ctx,
       String aggregate,
       String column,
+      TypeInfo typeInfo,
       String key) {
 
     GroupByDesc desc = buildGroupByDesc(ctx, aggregate, column);
-    
-    ExprNodeDesc keyExp = buildColumnDesc(ctx, key);
+
+    ExprNodeDesc keyExp = buildColumnDesc(ctx, key, typeInfo);
     ArrayList<ExprNodeDesc> keys = new ArrayList<ExprNodeDesc>();
     keys.add(keyExp);
     desc.setKeys(keys);
-    
+
     return desc;
   }
 
   @Test
+  public void testMinLongNullStringKeys() throws HiveException {
+    testAggregateStringKeyAggregate(
+        "min",
+        2,
+        Arrays.asList(new Object[]{"A",null,"A",null}),
+        Arrays.asList(new Object[]{13L, 5L, 7L,19L}),
+        buildHashMap("A", 7L, null, 5L));
+  }
+
+  @Test
+  public void testMinLongStringKeys() throws HiveException {
+    testAggregateStringKeyAggregate(
+        "min",
+        2,
+        Arrays.asList(new Object[]{"A","B","A","B"}),
+        Arrays.asList(new Object[]{13L, 5L, 7L,19L}),
+        buildHashMap("A", 7L, "B", 5L));
+  }
+
+  @Test
   public void testMinLongKeyGroupByCompactBatch() throws HiveException {
     testAggregateLongKeyAggregate(
         "min",
@@ -120,7 +145,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L,5L,7L,19L}),
         buildHashMap(1L, 5L, 2L, 7L));
   }
-  
+
   @Test
   public void testMinLongKeyGroupBySingleBatch() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -130,7 +155,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L,5L,7L,19L}),
         buildHashMap(1L, 5L, 2L, 7L));
   }
-  
+
   @Test
   public void testMinLongKeyGroupByCrossBatch() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -170,7 +195,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L,5L,7L,19L}),
         buildHashMap(null, 13L, 2L, 19L));
   }
-  
+
   @Test
   public void testCountLongNullKeyGroupBySingleBatch() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -180,7 +205,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L,5L,7L,19L}),
         buildHashMap(null, 2L, 2L, 2L));
   }
-  
+
   @Test
   public void testSumLongNullKeyGroupBySingleBatch() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -190,7 +215,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L,5L,7L,19L}),
         buildHashMap(null, 20L, 2L, 24L));
   }
-  
+
   @Test
   public void testAvgLongNullKeyGroupBySingleBatch() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -210,7 +235,7 @@ public class TestVectorGroupByOperator {
         Arrays.asList(new Long[]{13L, 5L,18L,19L,12L,15L}),
         buildHashMap(null, 0.0, 2L, 49.0, 01L, 6.0));
   }
-  
+
   @Test
   public void testMinNullLongNullKeyGroupBy() throws HiveException {
     testAggregateLongKeyAggregate(
@@ -230,7 +255,7 @@ public class TestVectorGroupByOperator {
         5L);
   }
 
-  
+
   @Test
   public void testMinLongSimple() throws HiveException {
     testAggregateLongAggregate(
@@ -408,7 +433,7 @@ public class TestVectorGroupByOperator {
         new FakeVectorRowBatchFromConcat(
             new FakeVectorRowBatchFromRepeats(
                 new Long[] {19L}, 10, 2),
-            new FakeVectorRowBatchFromIterables(
+            new FakeVectorRowBatchFromLongIterables(
                 3,
                 Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
          7L);
@@ -485,7 +510,7 @@ public class TestVectorGroupByOperator {
         new FakeVectorRowBatchFromConcat(
             new FakeVectorRowBatchFromRepeats(
                 new Long[] {19L}, 10, 2),
-            new FakeVectorRowBatchFromIterables(
+            new FakeVectorRowBatchFromLongIterables(
                 3,
                 Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
          14L);
@@ -561,7 +586,7 @@ public class TestVectorGroupByOperator {
         new FakeVectorRowBatchFromConcat(
             new FakeVectorRowBatchFromRepeats(
                 new Long[] {19L}, 10, 2),
-            new FakeVectorRowBatchFromIterables(
+            new FakeVectorRowBatchFromLongIterables(
                 3,
                 Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
          19L*10L + 13L + 7L + 23L +29L);
@@ -692,7 +717,7 @@ public class TestVectorGroupByOperator {
         new FakeVectorRowBatchFromConcat(
             new FakeVectorRowBatchFromRepeats(
                 new Long[] {19L}, 10, 2),
-            new FakeVectorRowBatchFromIterables(
+            new FakeVectorRowBatchFromLongIterables(
                 3,
                 Arrays.asList(new Long[]{13L, 7L, 23L, 29L}))),
          (double) (19L*10L + 13L + 7L + 23L +29L) / (double) 14 );
@@ -873,7 +898,7 @@ public class TestVectorGroupByOperator {
         new Long[] {value}, repeat, batchSize);
     testAggregateLongIterable (aggregateName, fdr, expected);
   }
-  
+
   public HashMap<Object, Object> buildHashMap(Object... pairs) {
     HashMap<Object, Object> map = new HashMap<Object, Object>();
     for(int i = 0; i < pairs.length; i += 2) {
@@ -882,19 +907,35 @@ public class TestVectorGroupByOperator {
     return map;
   }
 
+  public void testAggregateStringKeyAggregate (
+      String aggregateName,
+      int batchSize,
+      Iterable<Object> list,
+      Iterable<Object> values,
+      HashMap<Object, Object> expected) throws HiveException {
+
+    @SuppressWarnings("unchecked")
+    FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(
+        batchSize,
+        new String[] {"string", "long"},
+        list,
+        values);
+    testAggregateStringKeyIterable (aggregateName, fdr, expected);
+  }
+
 
   public void testAggregateLongKeyAggregate (
       String aggregateName,
       int batchSize,
-      Iterable<Long> keys,
+      List<Long> list,
       Iterable<Long> values,
       HashMap<Object, Object> expected) throws HiveException {
 
     @SuppressWarnings("unchecked")
-    FakeVectorRowBatchFromIterables fdr = new FakeVectorRowBatchFromIterables(batchSize, keys, values);
+    FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, list, values);
     testAggregateLongKeyIterable (aggregateName, fdr, expected);
   }
-  
+
   public void testAggregateLongAggregate (
       String aggregateName,
       int batchSize,
@@ -902,7 +943,7 @@ public class TestVectorGroupByOperator {
       Object expected) throws HiveException {
 
     @SuppressWarnings("unchecked")
-    FakeVectorRowBatchFromIterables fdr = new FakeVectorRowBatchFromIterables(batchSize, values);
+    FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, values);
     testAggregateLongIterable (aggregateName, fdr, expected);
   }
 
@@ -1085,19 +1126,20 @@ public class TestVectorGroupByOperator {
     VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
     Set<Object> keys = new HashSet<Object>();
 
-    GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", "Key");
+    GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
+        TypeInfoFactory.longTypeInfo, "Key");
 
     VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc);
 
     FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo);
     vgo.initialize(null, null);
     out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
-      
+
       private int rowIndex;
       private String aggregateName;
       private HashMap<Object,Object> expected;
       private Set<Object> keys;
-      
+
       @Override
       public void inspectRow(Object row, int tag) throws HiveException {
         assertTrue(row instanceof Object[]);
@@ -1117,7 +1159,72 @@ public class TestVectorGroupByOperator {
         validator.validate(expectedValue, new Object[] {value});
         keys.add(keyValue);
       }
-      
+
+      private FakeCaptureOutputOperator.OutputInspector init(
+          String aggregateName, HashMap<Object,Object> expected, Set<Object> keys) {
+        this.aggregateName = aggregateName;
+        this.expected = expected;
+        this.keys = keys;
+        return this;
+      }
+    }.init(aggregateName, expected, keys));
+
+    for (VectorizedRowBatch unit: data) {
+      vgo.process(unit,  0);
+    }
+    vgo.close(false);
+
+    List<Object> outBatchList = out.getCapturedRows();
+    assertNotNull(outBatchList);
+    assertEquals(expected.size(), outBatchList.size());
+    assertEquals(expected.size(), keys.size());
+  }
+
+  public void testAggregateStringKeyIterable (
+      String aggregateName,
+      Iterable<VectorizedRowBatch> data,
+      HashMap<Object,Object> expected) throws HiveException {
+    Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
+    mapColumnNames.put("Key", 0);
+    mapColumnNames.put("Value", 1);
+    VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+    Set<Object> keys = new HashSet<Object>();
+
+    GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
+        TypeInfoFactory.stringTypeInfo, "Key");
+
+    VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc);
+
+    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo);
+    vgo.initialize(null, null);
+    out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
+
+      private int rowIndex;
+      private String aggregateName;
+      private HashMap<Object,Object> expected;
+      private Set<Object> keys;
+
+      @SuppressWarnings("deprecation")
+      @Override
+      public void inspectRow(Object row, int tag) throws HiveException {
+        assertTrue(row instanceof Object[]);
+        Object[] fields = (Object[]) row;
+        assertEquals(2, fields.length);
+        Object key = fields[0];
+        String keyValue = null;
+        if (null != key) {
+          assertTrue(key instanceof BytesWritable);
+          BytesWritable bwKey = (BytesWritable)key;
+          keyValue = new String(bwKey.get());
+        }
+        assertTrue(expected.containsKey(keyValue));
+        Object expectedValue = expected.get(keyValue);
+        Object value = fields[1];
+        Validator validator = getValidator(aggregateName);
+        validator.validate(expectedValue, new Object[] {value});
+        keys.add(keyValue);
+      }
+
       private FakeCaptureOutputOperator.OutputInspector init(
           String aggregateName, HashMap<Object,Object> expected, Set<Object> keys) {
         this.aggregateName = aggregateName;
@@ -1131,11 +1238,13 @@ public class TestVectorGroupByOperator {
       vgo.process(unit,  0);
     }
     vgo.close(false);
-    
+
     List<Object> outBatchList = out.getCapturedRows();
     assertNotNull(outBatchList);
     assertEquals(expected.size(), outBatchList.size());
     assertEquals(expected.size(), keys.size());
   }
+
+
 }
 

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java?rev=1487887&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromLongIterables.java Thu May 30 15:11:06 2013
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.util;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * VectorizedRowBatch test source from individual column values (as iterables)
+ * Used in unit test only.
+ */
+public class FakeVectorRowBatchFromLongIterables extends FakeVectorRowBatchBase {
+  private VectorizedRowBatch batch;
+  private final int numCols;
+  private final int batchSize;
+  private List<Iterator<Long>> iterators;
+  private boolean eof;
+
+  public FakeVectorRowBatchFromLongIterables(int batchSize, Iterable<Long>...iterables) {
+    numCols = iterables.length;
+    this.batchSize = batchSize;
+    iterators = new ArrayList<Iterator<Long>>();
+    batch = new VectorizedRowBatch(numCols, batchSize);
+    for (int i =0; i < numCols; i++) {
+      batch.cols[i] = new LongColumnVector(batchSize);
+      iterators.add(iterables[i].iterator());
+    }
+  }
+
+  @Override
+  public VectorizedRowBatch produceNextBatch() {
+    batch.size = 0;
+    batch.selectedInUse = false;
+    for (int i=0; i < numCols; ++i) {
+      ColumnVector col = batch.cols[i];
+      col.noNulls = true;
+      col.isRepeating = false;
+    }
+    while (!eof && batch.size < this.batchSize){
+      int r = batch.size;
+      for (int i=0; i < numCols; ++i) {
+        Iterator<Long> it = iterators.get(i);
+        if (!it.hasNext()) {
+          eof = true;
+          break;
+        }
+        LongColumnVector col = (LongColumnVector)batch.cols[i];
+        Long value = it.next();
+        if (null == value) {
+          col.noNulls = false;
+          col.isNull[batch.size] = true;
+        } else {
+          long[] vector = col.vector;
+          vector[r] = value;
+          col.isNull[batch.size] = false;
+        }
+      }
+      if (!eof) {
+        batch.size += 1;
+      }
+    }
+    return batch;
+  }
+}
+