You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/04 18:30:00 UTC
svn commit: r1489506 - in
/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec: ./
vector/
Author: hashutosh
Date: Tue Jun 4 16:29:51 2013
New Revision: 1489506
URL: http://svn.apache.org/r1489506
Log:
HIVE-4652 : VectorHashKeyWrapperBatch.java should be in vector package (instead of exec) (Remus Rusanu via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
Removed:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java?rev=1489506&r1=1489505&r2=1489506&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java Tue Jun 4 16:29:51 2013
@@ -22,9 +22,9 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
public abstract class KeyWrapper {
- abstract void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException;
- abstract void setHashKey();
- abstract KeyWrapper copyKey();
- abstract void copyKey(KeyWrapper oldWrapper);
- abstract Object[] getKeyArray();
+ public abstract void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException;
+ public abstract void setHashKey();
+ public abstract KeyWrapper copyKey();
+ public abstract void copyKey(KeyWrapper oldWrapper);
+ public abstract Object[] getKeyArray();
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1489506&r1=1489505&r2=1489506&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Tue Jun 4 16:29:51 2013
@@ -30,8 +30,6 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.KeyWrapper;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapper;
-import org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java?rev=1489506&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java Tue Jun 4 16:29:51 2013
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.KeyWrapper;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * A hash map key wrapper for vectorized processing.
+ * It stores the key values as primitives in arrays for each supported primitive type.
+ * This works in conjunction with
+ * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch}
+ * to hash vectorized processing units (batches).
+ */
+public class VectorHashKeyWrapper extends KeyWrapper {
+
+ private long[] longValues;
+ private double[] doubleValues;
+
+ private byte[][] byteValues;
+ private int[] byteStarts;
+ private int[] byteLengths;
+
+ private boolean[] isNull;
+ private int hashcode;
+
+ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) {
+ longValues = new long[longValuesCount];
+ doubleValues = new double[doubleValuesCount];
+ byteValues = new byte[byteValuesCount][];
+ byteStarts = new int[byteValuesCount];
+ byteLengths = new int[byteValuesCount];
+ isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount];
+ }
+
+ private VectorHashKeyWrapper() {
+ }
+
+ @Override
+ public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException {
+ throw new HiveException("Should not be called");
+ }
+
+ @Override
+ public void setHashKey() {
+ hashcode = Arrays.hashCode(longValues) ^
+ Arrays.hashCode(doubleValues) ^
+ Arrays.hashCode(isNull);
+
+ // This code, with branches and all, is not executed if there are no string keys
+ for (int i = 0; i < byteValues.length; ++i) {
+ /*
+ * Hashing the string is potentially expensive so is better to branch.
+ * Additionally not looking at values for nulls allows us not reset the values.
+ */
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ byte[] bytes = byteValues[i];
+ int start = byteStarts[i];
+ int length = byteLengths[i];
+ if (length == bytes.length && start == 0) {
+ hashcode ^= Arrays.hashCode(bytes);
+ }
+ else {
+ // Unfortunately there is no Arrays.hashCode(byte[], start, length)
+ for(int j = start; j < start + length; ++j) {
+ // use 461 as is a (sexy!) prime.
+ hashcode ^= 461 * bytes[j];
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return hashcode;
+ }
+
+ @Override
+ public boolean equals(Object that) {
+ if (that instanceof VectorHashKeyWrapper) {
+ VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that;
+ return hashcode == keyThat.hashcode &&
+ Arrays.equals(longValues, keyThat.longValues) &&
+ Arrays.equals(doubleValues, keyThat.doubleValues) &&
+ Arrays.equals(isNull, keyThat.isNull) &&
+ byteValues.length == keyThat.byteValues.length &&
+ (0 == byteValues.length || bytesEquals(keyThat));
+ }
+ return false;
+ }
+
+ private boolean bytesEquals(VectorHashKeyWrapper keyThat) {
+ //By the time we enter here the byteValues.lentgh and isNull must have already been compared
+ for (int i = 0; i < byteValues.length; ++i) {
+ // the byte comparison is potentially expensive so is better to branch on null
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ if (0 != StringExpr.compare(
+ byteValues[i],
+ byteStarts[i],
+ byteLengths[i],
+ keyThat.byteValues[i],
+ keyThat.byteStarts[i],
+ keyThat.byteLengths[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ @Override
+ protected Object clone() {
+ VectorHashKeyWrapper clone = new VectorHashKeyWrapper();
+ clone.longValues = longValues.clone();
+ clone.doubleValues = doubleValues.clone();
+ clone.isNull = isNull.clone();
+
+ clone.byteValues = new byte[byteValues.length][];
+ clone.byteStarts = new int[byteValues.length];
+ clone.byteLengths = byteLengths.clone();
+ for (int i = 0; i < byteValues.length; ++i) {
+ // avoid allocation/copy of nulls, because it potentially expensive. branch instead.
+ if (!isNull[i]) {
+ clone.byteValues[i] = Arrays.copyOfRange(
+ byteValues[i],
+ byteStarts[i],
+ byteStarts[i] + byteLengths[i]);
+ }
+ }
+ clone.hashcode = hashcode;
+ assert clone.equals(this);
+ return clone;
+ }
+
+ @Override
+ public KeyWrapper copyKey() {
+ return (KeyWrapper) clone();
+ }
+
+ @Override
+ public void copyKey(KeyWrapper oldWrapper) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Object[] getKeyArray() {
+ throw new UnsupportedOperationException();
+ }
+
+ public void assignDouble(int index, double d) {
+ doubleValues[index] = d;
+ isNull[longValues.length + index] = false;
+ }
+
+ public void assignNullDouble(int index) {
+ doubleValues[index] = 0; // assign 0 to simplify hashcode
+ isNull[longValues.length + index] = true;
+ }
+
+ public void assignLong(int index, long v) {
+ longValues[index] = v;
+ isNull[index] = false;
+ }
+
+ public void assignNullLong(int index) {
+ longValues[index] = 0; // assign 0 to simplify hashcode
+ isNull[index] = true;
+ }
+
+ public void assignString(int index, byte[] bytes, int start, int length) {
+ byteValues[index] = bytes;
+ byteStarts[index] = start;
+ byteLengths[index] = length;
+ isNull[longValues.length + doubleValues.length + index] = false;
+ }
+
+ public void assignNullString(int index) {
+ // We do not assign the value to [] because the value is never used on null
+ isNull[longValues.length + doubleValues.length + index] = true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return String.format("%d[%s] %d[%s] %d[%s]",
+ longValues.length, Arrays.toString(longValues),
+ doubleValues.length, Arrays.toString(doubleValues),
+ byteValues.length, Arrays.toString(byteValues));
+ }
+
+ public boolean getIsNull(int i) {
+ return isNull[i];
+ }
+
+ public long getLongValue(int i) {
+ return longValues[i];
+ }
+
+ public double getDoubleValue(int i) {
+ return doubleValues[i - longValues.length];
+ }
+
+ public byte[] getBytes(int i) {
+ return byteValues[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteStart(int i) {
+ return byteStarts[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteLength(int i) {
+ return byteLengths[i - longValues.length - doubleValues.length];
+ }
+
+
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java?rev=1489506&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java Tue Jun 4 16:29:51 2013
@@ -0,0 +1,510 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
+ * row batch in a vectorized fashion.
+ * This class stores additional information about keys needed to evaluate and output the key values.
+ *
+ */
+public class VectorHashKeyWrapperBatch {
+
+ /**
+ * Helper class for looking up a key value based on key index
+ *
+ */
+ private static class KeyLookupHelper {
+ public int longIndex;
+ public int doubleIndex;
+ public int stringIndex;
+ }
+
+ /**
+ * The key expressions that require evaluation and output the primitive values for each key.
+ */
+ private VectorExpression[] keyExpressions;
+
+ /**
+ * indices of LONG primitive keys
+ */
+ private int[] longIndices;
+
+ /**
+ * indices of DOUBLE primitive keys
+ */
+ private int[] doubleIndices;
+
+ /*
+ * indices of string (byte[]) primitive keys
+ */
+ private int[] stringIndices;
+
+ /**
+ * pre-allocated batch size vector of keys wrappers.
+ * N.B. these keys are **mutable** and should never be used in a HashMap.
+ * Always clone the key wrapper to obtain an immutable keywrapper suitable
+ * to use a key in a HashMap.
+ */
+ private VectorHashKeyWrapper[] vectorHashKeyWrappers;
+
+ /**
+ * lookup vector to map from key index to primitive type index
+ */
+ private KeyLookupHelper[] indexLookup;
+
+ /**
+ * Accessor for the batch-sized array of key wrappers
+ */
+ public VectorHashKeyWrapper[] getVectorHashKeyWrappers() {
+ return vectorHashKeyWrappers;
+ }
+
+ /**
+ * Processes a batch:
+ * <ul>
+ * <li>Evaluates each key vector expression.</li>
+ * <li>Copies out each key's primitive values into the key wrappers</li>
+ * <li>computes the hashcode of the key wrappers</li>
+ * </ul>
+ * @param batch
+ * @throws HiveException
+ */
+ public void evaluateBatch (VectorizedRowBatch batch) throws HiveException {
+ for(int i = 0; i < keyExpressions.length; ++i) {
+ keyExpressions[i].evaluate(batch);
+ }
+ for(int i = 0; i< longIndices.length; ++i) {
+ int keyIndex = longIndices[i];
+ int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+ LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+ } else if (columnVector.noNulls && columnVector.isRepeating) {
+ assignLongNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && columnVector.isRepeating) {
+ assignLongNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+ } else {
+ throw new HiveException (String.format("Unimplemented Long null/repeat/selected combination %b/%b/%b",
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+ }
+ }
+ for(int i=0;i<doubleIndices.length; ++i) {
+ int keyIndex = doubleIndices[i];
+ int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+ DoubleColumnVector columnVector = (DoubleColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignDoubleNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignDoubleNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+ } else if (columnVector.noNulls && columnVector.isRepeating) {
+ assignDoubleNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignDoubleNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && columnVector.isRepeating) {
+ assignDoubleNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignDoubleNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+ } else {
+ throw new HiveException (String.format("Unimplemented Double null/repeat/selected combination %b/%b/%b",
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+ }
+ }
+ for(int i=0;i<stringIndices.length; ++i) {
+ int keyIndex = stringIndices[i];
+ int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+ BytesColumnVector columnVector = (BytesColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignStringNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignStringNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+ } else if (columnVector.noNulls && columnVector.isRepeating) {
+ assignStringNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignStringNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && columnVector.isRepeating) {
+ assignStringNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignStringNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+ } else {
+ throw new HiveException (String.format("Unimplemented String null/repeat/selected combination %b/%b/%b",
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+ }
+ }
+ for(int i=0;i<batch.size;++i) {
+ vectorHashKeyWrappers[i].setHashKey();
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for string type, possible nulls, no repeat values, batch selection vector.
+ */
+ private void assignStringNullsNoRepeatingSelection(int index, int size,
+ BytesColumnVector columnVector, int[] selected) {
+ for(int i=0; i<size; ++i) {
+ int row = selected[i];
+ if (columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ } else {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[row],
+ columnVector.start[row],
+ columnVector.length[row]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, possible nulls, repeat values.
+ */
+ private void assignStringNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+ if (columnVector.isNull[0]) {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ }
+ } else {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[0],
+ columnVector.start[0],
+ columnVector.length[0]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for string type, possible nulls, no repeat values, no selection vector.
+ */
+ private void assignStringNullsNoRepeatingNoSelection(int index, int size,
+ BytesColumnVector columnVector) {
+ for(int i=0; i<size; ++i) {
+ if (columnVector.isNull[i]) {
+ vectorHashKeyWrappers[i].assignNullString(index);
+ } else {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[i],
+ columnVector.start[i],
+ columnVector.length[i]);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, repeat values, no selection vector.
+ */
+ private void assignStringNoNullsRepeating(int index, int size, BytesColumnVector columnVector) {
+ for(int i = 0; i < size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[0],
+ columnVector.start[0],
+ columnVector.length[0]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, batch selection vector.
+ */
+ private void assignStringNoNullsNoRepeatingSelection(int index, int size,
+ BytesColumnVector columnVector, int[] selected) {
+ for(int i=0; i<size; ++i) {
+ int row = selected[i];
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[row],
+ columnVector.start[row],
+ columnVector.length[row]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, no selection vector.
+ */
+ private void assignStringNoNullsNoRepeatingNoSelection(int index, int size,
+ BytesColumnVector columnVector) {
+ for(int i=0; i<size; ++i) {
+ vectorHashKeyWrappers[i].assignString(index,
+ columnVector.vector[i],
+ columnVector.start[i],
+ columnVector.length[i]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, possible nulls, no repeat values, batch selection vector.
+ */
+ private void assignDoubleNullsNoRepeatingSelection(int index, int size,
+ DoubleColumnVector columnVector, int[] selected) {
+ for(int i = 0; i < size; ++i) {
+ int row = selected[i];
+ if (!columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignDouble(index, columnVector.vector[row]);
+ } else {
+ vectorHashKeyWrappers[i].assignNullDouble(index);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for Double type, repeat null values.
+ */
+ private void assignDoubleNullsRepeating(int index, int size,
+ DoubleColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignNullDouble(index);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for Double type, possible nulls, repeat values.
+ */
+ private void assignDoubleNullsNoRepeatingNoSelection(int index, int size,
+ DoubleColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ if (!columnVector.isNull[r]) {
+ vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[r]);
+ } else {
+ vectorHashKeyWrappers[r].assignNullDouble(index);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, repeat values, no selection vector.
+ */
+ private void assignDoubleNoNullsRepeating(int index, int size, DoubleColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[0]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, batch selection vector.
+ */
+ private void assignDoubleNoNullsNoRepeatingSelection(int index, int size,
+ DoubleColumnVector columnVector, int[] selected) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[selected[r]]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, no selection vector.
+ */
+ private void assignDoubleNoNullsNoRepeatingNoSelection(int index, int size,
+ DoubleColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignDouble(index, columnVector.vector[r]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, possible nulls, no repeat values, batch selection vector.
+ */
+ private void assignLongNullsNoRepeatingSelection(int index, int size,
+ LongColumnVector columnVector, int[] selected) {
+ for(int i = 0; i < size; ++i) {
+ int row = selected[i];
+ if (!columnVector.isNull[row]) {
+ vectorHashKeyWrappers[i].assignLong(index, columnVector.vector[row]);
+ } else {
+ vectorHashKeyWrappers[i].assignNullLong(index);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, repeating nulls.
+ */
+ private void assignLongNullsRepeating(int index, int size,
+ LongColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignNullLong(index);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, possible nulls, no repeat values, no selection vector.
+ */
+ private void assignLongNullsNoRepeatingNoSelection(int index, int size,
+ LongColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ if (!columnVector.isNull[r]) {
+ vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[r]);
+ } else {
+ vectorHashKeyWrappers[r].assignNullLong(index);
+ }
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, repeat values, no selection vector.
+ */
+ private void assignLongNoNullsRepeating(int index, int size, LongColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[0]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, batch selection vector.
+ */
+ private void assignLongNoNullsNoRepeatingSelection(int index, int size,
+ LongColumnVector columnVector, int[] selected) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[selected[r]]);
+ }
+ }
+
+ /**
+ * Helper method to assign values from a vector column into the key wrapper.
+ * Optimized for double type, no nulls, no repeat values, no selection vector.
+ */
+ private void assignLongNoNullsNoRepeatingNoSelection(int index, int size,
+ LongColumnVector columnVector) {
+ for(int r = 0; r < size; ++r) {
+ vectorHashKeyWrappers[r].assignLong(index, columnVector.vector[r]);
+ }
+ }
+
+ /**
+ * Prepares a VectorHashKeyWrapperBatch to work for a specific set of keys.
+ * Computes the fast access lookup indices, preallocates all needed internal arrays.
+ * This step is done only once per query, not once per batch. The information computed now
+ * will be used to generate proper individual VectorKeyHashWrapper objects.
+ */
+ public static VectorHashKeyWrapperBatch compileKeyWrapperBatch(VectorExpression[] keyExpressions)
+ throws HiveException {
+ VectorHashKeyWrapperBatch compiledKeyWrapperBatch = new VectorHashKeyWrapperBatch();
+ compiledKeyWrapperBatch.keyExpressions = keyExpressions;
+
+ // We'll overallocate and then shrink the array for each type
+ int[] longIndices = new int[keyExpressions.length];
+ int longIndicesIndex = 0;
+ int[] doubleIndices = new int[keyExpressions.length];
+ int doubleIndicesIndex = 0;
+ int[] stringIndices = new int[keyExpressions.length];
+ int stringIndicesIndex = 0;
+ KeyLookupHelper[] indexLookup = new KeyLookupHelper[keyExpressions.length];
+
+ // Inspect the output type of each key expression.
+ for(int i=0; i < keyExpressions.length; ++i) {
+ indexLookup[i] = new KeyLookupHelper();
+ String outputType = keyExpressions[i].getOutputType();
+ if (outputType.equalsIgnoreCase("tinyint") ||
+ outputType.equalsIgnoreCase("smallint") ||
+ outputType.equalsIgnoreCase("int") ||
+ outputType.equalsIgnoreCase("bigint") ||
+ outputType.equalsIgnoreCase("timestamp") ||
+ outputType.equalsIgnoreCase("boolean")) {
+ longIndices[longIndicesIndex] = i;
+ indexLookup[i].longIndex = longIndicesIndex;
+ indexLookup[i].doubleIndex = -1;
+ indexLookup[i].stringIndex = -1;
+ ++longIndicesIndex;
+ } else if (outputType.equalsIgnoreCase("double") ||
+ outputType.equalsIgnoreCase("float")) {
+ doubleIndices[doubleIndicesIndex] = i;
+ indexLookup[i].longIndex = -1;
+ indexLookup[i].doubleIndex = doubleIndicesIndex;
+ indexLookup[i].stringIndex = -1;
+ ++doubleIndicesIndex;
+ } else if (outputType.equalsIgnoreCase("string")) {
+ indexLookup[i].longIndex = -1;
+ indexLookup[i].doubleIndex = -1;
+ stringIndices[i]= stringIndicesIndex;
+ ++stringIndicesIndex;
+ }
+ else {
+ throw new HiveException("Unsuported vector output type: " + outputType);
+ }
+ }
+ compiledKeyWrapperBatch.indexLookup = indexLookup;
+ compiledKeyWrapperBatch.longIndices = Arrays.copyOf(longIndices, longIndicesIndex);
+ compiledKeyWrapperBatch.doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex);
+ compiledKeyWrapperBatch.stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex);
+ compiledKeyWrapperBatch.vectorHashKeyWrappers =
+ new VectorHashKeyWrapper[VectorizedRowBatch.DEFAULT_SIZE];
+ for(int i=0;i<VectorizedRowBatch.DEFAULT_SIZE; ++i) {
+ compiledKeyWrapperBatch.vectorHashKeyWrappers[i] =
+ new VectorHashKeyWrapper(longIndicesIndex, doubleIndicesIndex, stringIndicesIndex);
+ }
+ return compiledKeyWrapperBatch;
+ }
+
+ /**
+ * Get the row-mode writable object value of a key from a key wrapper
+ * @param keyOutputWriter
+ */
+ public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i,
+ VectorExpressionWriter keyOutputWriter)
+ throws HiveException {
+ if (kw.getIsNull(i)) {
+ return null;
+ }
+ KeyLookupHelper klh = indexLookup[i];
+ if (klh.longIndex >= 0) {
+ return keyOutputWriter.writeValue(kw.getLongValue(i));
+ } else if (klh.doubleIndex >= 0) {
+ return keyOutputWriter.writeValue(kw.getDoubleValue(i));
+ } else if (klh.stringIndex >= 0) {
+ return keyOutputWriter.writeValue(
+ kw.getBytes(i), kw.getByteStart(i), kw.getByteLength(i));
+ } else {
+ throw new HiveException(String.format(
+ "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d",
+ i, klh.longIndex, klh.doubleIndex, klh.stringIndex));
+ }
+ }
+}
+