You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/10/29 16:14:07 UTC
svn commit: r1635171 [1/2] - in /hive/trunk: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql...
Author: hashutosh
Date: Wed Oct 29 15:14:06 2014
New Revision: 1635171
URL: http://svn.apache.org/r1635171
Log:
HIVE-8587 : Vectorized Extract operator needs to update the Vectorization Context column map (Matt McCline via Jitendra Nath Pandey)
Added:
hive/trunk/ql/src/test/queries/clientpositive/vector_bucket.q
hive/trunk/ql/src/test/results/clientpositive/tez/vector_bucket.q.out
hive/trunk/ql/src/test/results/clientpositive/vector_bucket.q.out
Modified:
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_2.q.out
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Wed Oct 29 15:14:06 2014
@@ -156,6 +156,7 @@ minitez.query.files.shared=alter_merge_2
update_where_partitioned.q,\
update_two_cols.q,\
vector_between_in.q,\
+ vector_bucket.q,\
vector_cast_constant.q,\
vector_char_4.q,\
vector_char_simple.q,\
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Wed Oct 29 15:14:06 2014
@@ -429,20 +429,20 @@ public final class Utilities {
}
}
- public static Map<String, Map<Integer, String>> getScratchColumnVectorTypes(Configuration hiveConf) {
+ public static Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
BaseWork baseWork = getMapWork(hiveConf);
if (baseWork == null) {
baseWork = getReduceWork(hiveConf);
}
- return baseWork.getScratchColumnVectorTypes();
+ return baseWork.getAllScratchColumnVectorTypeMaps();
}
- public static Map<String, Map<String, Integer>> getScratchColumnMap(Configuration hiveConf) {
+ public static Map<String, Map<String, Integer>> getAllColumnVectorMaps(Configuration hiveConf) {
BaseWork baseWork = getMapWork(hiveConf);
if (baseWork == null) {
baseWork = getReduceWork(hiveConf);
}
- return baseWork.getScratchColumnMap();
+ return baseWork.getAllColumnVectorMaps();
}
public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Wed Oct 29 15:14:06 2014
@@ -101,7 +101,7 @@ public class ReduceRecordProcessor exte
sources[tag] = new ReduceRecordSource();
sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc,
reader, tag == position, (byte) tag,
- redWork.getScratchColumnVectorTypes());
+ redWork.getAllScratchColumnVectorTypeMaps());
ois[tag] = sources[tag].getObjectInspector();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java Wed Oct 29 15:14:06 2014
@@ -18,85 +18,118 @@
package org.apache.hadoop.hive.ql.exec.vector;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
+import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
/**
- * Vectorized extract operator implementation. Consumes rows and outputs a
- * vectorized batch of subobjects.
+ * Vectorized extract operator implementation.
**/
-public class VectorExtractOperator extends ExtractOperator {
+public class VectorExtractOperator extends ExtractOperator implements VectorizationContextRegion {
private static final long serialVersionUID = 1L;
- private int keyColCount;
- private int valueColCount;
-
- private transient VectorizedRowBatch outputBatch;
- private transient int remainingColCount;
+ private List<TypeInfo> reduceTypeInfos;
+
+ // Create a new outgoing vectorization context because we will project just the values.
+ private VectorizationContext vOutContext;
+
+ private int[] projectedColumns;
+ private String removeValueDotPrefix(String columnName) {
+ return columnName.substring("VALUE.".length());
+ }
public VectorExtractOperator(VectorizationContext vContext, OperatorDesc conf)
throws HiveException {
this();
this.conf = (ExtractDesc) conf;
+
+ List<String> reduceColumnNames = vContext.getProjectionColumnNames();
+ int reduceColCount = reduceColumnNames.size();
+
+ /*
+ * Create a new vectorization context as projection of just the values columns, but
+ * keep same output column manager must be inherited to track the scratch the columns.
+ */
+ vOutContext = new VectorizationContext(vContext);
+
+ // Set a fileKey with vectorization context.
+ vOutContext.setFileKey(vContext.getFileKey() + "/_EXTRACT_");
+
+ // Remove "VALUE." prefix from value columns and create a new projection
+ vOutContext.resetProjectionColumns();
+ for (int i = 0; i < reduceColCount; i++) {
+ String columnName = reduceColumnNames.get(i);
+ if (columnName.startsWith("VALUE.")) {
+ vOutContext.addProjectionColumn(removeValueDotPrefix(columnName), i);
+ }
+ }
}
public VectorExtractOperator() {
super();
}
+ /*
+ * Called by the Vectorizer class to pass the types from reduce shuffle.
+ */
+ public void setReduceTypeInfos(List<TypeInfo> reduceTypeInfos) {
+ this.reduceTypeInfos = reduceTypeInfos;
+ }
+
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
- StructObjectInspector structInputObjInspector = (StructObjectInspector) inputObjInspectors[0];
- List<? extends StructField> fields = structInputObjInspector.getAllStructFieldRefs();
- ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
- ArrayList<String> colNames = new ArrayList<String>();
- for (int i = keyColCount; i < fields.size(); i++) {
- StructField field = fields.get(i);
- String fieldName = field.getFieldName();
-
- // Remove "VALUE." prefix.
- int dotIndex = fieldName.indexOf(".");
- colNames.add(fieldName.substring(dotIndex + 1));
- ois.add(field.getFieldObjectInspector());
+ // Create the projection of the values and the output object inspector
+ // for just the value without their "VALUE." prefix.
+ int projectionSize = vOutContext.getProjectedColumns().size();
+ projectedColumns = new int[projectionSize];
+ List<String> columnNames = new ArrayList<String>();
+ List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
+ for (int i = 0; i < projectionSize; i++) {
+ int projectedIndex = vOutContext.getProjectedColumns().get(i);
+ projectedColumns[i] = projectedIndex;
+ String colName = vOutContext.getProjectionColumnNames().get(i);
+ columnNames.add(colName);
+ TypeInfo typeInfo = reduceTypeInfos.get(projectedIndex);
+ ObjectInspector oi = TypeInfoUtils
+ .getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
+ ois.add(oi);
}
- outputObjInspector = ObjectInspectorFactory
- .getStandardStructObjectInspector(colNames, ois);
- remainingColCount = fields.size() - keyColCount;
- outputBatch = new VectorizedRowBatch(remainingColCount);
+ outputObjInspector = ObjectInspectorFactory.
+ getStandardStructObjectInspector(columnNames, ois);
initializeChildren(hconf);
}
- public void setKeyAndValueColCounts(int keyColCount, int valueColCount) {
- this.keyColCount = keyColCount;
- this.valueColCount = valueColCount;
- }
@Override
// Remove the key columns and forward the values (and scratch columns).
public void processOp(Object row, int tag) throws HiveException {
- VectorizedRowBatch inputBatch = (VectorizedRowBatch) row;
+ VectorizedRowBatch vrg = (VectorizedRowBatch) row;
- // Copy references to the input columns array starting after the keys...
- for (int i = 0; i < remainingColCount; i++) {
- outputBatch.cols[i] = inputBatch.cols[keyColCount + i];
- }
- outputBatch.size = inputBatch.size;
+ int[] originalProjections = vrg.projectedColumns;
+ int originalProjectionSize = vrg.projectionSize;
+
+ // Temporarily substitute our projection.
+ vrg.projectionSize = projectedColumns.length;
+ vrg.projectedColumns = projectedColumns;
- forward(outputBatch, outputObjInspector);
+ forward(vrg, null);
+
+ // Revert the projected columns back, because vrg will be re-used.
+ vrg.projectionSize = originalProjectionSize;
+ vrg.projectedColumns = originalProjections;
+ }
+
+ @Override
+ public VectorizationContext getOuputVectorizationContext() {
+ return vOutContext;
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java Wed Oct 29 15:14:06 2014
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
/**
@@ -50,10 +51,22 @@ public class VectorFileSinkOperator exte
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
- super.initializeOp(hconf);
- valueWriters = VectorExpressionWriterFactory.getExpressionWriters(
- (StructObjectInspector) inputObjInspectors[0]);
+ // We need a input object inspector that is for the row we will extract out of the
+ // vectorized row batch, not for example, an original inspector for an ORC table, etc.
+ VectorExpressionWriterFactory.processVectorInspector(
+ (StructObjectInspector) inputObjInspectors[0],
+ new VectorExpressionWriterFactory.SingleOIDClosure() {
+ @Override
+ public void assign(VectorExpressionWriter[] writers,
+ ObjectInspector objectInspector) {
+ valueWriters = writers;
+ inputObjInspectors[0] = objectInspector;
+ }
+ });
singleRow = new Object[valueWriters.length];
+
+ // Call FileSinkOperator with new input inspector.
+ super.initializeOp(hconf);
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Wed Oct 29 15:14:06 2014
@@ -32,11 +32,8 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.KeyWrapper;
-import org.apache.hadoop.hive.ql.exec.KeyWrapperFactory;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
@@ -46,12 +43,9 @@ import org.apache.hadoop.hive.ql.plan.Ag
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.DataOutputBuffer;
@@ -760,13 +754,7 @@ public class VectorGroupByOperator exten
isVectorOutput = desc.getVectorDesc().isVectorOutput();
- List<String> outColNames = desc.getOutputColumnNames();
- Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
- int outColIndex = 0;
- for(String outCol: outColNames) {
- mapOutCols.put(outCol, outColIndex++);
- }
- vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+ vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_");
fileKey = vOutContext.getFileKey();
}
@@ -811,7 +799,7 @@ public class VectorGroupByOperator exten
vrbCtx.init(hconf, fileKey, (StructObjectInspector) outputObjInspector);
outputBatch = vrbCtx.createVectorizedRowBatch();
vectorColumnAssign = VectorColumnAssignFactory.buildAssigners(
- outputBatch, outputObjInspector, vOutContext.getColumnMap(), conf.getOutputColumnNames());
+ outputBatch, outputObjInspector, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
}
} catch (HiveException he) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java Wed Oct 29 15:14:06 2014
@@ -28,11 +28,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
@@ -41,8 +37,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -116,16 +110,8 @@ public class VectorMapJoinOperator exten
Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
- List<String> outColNames = desc.getOutputColumnNames();
-
- Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
-
- int outColIndex = 0;
- for(String outCol: outColNames) {
- mapOutCols.put(outCol, outColIndex++);
- }
-
- vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+ // We are making a new output vectorized row batch.
+ vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias());
this.fileKey = vOutContext.getFileKey();
}
@@ -207,7 +193,7 @@ public class VectorMapJoinOperator exten
Object[] values = (Object[]) row;
VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
if (null == vcas) {
- Map<String, Map<String, Integer>> allColumnMaps = Utilities.getScratchColumnMap(hconf);
+ Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
vcas = VectorColumnAssignFactory.buildAssigners(
outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Wed Oct 29 15:14:06 2014
@@ -27,16 +27,13 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -116,19 +113,9 @@ public class VectorSMBMapJoinOperator ex
Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
-
- // Vectorized join operators need to create a new vectorization region for child operators.
-
- List<String> outColNames = desc.getOutputColumnNames();
-
- Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
-
- int outColIndex = 0;
- for(String outCol: outColNames) {
- mapOutCols.put(outCol, outColIndex++);
- }
- vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+ // We are making a new output vectorized row batch.
+ vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
this.fileKey = vOutContext.getFileKey();
}
@@ -285,7 +272,7 @@ public class VectorSMBMapJoinOperator ex
Object[] values = (Object[]) row;
VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
if (null == vcas) {
- Map<String, Map<String, Integer>> allColumnMaps = Utilities.getScratchColumnMap(hconf);
+ Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
vcas = VectorColumnAssignFactory.buildAssigners(
outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java Wed Oct 29 15:14:06 2014
@@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -63,20 +62,20 @@ public class VectorSelectOperator extend
}
/**
- * Create a new vectorization context to update the column map but same output column manager
- * must be inherited to track the scratch the columns.
+ * Create a new vectorization context to create a new projection, but keep
+ * same output column manager must be inherited to track the scratch the columns.
*/
vOutContext = new VectorizationContext(vContext);
// Set a fileKey, although this operator doesn't use it.
vOutContext.setFileKey(vContext.getFileKey() + "/_SELECT_");
- // Update column map
- vOutContext.getColumnMap().clear();
+ vOutContext.resetProjectionColumns();
for (int i=0; i < colList.size(); ++i) {
String columnName = this.conf.getOutputColumnNames().get(i);
VectorExpression ve = vExpressions[i];
- vOutContext.addToColumnMap(columnName, ve.getOutputColumn());
+ vOutContext.addProjectionColumn(columnName,
+ ve.getOutputColumn());
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Oct 29 15:14:06 2014
@@ -23,11 +23,13 @@ import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
@@ -123,9 +125,98 @@ public class VectorizationContext {
VectorExpressionDescriptor vMap;
+ private List<Integer> projectedColumns;
+ private List<String> projectionColumnNames;
+ private Map<String, Integer> projectionColumnMap;
+
//columnName to column position map
- private final Map<String, Integer> columnMap;
- private final int firstOutputColumnIndex;
+ // private final Map<String, Integer> columnMap;
+ private int firstOutputColumnIndex;
+
+ // Convenient constructor for initial batch creation takes
+ // a list of columns names and maps them to 0..n-1 indices.
+ public VectorizationContext(List<String> initialColumnNames) {
+ this.projectionColumnNames = initialColumnNames;
+
+ projectedColumns = new ArrayList<Integer>();
+ projectionColumnMap = new HashMap<String, Integer>();
+ for (int i = 0; i < this.projectionColumnNames.size(); i++) {
+ projectedColumns.add(i);
+ projectionColumnMap.put(projectionColumnNames.get(i), i);
+ }
+ int firstOutputColumnIndex = projectedColumns.size();
+ this.ocm = new OutputColumnManager(firstOutputColumnIndex);
+ this.firstOutputColumnIndex = firstOutputColumnIndex;
+ vMap = new VectorExpressionDescriptor();
+ }
+
+ // Constructor to with the individual addInitialColumn method
+ // followed by a call to finishedAddingInitialColumns.
+ public VectorizationContext() {
+ projectedColumns = new ArrayList<Integer>();
+ projectionColumnNames = new ArrayList<String>();
+ projectionColumnMap = new HashMap<String, Integer>();
+ this.ocm = new OutputColumnManager(0);
+ this.firstOutputColumnIndex = 0;
+ vMap = new VectorExpressionDescriptor();
+ }
+
+ // Constructor useful making a projection vectorization context.
+ // Use with resetProjectionColumns and addProjectionColumn.
+ // Keeps existing output column map, etc.
+ public VectorizationContext(VectorizationContext vContext) {
+ this.projectedColumns = new ArrayList<Integer>();
+ this.projectionColumnNames = new ArrayList<String>();
+ this.projectionColumnMap = new HashMap<String, Integer>();
+
+ this.ocm = vContext.ocm;
+ this.firstOutputColumnIndex = vContext.firstOutputColumnIndex;
+ vMap = new VectorExpressionDescriptor();
+ }
+
+ // Add an initial column to a vectorization context when
+ // a vectorized row batch is being created.
+ public void addInitialColumn(String columnName) {
+ int index = projectedColumns.size();
+ projectedColumns.add(index);
+ projectionColumnNames.add(columnName);
+ projectionColumnMap.put(columnName, index);
+ }
+
+ // Finishes the vectorization context after all the initial
+ // columns have been added.
+ public void finishedAddingInitialColumns() {
+ int firstOutputColumnIndex = projectedColumns.size();
+ this.ocm = new OutputColumnManager(firstOutputColumnIndex);
+ this.firstOutputColumnIndex = firstOutputColumnIndex;
+ }
+
+ // Empties the projection columns.
+ public void resetProjectionColumns() {
+ projectedColumns = new ArrayList<Integer>();
+ projectionColumnNames = new ArrayList<String>();
+ projectionColumnMap = new HashMap<String, Integer>();
+ }
+
+ // Add a projection column to a projection vectorization context.
+ public void addProjectionColumn(String columnName, int vectorBatchColIndex) {
+ projectedColumns.add(vectorBatchColIndex);
+ projectionColumnNames.add(columnName);
+ projectionColumnMap.put(columnName, vectorBatchColIndex);
+ }
+
+ public List<Integer> getProjectedColumns() {
+ return projectedColumns;
+ }
+
+ public List<String> getProjectionColumnNames() {
+ return projectionColumnNames;
+ }
+
+ public Map<String, Integer> getProjectionColumnMap() {
+ return projectionColumnMap;
+ }
+
public static final Pattern decimalTypePattern = Pattern.compile("decimal.*",
Pattern.CASE_INSENSITIVE);
@@ -140,7 +231,7 @@ public class VectorizationContext {
Pattern.CASE_INSENSITIVE);
//Map column number to type
- private final OutputColumnManager ocm;
+ private OutputColumnManager ocm;
// File key is used by operators to retrieve the scratch vectors
// from mapWork at runtime. The operators that modify the structure of
@@ -170,27 +261,6 @@ public class VectorizationContext {
castExpressionUdfs.add(UDFToShort.class);
}
- public VectorizationContext(Map<String, Integer> columnMap,
- int initialOutputCol) {
- this.columnMap = columnMap;
- this.ocm = new OutputColumnManager(initialOutputCol);
- this.firstOutputColumnIndex = initialOutputCol;
- vMap = new VectorExpressionDescriptor();
- }
-
- /**
- * This constructor inherits the OutputColumnManger and from
- * the 'parent' constructor, therefore this should be used only by operators
- * that don't create a new vectorized row batch. This should be used only by
- * operators that want to modify the columnName map without changing the row batch.
- */
- public VectorizationContext(VectorizationContext parent) {
- this.columnMap = new HashMap<String, Integer>(parent.columnMap);
- this.ocm = parent.ocm;
- this.firstOutputColumnIndex = parent.firstOutputColumnIndex;
- vMap = new VectorExpressionDescriptor();
- }
-
public String getFileKey() {
return fileKey;
}
@@ -199,16 +269,19 @@ public class VectorizationContext {
this.fileKey = fileKey;
}
- protected int getInputColumnIndex(String name) {
- if (!columnMap.containsKey(name)) {
- LOG.error(String.format("The column %s is not in the vectorization context column map %s.",
- name, columnMap.toString()));
+ protected int getInputColumnIndex(String name) throws HiveException {
+ if (name == null) {
+ throw new HiveException("Null column name");
+ }
+ if (!projectionColumnMap.containsKey(name)) {
+ throw new HiveException(String.format("The column %s is not in the vectorization context column map %s.",
+ name, projectionColumnMap.toString()));
}
- return columnMap.get(name);
+ return projectionColumnMap.get(name);
}
protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) {
- return columnMap.get(colExpr.getColumn());
+ return projectionColumnMap.get(colExpr.getColumn());
}
private static class OutputColumnManager {
@@ -280,7 +353,7 @@ public class VectorizationContext {
}
private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc
- exprDesc, Mode mode) {
+ exprDesc, Mode mode) throws HiveException {
int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr = null;
switch (mode) {
@@ -1988,7 +2061,7 @@ public class VectorizationContext {
"\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")");
}
- public Map<Integer, String> getOutputColumnTypeMap() {
+ public Map<Integer, String> getScratchColumnTypeMap() {
Map<Integer, String> map = new HashMap<Integer, String>();
for (int i = 0; i < ocm.outputColCount; i++) {
String type = ocm.outputColumnsTypes[i];
@@ -1997,15 +2070,26 @@ public class VectorizationContext {
return map;
}
- public Map<String, Integer> getColumnMap() {
- return columnMap;
- }
+ public String toString() {
+ StringBuilder sb = new StringBuilder(32);
+ sb.append("Context key ").append(getFileKey()).append(", ");
+
+ Comparator<Integer> comparerInteger = new Comparator<Integer>() {
+ @Override
+ public int compare(Integer o1, Integer o2) {
+ return o1.compareTo(o2);
+ }};
- public void addToColumnMap(String columnName, int outputColumn) throws HiveException {
- if (columnMap.containsKey(columnName) && (columnMap.get(columnName) != outputColumn)) {
- throw new HiveException(String.format("Column %s is already mapped to %d. Cannot remap to %d.",
- columnName, columnMap.get(columnName), outputColumn));
+ Map<Integer, String> sortedColumnMap = new TreeMap<Integer, String>(comparerInteger);
+ for (Map.Entry<String, Integer> entry : projectionColumnMap.entrySet()) {
+ sortedColumnMap.put(entry.getValue(), entry.getKey());
}
- columnMap.put(columnName, outputColumn);
+ sb.append("sortedProjectionColumnMap ").append(sortedColumnMap).append(", ");
+
+ Map<Integer, String> sortedScratchColumnTypeMap = new TreeMap<Integer, String>(comparerInteger);
+ sortedScratchColumnTypeMap.putAll(getScratchColumnTypeMap());
+ sb.append("sortedScratchColumnTypeMap ").append(sortedScratchColumnTypeMap);
+
+ return sb.toString();
}
- }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Wed Oct 29 15:14:06 2014
@@ -139,7 +139,7 @@ public class VectorizedRowBatchCtx {
public void init(Configuration hiveConf, String fileKey,
StructObjectInspector rowOI) {
Map<String, Map<Integer, String>> scratchColumnVectorTypes =
- Utilities.getScratchColumnVectorTypes(hiveConf);
+ Utilities.getAllScratchColumnVectorTypeMaps(hiveConf);
columnTypeMap = scratchColumnVectorTypes.get(fileKey);
this.rowOI= rowOI;
this.rawRowOI = rowOI;
@@ -190,7 +190,7 @@ public class VectorizedRowBatchCtx {
String partitionPath = split.getPath().getParent().toString();
columnTypeMap = Utilities
- .getScratchColumnVectorTypes(hiveConf)
+ .getAllScratchColumnVectorTypeMaps(hiveConf)
.get(partitionPath);
Properties partProps =
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Wed Oct 29 15:14:06 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimi
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
@@ -28,6 +29,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
@@ -66,12 +68,10 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
@@ -119,7 +119,11 @@ import org.apache.hadoop.hive.serde2.Des
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.util.ReflectionUtils;
public class Vectorizer implements PhysicalPlanResolver {
@@ -280,13 +284,13 @@ public class Vectorizer implements Physi
private PhysicalContext pctx;
- private int keyColCount;
- private int valueColCount;
+ private List<String> reduceColumnNames;
+ private List<TypeInfo> reduceTypeInfos;
public VectorizationDispatcher(PhysicalContext pctx) {
this.pctx = pctx;
- keyColCount = 0;
- valueColCount = 0;
+ reduceColumnNames = null;
+ reduceTypeInfos = null;
}
@Override
@@ -374,14 +378,13 @@ public class Vectorizer implements Physi
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
ogw.startWalking(topNodes, nodeOutput);
- Map<String, Map<Integer, String>> columnVectorTypes = vnp.getScratchColumnVectorTypes();
- mapWork.setScratchColumnVectorTypes(columnVectorTypes);
- Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
- mapWork.setScratchColumnMap(columnMap);
+ Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps();
+ mapWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps);
+ Map<String, Map<String, Integer>> allColumnVectorMaps = vnp.getAllColumnVectorMaps();
+ mapWork.setAllColumnVectorMaps(allColumnVectorMaps);
if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString()));
- LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+ debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps);
}
return;
@@ -402,7 +405,7 @@ public class Vectorizer implements Physi
return false;
}
StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
- keyColCount = keyStructObjectInspector.getAllStructFieldRefs().size();
+ List<? extends StructField> keyFields = keyStructObjectInspector.getAllStructFieldRefs();
// Tez doesn't use tagging...
if (reduceWork.getNeedsTagging()) {
@@ -415,9 +418,20 @@ public class Vectorizer implements Physi
!(valueObjectInspector instanceof StructObjectInspector)) {
return false;
}
- StructObjectInspector valueStructObjectInspector =
- (StructObjectInspector)valueObjectInspector;
- valueColCount = valueStructObjectInspector.getAllStructFieldRefs().size();
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+ List<? extends StructField> valueFields = valueStructObjectInspector.getAllStructFieldRefs();
+
+ reduceColumnNames = new ArrayList<String>();
+ reduceTypeInfos = new ArrayList<TypeInfo>();
+
+ for (StructField field: keyFields) {
+ reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
+ for (StructField field: valueFields) {
+ reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
+ reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName()));
+ }
} catch (Exception e) {
throw new SemanticException(e);
}
@@ -467,7 +481,7 @@ public class Vectorizer implements Physi
// VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker.
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
ReduceWorkVectorizationNodeProcessor vnp =
- new ReduceWorkVectorizationNodeProcessor(reduceWork, keyColCount, valueColCount);
+ new ReduceWorkVectorizationNodeProcessor(reduceColumnNames);
addReduceWorkRules(opRules, vnp);
Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
GraphWalker ogw = new PreOrderWalker(disp);
@@ -484,18 +498,17 @@ public class Vectorizer implements Physi
Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
if (reducer.getType().equals(OperatorType.EXTRACT)) {
- ((VectorExtractOperator)reducer).setKeyAndValueColCounts(keyColCount, valueColCount);
+ ((VectorExtractOperator)reducer).setReduceTypeInfos(reduceTypeInfos);
}
- Map<String, Map<Integer, String>> columnVectorTypes = vnp.getScratchColumnVectorTypes();
- reduceWork.setScratchColumnVectorTypes(columnVectorTypes);
- Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
- reduceWork.setScratchColumnMap(columnMap);
+ Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps();
+ reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps);
+ Map<String, Map<String, Integer>> allColumnVectorMaps = vnp.getAllColumnVectorMaps();
+ reduceWork.setAllColumnVectorMaps(allColumnVectorMaps);
if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString()));
- LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+ debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps);
}
}
}
@@ -560,26 +573,26 @@ public class Vectorizer implements Physi
protected final Set<Operator<? extends OperatorDesc>> opsDone =
new HashSet<Operator<? extends OperatorDesc>>();
- public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
- Map<String, Map<Integer, String>> scratchColumnVectorTypes =
+ public Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps() {
+ Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps =
new HashMap<String, Map<Integer, String>>();
for (String onefile : scratchColumnContext.keySet()) {
VectorizationContext vc = scratchColumnContext.get(onefile);
- Map<Integer, String> cmap = vc.getOutputColumnTypeMap();
- scratchColumnVectorTypes.put(onefile, cmap);
+ Map<Integer, String> cmap = vc.getScratchColumnTypeMap();
+ allScratchColumnVectorTypeMaps.put(onefile, cmap);
}
- return scratchColumnVectorTypes;
+ return allScratchColumnVectorTypeMaps;
}
- public Map<String, Map<String, Integer>> getScratchColumnMap() {
- Map<String, Map<String, Integer>> scratchColumnMap =
+ public Map<String, Map<String, Integer>> getAllColumnVectorMaps() {
+ Map<String, Map<String, Integer>> allColumnVectorMaps =
new HashMap<String, Map<String, Integer>>();
for(String oneFile: scratchColumnContext.keySet()) {
VectorizationContext vc = scratchColumnContext.get(oneFile);
- Map<String, Integer> cmap = vc.getColumnMap();
- scratchColumnMap.put(oneFile, cmap);
+ Map<String, Integer> cmap = vc.getProjectionColumnMap();
+ allColumnVectorMaps.put(oneFile, cmap);
}
- return scratchColumnMap;
+ return allColumnVectorMaps;
}
public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack,
@@ -665,10 +678,7 @@ public class Vectorizer implements Physi
vContext.setFileKey(onefile);
scratchColumnContext.put(onefile, vContext);
if (LOG.isDebugEnabled()) {
- LOG.debug("Vectorized MapWork operator " + op.getName() +
- " with vectorization context key=" + vContext.getFileKey() +
- ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vContext.getColumnMap().toString());
+ LOG.debug("Vectorized MapWork operator " + op.getName() + " vectorization context " + vContext.toString());
}
break;
}
@@ -699,17 +709,11 @@ public class Vectorizer implements Physi
Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext);
if (LOG.isDebugEnabled()) {
- LOG.debug("Vectorized MapWork operator " + vectorOp.getName() +
- " with vectorization context key=" + vContext.getFileKey() +
- ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vContext.getColumnMap().toString());
+ LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString());
if (vectorOp instanceof VectorizationContextRegion) {
VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
- LOG.debug("Vectorized MapWork operator " + vectorOp.getName() +
- " added new vectorization context key=" + vOutContext.getFileKey() +
- ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vOutContext.getColumnMap().toString());
+ LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString());
}
}
@@ -719,10 +723,7 @@ public class Vectorizer implements Physi
class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
- private final ReduceWork rWork;
- private int keyColCount;
- private int valueColCount;
- private Map<String, Integer> reduceColumnNameMap;
+ private List<String> reduceColumnNames;
private VectorizationContext reduceShuffleVectorizationContext;
@@ -732,12 +733,8 @@ public class Vectorizer implements Physi
return rootVectorOp;
}
- public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount,
- int valueColCount) {
- this.rWork = rWork;
- reduceColumnNameMap = rWork.getReduceColumnNameMap();
- this.keyColCount = keyColCount;
- this.valueColCount = valueColCount;
+ public ReduceWorkVectorizationNodeProcessor(List<String> reduceColumnNames) {
+ this.reduceColumnNames = reduceColumnNames;
rootVectorOp = null;
reduceShuffleVectorizationContext = null;
}
@@ -755,17 +752,16 @@ public class Vectorizer implements Physi
boolean saveRootVectorOp = false;
if (op.getParentOperators().size() == 0) {
- vContext = getReduceVectorizationContext(reduceColumnNameMap);
+ LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString());
+
+ vContext = new VectorizationContext(reduceColumnNames);
vContext.setFileKey("_REDUCE_SHUFFLE_");
scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext);
reduceShuffleVectorizationContext = vContext;
saveRootVectorOp = true;
if (LOG.isDebugEnabled()) {
- LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context key=" +
- vContext.getFileKey() +
- ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vContext.getColumnMap().toString());
+ LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString());
}
} else {
vContext = walkStackToFindVectorizationContext(stack, op);
@@ -791,17 +787,11 @@ public class Vectorizer implements Physi
Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext);
if (LOG.isDebugEnabled()) {
- LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() +
- " with vectorization context key=" + vContext.getFileKey() +
- ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vContext.getColumnMap().toString());
+ LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString());
if (vectorOp instanceof VectorizationContextRegion) {
VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
- LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() +
- " added new vectorization context key=" + vOutContext.getFileKey() +
- ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() +
- ", columnMap: " + vOutContext.getColumnMap().toString());
+ LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString());
}
}
if (vectorOp instanceof VectorGroupByOperator) {
@@ -819,7 +809,7 @@ public class Vectorizer implements Physi
private static class ValidatorVectorizationContext extends VectorizationContext {
private ValidatorVectorizationContext() {
- super(null, -1);
+ super();
}
@Override
@@ -1222,21 +1212,17 @@ public class Vectorizer implements Physi
PhysicalContext pctx) {
RowSchema rs = op.getSchema();
- Map<String, Integer> cmap = new HashMap<String, Integer>();
- int columnCount = 0;
+ // Add all non-virtual columns to make a vectorization context for
+ // the TableScan operator.
+ VectorizationContext vContext = new VectorizationContext();
for (ColumnInfo c : rs.getSignature()) {
// Earlier, validation code should have eliminated virtual columns usage (HIVE-5560).
if (!isVirtualColumn(c)) {
- cmap.put(c.getInternalName(), columnCount++);
+ vContext.addInitialColumn(c.getInternalName());
}
}
-
- return new VectorizationContext(cmap, columnCount);
- }
-
- private VectorizationContext getReduceVectorizationContext(
- Map<String, Integer> reduceColumnNameMap) {
- return new VectorizationContext(reduceColumnNameMap, reduceColumnNameMap.size());
+ vContext.finishedAddingInitialColumns();
+ return vContext;
}
private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
@@ -1292,4 +1278,41 @@ public class Vectorizer implements Physi
}
return false;
}
+
+ public void debugDisplayAllMaps(Map<String, Map<String, Integer>> allColumnVectorMaps,
+ Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps) {
+
+ // Context keys grow in length since they are a path...
+ Comparator<String> comparerShorterString = new Comparator<String>() {
+ @Override
+ public int compare(String o1, String o2) {
+ Integer length1 = o1.length();
+ Integer length2 = o2.length();
+ return length1.compareTo(length2);
+ }};
+
+ Comparator<Integer> comparerInteger = new Comparator<Integer>() {
+ @Override
+ public int compare(Integer o1, Integer o2) {
+ return o1.compareTo(o2);
+ }};
+
+ Map<String, Map<Integer, String>> sortedAllColumnVectorMaps = new TreeMap<String, Map<Integer, String>>(comparerShorterString);
+ for (Map.Entry<String, Map<String, Integer>> entry : allColumnVectorMaps.entrySet()) {
+ Map<Integer, String> sortedColumnMap = new TreeMap<Integer, String>(comparerInteger);
+ for (Map.Entry<String, Integer> innerEntry : entry.getValue().entrySet()) {
+ sortedColumnMap.put(innerEntry.getValue(), innerEntry.getKey());
+ }
+ sortedAllColumnVectorMaps.put(entry.getKey(), sortedColumnMap);
+ }
+ LOG.debug("sortedAllColumnVectorMaps " + sortedAllColumnVectorMaps);
+
+ Map<String, Map<Integer, String>> sortedAllScratchColumnVectorTypeMap = new TreeMap<String, Map<Integer, String>>(comparerShorterString);
+ for (Map.Entry<String, Map<Integer, String>> entry : allScratchColumnVectorTypeMaps.entrySet()) {
+ Map<Integer, String> sortedScratchColumnTypeMap = new TreeMap<Integer, String>(comparerInteger);
+ sortedScratchColumnTypeMap.putAll(entry.getValue());
+ sortedAllScratchColumnVectorTypeMap.put(entry.getKey(), sortedScratchColumnTypeMap);
+ }
+ LOG.debug("sortedAllScratchColumnVectorTypeMap " + sortedAllScratchColumnVectorTypeMap);
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Wed Oct 29 15:14:06 2014
@@ -56,8 +56,8 @@ public abstract class BaseWork extends A
private String name;
// Vectorization.
- protected Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
- protected Map<String, Map<String, Integer>> scratchColumnMap = null;
+ protected Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps = null;
+ protected Map<String, Map<String, Integer>> allColumnVectorMaps = null;
protected boolean vectorMode = false;
public void setGatheringStats(boolean gatherStats) {
@@ -115,21 +115,21 @@ public abstract class BaseWork extends A
return returnSet;
}
- public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
- return scratchColumnVectorTypes;
+ public Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps() {
+ return allScratchColumnVectorTypeMaps;
}
- public void setScratchColumnVectorTypes(
- Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
- this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+ public void setAllScratchColumnVectorTypeMaps(
+ Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps) {
+ this.allScratchColumnVectorTypeMaps = allScratchColumnVectorTypeMaps;
}
- public Map<String, Map<String, Integer>> getScratchColumnMap() {
- return scratchColumnMap;
+ public Map<String, Map<String, Integer>> getAllColumnVectorMaps() {
+ return allColumnVectorMaps;
}
- public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
- this.scratchColumnMap = scratchColumnMap;
+ public void setAllColumnVectorMaps(Map<String, Map<String, Integer>> allColumnVectorMaps) {
+ this.allColumnVectorMaps = allColumnVectorMaps;
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Wed Oct 29 15:14:06 2014
@@ -95,8 +95,6 @@ public class ReduceWork extends BaseWork
private ObjectInspector keyObjectInspector = null;
private ObjectInspector valueObjectInspector = null;
- private final Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
-
/**
* If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
* to keySerializeInfo of the ReduceSink
@@ -142,58 +140,6 @@ public class ReduceWork extends BaseWork
return valueObjectInspector;
}
- private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) {
- List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
- int index = startIndex;
- for (StructField field: fields) {
- reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index);
- index++;
- }
- return index;
- }
-
- public Boolean fillInReduceColumnNameMap() {
- ObjectInspector keyObjectInspector = getKeyObjectInspector();
- if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
- return false;
- }
- StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector;
-
- ObjectInspector valueObjectInspector = getValueObjectInspector();
- if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
- return false;
- }
- StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector;
-
- int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString());
- addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString());
- return true;
- }
-
- public Map<String, Integer> getReduceColumnNameMap() {
- if (needsTagging) {
- return null;
- }
- if (reduceColumnNameMap.size() == 0) {
- if (!fillInReduceColumnNameMap()) {
- return null;
- }
- }
- return reduceColumnNameMap;
- }
-
- public List<String> getReduceColumnNames() {
- if (needsTagging) {
- return null;
- }
- if (reduceColumnNameMap.size() == 0) {
- if (!fillInReduceColumnNameMap()) {
- return null;
- }
- }
- return new ArrayList<String>(reduceColumnNameMap.keySet());
- }
-
public List<TableDesc> getTagToValueDesc() {
return tagToValueDesc;
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java Wed Oct 29 15:14:06 2014
@@ -18,7 +18,9 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import junit.framework.Assert;
@@ -83,9 +85,9 @@ public class TestVectorFilterOperator {
private VectorFilterOperator getAVectorFilterOperator() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
- Map<String, Integer> columnMap = new HashMap<String, Integer>();
- columnMap.put("col1", 1);
- VectorizationContext vc = new VectorizationContext(columnMap, 1);
+ List<String> columns = new ArrayList<String>();
+ columns.add("col1");
+ VectorizationContext vc = new VectorizationContext(columns);
FilterDesc fdesc = new FilterDesc();
fdesc.setPredicate(col1Expr);
return new VectorFilterOperator(vc, fdesc);
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java Wed Oct 29 15:14:06 2014
@@ -170,10 +170,10 @@ public class TestVectorGroupByOperator {
@Test
public void testMemoryPressureFlush() throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("Key", 0);
- mapColumnNames.put("Value", 1);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("Key");
+ mapColumnNames.add("Value");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildKeyGroupByDesc (ctx, "max",
"Value", TypeInfoFactory.longTypeInfo,
@@ -1710,7 +1710,7 @@ public class TestVectorGroupByOperator {
mapColumnNames.put("value", i);
outputColumnNames.add("value");
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, i+1);
+ VectorizationContext ctx = new VectorizationContext(outputColumnNames);
ArrayList<AggregationDesc> aggs = new ArrayList(1);
aggs.add(
@@ -1818,10 +1818,10 @@ public class TestVectorGroupByOperator {
FakeVectorRowBatchFromObjectIterables data,
Map<Object, Object> expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("Key", 0);
- mapColumnNames.put("Value", 1);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("Key");
+ mapColumnNames.add("Value");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
Set<Object> keys = new HashSet<Object>();
AggregationDesc agg = buildAggregationDesc(ctx, aggregateName,
@@ -2233,9 +2233,9 @@ public class TestVectorGroupByOperator {
public void testAggregateCountStarIterable (
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescCountStar (ctx);
@@ -2262,9 +2262,9 @@ public class TestVectorGroupByOperator {
public void testAggregateCountReduceIterable (
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo);
VectorGroupByDesc vectorDesc = desc.getVectorDesc();
@@ -2294,9 +2294,9 @@ public class TestVectorGroupByOperator {
String aggregateName,
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A",
TypeInfoFactory.stringTypeInfo);
@@ -2325,9 +2325,9 @@ public class TestVectorGroupByOperator {
String aggregateName,
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A",
TypeInfoFactory.getDecimalTypeInfo(30, 4));
@@ -2357,9 +2357,9 @@ public class TestVectorGroupByOperator {
String aggregateName,
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A",
TypeInfoFactory.doubleTypeInfo);
@@ -2388,9 +2388,9 @@ public class TestVectorGroupByOperator {
String aggregateName,
Iterable<VectorizedRowBatch> data,
Object expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("A", 0);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("A");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.longTypeInfo);
@@ -2418,10 +2418,11 @@ public class TestVectorGroupByOperator {
String aggregateName,
Iterable<VectorizedRowBatch> data,
HashMap<Object,Object> expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("Key", 0);
- mapColumnNames.put("Value", 1);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("Key");
+ mapColumnNames.add("Value");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
+
Set<Object> keys = new HashSet<Object>();
GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
@@ -2484,10 +2485,10 @@ public class TestVectorGroupByOperator {
Iterable<VectorizedRowBatch> data,
TypeInfo dataTypeInfo,
HashMap<Object,Object> expected) throws HiveException {
- Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
- mapColumnNames.put("Key", 0);
- mapColumnNames.put("Value", 1);
- VectorizationContext ctx = new VectorizationContext(mapColumnNames, 2);
+ List<String> mapColumnNames = new ArrayList<String>();
+ mapColumnNames.add("Key");
+ mapColumnNames.add("Value");
+ VectorizationContext ctx = new VectorizationContext(mapColumnNames);
Set<Object> keys = new HashSet<Object>();
GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value",
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java?rev=1635171&r1=1635170&r2=1635171&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java Wed Oct 29 15:14:06 2014
@@ -84,9 +84,11 @@ public class TestVectorSelectOperator {
@Test
public void testSelectOperator() throws HiveException {
- Map<String, Integer> columnMap = new HashMap<String, Integer>();
- columnMap.put("a", 0); columnMap.put("b", 1); columnMap.put("c", 2);
- VectorizationContext vc = new VectorizationContext(columnMap, 3);
+ List<String> columns = new ArrayList<String>();
+ columns.add("a");
+ columns.add("b");
+ columns.add("c");
+ VectorizationContext vc = new VectorizationContext(columns);
SelectDesc selDesc = new SelectDesc(false);
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();