You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/10/31 16:44:57 UTC
svn commit: r1635800 - in
/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec:
Utilities.java vector/VectorGroupByOperator.java
vector/VectorMapJoinOperator.java vector/VectorSMBMapJoinOperator.java
vector/VectorizedRowBatchCtx.java
Author: hashutosh
Date: Fri Oct 31 15:44:57 2014
New Revision: 1635800
URL: http://svn.apache.org/r1635800
Log:
HIVE-8663 : Fetching Vectorization scratch column map in Reduce-Side stop working (Matt McCline via Ashutosh Chauhan)
Modified:
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Oct 31 15:44:57 2014
@@ -429,20 +429,9 @@ public final class Utilities {
}
}
- public static Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
- BaseWork baseWork = getMapWork(hiveConf);
- if (baseWork == null) {
- baseWork = getReduceWork(hiveConf);
- }
- return baseWork.getAllScratchColumnVectorTypeMaps();
- }
-
- public static Map<String, Map<String, Integer>> getAllColumnVectorMaps(Configuration hiveConf) {
- BaseWork baseWork = getMapWork(hiveConf);
- if (baseWork == null) {
- baseWork = getReduceWork(hiveConf);
- }
- return baseWork.getAllColumnVectorMaps();
+ public static Map<String, Map<Integer, String>> getMapWorkAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
+ MapWork mapWork = getMapWork(hiveConf);
+ return mapWork.getAllScratchColumnVectorTypeMaps();
}
public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) {
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Fri Oct 31 15:44:57 2014
@@ -76,8 +76,6 @@ public class VectorGroupByOperator exten
// Create a new outgoing vectorization context because column name map will change.
private VectorizationContext vOutContext = null;
- private String fileKey;
-
// The above members are initialized by the constructor and must not be
// transient.
//---------------------------------------------------------------------------
@@ -756,7 +754,6 @@ public class VectorGroupByOperator exten
vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_");
- fileKey = vOutContext.getFileKey();
}
public VectorGroupByOperator() {
@@ -796,7 +793,7 @@ public class VectorGroupByOperator exten
outputFieldNames, objectInspectors);
if (isVectorOutput) {
vrbCtx = new VectorizedRowBatchCtx();
- vrbCtx.init(hconf, fileKey, (StructObjectInspector) outputObjInspector);
+ vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) outputObjInspector);
outputBatch = vrbCtx.createVectorizedRowBatch();
vectorColumnAssign = VectorColumnAssignFactory.buildAssigners(
outputBatch, outputObjInspector, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java Fri Oct 31 15:44:57 2014
@@ -28,7 +28,6 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
@@ -53,19 +52,16 @@ public class VectorMapJoinOperator exten
*/
private static final long serialVersionUID = 1L;
- /**
- * Vectorizaiton context key
- * Used to retrieve column map from the MapTask scratch
- */
- private String fileKey;
- private int tagLen;
-
private VectorExpression[] keyExpressions;
- private transient VectorHashKeyWrapperBatch keyWrapperBatch;
- private transient VectorExpressionWriter[] keyOutputWriters;
private VectorExpression[] bigTableFilterExpressions;
private VectorExpression[] bigTableValueExpressions;
+
+ private VectorizationContext vOutContext;
+
+ // The above members are initialized by the constructor and must not be
+ // transient.
+ //---------------------------------------------------------------------------
private transient VectorizedRowBatch outputBatch;
private transient VectorExpressionWriter[] valueWriters;
@@ -76,8 +72,9 @@ public class VectorMapJoinOperator exten
//
private transient int batchIndex;
private transient VectorHashKeyWrapper[] keyValues;
-
- private transient VectorizationContext vOutContext = null;
+ private transient VectorHashKeyWrapperBatch keyWrapperBatch;
+ private transient VectorExpressionWriter[] keyOutputWriters;
+
private transient VectorizedRowBatchCtx vrbCtx = null;
public VectorMapJoinOperator() {
@@ -96,7 +93,6 @@ public class VectorMapJoinOperator exten
numAliases = desc.getExprs().size();
posBigTable = (byte) desc.getPosBigTable();
filterMaps = desc.getFilterMap();
- tagLen = desc.getTagLength();
noOuterJoin = desc.isNoOuterJoin();
Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
@@ -113,7 +109,6 @@ public class VectorMapJoinOperator exten
// We are making a new output vectorized row batch.
vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias());
- this.fileKey = vOutContext.getFileKey();
}
@Override
@@ -124,7 +119,7 @@ public class VectorMapJoinOperator exten
keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
vrbCtx = new VectorizedRowBatchCtx();
- vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+ vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector);
outputBatch = vrbCtx.createVectorizedRowBatch();
@@ -193,10 +188,8 @@ public class VectorMapJoinOperator exten
Object[] values = (Object[]) row;
VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
if (null == vcas) {
- Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
- Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
vcas = VectorColumnAssignFactory.buildAssigners(
- outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+ outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
outputVectorAssigners.put(outputOI, vcas);
}
for (int i=0; i<values.length; ++i) {
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Fri Oct 31 15:44:57 2014
@@ -28,7 +28,6 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
@@ -52,14 +51,6 @@ public class VectorSMBMapJoinOperator ex
private static final long serialVersionUID = 1L;
- private int tagLen;
-
- private transient VectorizedRowBatch outputBatch;
- private transient VectorizationContext vOutContext = null;
- private transient VectorizedRowBatchCtx vrbCtx = null;
-
- private String fileKey;
-
private VectorExpression[] bigTableValueExpressions;
private VectorExpression[] bigTableFilterExpressions;
@@ -68,6 +59,16 @@ public class VectorSMBMapJoinOperator ex
private VectorExpressionWriter[] keyOutputWriters;
+ private VectorizationContext vOutContext;
+
+ // The above members are initialized by the constructor and must not be
+ // transient.
+ //---------------------------------------------------------------------------
+
+ private transient VectorizedRowBatch outputBatch;
+
+ private transient VectorizedRowBatchCtx vrbCtx = null;
+
private transient VectorHashKeyWrapperBatch keyWrapperBatch;
private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners;
@@ -98,7 +99,6 @@ public class VectorSMBMapJoinOperator ex
numAliases = desc.getExprs().size();
posBigTable = (byte) desc.getPosBigTable();
filterMaps = desc.getFilterMap();
- tagLen = desc.getTagLength();
noOuterJoin = desc.isNoOuterJoin();
// Must obtain vectorized equivalents for filter and value expressions
@@ -117,7 +117,6 @@ public class VectorSMBMapJoinOperator ex
// We are making a new output vectorized row batch.
vOutContext = new VectorizationContext(desc.getOutputColumnNames());
vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
- this.fileKey = vOutContext.getFileKey();
}
@Override
@@ -135,7 +134,7 @@ public class VectorSMBMapJoinOperator ex
super.initializeOp(hconf);
vrbCtx = new VectorizedRowBatchCtx();
- vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+ vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector);
outputBatch = vrbCtx.createVectorizedRowBatch();
@@ -272,10 +271,8 @@ public class VectorSMBMapJoinOperator ex
Object[] values = (Object[]) row;
VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
if (null == vcas) {
- Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
- Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
vcas = VectorColumnAssignFactory.buildAssigners(
- outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+ outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
outputVectorAssigners.put(outputOI, vcas);
}
for (int i = 0; i < values.length; ++i) {
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Fri Oct 31 15:44:57 2014
@@ -96,7 +96,7 @@ public class VectorizedRowBatchCtx {
// list does not contain partition columns
private List<Integer> colsToInclude;
- private Map<Integer, String> columnTypeMap = null;
+ private Map<Integer, String> scratchColumnTypeMap = null;
/**
* Constructor for VectorizedRowBatchCtx
@@ -126,36 +126,17 @@ public class VectorizedRowBatchCtx {
public VectorizedRowBatchCtx() {
}
-
- /**
- * Initializes the VectorizedRowBatch context based on an arbitrary object inspector
- * Used by non-tablescan operators when they change the vectorization context
- * @param hiveConf
- * @param fileKey
- * The key on which to retrieve the extra column mapping from the map/reduce scratch
- * @param rowOI
- * Object inspector that shapes the column types
- */
- public void init(Configuration hiveConf, String fileKey,
- StructObjectInspector rowOI) {
- Map<String, Map<Integer, String>> scratchColumnVectorTypes =
- Utilities.getAllScratchColumnVectorTypeMaps(hiveConf);
- columnTypeMap = scratchColumnVectorTypes.get(fileKey);
- this.rowOI= rowOI;
- this.rawRowOI = rowOI;
- }
-
/**
* Initializes the VectorizedRowBatch context based on an scratch column type map and
* object inspector.
- * @param columnTypeMap
+ * @param scratchColumnTypeMap
* @param rowOI
* Object inspector that shapes the column types
*/
- public void init(Map<Integer, String> columnTypeMap,
+ public void init(Map<Integer, String> scratchColumnTypeMap,
StructObjectInspector rowOI) {
- this.columnTypeMap = columnTypeMap;
+ this.scratchColumnTypeMap = scratchColumnTypeMap;
this.rowOI= rowOI;
this.rawRowOI = rowOI;
}
@@ -179,7 +160,8 @@ public class VectorizedRowBatchCtx {
IOException,
SerDeException,
InstantiationException,
- IllegalAccessException, HiveException {
+ IllegalAccessException,
+ HiveException {
Map<String, PartitionDesc> pathToPartitionInfo = Utilities
.getMapRedWork(hiveConf).getMapWork().getPathToPartitionInfo();
@@ -189,8 +171,8 @@ public class VectorizedRowBatchCtx {
split.getPath(), IOPrepareCache.get().getPartitionDescMap());
String partitionPath = split.getPath().getParent().toString();
- columnTypeMap = Utilities
- .getAllScratchColumnVectorTypeMaps(hiveConf)
+ scratchColumnTypeMap = Utilities
+ .getMapWorkAllScratchColumnVectorTypeMaps(hiveConf)
.get(partitionPath);
Properties partProps =
@@ -613,12 +595,12 @@ public class VectorizedRowBatchCtx {
}
private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException {
- if (columnTypeMap != null && !columnTypeMap.isEmpty()) {
+ if (scratchColumnTypeMap != null && !scratchColumnTypeMap.isEmpty()) {
int origNumCols = vrb.numCols;
- int newNumCols = vrb.cols.length+columnTypeMap.keySet().size();
+ int newNumCols = vrb.cols.length+scratchColumnTypeMap.keySet().size();
vrb.cols = Arrays.copyOf(vrb.cols, newNumCols);
for (int i = origNumCols; i < newNumCols; i++) {
- String typeName = columnTypeMap.get(i);
+ String typeName = scratchColumnTypeMap.get(i);
if (typeName == null) {
throw new HiveException("No type found for column type entry " + i);
}