You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/10/31 16:44:57 UTC
svn commit: r1635800 - in /hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec: Utilities.java vector/VectorGroupByOperator.java vector/VectorMapJoinOperator.java vector/VectorSMBMapJoinOperator.java vector/VectorizedRowBatchCtx.java

Author: hashutosh
Date: Fri Oct 31 15:44:57 2014
New Revision: 1635800

URL: http://svn.apache.org/r1635800
Log:
HIVE-8663 : Fetching Vectorization scratch column map in Reduce-Side stop working (Matt McCline via Ashutosh Chauhan)

Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Oct 31 15:44:57 2014
@@ -429,20 +429,9 @@ public final class Utilities {
     }
   }
 
-  public static Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
-    BaseWork baseWork = getMapWork(hiveConf);
-    if (baseWork == null) {
-      baseWork = getReduceWork(hiveConf);
-    }
-    return baseWork.getAllScratchColumnVectorTypeMaps();
-  }
-
-  public static Map<String, Map<String, Integer>> getAllColumnVectorMaps(Configuration hiveConf) {
-    BaseWork baseWork = getMapWork(hiveConf);
-    if (baseWork == null) {
-      baseWork = getReduceWork(hiveConf);
-    }
-    return baseWork.getAllColumnVectorMaps();
+  public static Map<String, Map<Integer, String>> getMapWorkAllScratchColumnVectorTypeMaps(Configuration hiveConf) {
+    MapWork mapWork = getMapWork(hiveConf);
+    return mapWork.getAllScratchColumnVectorTypeMaps();
   }
 
   public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) {

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java Fri Oct 31 15:44:57 2014
@@ -76,8 +76,6 @@ public class VectorGroupByOperator exten
   // Create a new outgoing vectorization context because column name map will change.
   private VectorizationContext vOutContext = null;
 
-  private String fileKey;
-
   // The above members are initialized by the constructor and must not be
   // transient.
   //---------------------------------------------------------------------------
@@ -756,7 +754,6 @@ public class VectorGroupByOperator exten
 
     vOutContext = new VectorizationContext(desc.getOutputColumnNames());
     vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_");
-    fileKey = vOutContext.getFileKey();
   }
 
   public VectorGroupByOperator() {
@@ -796,7 +793,7 @@ public class VectorGroupByOperator exten
           outputFieldNames, objectInspectors);
       if (isVectorOutput) {
           vrbCtx = new VectorizedRowBatchCtx();
-          vrbCtx.init(hconf, fileKey, (StructObjectInspector) outputObjInspector);
+          vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) outputObjInspector);
           outputBatch = vrbCtx.createVectorizedRowBatch();
           vectorColumnAssign = VectorColumnAssignFactory.buildAssigners(
               outputBatch, outputObjInspector, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java Fri Oct 31 15:44:57 2014
@@ -28,7 +28,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
@@ -53,19 +52,16 @@ public class VectorMapJoinOperator exten
    */
   private static final long serialVersionUID = 1L;
 
-  /**
-   * Vectorizaiton context key
-   * Used to retrieve column map from the MapTask scratch
-   */
-  private String fileKey;
-  private int tagLen;
-
   private VectorExpression[] keyExpressions;
-  private transient VectorHashKeyWrapperBatch keyWrapperBatch;
-  private transient VectorExpressionWriter[] keyOutputWriters;
 
   private VectorExpression[] bigTableFilterExpressions;
   private VectorExpression[] bigTableValueExpressions;
+  
+  private VectorizationContext vOutContext;
+
+  // The above members are initialized by the constructor and must not be
+  // transient.
+  //---------------------------------------------------------------------------
 
   private transient VectorizedRowBatch outputBatch;
   private transient VectorExpressionWriter[] valueWriters;
@@ -76,8 +72,9 @@ public class VectorMapJoinOperator exten
   //
   private transient int batchIndex;
   private transient VectorHashKeyWrapper[] keyValues;
-  
-  private transient VectorizationContext vOutContext = null;
+  private transient VectorHashKeyWrapperBatch keyWrapperBatch;
+  private transient VectorExpressionWriter[] keyOutputWriters;
+
   private transient VectorizedRowBatchCtx vrbCtx = null;
   
   public VectorMapJoinOperator() {
@@ -96,7 +93,6 @@ public class VectorMapJoinOperator exten
     numAliases = desc.getExprs().size();
     posBigTable = (byte) desc.getPosBigTable();
     filterMaps = desc.getFilterMap();
-    tagLen = desc.getTagLength();
     noOuterJoin = desc.isNoOuterJoin();
 
     Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
@@ -113,7 +109,6 @@ public class VectorMapJoinOperator exten
     // We are making a new output vectorized row batch.
     vOutContext = new VectorizationContext(desc.getOutputColumnNames());
     vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias());
-    this.fileKey = vOutContext.getFileKey();
   }
 
   @Override
@@ -124,7 +119,7 @@ public class VectorMapJoinOperator exten
     keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
 
     vrbCtx = new VectorizedRowBatchCtx();
-    vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+    vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector);
 
     outputBatch = vrbCtx.createVectorizedRowBatch();
 
@@ -193,10 +188,8 @@ public class VectorMapJoinOperator exten
     Object[] values = (Object[]) row;
     VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
     if (null == vcas) {
-      Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
-      Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
       vcas = VectorColumnAssignFactory.buildAssigners(
-          outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+          outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
       outputVectorAssigners.put(outputOI, vcas);
     }
     for (int i=0; i<values.length; ++i) {

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Fri Oct 31 15:44:57 2014
@@ -28,7 +28,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
@@ -52,14 +51,6 @@ public class VectorSMBMapJoinOperator ex
   
   private static final long serialVersionUID = 1L;
 
-  private int tagLen;
-  
-  private transient VectorizedRowBatch outputBatch;  
-  private transient VectorizationContext vOutContext = null;
-  private transient VectorizedRowBatchCtx vrbCtx = null;  
-  
-  private String fileKey;
-
   private VectorExpression[] bigTableValueExpressions;
 
   private VectorExpression[] bigTableFilterExpressions;
@@ -68,6 +59,16 @@ public class VectorSMBMapJoinOperator ex
 
   private VectorExpressionWriter[] keyOutputWriters;
 
+  private VectorizationContext vOutContext;
+
+  // The above members are initialized by the constructor and must not be
+  // transient.
+  //---------------------------------------------------------------------------
+
+  private transient VectorizedRowBatch outputBatch;  
+
+  private transient VectorizedRowBatchCtx vrbCtx = null;
+
   private transient VectorHashKeyWrapperBatch keyWrapperBatch;
 
   private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners;
@@ -98,7 +99,6 @@ public class VectorSMBMapJoinOperator ex
     numAliases = desc.getExprs().size();
     posBigTable = (byte) desc.getPosBigTable();
     filterMaps = desc.getFilterMap();
-    tagLen = desc.getTagLength();
     noOuterJoin = desc.isNoOuterJoin();
 
     // Must obtain vectorized equivalents for filter and value expressions
@@ -117,7 +117,6 @@ public class VectorSMBMapJoinOperator ex
     // We are making a new output vectorized row batch.
     vOutContext = new VectorizationContext(desc.getOutputColumnNames());
     vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
-    this.fileKey = vOutContext.getFileKey();
   }
   
   @Override
@@ -135,7 +134,7 @@ public class VectorSMBMapJoinOperator ex
     super.initializeOp(hconf);
 
     vrbCtx = new VectorizedRowBatchCtx();
-    vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+    vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector);
     
     outputBatch = vrbCtx.createVectorizedRowBatch();
     
@@ -272,10 +271,8 @@ public class VectorSMBMapJoinOperator ex
     Object[] values = (Object[]) row;
     VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
     if (null == vcas) {
-      Map<String, Map<String, Integer>> allColumnMaps = Utilities.getAllColumnVectorMaps(hconf);
-      Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
       vcas = VectorColumnAssignFactory.buildAssigners(
-          outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+          outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames());
       outputVectorAssigners.put(outputOI, vcas);
     }
     for (int i = 0; i < values.length; ++i) {

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1635800&r1=1635799&r2=1635800&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Fri Oct 31 15:44:57 2014
@@ -96,7 +96,7 @@ public class VectorizedRowBatchCtx {
   // list does not contain partition columns
   private List<Integer> colsToInclude;
 
-  private Map<Integer, String> columnTypeMap = null;
+  private Map<Integer, String> scratchColumnTypeMap = null;
 
   /**
    * Constructor for VectorizedRowBatchCtx
@@ -126,36 +126,17 @@ public class VectorizedRowBatchCtx {
   public VectorizedRowBatchCtx() {
 
   }
-  
-  /**
-   * Initializes the VectorizedRowBatch context based on an arbitrary object inspector
-   * Used by non-tablescan operators when they change the vectorization context 
-   * @param hiveConf
-   * @param fileKey 
-   *          The key on which to retrieve the extra column mapping from the map/reduce scratch
-   * @param rowOI
-   *          Object inspector that shapes the column types
-   */
-  public void init(Configuration hiveConf, String fileKey,
-      StructObjectInspector rowOI) {
-    Map<String, Map<Integer, String>> scratchColumnVectorTypes =
-            Utilities.getAllScratchColumnVectorTypeMaps(hiveConf);
-    columnTypeMap = scratchColumnVectorTypes.get(fileKey);
-    this.rowOI= rowOI;
-    this.rawRowOI = rowOI;
-  }
-  
 
   /**
    * Initializes the VectorizedRowBatch context based on an scratch column type map and
    * object inspector.
-   * @param columnTypeMap
+   * @param scratchColumnTypeMap
    * @param rowOI
    *          Object inspector that shapes the column types
    */
-  public void init(Map<Integer, String> columnTypeMap,
+  public void init(Map<Integer, String> scratchColumnTypeMap,
       StructObjectInspector rowOI) {
-    this.columnTypeMap = columnTypeMap;
+    this.scratchColumnTypeMap = scratchColumnTypeMap;
     this.rowOI= rowOI;
     this.rawRowOI = rowOI;
   }
@@ -179,7 +160,8 @@ public class VectorizedRowBatchCtx {
       IOException,
       SerDeException,
       InstantiationException,
-      IllegalAccessException, HiveException {
+      IllegalAccessException,
+      HiveException {
 
     Map<String, PartitionDesc> pathToPartitionInfo = Utilities
         .getMapRedWork(hiveConf).getMapWork().getPathToPartitionInfo();
@@ -189,8 +171,8 @@ public class VectorizedRowBatchCtx {
             split.getPath(), IOPrepareCache.get().getPartitionDescMap());
 
     String partitionPath = split.getPath().getParent().toString();
-    columnTypeMap = Utilities
-        .getAllScratchColumnVectorTypeMaps(hiveConf)
+    scratchColumnTypeMap = Utilities
+        .getMapWorkAllScratchColumnVectorTypeMaps(hiveConf)
         .get(partitionPath);
 
     Properties partProps =
@@ -613,12 +595,12 @@ public class VectorizedRowBatchCtx {
   }
 
   private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException {
-    if (columnTypeMap != null && !columnTypeMap.isEmpty()) {
+    if (scratchColumnTypeMap != null && !scratchColumnTypeMap.isEmpty()) {
       int origNumCols = vrb.numCols;
-      int newNumCols = vrb.cols.length+columnTypeMap.keySet().size();
+      int newNumCols = vrb.cols.length+scratchColumnTypeMap.keySet().size();
       vrb.cols = Arrays.copyOf(vrb.cols, newNumCols);
       for (int i = origNumCols; i < newNumCols; i++) {
-       String typeName = columnTypeMap.get(i);
+       String typeName = scratchColumnTypeMap.get(i);
        if (typeName == null) {
          throw new HiveException("No type found for column type entry " + i);
        }