You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/10/01 08:47:16 UTC

svn commit: r1527937 - in /hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql: optimizer/physical/Vectorizer.java parse/TezCompiler.java

Author: gunther
Date: Tue Oct  1 06:47:15 2013
New Revision: 1527937

URL: http://svn.apache.org/r1527937
Log:
HIVE-5409: Enable vectorization for Tez (Gunther Hagleitner)

Modified:
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1527937&r1=1527936&r2=1527937&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Oct  1 06:47:15 2013
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -62,11 +63,13 @@ import org.apache.hadoop.hive.ql.parse.R
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TezWork;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
@@ -196,16 +199,26 @@ public class Vectorizer implements Physi
         throws SemanticException {
       Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
       if (currTask instanceof MapRedTask) {
-        boolean ret = validateMRTask((MapRedTask) currTask);
-        if (ret) {
-          vectorizeMRTask((MapRedTask) currTask);
+        convertMapWork(((MapRedTask) currTask).getWork().getMapWork());
+      } else if (currTask instanceof TezTask) {
+        TezWork work = ((TezTask) currTask).getWork();
+        for (BaseWork w: work.getAllWork()) {
+          if (w instanceof MapWork) {
+            convertMapWork((MapWork)w);
+          }
         }
       }
       return null;
     }
 
-    private boolean validateMRTask(MapRedTask mrTask) throws SemanticException {
-      MapWork mapWork = mrTask.getWork().getMapWork();
+    private void convertMapWork(MapWork mapWork) throws SemanticException {
+      boolean ret = validateMapWork(mapWork);
+      if (ret) {
+        vectorizeMapWork(mapWork);
+      }
+    }
+
+    private boolean validateMapWork(MapWork mapWork) throws SemanticException {
 
       // Validate the input format
       for (String path : mapWork.getPathToPartitionInfo().keySet()) {
@@ -243,12 +256,11 @@ public class Vectorizer implements Physi
       return true;
     }
 
-    private void vectorizeMRTask(MapRedTask mrTask) throws SemanticException {
+    private void vectorizeMapWork(MapWork mapWork) throws SemanticException {
       System.err.println("Going down the vectorized path");
-      MapWork mapWork = mrTask.getWork().getMapWork();
       mapWork.setVectorMode(true);
       Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-      VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mrTask);
+      VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mapWork);
       opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" +
           ReduceSinkOperator.getOperatorName()), vnp);
       opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
@@ -298,8 +310,8 @@ public class Vectorizer implements Physi
     private final Set<Operator<? extends OperatorDesc>> opsDone =
         new HashSet<Operator<? extends OperatorDesc>>();
 
-    public VectorizationNodeProcessor(MapRedTask mrTask) {
-      this.mWork = mrTask.getWork().getMapWork();
+    public VectorizationNodeProcessor(MapWork mWork) {
+      this.mWork = mWork;
     }
 
     public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1527937&r1=1527936&r2=1527937&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Tue Oct  1 06:47:15 2013
@@ -29,6 +29,7 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -50,6 +51,8 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
 import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
 import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
@@ -238,7 +241,11 @@ public class TezCompiler extends TaskCom
   @Override
   protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx,
       Context ctx) throws SemanticException {
-    // no additional optimization needed
+    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
+       pCtx.getFetchTask());
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+      (new Vectorizer()).resolve(physicalCtx);
+    }
     return;
   }
 }