You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/10/01 08:47:16 UTC
svn commit: r1527937 - in
/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql:
optimizer/physical/Vectorizer.java parse/TezCompiler.java
Author: gunther
Date: Tue Oct 1 06:47:15 2013
New Revision: 1527937
URL: http://svn.apache.org/r1527937
Log:
HIVE-5409: Enable vectorization for Tez (Gunther Hagleitner)
Modified:
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1527937&r1=1527936&r2=1527937&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Oct 1 06:47:15 2013
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -62,11 +63,13 @@ import org.apache.hadoop.hive.ql.parse.R
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFHour;
@@ -196,16 +199,26 @@ public class Vectorizer implements Physi
throws SemanticException {
Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
if (currTask instanceof MapRedTask) {
- boolean ret = validateMRTask((MapRedTask) currTask);
- if (ret) {
- vectorizeMRTask((MapRedTask) currTask);
+ convertMapWork(((MapRedTask) currTask).getWork().getMapWork());
+ } else if (currTask instanceof TezTask) {
+ TezWork work = ((TezTask) currTask).getWork();
+ for (BaseWork w: work.getAllWork()) {
+ if (w instanceof MapWork) {
+ convertMapWork((MapWork)w);
+ }
}
}
return null;
}
- private boolean validateMRTask(MapRedTask mrTask) throws SemanticException {
- MapWork mapWork = mrTask.getWork().getMapWork();
+ private void convertMapWork(MapWork mapWork) throws SemanticException {
+ boolean ret = validateMapWork(mapWork);
+ if (ret) {
+ vectorizeMapWork(mapWork);
+ }
+ }
+
+ private boolean validateMapWork(MapWork mapWork) throws SemanticException {
// Validate the input format
for (String path : mapWork.getPathToPartitionInfo().keySet()) {
@@ -243,12 +256,11 @@ public class Vectorizer implements Physi
return true;
}
- private void vectorizeMRTask(MapRedTask mrTask) throws SemanticException {
+ private void vectorizeMapWork(MapWork mapWork) throws SemanticException {
System.err.println("Going down the vectorized path");
- MapWork mapWork = mrTask.getWork().getMapWork();
mapWork.setVectorMode(true);
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mrTask);
+ VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mapWork);
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" +
ReduceSinkOperator.getOperatorName()), vnp);
opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
@@ -298,8 +310,8 @@ public class Vectorizer implements Physi
private final Set<Operator<? extends OperatorDesc>> opsDone =
new HashSet<Operator<? extends OperatorDesc>>();
- public VectorizationNodeProcessor(MapRedTask mrTask) {
- this.mWork = mrTask.getWork().getMapWork();
+ public VectorizationNodeProcessor(MapWork mWork) {
+ this.mWork = mWork;
}
public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1527937&r1=1527936&r2=1527937&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Tue Oct 1 06:47:15 2013
@@ -29,6 +29,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -50,6 +51,8 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
@@ -238,7 +241,11 @@ public class TezCompiler extends TaskCom
@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx,
Context ctx) throws SemanticException {
- // no additional optimization needed
+ PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
+ pCtx.getFetchTask());
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+ (new Vectorizer()).resolve(physicalCtx);
+ }
return;
}
}