You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/26 20:37:37 UTC
svn commit: r1620688 - in /hive/branches/spark: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/exec/spark/
ql/src/java/org/apache/hadoop/hive/ql/parse/spark/
ql/src/test/results/clientpositive/spark/
Author: brock
Date: Tue Aug 26 18:37:37 2014
New Revision: 1620688
URL: http://svn.apache.org/r1620688
Log:
HIVE-7844 - optimize_nullscan.q fails due to differences in explain plan [Spark Branch] (Venki Korukanti via Brock)
Added:
hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Tue Aug 26 18:37:37 2014
@@ -373,6 +373,7 @@ spark.query.files=alter_merge_orc.q \
merge1.q \
merge2.q \
metadata_only_queries.q \
+ optimize_nullscan.q \
order.q \
order2.q \
ptf_decimal.q \
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Tue Aug 26 18:37:37 2014
@@ -19,9 +19,10 @@
package org.apache.hadoop.hive.ql.exec.spark;
import java.io.IOException;
+import java.util.Collection;
import java.util.List;
-import org.apache.hadoop.conf.Configuration;
+import com.google.common.collect.Lists;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverContext;
@@ -34,8 +35,10 @@ import org.apache.hadoop.hive.ql.exec.sp
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.plan.UnionWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.JobConf;
@@ -65,9 +68,9 @@ public class SparkTask extends Task<Spar
// Spark configurations are updated close the existing session
if(conf.getSparkConfigUpdated()){
- sparkSessionManager.closeSession(sparkSession);
- sparkSession = null;
- conf.setSparkConfigUpdated(false);
+ sparkSessionManager.closeSession(sparkSession);
+ sparkSession = null;
+ conf.setSparkConfigUpdated(false);
}
sparkSession = sparkSessionManager.getSession(sparkSession, conf, true);
SessionState.get().setSparkSession(sparkSession);
@@ -128,6 +131,30 @@ public class SparkTask extends Task<Spar
return "SPARK";
}
+ @Override
+ public Collection<MapWork> getMapWork() {
+ List<MapWork> result = Lists.newArrayList();
+ SparkWork work = getWork();
+
+ // framework expects MapWork instances that have no physical parents (i.e.: union parent is
+ // fine, broadcast parent isn't)
+ for (BaseWork w: work.getAllWorkUnsorted()) {
+ if (w instanceof MapWork) {
+ List<BaseWork> parents = work.getParents(w);
+ boolean candidate = true;
+ for (BaseWork parent: parents) {
+ if (!(parent instanceof UnionWork)) {
+ candidate = false;
+ }
+ }
+ if (candidate) {
+ result.add((MapWork)w);
+ }
+ }
+ }
+ return result;
+ }
+
/**
* Set the number of reducers for the spark work.
*/
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Tue Aug 26 18:37:37 2014
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
+import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
@@ -245,17 +246,29 @@ public class SparkCompiler extends TaskC
PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
pCtx.getFetchTask());
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
+ physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
+ } else {
+ LOG.debug("Skipping null scan query optimization");
+ }
+
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
physicalCtx = new CrossProductCheck().resolve(physicalCtx);
+ } else {
+ LOG.debug("Skipping cross product analysis");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
(new Vectorizer()).resolve(physicalCtx);
+ } else {
+ LOG.debug("Skipping vectorization");
}
+
if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
(new StageIDsRearranger()).resolve(physicalCtx);
+ } else {
+ LOG.debug("Skipping stage id rearranger");
}
return;
}
-
}
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out?rev=1620688&view=auto
==============================================================================
Files hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out (added) and hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out Tue Aug 26 18:37:37 2014 differ