You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/26 20:37:37 UTC

svn commit: r1620688 - in /hive/branches/spark: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/ ql/src/test/results/clientpositive/spark/

Author: brock
Date: Tue Aug 26 18:37:37 2014
New Revision: 1620688

URL: http://svn.apache.org/r1620688
Log:
HIVE-7844 - optimize_nullscan.q fails due to differences in explain plan [Spark Branch] (Venki Korukanti via Brock)

Added:
    hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
Modified:
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Tue Aug 26 18:37:37 2014
@@ -373,6 +373,7 @@ spark.query.files=alter_merge_orc.q \
   merge1.q \
   merge2.q \
   metadata_only_queries.q \
+  optimize_nullscan.q \
   order.q \
   order2.q \
   ptf_decimal.q \

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Tue Aug 26 18:37:37 2014
@@ -19,9 +19,10 @@
 package org.apache.hadoop.hive.ql.exec.spark;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.List;
 
-import org.apache.hadoop.conf.Configuration;
+import com.google.common.collect.Lists;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.DriverContext;
@@ -34,8 +35,10 @@ import org.apache.hadoop.hive.ql.exec.sp
 import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
 import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.plan.UnionWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.mapred.JobConf;
@@ -65,9 +68,9 @@ public class SparkTask extends Task<Spar
       
       // Spark configurations are updated close the existing session 
       if(conf.getSparkConfigUpdated()){
-	sparkSessionManager.closeSession(sparkSession);
-	sparkSession =  null;
-	conf.setSparkConfigUpdated(false);
+        sparkSessionManager.closeSession(sparkSession);
+        sparkSession =  null;
+        conf.setSparkConfigUpdated(false);
       }
       sparkSession = sparkSessionManager.getSession(sparkSession, conf, true);
       SessionState.get().setSparkSession(sparkSession);
@@ -128,6 +131,30 @@ public class SparkTask extends Task<Spar
     return "SPARK";
   }
 
+  @Override
+  public Collection<MapWork> getMapWork() {
+    List<MapWork> result = Lists.newArrayList();
+    SparkWork work = getWork();
+
+    // framework expects MapWork instances that have no physical parents (i.e.: union parent is
+    // fine, broadcast parent isn't)
+    for (BaseWork w: work.getAllWorkUnsorted()) {
+      if (w instanceof MapWork) {
+        List<BaseWork> parents = work.getParents(w);
+        boolean candidate = true;
+        for (BaseWork parent: parents) {
+          if (!(parent instanceof UnionWork)) {
+            candidate = false;
+          }
+        }
+        if (candidate) {
+          result.add((MapWork)w);
+        }
+      }
+    }
+    return result;
+  }
+
   /**
    * Set the number of reducers for the spark work.
    */

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1620688&r1=1620687&r2=1620688&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Tue Aug 26 18:37:37 2014
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
+import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
 import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
@@ -245,17 +246,29 @@ public class SparkCompiler extends TaskC
     PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
        pCtx.getFetchTask());
 
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
+      physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
+    } else {
+      LOG.debug("Skipping null scan query optimization");
+    }
+
     if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
       physicalCtx = new CrossProductCheck().resolve(physicalCtx);
+    } else {
+      LOG.debug("Skipping cross product analysis");
     }
 
     if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
       (new Vectorizer()).resolve(physicalCtx);
+    } else {
+      LOG.debug("Skipping vectorization");
     }
+
     if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
       (new StageIDsRearranger()).resolve(physicalCtx);
+    } else {
+      LOG.debug("Skipping stage id rearranger");
     }
     return;
   }
-
 }

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out?rev=1620688&view=auto
==============================================================================
Files hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out (added) and hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out Tue Aug 26 18:37:37 2014 differ