You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/01/12 20:09:27 UTC

spark git commit: [SPARK-9843][SQL] Make catalyst optimizer pass pluggable at runtime

Repository: spark
Updated Branches:
  refs/heads/master 1d8887953 -> 508592b1b


[SPARK-9843][SQL] Make catalyst optimizer pass pluggable at runtime

Let me know whether you'd like to see it in other place

Author: Robert Kruszewski <ro...@palantir.com>

Closes #10210 from robert3005/feature/pluggable-optimizer.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/508592b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/508592b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/508592b1

Branch: refs/heads/master
Commit: 508592b1bae3b2c88350ddfc1d909892f236ce5f
Parents: 1d88879
Author: Robert Kruszewski <ro...@palantir.com>
Authored: Tue Jan 12 11:09:28 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Jan 12 11:09:28 2016 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/ExperimentalMethods.scala  |  5 ++++
 .../scala/org/apache/spark/sql/SQLContext.scala |  4 +--
 .../spark/sql/execution/SparkOptimizer.scala    | 27 ++++++++++++++++++++
 .../org/apache/spark/sql/SQLContextSuite.scala  | 12 +++++++++
 4 files changed, 46 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/508592b1/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 717709e..deed45d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 
 /**
  * :: Experimental ::
@@ -42,4 +44,7 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
   @Experimental
   var extraStrategies: Seq[Strategy] = Nil
 
+  @Experimental
+  var extraOptimizations: Seq[Rule[LogicalPlan]] = Nil
+
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/508592b1/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 61c74f8..6721d9c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
+import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution._
@@ -202,7 +202,7 @@ class SQLContext private[sql](
     }
 
   @transient
-  protected[sql] lazy val optimizer: Optimizer = DefaultOptimizer
+  protected[sql] lazy val optimizer: Optimizer = new SparkOptimizer(this)
 
   @transient
   protected[sql] val ddlParser = new DDLParser(sqlParser.parse(_))

http://git-wip-us.apache.org/repos/asf/spark/blob/508592b1/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
new file mode 100644
index 0000000..edaf3b3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.optimizer._
+
+class SparkOptimizer(val sqlContext: SQLContext)
+    extends Optimizer {
+      override def batches: Seq[Batch] = super.batches :+ Batch(
+        "User Provided Optimizers", FixedPoint(100), sqlContext.experimental.extraOptimizations: _*)
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/508592b1/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index 1994dac..14b9448 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -18,9 +18,15 @@
 package org.apache.spark.sql
 
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 
 class SQLContextSuite extends SparkFunSuite with SharedSparkContext{
 
+  object DummyRule extends Rule[LogicalPlan] {
+    def apply(p: LogicalPlan): LogicalPlan = p
+  }
+
   test("getOrCreate instantiates SQLContext") {
     val sqlContext = SQLContext.getOrCreate(sc)
     assert(sqlContext != null, "SQLContext.getOrCreate returned null")
@@ -65,4 +71,10 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext{
       session2.sql("select myadd(1, 2)").explain()
     }
   }
+
+  test("Catalyst optimization passes are modifiable at runtime") {
+    val sqlContext = SQLContext.getOrCreate(sc)
+    sqlContext.experimental.extraOptimizations = Seq(DummyRule)
+    assert(sqlContext.optimizer.batches.flatMap(_.rules).contains(DummyRule))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org