You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/12/28 21:46:24 UTC
spark git commit: [SPARK-7727][SQL] Avoid inner classes in RuleExecutor

Repository: spark
Updated Branches:
  refs/heads/master 07165ca06 -> a6a481243


[SPARK-7727][SQL] Avoid inner classes in RuleExecutor

Moved (case) classes Strategy, Once, FixedPoint and Batch to the companion object. This is necessary if we want to have the Optimizer easily extendable in the following sense: Usually a user wants to add additional rules, and just take the ones that are already there. However, inner classes made that impossible since the code did not compile

This allows easy extension of existing Optimizers see the DefaultOptimizerExtendableSuite for a corresponding test case.

Author: Stephan Kessler <st...@sap.com>

Closes #10174 from stephankessler/SPARK-7727.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a6a48124
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a6a48124
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a6a48124

Branch: refs/heads/master
Commit: a6a4812434c6f43cd4742437f957fecd86220255
Parents: 07165ca
Author: Stephan Kessler <st...@sap.com>
Authored: Mon Dec 28 12:46:20 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Dec 28 12:46:20 2015 -0800

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      | 19 +++++--
 .../spark/sql/catalyst/rules/RuleExecutor.scala |  2 +-
 .../optimizer/OptimizerExtendableSuite.scala    | 58 ++++++++++++++++++++
 3 files changed, 74 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a6a48124/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f608869..0b1c742 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -28,10 +28,12 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types._
 
-abstract class Optimizer extends RuleExecutor[LogicalPlan]
-
-object DefaultOptimizer extends Optimizer {
-  val batches =
+/**
+  * Abstract class all optimizers should inherit of, contains the standard batches (extending
+  * Optimizers can override this.
+  */
+abstract class Optimizer extends RuleExecutor[LogicalPlan] {
+  def batches: Seq[Batch] = {
     // SubQueries are only needed for analysis and can be removed before execution.
     Batch("Remove SubQueries", FixedPoint(100),
       EliminateSubQueries) ::
@@ -66,9 +68,18 @@ object DefaultOptimizer extends Optimizer {
       DecimalAggregates) ::
     Batch("LocalRelation", FixedPoint(100),
       ConvertToLocalRelation) :: Nil
+  }
 }
 
 /**
+  * Non-abstract representation of the standard Spark optimizing strategies
+  *
+  * To ensure extendability, we leave the standard rules in the abstract optimizer rules, while
+  * specific rules go to the subclasses
+  */
+object DefaultOptimizer extends Optimizer
+
+/**
  * Pushes operations down into a Sample.
  */
 object SamplePushDown extends Rule[LogicalPlan] {

http://git-wip-us.apache.org/repos/asf/spark/blob/a6a48124/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index f80d2a9..62ea731 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -59,7 +59,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
   protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
 
   /** Defines a sequence of rule batches, to be overridden by the implementation. */
-  protected val batches: Seq[Batch]
+  protected def batches: Seq[Batch]
 
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/a6a48124/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerExtendableSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerExtendableSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerExtendableSuite.scala
new file mode 100644
index 0000000..7e3da6b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerExtendableSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+  * This is a test for SPARK-7727 if the Optimizer is kept being extendable
+  */
+class OptimizerExtendableSuite extends SparkFunSuite {
+
+  /**
+    * Dummy rule for test batches
+    */
+  object DummyRule extends Rule[LogicalPlan] {
+    def apply(p: LogicalPlan): LogicalPlan = p
+  }
+
+  /**
+    * This class represents a dummy extended optimizer that takes the batches of the
+    * Optimizer and adds custom ones.
+    */
+  class ExtendedOptimizer extends Optimizer {
+
+    // rules set to DummyRule, would not be executed anyways
+    val myBatches: Seq[Batch] = {
+      Batch("once", Once,
+        DummyRule) ::
+      Batch("fixedPoint", FixedPoint(100),
+        DummyRule) :: Nil
+    }
+
+    override def batches: Seq[Batch] = super.batches ++ myBatches
+  }
+
+  test("Extending batches possible") {
+    // test simply instantiates the new extended optimizer
+    val extendedOptimizer = new ExtendedOptimizer()
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org