You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2015/06/19 20:40:16 UTC

spark git commit: [SPARK-8461] [SQL] fix codegen with REPL class loader

Repository: spark
Updated Branches:
  refs/heads/master 4a462c282 -> e41e2fd6c


[SPARK-8461] [SQL] fix codegen with REPL class loader

The ExecutorClassLoader for REPL will cause Janino failed to find class for those in java.lang, so switch to use default class loader for Janino, which will also help performance.

cc liancheng yhuai

Author: Davies Liu <da...@databricks.com>

Closes #6898 from davies/fix_class_loader and squashes the following commits:

24276d4 [Davies Liu] add regression test
4ff0457 [Davies Liu] address comment, refactor
7f5ffbe [Davies Liu] fix REPL class loader with codegen


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e41e2fd6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e41e2fd6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e41e2fd6

Branch: refs/heads/master
Commit: e41e2fd6c61076f870de03b85c5da6c12b8da038
Parents: 4a462c2
Author: Davies Liu <da...@databricks.com>
Authored: Fri Jun 19 11:40:04 2015 -0700
Committer: Davies Liu <da...@databricks.com>
Committed: Fri Jun 19 11:40:04 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/repl/ReplSuite.scala | 11 ++++++++++
 .../expressions/codegen/CodeGenerator.scala     | 22 ++++++++++++--------
 .../codegen/GenerateMutableProjection.scala     |  8 ++-----
 .../expressions/codegen/GenerateOrdering.scala  |  7 +------
 .../expressions/codegen/GeneratePredicate.scala |  8 +------
 .../codegen/GenerateProjection.scala            |  7 +------
 6 files changed, 29 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
----------------------------------------------------------------------
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 50fd43a..f150fec 100644
--- a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -267,6 +267,17 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("Exception", output)
   }
 
+  test("SPARK-8461 SQL with codegen") {
+    val output = runInterpreter("local",
+    """
+      |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+      |sqlContext.setConf("spark.sql.codegen", "true")
+      |sqlContext.range(0, 100).filter('id > 50).count()
+    """.stripMargin)
+    assertContains("Long = 49", output)
+    assertDoesNotContain("java.lang.ClassNotFoundException", output)
+  }
+
   test("SPARK-2632 importing a method from non serializable class and not using it.") {
     val output = runInterpreter("local",
     """

http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index ab850d1..bd5475d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -203,6 +203,11 @@ class CodeGenContext {
   def isPrimitiveType(dt: DataType): Boolean = primitiveTypes.contains(dt)
 }
 
+
+abstract class GeneratedClass {
+  def generate(expressions: Array[Expression]): Any
+}
+
 /**
  * A base class for generators of byte code to perform expression evaluation.  Includes a set of
  * helpers for referring to Catalyst types and building trees that perform evaluation of individual
@@ -215,11 +220,6 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
   protected val genericMutableRowType: String = classOf[GenericMutableRow].getName
 
   /**
-   * Can be flipped on manually in the console to add (expensive) expression evaluation trace code.
-   */
-  var debugLogging = false
-
-  /**
    * Generates a class for a given input expression.  Called when there is not cached code
    * already available.
    */
@@ -239,10 +239,14 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
    *
    * It will track the time used to compile
    */
-  protected def compile(code: String): Class[_] = {
+  protected def compile(code: String): GeneratedClass = {
     val startTime = System.nanoTime()
-    val clazz = try {
-      new ClassBodyEvaluator(code).getClazz()
+    val evaluator = new ClassBodyEvaluator()
+    evaluator.setParentClassLoader(getClass.getClassLoader)
+    evaluator.setDefaultImports(Array("org.apache.spark.sql.catalyst.InternalRow"))
+    evaluator.setExtendedClass(classOf[GeneratedClass])
+    try {
+      evaluator.cook(code)
     } catch {
       case e: Exception =>
         logError(s"failed to compile:\n $code", e)
@@ -251,7 +255,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
     val endTime = System.nanoTime()
     def timeMs: Double = (endTime - startTime).toDouble / 1000000
     logDebug(s"Code (${code.size} bytes) compiled in $timeMs ms")
-    clazz
+    evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 573a9ea..e75e82d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -47,9 +47,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
         """
     }.mkString("\n")
     val code = s"""
-      import org.apache.spark.sql.catalyst.InternalRow;
-
-      public SpecificProjection generate($exprType[] expr) {
+      public Object generate($exprType[] expr) {
         return new SpecificProjection(expr);
       }
 
@@ -85,10 +83,8 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
     logDebug(s"code for ${expressions.mkString(",")}:\n$code")
 
     val c = compile(code)
-    // fetch the only one method `generate(Expression[])`
-    val m = c.getDeclaredMethods()(0)
     () => {
-      m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[BaseMutableProjection]
+      c.generate(ctx.references.toArray).asInstanceOf[MutableProjection]
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 3e9ee60..7ed2c5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -76,8 +76,6 @@ object GenerateOrdering
     }.mkString("\n")
 
     val code = s"""
-      import org.apache.spark.sql.catalyst.InternalRow;
-
       public SpecificOrdering generate($exprType[] expr) {
         return new SpecificOrdering(expr);
       }
@@ -100,9 +98,6 @@ object GenerateOrdering
 
     logDebug(s"Generated Ordering: $code")
 
-    val c = compile(code)
-    // fetch the only one method `generate(Expression[])`
-    val m = c.getDeclaredMethods()(0)
-    m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[BaseOrdering]
+    compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index dad4364..3ebc2c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
-import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
@@ -41,8 +40,6 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
     val ctx = newCodeGenContext()
     val eval = predicate.gen(ctx)
     val code = s"""
-      import org.apache.spark.sql.catalyst.InternalRow;
-
       public SpecificPredicate generate($exprType[] expr) {
         return new SpecificPredicate(expr);
       }
@@ -62,10 +59,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
 
     logDebug(s"Generated predicate '$predicate':\n$code")
 
-    val c = compile(code)
-    // fetch the only one method `generate(Expression[])`
-    val m = c.getDeclaredMethods()(0)
-    val p = m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[Predicate]
+    val p = compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
     (r: InternalRow) => p.eval(r)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e41e2fd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
index 8b5dc19..2e20eda 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -147,8 +147,6 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
     }.mkString("\n")
 
     val code = s"""
-    import org.apache.spark.sql.catalyst.InternalRow;
-
     public SpecificProjection generate($exprType[] expr) {
       return new SpecificProjection(expr);
     }
@@ -220,9 +218,6 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
 
     logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${code}")
 
-    val c = compile(code)
-    // fetch the only one method `generate(Expression[])`
-    val m = c.getDeclaredMethods()(0)
-    m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[Projection]
+    compile(code).generate(ctx.references.toArray).asInstanceOf[Projection]
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org