You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/23 17:38:07 UTC
spark git commit: [SPARK-20871][SQL] limit logging of Janino code

Repository: spark
Updated Branches:
  refs/heads/master cecd285a2 -> 2a53fbfce


[SPARK-20871][SQL] limit logging of Janino code

## What changes were proposed in this pull request?

When the code that is generated is greater than 64k, then Janino compile will fail and CodeGenerator.scala will log the entire code at Error level.
SPARK-20871 suggests only logging the code at Debug level.
Since, the code is already logged at debug level, this Pull Request proposes not including the formatted code in the Error logging and exception message at all.
When an exception occurs, the code will be logged at Info level but truncated if it is more than 1000 lines long.

## How was this patch tested?

Existing tests were run.
An extra test test case was added to CodeFormatterSuite to test the new maxLines parameter,

Author: pj.fanning <pj...@workday.com>

Closes #18658 from pjfanning/SPARK-20871.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a53fbfc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a53fbfc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a53fbfc

Branch: refs/heads/master
Commit: 2a53fbfce72b3faef020e39a1e8628d68bc95beb
Parents: cecd285
Author: pj.fanning <pj...@workday.com>
Authored: Sun Jul 23 10:38:03 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Sun Jul 23 10:38:03 2017 -0700

----------------------------------------------------------------------
 .../expressions/codegen/CodeFormatter.scala     | 10 ++++-
 .../expressions/codegen/CodeGenerator.scala     | 13 ++++---
 .../org/apache/spark/sql/internal/SQLConf.scala | 10 +++++
 .../codegen/CodeFormatterSuite.scala            | 40 +++++++++++++++++---
 4 files changed, 61 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 05b7c96..60e600d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -28,14 +28,20 @@ import java.util.regex.Matcher
 object CodeFormatter {
   val commentHolder = """\/\*(.+?)\*\/""".r
 
-  def format(code: CodeAndComment): String = {
+  def format(code: CodeAndComment, maxLines: Int = -1): String = {
     val formatter = new CodeFormatter
-    code.body.split("\n").foreach { line =>
+    val lines = code.body.split("\n")
+    val needToTruncate = maxLines >= 0 && lines.length > maxLines
+    val filteredLines = if (needToTruncate) lines.take(maxLines) else lines
+    filteredLines.foreach { line =>
       val commentReplaced = commentHolder.replaceAllIn(
         line.trim,
         m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0)))
       formatter.addLine(commentReplaced)
     }
+    if (needToTruncate) {
+      formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]")
+    }
     formatter.result()
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 7cf9daf..a014e2a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types._
@@ -1037,12 +1038,10 @@ object CodeGenerator extends Logging {
     ))
     evaluator.setExtendedClass(classOf[GeneratedClass])
 
-    lazy val formatted = CodeFormatter.format(code)
-
     logDebug({
       // Only add extra debugging info to byte code when we are going to print the source code.
       evaluator.setDebuggingInformation(true, true, false)
-      s"\n$formatted"
+      s"\n${CodeFormatter.format(code)}"
     })
 
     try {
@@ -1050,12 +1049,16 @@ object CodeGenerator extends Logging {
       recordCompilationStats(evaluator)
     } catch {
       case e: JaninoRuntimeException =>
-        val msg = s"failed to compile: $e\n$formatted"
+        val msg = s"failed to compile: $e"
         logError(msg, e)
+        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
         throw new JaninoRuntimeException(msg, e)
       case e: CompileException =>
-        val msg = s"failed to compile: $e\n$formatted"
+        val msg = s"failed to compile: $e"
         logError(msg, e)
+        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
         throw new CompileException(msg, e.getLocation)
     }
     evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 824908d..a819cdd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -564,6 +564,14 @@ object SQLConf {
     .intConf
     .createWithDefault(20)
 
+  val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
+    .internal()
+    .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
+    .intConf
+    .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " +
+      "disable logging or -1 to apply no limit.")
+    .createWithDefault(1000)
+
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .longConf
@@ -1004,6 +1012,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
 
+  def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
+
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index bc5a8f0..9d0a416 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util._
 
-
 class CodeFormatterSuite extends SparkFunSuite {
 
-  def testCase(name: String)(
-      input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = {
+  def testCase(name: String)(input: String,
+      comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = {
     test(name) {
       val sourceCode = new CodeAndComment(input.trim, comment)
-      if (CodeFormatter.format(sourceCode).trim !== expected.trim) {
+      if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) {
         fail(
           s"""
              |== FAIL: Formatted code doesn't match ===
-             |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")}
+             |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim,
+                 expected.trim).mkString("\n")}
            """.stripMargin)
       }
     }
@@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite {
     """.stripMargin
   }
 
+  testCase("function calls with maxLines=0") (
+    """
+      |foo(
+      |a,
+      |b,
+      |c)
+    """.stripMargin,
+    maxLines = 0
+  ) {
+    """
+      |/* 001 */ [truncated to 0 lines (total lines is 4)]
+    """.stripMargin
+  }
+
+  testCase("function calls with maxLines=2") (
+    """
+      |foo(
+      |a,
+      |b,
+      |c)
+    """.stripMargin,
+    maxLines = 2
+  ) {
+    """
+      |/* 001 */ foo(
+      |/* 002 */   a,
+      |/* 003 */   [truncated to 2 lines (total lines is 4)]
+    """.stripMargin
+  }
+
   testCase("single line comments") {
     """
       |// This is a comment about class A { { { ( (


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org