You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/07/25 04:35:31 UTC

spark git commit: [SPARK-9331][SQL] Add a code formatter to auto-format generated code.

Repository: spark
Updated Branches:
  refs/heads/master f99cb5615 -> c84acd4aa


[SPARK-9331][SQL] Add a code formatter to auto-format generated code.

The generated expression code can be hard to read since they are not indented well. This patch adds a code formatter that formats code automatically when we output them to the screen.

Author: Reynold Xin <rx...@databricks.com>

Closes #7656 from rxin/codeformatter and squashes the following commits:

5ba0e90 [Reynold Xin] [SPARK-9331][SQL] Add a code formatter to auto-format generated code.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c84acd4a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c84acd4a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c84acd4a

Branch: refs/heads/master
Commit: c84acd4aa4f8bee98baa550cd6801c797a8a7a25
Parents: f99cb56
Author: Reynold Xin <rx...@databricks.com>
Authored: Fri Jul 24 19:35:24 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Jul 24 19:35:24 2015 -0700

----------------------------------------------------------------------
 .../expressions/codegen/CodeFormatter.scala     | 60 ++++++++++++++++
 .../expressions/codegen/CodeGenerator.scala     |  2 +-
 .../codegen/GenerateMutableProjection.scala     | 11 +--
 .../expressions/codegen/GenerateOrdering.scala  |  2 +-
 .../expressions/codegen/GeneratePredicate.scala |  2 +-
 .../codegen/GenerateProjection.scala            |  3 +-
 .../codegen/GenerateUnsafeProjection.scala      |  2 +-
 .../codegen/CodeFormatterSuite.scala            | 76 ++++++++++++++++++++
 8 files changed, 148 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
new file mode 100644
index 0000000..2087cc7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+/**
+ * An utility class that indents a block of code based on the curly braces.
+ *
+ * This is used to prettify generated code when in debug mode (or exceptions).
+ *
+ * Written by Matei Zaharia.
+ */
+object CodeFormatter {
+  def format(code: String): String = new CodeFormatter().addLines(code).result()
+}
+
+private class CodeFormatter {
+  private val code = new StringBuilder
+  private var indentLevel = 0
+  private val indentSize = 2
+  private var indentString = ""
+
+  private def addLine(line: String): Unit = {
+    val indentChange = line.count(_ == '{') - line.count(_ == '}')
+    val newIndentLevel = math.max(0, indentLevel + indentChange)
+    // Lines starting with '}' should be de-indented even if they contain '{' after;
+    // in addition, lines ending with ':' are typically labels
+    val thisLineIndent = if (line.startsWith("}") || line.endsWith(":")) {
+      " " * (indentSize * (indentLevel - 1))
+    } else {
+      indentString
+    }
+    code.append(thisLineIndent)
+    code.append(line)
+    code.append("\n")
+    indentLevel = newIndentLevel
+    indentString = " " * (indentSize * newIndentLevel)
+  }
+
+  private def addLines(code: String): CodeFormatter = {
+    code.split('\n').foreach(s => addLine(s.trim()))
+    this
+  }
+
+  private def result(): String = code.result()
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4a90f1b..508882a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -299,7 +299,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
       evaluator.cook(code)
     } catch {
       case e: Exception =>
-        val msg = s"failed to compile:\n $code"
+        val msg = "failed to compile:\n " + CodeFormatter.format(code)
         logError(msg, e)
         throw new Exception(msg, e)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index d838268..825031a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 
-import scala.collection.mutable.ArrayBuffer
-
 // MutableProjection is not accessible in Java
 abstract class BaseMutableProjection extends MutableProjection
 
@@ -45,10 +45,11 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
         val evaluationCode = e.gen(ctx)
         evaluationCode.code +
           s"""
-            if(${evaluationCode.isNull})
+            if (${evaluationCode.isNull}) {
               mutableRow.setNullAt($i);
-            else
+            } else {
               ${ctx.setColumn("mutableRow", e.dataType, i, evaluationCode.primitive)};
+            }
           """
     }
     // collect projections into blocks as function has 64kb codesize limit in JVM
@@ -119,7 +120,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
       }
     """
 
-    logDebug(s"code for ${expressions.mkString(",")}:\n$code")
+    logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
 
     val c = compile(code)
     () => {

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 2e6f9e2..dbd4616 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -107,7 +107,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
         }
       }"""
 
-    logDebug(s"Generated Ordering: $code")
+    logDebug(s"Generated Ordering: ${CodeFormatter.format(code)}")
 
     compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 1dda599..dfd593f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -60,7 +60,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
         }
       }"""
 
-    logDebug(s"Generated predicate '$predicate':\n$code")
+    logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
 
     val p = compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
     (r: InternalRow) => p.eval(r)

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
index f0efc4b..a361b21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -230,7 +230,8 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
     }
     """
 
-    logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${code}")
+    logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n" +
+      CodeFormatter.format(code))
 
     compile(code).generate(ctx.references.toArray).asInstanceOf[Projection]
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index d65e5c3..0320bcb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -114,7 +114,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     }
     """
 
-    logDebug(s"code for ${expressions.mkString(",")}:\n$code")
+    logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
 
     val c = compile(code)
     c.generate(ctx.references.toArray).asInstanceOf[UnsafeProjection]

http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
new file mode 100644
index 0000000..478702f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+
+
+class CodeFormatterSuite extends SparkFunSuite {
+
+  def testCase(name: String)(input: String)(expected: String): Unit = {
+    test(name) {
+      assert(CodeFormatter.format(input).trim === expected.trim)
+    }
+  }
+
+  testCase("basic example") {
+    """
+      |class A {
+      |blahblah;
+      |}
+    """.stripMargin
+  }{
+    """
+      |class A {
+      |  blahblah;
+      |}
+    """.stripMargin
+  }
+
+  testCase("nested example") {
+    """
+      |class A {
+      | if (c) {
+      |duh;
+      |}
+      |}
+    """.stripMargin
+  } {
+    """
+      |class A {
+      |  if (c) {
+      |    duh;
+      |  }
+      |}
+    """.stripMargin
+  }
+
+  testCase("single line") {
+    """
+      |class A {
+      | if (c) {duh;}
+      |}
+    """.stripMargin
+  }{
+    """
+      |class A {
+      |  if (c) {duh;}
+      |}
+    """.stripMargin
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org