You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/03/25 05:20:46 UTC

git commit: SPARK-1094 Support MiMa for reporting binary compatibility accross versions.

Repository: spark
Updated Branches:
  refs/heads/master 8043b7bc7 -> dc126f212


SPARK-1094 Support MiMa for reporting binary compatibility accross versions.

This adds some changes on top of the initial work by @scrapcodes in #20:

The goal here is to do automated checking of Spark commits to determine whether they break binary compatibility.

1. Special case for inner classes of package-private objects.
2. Made tools classes accessible when running `spark-class`.
3. Made some declared types in MLLib more general.
4. Various other improvements to exclude-generation script.
5. In-code documentation.

Author: Patrick Wendell <pw...@gmail.com>
Author: Prashant Sharma <pr...@imaginea.com>
Author: Prashant Sharma <sc...@gmail.com>

Closes #207 from pwendell/mima and squashes the following commits:

22ae267 [Patrick Wendell] New binary changes after upmerge
6c2030d [Patrick Wendell] Merge remote-tracking branch 'apache/master' into mima
3666cf1 [Patrick Wendell] Minor style change
0e0f570 [Patrick Wendell] Small fix and removing directory listings
647c547 [Patrick Wendell] Reveiw feedback.
c39f3b5 [Patrick Wendell] Some enhancements to binary checking.
4c771e0 [Prashant Sharma] Added a tool to generate mima excludes and also adapted build to pick automatically.
b551519 [Prashant Sharma] adding a new exclude after rebasing with master
651844c [Prashant Sharma] Support MiMa for reporting binary compatibility accross versions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dc126f21
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dc126f21
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dc126f21

Branch: refs/heads/master
Commit: dc126f2121d0cd1dc0caa50ae0c4cb9137d42562
Parents: 8043b7b
Author: Patrick Wendell <pw...@gmail.com>
Authored: Mon Mar 24 21:20:23 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Mon Mar 24 21:20:23 2014 -0700

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 bin/compute-classpath.sh                        |   1 +
 bin/spark-class                                 |   3 +-
 dev/run-tests                                   |   7 +
 project/MimaBuild.scala                         |  83 ++++++++++++
 project/SparkBuild.scala                        |  27 +++-
 project/plugins.sbt                             |   2 +
 .../apache/spark/tools/GenerateMIMAIgnore.scala | 132 +++++++++++++++++++
 8 files changed, 249 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index e9d8dc3..3a68abd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 sbt/*.jar
 .settings
 .cache
+.mima-excludes
 /build/
 work/
 out/

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/bin/compute-classpath.sh
----------------------------------------------------------------------
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 0624117..5f54391 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -58,6 +58,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
   CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/bin/spark-class
----------------------------------------------------------------------
diff --git a/bin/spark-class b/bin/spark-class
index 229ae2c..a3efa2f 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -137,8 +137,7 @@ fi
 
 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
-
-if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+if [[ "$1" =~ org.apache.spark.tools.* ]]; then
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/dev/run-tests
----------------------------------------------------------------------
diff --git a/dev/run-tests b/dev/run-tests
index 6e78cad..6f115d2 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -59,3 +59,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then
   export PYSPARK_PYTHON=/usr/local/bin/python2.7
 fi
 ./python/run-tests
+
+echo "========================================================================="
+echo "Detecting binary incompatibilites with MiMa"
+echo "========================================================================="
+./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
+sbt/sbt mima-report-binary-issues
+

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/project/MimaBuild.scala
----------------------------------------------------------------------
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
new file mode 100644
index 0000000..e7c9c47
--- /dev/null
+++ b/project/MimaBuild.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
+import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
+import sbt._
+
+object MimaBuild {
+
+  def ignoredABIProblems(base: File) = {
+    import com.typesafe.tools.mima.core._
+    import com.typesafe.tools.mima.core.ProblemFilters._
+
+    // Excludes placed here will be used for all Spark versions
+    val defaultExcludes = Seq()
+
+    // Read package-private excludes from file
+    val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
+    val excludeFile = file(excludeFilePath) 
+    val packagePrivateList: Seq[String] =
+      if (!excludeFile.exists()) {
+        Seq()
+      } else {
+        IO.read(excludeFile).split("\n")
+      }
+
+    def excludeClass(className: String) = {
+      Seq(
+        excludePackage(className), 
+        ProblemFilters.exclude[MissingClassProblem](className),
+        ProblemFilters.exclude[MissingTypesProblem](className),
+        excludePackage(className + "$"), 
+        ProblemFilters.exclude[MissingClassProblem](className + "$"),
+        ProblemFilters.exclude[MissingTypesProblem](className + "$")
+      )
+    }
+    def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className)
+
+    val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)
+
+    /* Excludes specific to a given version of Spark. When comparing the given version against
+       its immediate predecessor, the excludes listed here will be applied. */
+    val versionExcludes =
+      SparkBuild.SPARK_VERSION match {
+        case v if v.startsWith("1.0") =>
+          Seq(
+             excludePackage("org.apache.spark.api.java"),
+             excludePackage("org.apache.spark.streaming.api.java"),
+             excludePackage("org.apache.spark.mllib")
+           ) ++
+           excludeSparkClass("rdd.ClassTags") ++
+           excludeSparkClass("util.XORShiftRandom") ++
+           excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
+           excludeSparkClass("mllib.optimization.SquaredGradient") ++
+           excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
+           excludeSparkClass("mllib.regression.LassoWithSGD") ++
+           excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
+        case _ => Seq()
+      }
+
+    defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
+  }
+
+  def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
+    previousArtifact := None,
+    binaryIssueFilters ++= ignoredABIProblems(sparkHome)
+  )
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 1969486..21d2779 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -22,6 +22,7 @@ import sbtassembly.Plugin._
 import AssemblyKeys._
 import scala.util.Properties
 import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
+import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
 
 import scala.collection.JavaConversions._
 
@@ -29,6 +30,8 @@ import scala.collection.JavaConversions._
 //import com.jsuereth.pgp.sbtplugin.PgpKeys._
 
 object SparkBuild extends Build {
+  val SPARK_VERSION = "1.0.0-SNAPSHOT" 
+
   // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
   // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
   // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
@@ -146,9 +149,9 @@ object SparkBuild extends Build {
   lazy val allProjects = packageProjects ++ allExternalRefs ++
     Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests
 
-  def sharedSettings = Defaults.defaultSettings ++ Seq(
+  def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
     organization       := "org.apache.spark",
-    version            := "1.0.0-SNAPSHOT",
+    version            := SPARK_VERSION,
     scalaVersion       := "2.10.3",
     scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
       "-target:" + SCALAC_JVM_VERSION),
@@ -284,9 +287,14 @@ object SparkBuild extends Build {
   val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
   val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
 
+  def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark", 
+      version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
+    val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
+    Some(organization % fullId % version) // the artifact to compare binary compatibility with
+  }
+
   def coreSettings = sharedSettings ++ Seq(
     name := "spark-core",
-
     libraryDependencies ++= Seq(
         "com.google.guava"           % "guava"            % "14.0.1",
         "com.google.code.findbugs"   % "jsr305"           % "1.3.9",
@@ -325,7 +333,7 @@ object SparkBuild extends Build {
     publish := {}
   )
 
- def replSettings = sharedSettings ++ Seq(
+  def replSettings = sharedSettings ++ Seq(
     name := "spark-repl",
    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v ),
    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v ),
@@ -354,17 +362,20 @@ object SparkBuild extends Build {
 
   def graphxSettings = sharedSettings ++ Seq(
     name := "spark-graphx",
+    previousArtifact := sparkPreviousArtifact("spark-graphx"),
     libraryDependencies ++= Seq(
       "org.jblas" % "jblas" % "1.2.3"
     )
   )
 
   def bagelSettings = sharedSettings ++ Seq(
-    name := "spark-bagel"
+    name := "spark-bagel",
+    previousArtifact := sparkPreviousArtifact("spark-bagel")
   )
 
   def mllibSettings = sharedSettings ++ Seq(
     name := "spark-mllib",
+    previousArtifact := sparkPreviousArtifact("spark-mllib"),
     libraryDependencies ++= Seq(
       "org.jblas" % "jblas" % "1.2.3",
       "org.scalanlp" %% "breeze" % "0.7"
@@ -428,6 +439,7 @@ object SparkBuild extends Build {
 
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
+    previousArtifact := sparkPreviousArtifact("spark-streaming"),
     libraryDependencies ++= Seq(
       "commons-io" % "commons-io" % "2.4"
     )
@@ -503,6 +515,7 @@ object SparkBuild extends Build {
 
   def twitterSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-twitter",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
     libraryDependencies ++= Seq(
       "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
     )
@@ -510,6 +523,7 @@ object SparkBuild extends Build {
 
   def kafkaSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-kafka",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
     libraryDependencies ++= Seq(
       "com.github.sgroschupf"    % "zkclient"   % "0.1"          excludeAll(excludeNetty),
       "org.apache.kafka"        %% "kafka"      % "0.8.0"
@@ -522,6 +536,7 @@ object SparkBuild extends Build {
 
   def flumeSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-flume",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
     libraryDependencies ++= Seq(
       "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty)
     )
@@ -529,6 +544,7 @@ object SparkBuild extends Build {
 
   def zeromqSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-zeromq",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
     libraryDependencies ++= Seq(
       "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
     )
@@ -536,6 +552,7 @@ object SparkBuild extends Build {
 
   def mqttSettings() = streamingSettings ++ Seq(
     name := "spark-streaming-mqtt",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
     libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
   )
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/project/plugins.sbt
----------------------------------------------------------------------
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 32bc044..4ff6f67 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -19,4 +19,6 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
 
+addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
+
 addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")

http://git-wip-us.apache.org/repos/asf/spark/blob/dc126f21/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
new file mode 100644
index 0000000..5547e9f
--- /dev/null
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tools
+
+import java.io.File
+import java.util.jar.JarFile
+
+import scala.collection.mutable
+import scala.collection.JavaConversions._
+import scala.reflect.runtime.universe.runtimeMirror
+
+/**
+ * A tool for generating classes to be excluded during binary checking with MIMA. It is expected
+ * that this tool is run with ./spark-class.
+ *
+ * MIMA itself only supports JVM-level visibility and doesn't account for package-private classes.
+ * This tool looks at all currently package-private classes and generates exclusions for them. Note
+ * that this approach is not sound. It can lead to false positives if we move or rename a previously
+ * package-private class. It can lead to false negatives if someone explicitly makes a class
+ * package-private that wasn't before. This exists only to help catch certain classes of changes
+ * which might be difficult to catch during review.
+ */
+object GenerateMIMAIgnore {
+  private val classLoader = Thread.currentThread().getContextClassLoader
+  private val mirror = runtimeMirror(classLoader)
+
+  private def classesPrivateWithin(packageName: String): Set[String] = {
+
+    val classes = getClasses(packageName, classLoader)
+    val privateClasses = mutable.HashSet[String]()
+
+    def isPackagePrivate(className: String) = {
+      try {
+        /* Couldn't figure out if it's possible to determine a-priori whether a given symbol
+           is a module or class. */
+
+        val privateAsClass = mirror
+          .staticClass(className)
+          .privateWithin
+          .fullName
+          .startsWith(packageName)
+
+        val privateAsModule = mirror
+          .staticModule(className)
+          .privateWithin
+          .fullName
+          .startsWith(packageName)
+
+        privateAsClass || privateAsModule
+      } catch {
+        case _: Throwable => {
+          println("Error determining visibility: " + className)
+          false
+        }
+      }
+    }
+
+    for (className <- classes) {
+      val directlyPrivateSpark = isPackagePrivate(className)
+
+      /* Inner classes defined within a private[spark] class or object are effectively
+         invisible, so we account for them as package private. */
+      val indirectlyPrivateSpark = {
+        val maybeOuter = className.toString.takeWhile(_ != '$')
+        if (maybeOuter != className) {
+          isPackagePrivate(maybeOuter)
+        } else {
+          false
+        }
+      }
+      if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className
+    }
+    privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
+  }
+
+  def main(args: Array[String]) {
+    scala.tools.nsc.io.File(".mima-excludes").
+      writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
+    println("Created : .mima-excludes in current directory.")
+  }
+
+
+  private def shouldExclude(name: String) = {
+    // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
+    name.contains("anon") ||
+    name.endsWith("$class") ||
+    name.contains("$sp")
+  }
+
+  /**
+   * Scans all classes accessible from the context class loader which belong to the given package
+   * and subpackages both from directories and jars present on the classpath.
+   */
+  private def getClasses(packageName: String,
+      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
+    val path = packageName.replace('.', '/')
+    val resources = classLoader.getResources(path)
+
+    val jars = resources.filter(x => x.getProtocol == "jar")
+      .map(_.getFile.split(":")(1).split("!")(0)).toSeq
+
+    jars.flatMap(getClassesFromJar(_, path))
+      .map(_.getName)
+      .filterNot(shouldExclude).toSet
+  }
+
+  /**
+   * Get all classes in a package from a jar file.
+   */
+  private def getClassesFromJar(jarPath: String, packageName: String) = {
+    val jar = new JarFile(new File(jarPath))
+    val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
+    val classes = for (entry <- enums if entry.endsWith(".class"))
+      yield Class.forName(entry.replace('/', '.').stripSuffix(".class"))
+    classes
+  }
+}