You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@spark.apache.org by pw...@apache.org on 2014/04/22 20:23:38 UTC

[1/4] git commit: fix bugs of dot in python

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 3f708f566 -> 898fc3480


fix bugs of dot in python

If there are no `transpose()` in `self.theta`, a

*ValueError: matrices are not aligned*

is occurring. The former test case just ignore this situation.

Author: Xusen Yin <yi...@gmail.com>

Closes #463 from yinxusen/python-naive-bayes and squashes the following commits:

fcbe3bc [Xusen Yin] fix bugs of dot in python
(cherry picked from commit c919798f0912dc03c8365b9a384d9ee6d5b25c51)

Signed-off-by: Patrick Wendell <pw...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f2f093c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f2f093c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f2f093c

Branch: refs/heads/branch-1.0
Commit: 4f2f093c5b65b74869068d5690a4d2b0e0b5f759
Parents: 3f708f5
Author: Xusen Yin <yi...@gmail.com>
Authored: Tue Apr 22 11:06:18 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Apr 22 11:22:24 2014 -0700

----------------------------------------------------------------------
 python/pyspark/mllib/classification.py | 2 +-
 python/pyspark/mllib/tests.py          | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4f2f093c/python/pyspark/mllib/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3a23e08..c584459 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -154,7 +154,7 @@ class NaiveBayesModel(object):
 
     def predict(self, x):
         """Return the most likely class for a data vector x"""
-        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))]
+        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
 
 class NaiveBayes(object):
     @classmethod

http://git-wip-us.apache.org/repos/asf/spark/blob/4f2f093c/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d4771d7..1ee96bb 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -104,10 +104,10 @@ class ListTests(PySparkTestCase):
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
         data = [
-            LabeledPoint(0.0, [1, 0]),
-            LabeledPoint(1.0, [0, 1]),
-            LabeledPoint(0.0, [2, 0]),
-            LabeledPoint(1.0, [0, 2])
+            LabeledPoint(0.0, [1, 0, 0]),
+            LabeledPoint(1.0, [0, 1, 1]),
+            LabeledPoint(0.0, [2, 0, 0]),
+            LabeledPoint(1.0, [0, 2, 1])
         ]
         rdd = self.sc.parallelize(data)
         features = [p.features.tolist() for p in data]

[2/4] git commit: [SPARK-1281] Improve partitioning in ALS

Posted by pw...@apache.org.

[SPARK-1281] Improve partitioning in ALS

ALS was using HashPartitioner and explicit uses of `%` together.  Further, the naked use of `%` meant that, if the number of partitions corresponded with the stride of arithmetic progressions appearing in user and product ids, users and products could be mapped into buckets in an unfair or unwise way.

This pull request:
1) Makes the Partitioner an instance variable of ALS.
2) Replaces the direct uses of `%` with calls to a Partitioner.
3) Defines an anonymous Partitioner that scrambles the bits of the object's hashCode before reducing to the number of present buckets.

This pull request does not make the partitioner user-configurable.

I'm not all that happy about the way I did (1).  It introduces an icky lifetime issue and dances around it by nulling something.  However, I don't know a better way to make the partitioner visible everywhere it needs to be visible.

Author: Tor Myklebust <tm...@gmail.com>

Closes #407 from tmyklebu/master and squashes the following commits:

dcf583a [Tor Myklebust] Remove the partitioner member variable; instead, thread that needle everywhere it needs to go.
23d6f91 [Tor Myklebust] Stop making the partitioner configurable.
495784f [Tor Myklebust] Merge branch 'master' of https://github.com/apache/spark
674933a [Tor Myklebust] Fix style.
40edc23 [Tor Myklebust] Fix missing space.
f841345 [Tor Myklebust] Fix daft bug creating 'pairs', also for -> foreach.
5ec9e6c [Tor Myklebust] Clean a couple of things up using 'map'.
36a0f43 [Tor Myklebust] Make the partitioner private.
d872b09 [Tor Myklebust] Add negative id ALS test.
df27697 [Tor Myklebust] Support custom partitioners.  Currently we use the same partitioner for users and products.
c90b6d8 [Tor Myklebust] Scramble user and product ids before bucketing.
c774d7d [Tor Myklebust] Make the partitioner a member variable and use it instead of modding directly.
(cherry picked from commit bf9d49b6d1f668b49795c2d380ab7d64ec0029da)

Signed-off-by: Patrick Wendell <pw...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4834adff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4834adff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4834adff

Branch: refs/heads/branch-1.0
Commit: 4834adfff26af56094e6bf18d84ca8fe243d8c20
Parents: 4f2f093
Author: Tor Myklebust <tm...@gmail.com>
Authored: Tue Apr 22 11:07:30 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Apr 22 11:22:30 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/mllib/recommendation/ALS.scala | 47 ++++++++++++--------
 .../spark/mllib/recommendation/ALSSuite.scala   | 30 +++++++++++--
 2 files changed, 54 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4834adff/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 1f5c746..60fb73f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable.{ArrayBuffer, BitSet}
 import scala.math.{abs, sqrt}
 import scala.util.Random
 import scala.util.Sorting
+import scala.util.hashing.byteswap32
 
 import com.esotericsoftware.kryo.Kryo
 import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
@@ -32,6 +33,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.KryoRegistrator
 import org.apache.spark.SparkContext._
+import org.apache.spark.util.Utils
 
 /**
  * Out-link information for a user or product block. This includes the original user/product IDs
@@ -169,34 +171,39 @@ class ALS private (
       this.numBlocks
     }
 
-    val partitioner = new HashPartitioner(numBlocks)
+    val partitioner = new Partitioner {
+      val numPartitions = numBlocks
 
-    val ratingsByUserBlock = ratings.map{ rating => (rating.user % numBlocks, rating) }
+      def getPartition(x: Any): Int = {
+        Utils.nonNegativeMod(byteswap32(x.asInstanceOf[Int]), numPartitions)
+      }
+    }
+
+    val ratingsByUserBlock = ratings.map{ rating =>
+      (partitioner.getPartition(rating.user), rating)
+    }
     val ratingsByProductBlock = ratings.map{ rating =>
-      (rating.product % numBlocks, Rating(rating.product, rating.user, rating.rating))
+      (partitioner.getPartition(rating.product),
+        Rating(rating.product, rating.user, rating.rating))
     }
 
-    val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock)
-    val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock)
+    val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock, partitioner)
+    val (productInLinks, productOutLinks) =
+        makeLinkRDDs(numBlocks, ratingsByProductBlock, partitioner)
 
     // Initialize user and product factors randomly, but use a deterministic seed for each
     // partition so that fault recovery works
     val seedGen = new Random(seed)
     val seed1 = seedGen.nextInt()
     val seed2 = seedGen.nextInt()
-    // Hash an integer to propagate random bits at all positions, similar to java.util.HashTable
-    def hash(x: Int): Int = {
-      val r = x ^ (x >>> 20) ^ (x >>> 12)
-      r ^ (r >>> 7) ^ (r >>> 4)
-    }
     var users = userOutLinks.mapPartitionsWithIndex { (index, itr) =>
-      val rand = new Random(hash(seed1 ^ index))
+      val rand = new Random(byteswap32(seed1 ^ index))
       itr.map { case (x, y) =>
         (x, y.elementIds.map(_ => randomFactor(rank, rand)))
       }
     }
     var products = productOutLinks.mapPartitionsWithIndex { (index, itr) =>
-      val rand = new Random(hash(seed2 ^ index))
+      val rand = new Random(byteswap32(seed2 ^ index))
       itr.map { case (x, y) =>
         (x, y.elementIds.map(_ => randomFactor(rank, rand)))
       }
@@ -327,13 +334,14 @@ class ALS private (
    * Make the out-links table for a block of the users (or products) dataset given the list of
    * (user, product, rating) values for the users in that block (or the opposite for products).
    */
-  private def makeOutLinkBlock(numBlocks: Int, ratings: Array[Rating]): OutLinkBlock = {
+  private def makeOutLinkBlock(numBlocks: Int, ratings: Array[Rating],
+      partitioner: Partitioner): OutLinkBlock = {
     val userIds = ratings.map(_.user).distinct.sorted
     val numUsers = userIds.length
     val userIdToPos = userIds.zipWithIndex.toMap
     val shouldSend = Array.fill(numUsers)(new BitSet(numBlocks))
     for (r <- ratings) {
-      shouldSend(userIdToPos(r.user))(r.product % numBlocks) = true
+      shouldSend(userIdToPos(r.user))(partitioner.getPartition(r.product)) = true
     }
     OutLinkBlock(userIds, shouldSend)
   }
@@ -342,14 +350,15 @@ class ALS private (
    * Make the in-links table for a block of the users (or products) dataset given a list of
    * (user, product, rating) values for the users in that block (or the opposite for products).
    */
-  private def makeInLinkBlock(numBlocks: Int, ratings: Array[Rating]): InLinkBlock = {
+  private def makeInLinkBlock(numBlocks: Int, ratings: Array[Rating],
+      partitioner: Partitioner): InLinkBlock = {
     val userIds = ratings.map(_.user).distinct.sorted
     val numUsers = userIds.length
     val userIdToPos = userIds.zipWithIndex.toMap
     // Split out our ratings by product block
     val blockRatings = Array.fill(numBlocks)(new ArrayBuffer[Rating])
     for (r <- ratings) {
-      blockRatings(r.product % numBlocks) += r
+      blockRatings(partitioner.getPartition(r.product)) += r
     }
     val ratingsForBlock = new Array[Array[(Array[Int], Array[Double])]](numBlocks)
     for (productBlock <- 0 until numBlocks) {
@@ -374,14 +383,14 @@ class ALS private (
    * the users (or (blockId, (p, u, r)) for the products). We create these simultaneously to avoid
    * having to shuffle the (blockId, (u, p, r)) RDD twice, or to cache it.
    */
-  private def makeLinkRDDs(numBlocks: Int, ratings: RDD[(Int, Rating)])
+  private def makeLinkRDDs(numBlocks: Int, ratings: RDD[(Int, Rating)], partitioner: Partitioner)
     : (RDD[(Int, InLinkBlock)], RDD[(Int, OutLinkBlock)]) =
   {
     val grouped = ratings.partitionBy(new HashPartitioner(numBlocks))
     val links = grouped.mapPartitionsWithIndex((blockId, elements) => {
       val ratings = elements.map{_._2}.toArray
-      val inLinkBlock = makeInLinkBlock(numBlocks, ratings)
-      val outLinkBlock = makeOutLinkBlock(numBlocks, ratings)
+      val inLinkBlock = makeInLinkBlock(numBlocks, ratings, partitioner)
+      val outLinkBlock = makeOutLinkBlock(numBlocks, ratings, partitioner)
       Iterator.single((blockId, (inLinkBlock, outLinkBlock)))
     }, true)
     val inLinks = links.mapValues(_._1)

http://git-wip-us.apache.org/repos/asf/spark/blob/4834adff/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 5aab9ab..4dfcd4b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -27,6 +27,7 @@ import org.jblas.DoubleMatrix
 
 import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.SparkContext._
+import org.apache.spark.Partitioner
 
 object ALSSuite {
 
@@ -74,7 +75,6 @@ object ALSSuite {
 
     (sampledRatings, trueRatings, truePrefs)
   }
-
 }
 
 
@@ -128,6 +128,25 @@ class ALSSuite extends FunSuite with LocalSparkContext {
     assert(u11 != u2)
   }
 
+  test("negative ids") {
+    val data = ALSSuite.generateRatings(50, 50, 2, 0.7, false, false)
+    val ratings = sc.parallelize(data._1.map { case Rating(u, p, r) =>
+      Rating(u - 25, p - 25, r)
+    })
+    val correct = data._2
+    val model = ALS.train(ratings, 5, 15)
+
+    val pairs = Array.tabulate(50, 50)((u, p) => (u - 25, p - 25)).flatten
+    val ans = model.predict(sc.parallelize(pairs)).collect()
+    ans.foreach { r =>
+      val u = r.user + 25
+      val p = r.product + 25
+      val v = r.rating
+      val error = v - correct.get(u, p)
+      assert(math.abs(error) < 0.4)
+    }
+  }
+
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known rank.
    *
@@ -140,16 +159,19 @@ class ALSSuite extends FunSuite with LocalSparkContext {
    * @param implicitPrefs  flag to test implicit feedback
    * @param bulkPredict    flag to test bulk prediciton
    * @param negativeWeights whether the generated data can contain negative values
+   * @param numBlocks      number of blocks to partition users and products into
    */
   def testALS(users: Int, products: Int, features: Int, iterations: Int,
     samplingRate: Double, matchThreshold: Double, implicitPrefs: Boolean = false,
-    bulkPredict: Boolean = false, negativeWeights: Boolean = false)
+    bulkPredict: Boolean = false, negativeWeights: Boolean = false, numBlocks: Int = -1)
   {
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
       features, samplingRate, implicitPrefs, negativeWeights)
     val model = implicitPrefs match {
-      case false => ALS.train(sc.parallelize(sampledRatings), features, iterations)
-      case true => ALS.trainImplicit(sc.parallelize(sampledRatings), features, iterations)
+      case false => ALS.train(sc.parallelize(sampledRatings), features, iterations, 0.01,
+          numBlocks, 0L)
+      case true => ALS.trainImplicit(sc.parallelize(sampledRatings), features, iterations, 0.01,
+          numBlocks, 1.0, 0L)
     }
 
     val predictedU = new DoubleMatrix(users, features)

[3/4] git commit: [HOTFIX] SPARK-1399: remove outdated comments

Posted by pw...@apache.org.

[HOTFIX] SPARK-1399: remove outdated comments

as the original PR was merged before this mistake is found....fix here,

Sorry about that @pwendell, @andrewor14, I will be more careful next time

Author: CodingCat <zh...@gmail.com>

Closes #474 from CodingCat/hotfix_1399 and squashes the following commits:

f3a8ba9 [CodingCat] move outdated comments
(cherry picked from commit 87de29084eed2a287ee114d492f45b211c500c6f)

Signed-off-by: Patrick Wendell <pw...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61d74016
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61d74016
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61d74016

Branch: refs/heads/branch-1.0
Commit: 61d74016cb4c9d6c2d349bbc96c273c63e730af5
Parents: 4834adf
Author: CodingCat <zh...@gmail.com>
Authored: Tue Apr 22 09:43:13 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Apr 22 11:23:01 2014 -0700

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/61d74016/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 2eb8c7a..153434a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -37,7 +37,6 @@ private[ui] class StageTableBase(
   protected def isFairScheduler = parent.isFairScheduler
 
   protected def columns: Seq[Node] = {
-    // create dummy element to wrap the columns
     <th>Stage Id</th> ++
     {if (isFairScheduler) {<th>Pool Name</th>} else Seq.empty} ++
     <th>Description</th>

[4/4] git commit: [Fix #274] Document + fix annotation usages

Posted by pw...@apache.org.

[Fix #274] Document + fix annotation usages

... so that we don't follow an unspoken set of forbidden rules for adding **@AlphaComponent**, **@DeveloperApi**, and **@Experimental** annotations in the code.

In addition, this PR
(1) removes unnecessary `:: * ::` tags,
(2) adds missing `:: * ::` tags, and
(3) removes annotations for internal APIs.

Author: Andrew Or <an...@gmail.com>

Closes #470 from andrewor14/annotations-fix and squashes the following commits:

92a7f42 [Andrew Or] Document + fix annotation usages
(cherry picked from commit b3e5366f696c463f1c2f033b0d5c7365e5d6b0f8)

Signed-off-by: Patrick Wendell <pw...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/898fc348
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/898fc348
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/898fc348

Branch: refs/heads/branch-1.0
Commit: 898fc3480c408a7e88fa95f27ca8f72d660cdff7
Parents: 61d7401
Author: Andrew Or <an...@gmail.com>
Authored: Mon Apr 21 22:24:44 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Apr 22 11:23:18 2014 -0700

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/SparkContext.scala     | 1 +
 .../scala/org/apache/spark/annotation/AlphaComponent.java   | 9 ++++++++-
 .../scala/org/apache/spark/annotation/DeveloperApi.java     | 5 +++++
 .../scala/org/apache/spark/annotation/Experimental.java     | 5 +++++
 core/src/main/scala/org/apache/spark/rdd/RDD.scala          | 6 ------
 .../main/scala/org/apache/spark/scheduler/SplitInfo.scala   | 8 ++++++--
 .../scala/org/apache/spark/mllib/clustering/KMeans.scala    | 6 +++---
 .../org/apache/spark/mllib/regression/RegressionModel.scala | 3 ---
 .../src/main/scala/org/apache/spark/sql/SQLContext.scala    | 2 --
 .../src/main/scala/org/apache/spark/sql/SchemaRDD.scala     | 2 +-
 .../main/scala/org/apache/spark/sql/hive/HiveContext.scala  | 1 -
 11 files changed, 29 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 1db14cb..25ca650 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1110,6 +1110,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
+   * :: Experimental ::
    * Submit a job for execution and return a FutureJob holding the result.
    */
   @Experimental

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/annotation/AlphaComponent.java
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/annotation/AlphaComponent.java b/core/src/main/scala/org/apache/spark/annotation/AlphaComponent.java
index af01fb7..db7b25c 100644
--- a/core/src/main/scala/org/apache/spark/annotation/AlphaComponent.java
+++ b/core/src/main/scala/org/apache/spark/annotation/AlphaComponent.java
@@ -19,7 +19,14 @@ package org.apache.spark.annotation;
 
 import java.lang.annotation.*;
 
-/** A new component of Spark which may have unstable API's. */
+/**
+ * A new component of Spark which may have unstable API's.
+ *
+ * NOTE: If there exists a Scaladoc comment that immediately precedes this annotation, the first
+ * line of the comment must be ":: AlphaComponent ::" with no trailing blank line. This is because
+ * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
+ * comes first.
+ */
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,
         ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE})

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/annotation/DeveloperApi.java
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/annotation/DeveloperApi.java b/core/src/main/scala/org/apache/spark/annotation/DeveloperApi.java
index 5d546e7..0ecef6d 100644
--- a/core/src/main/scala/org/apache/spark/annotation/DeveloperApi.java
+++ b/core/src/main/scala/org/apache/spark/annotation/DeveloperApi.java
@@ -23,6 +23,11 @@ import java.lang.annotation.*;
  * A lower-level, unstable API intended for developers.
  *
  * Developer API's might change or be removed in minor versions of Spark.
+ *
+ * NOTE: If there exists a Scaladoc comment that immediately precedes this annotation, the first
+ * line of the comment must be ":: DeveloperApi ::" with no trailing blank line. This is because
+ * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
+ * comes first.
  */
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/annotation/Experimental.java
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/annotation/Experimental.java b/core/src/main/scala/org/apache/spark/annotation/Experimental.java
index 306b141..ff81202 100644
--- a/core/src/main/scala/org/apache/spark/annotation/Experimental.java
+++ b/core/src/main/scala/org/apache/spark/annotation/Experimental.java
@@ -24,6 +24,11 @@ import java.lang.annotation.*;
  *
  * Experimental API's might change or be removed in minor versions of Spark, or be adopted as
  * first-class Spark API's.
+ *
+ * NOTE: If there exists a Scaladoc comment that immediately precedes this annotation, the first
+ * line of the comment must be ":: Experimental ::" with no trailing blank line. This is because
+ * of the known issue that Scaladoc displays only either the annotation or the comment, whichever
+ * comes first.
  */
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER,

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 891efcc..5d2ed2b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -94,26 +94,20 @@ abstract class RDD[T: ClassTag](
   def compute(split: Partition, context: TaskContext): Iterator[T]
 
   /**
-   * :: DeveloperApi ::
    * Implemented by subclasses to return the set of partitions in this RDD. This method will only
    * be called once, so it is safe to implement a time-consuming computation in it.
    */
-  @DeveloperApi
   protected def getPartitions: Array[Partition]
 
   /**
-   * :: DeveloperApi ::
    * Implemented by subclasses to return how this RDD depends on parent RDDs. This method will only
    * be called once, so it is safe to implement a time-consuming computation in it.
    */
-  @DeveloperApi
   protected def getDependencies: Seq[Dependency[_]] = deps
 
   /**
-   * :: DeveloperApi ::
    * Optionally overridden by subclasses to specify placement preferences.
    */
-  @DeveloperApi
   protected def getPreferredLocations(split: Partition): Seq[String] = Nil
 
   /** Optionally overridden by subclasses to specify how they are partitioned. */

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
index b85eabd..1ce8348 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -24,8 +24,12 @@ import org.apache.spark.annotation.DeveloperApi
 // information about a specific split instance : handles both split instances.
 // So that we do not need to worry about the differences.
 @DeveloperApi
-class SplitInfo(val inputFormatClazz: Class[_], val hostLocation: String, val path: String,
-                val length: Long, val underlyingSplit: Any) {
+class SplitInfo(
+    val inputFormatClazz: Class[_],
+    val hostLocation: String,
+    val path: String,
+    val length: Long,
+    val underlyingSplit: Any) {
   override def toString(): String = {
     "SplitInfo " + super.toString + " .. inputFormatClazz " + inputFormatClazz +
       ", hostLocation : " + hostLocation + ", path : " + path +

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 90cf852..dee9ef0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -391,9 +391,9 @@ object KMeans {
    * Returns the squared Euclidean distance between two vectors computed by
    * [[org.apache.spark.mllib.util.MLUtils#fastSquaredDistance]].
    */
-  private[clustering]
-  def fastSquaredDistance(v1: BreezeVectorWithNorm, v2: BreezeVectorWithNorm)
-  : Double = {
+  private[clustering] def fastSquaredDistance(
+      v1: BreezeVectorWithNorm,
+      v2: BreezeVectorWithNorm): Double = {
     MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 027305a..b27e158 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -21,9 +21,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.annotation.Experimental
 
-/**
- * :: Experimental ::
- */
 @Experimental
 trait RegressionModel extends Serializable {
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 4d216b5..e25201a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -249,11 +249,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
   }
 
   /**
-   * :: DeveloperApi ::
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy
    * access to the intermediate phases of query execution for developers.
    */
-  @DeveloperApi
   protected abstract class QueryExecution {
     def logical: LogicalPlan
 

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index f2ae5b0..6cb0e0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
 import net.razorvine.pickle.Pickler
 
 import org.apache.spark.{Dependency, OneToOneDependency, Partition, TaskContext}
-import org.apache.spark.annotation.{AlphaComponent, Experimental, DeveloperApi}
+import org.apache.spark.annotation.{AlphaComponent, Experimental}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._

http://git-wip-us.apache.org/repos/asf/spark/blob/898fc348/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index c0d8adf..b21f24d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -239,7 +239,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     sparkContext.parallelize(Seq(new GenericRow(Array[Any]()): Row), 1)
 
   /** Extends QueryExecution with hive specific features. */
-  @DeveloperApi
   protected[sql] abstract class QueryExecution extends super.QueryExecution {
     // TODO: Create mixin for the analyzer instead of overriding things here.
     override lazy val optimizedPlan =