You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/06/09 03:40:10 UTC

git commit: SPARK-1628 follow up: Improve RangePartitioner's documentation.

Repository: spark
Updated Branches:
  refs/heads/master e9261d086 -> 219dc00b3


SPARK-1628 follow up: Improve RangePartitioner's documentation.

Adding a paragraph clarifying a weird behavior in RangePartitioner.

See also #549.

Author: Reynold Xin <rx...@apache.org>

Closes #1012 from rxin/partitioner-doc and squashes the following commits:

6f0109e [Reynold Xin] SPARK-1628 follow up: Improve RangePartitioner's documentation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/219dc00b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/219dc00b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/219dc00b

Branch: refs/heads/master
Commit: 219dc00b30c8d9c4c0a6ce5d566497a93f21cb57
Parents: e9261d0
Author: Reynold Xin <rx...@apache.org>
Authored: Sun Jun 8 18:39:57 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Sun Jun 8 18:39:57 2014 -0700

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/Partitioner.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/219dc00b/core/src/main/scala/org/apache/spark/Partitioner.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 01e918f..e7f7548 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -90,6 +90,10 @@ class HashPartitioner(partitions: Int) extends Partitioner {
 /**
  * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly
  * equal ranges. The ranges are determined by sampling the content of the RDD passed in.
+ *
+ * Note that the actual number of partitions created by the RangePartitioner might not be the same
+ * as the `partitions` parameter, in the case where the number of sampled records is less than
+ * the value of `partitions`.
  */
 class RangePartitioner[K : Ordering : ClassTag, V](
     partitions: Int,
@@ -158,7 +162,6 @@ class RangePartitioner[K : Ordering : ClassTag, V](
       false
   }
 
-
   override def hashCode(): Int = {
     val prime = 31
     var result = 1