You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/04/12 04:48:01 UTC
[2/3] git commit: [FIX] make coalesce test deterministic in RDDSuite
[FIX] make coalesce test deterministic in RDDSuite
Make coalesce test deterministic by setting pre-defined seeds. (Saw random failures in other PRs.)
Author: Xiangrui Meng <me...@databricks.com>
Closes #387 from mengxr/fix-random and squashes the following commits:
59bc16f [Xiangrui Meng] make coalesce test deterministic in RDDSuite
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9afaeed5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9afaeed5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9afaeed5
Branch: refs/heads/branch-1.0
Commit: 9afaeed55916f9eee1f10dc450793c7e2c4e418b
Parents: 79eb276
Author: Xiangrui Meng <me...@databricks.com>
Authored: Fri Apr 11 19:41:40 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Apr 11 19:47:02 2014 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/rdd/RDDSuite.scala | 61 +++++++++++---------
1 file changed, 33 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9afaeed5/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 2597334..1901330 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -274,37 +274,42 @@ class RDDSuite extends FunSuite with SharedSparkContext {
test("coalesced RDDs with locality, large scale (10K partitions)") {
// large scale experiment
import collection.mutable
- val rnd = scala.util.Random
val partitions = 10000
val numMachines = 50
val machines = mutable.ListBuffer[String]()
- (1 to numMachines).foreach(machines += "m"+_)
-
- val blocks = (1 to partitions).map(i =>
- { (i, Array.fill(3)(machines(rnd.nextInt(machines.size))).toList) } )
-
- val data2 = sc.makeRDD(blocks)
- val coalesced2 = data2.coalesce(numMachines*2)
-
- // test that you get over 90% locality in each group
- val minLocality = coalesced2.partitions
- .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
- .foldLeft(1.0)((perc, loc) => math.min(perc,loc))
- assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.0).toInt + "%")
-
- // test that the groups are load balanced with 100 +/- 20 elements in each
- val maxImbalance = coalesced2.partitions
- .map(part => part.asInstanceOf[CoalescedRDDPartition].parents.size)
- .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev))
- assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance)
-
- val data3 = sc.makeRDD(blocks).map(i => i*2) // derived RDD to test *current* pref locs
- val coalesced3 = data3.coalesce(numMachines*2)
- val minLocality2 = coalesced3.partitions
- .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
- .foldLeft(1.0)((perc, loc) => math.min(perc,loc))
- assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " +
- (minLocality2*100.0).toInt + "%")
+ (1 to numMachines).foreach(machines += "m" + _)
+ val rnd = scala.util.Random
+ for (seed <- 1 to 5) {
+ rnd.setSeed(seed)
+
+ val blocks = (1 to partitions).map { i =>
+ (i, Array.fill(3)(machines(rnd.nextInt(machines.size))).toList)
+ }
+
+ val data2 = sc.makeRDD(blocks)
+ val coalesced2 = data2.coalesce(numMachines * 2)
+
+ // test that you get over 90% locality in each group
+ val minLocality = coalesced2.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
+ .foldLeft(1.0)((perc, loc) => math.min(perc, loc))
+ assert(minLocality >= 0.90, "Expected 90% locality but got " +
+ (minLocality * 100.0).toInt + "%")
+
+ // test that the groups are load balanced with 100 +/- 20 elements in each
+ val maxImbalance = coalesced2.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].parents.size)
+ .foldLeft(0)((dev, curr) => math.max(math.abs(100 - curr), dev))
+ assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance)
+
+ val data3 = sc.makeRDD(blocks).map(i => i * 2) // derived RDD to test *current* pref locs
+ val coalesced3 = data3.coalesce(numMachines * 2)
+ val minLocality2 = coalesced3.partitions
+ .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
+ .foldLeft(1.0)((perc, loc) => math.min(perc, loc))
+ assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " +
+ (minLocality2 * 100.0).toInt + "%")
+ }
}
test("zipped RDDs") {