You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2015/09/22 12:05:30 UTC
spark git commit: [SPARK-10706] [MLLIB] Add java wrapper for random
vector rdd
Repository: spark
Updated Branches:
refs/heads/master 7278f792a -> 870b8a2ed
[SPARK-10706] [MLLIB] Add java wrapper for random vector rdd
Add java wrapper for random vector rdd
holdenk srowen
Author: Meihua Wu <me...@umich.edu>
Closes #8841 from rotationsymmetry/SPARK-10706.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/870b8a2e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/870b8a2e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/870b8a2e
Branch: refs/heads/master
Commit: 870b8a2edd44c9274c43ca0db4ef5b0998e16fd8
Parents: 7278f79
Author: Meihua Wu <me...@umich.edu>
Authored: Tue Sep 22 11:05:24 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Tue Sep 22 11:05:24 2015 +0100
----------------------------------------------------------------------
.../apache/spark/mllib/random/RandomRDDs.scala | 42 ++++++++++++++++++++
.../spark/mllib/random/JavaRandomRDDsSuite.java | 17 ++++++++
2 files changed, 59 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index f8ff26b..41d7c4d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -856,6 +856,48 @@ object RandomRDDs {
}
/**
+ * Java-friendly version of [[RandomRDDs#randomVectorRDD]].
+ */
+ @DeveloperApi
+ @Since("1.6.0")
+ def randomJavaVectorRDD(
+ jsc: JavaSparkContext,
+ generator: RandomDataGenerator[Double],
+ numRows: Long,
+ numCols: Int,
+ numPartitions: Int,
+ seed: Long): JavaRDD[Vector] = {
+ randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions, seed).toJavaRDD()
+ }
+
+ /**
+ * [[RandomRDDs#randomJavaVectorRDD]] with the default seed.
+ */
+ @DeveloperApi
+ @Since("1.6.0")
+ def randomJavaVectorRDD(
+ jsc: JavaSparkContext,
+ generator: RandomDataGenerator[Double],
+ numRows: Long,
+ numCols: Int,
+ numPartitions: Int): JavaRDD[Vector] = {
+ randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions).toJavaRDD()
+ }
+
+ /**
+ * [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed.
+ */
+ @DeveloperApi
+ @Since("1.6.0")
+ def randomJavaVectorRDD(
+ jsc: JavaSparkContext,
+ generator: RandomDataGenerator[Double],
+ numRows: Long,
+ numCols: Int): JavaRDD[Vector] = {
+ randomVectorRDD(jsc.sc, generator, numRows, numCols).toJavaRDD()
+ }
+
+ /**
* Returns `numPartitions` if it is positive, or `sc.defaultParallelism` otherwise.
*/
private def numPartitionsOrDefault(sc: SparkContext, numPartitions: Int): Int = {
http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
----------------------------------------------------------------------
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
index fce5f67..5728df5 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -246,6 +246,23 @@ public class JavaRandomRDDsSuite {
Assert.assertEquals(2, rdd.first().length());
}
}
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testRandomVectorRDD() {
+ UniformGenerator generator = new UniformGenerator();
+ long m = 100L;
+ int n = 10;
+ int p = 2;
+ long seed = 1L;
+ JavaRDD<Vector> rdd1 = randomJavaVectorRDD(sc, generator, m, n);
+ JavaRDD<Vector> rdd2 = randomJavaVectorRDD(sc, generator, m, n, p);
+ JavaRDD<Vector> rdd3 = randomJavaVectorRDD(sc, generator, m, n, p, seed);
+ for (JavaRDD<Vector> rdd: Arrays.asList(rdd1, rdd2, rdd3)) {
+ Assert.assertEquals(m, rdd.count());
+ Assert.assertEquals(n, rdd.first().size());
+ }
+ }
}
// This is just a test generator, it always returns a string of 42
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org