You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2015/09/22 12:05:30 UTC

spark git commit: [SPARK-10706] [MLLIB] Add java wrapper for random vector rdd

Repository: spark
Updated Branches:
  refs/heads/master 7278f792a -> 870b8a2ed


[SPARK-10706] [MLLIB] Add java wrapper for random vector rdd

Add java wrapper for random vector rdd

holdenk srowen

Author: Meihua Wu <me...@umich.edu>

Closes #8841 from rotationsymmetry/SPARK-10706.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/870b8a2e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/870b8a2e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/870b8a2e

Branch: refs/heads/master
Commit: 870b8a2edd44c9274c43ca0db4ef5b0998e16fd8
Parents: 7278f79
Author: Meihua Wu <me...@umich.edu>
Authored: Tue Sep 22 11:05:24 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Tue Sep 22 11:05:24 2015 +0100

----------------------------------------------------------------------
 .../apache/spark/mllib/random/RandomRDDs.scala  | 42 ++++++++++++++++++++
 .../spark/mllib/random/JavaRandomRDDsSuite.java | 17 ++++++++
 2 files changed, 59 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index f8ff26b..41d7c4d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -856,6 +856,48 @@ object RandomRDDs {
   }
 
   /**
+   * Java-friendly version of [[RandomRDDs#randomVectorRDD]].
+   */
+  @DeveloperApi
+  @Since("1.6.0")
+  def randomJavaVectorRDD(
+      jsc: JavaSparkContext,
+      generator: RandomDataGenerator[Double],
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#randomJavaVectorRDD]] with the default seed.
+   */
+  @DeveloperApi
+  @Since("1.6.0")
+  def randomJavaVectorRDD(
+      jsc: JavaSparkContext,
+      generator: RandomDataGenerator[Double],
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  @DeveloperApi
+  @Since("1.6.0")
+  def randomJavaVectorRDD(
+      jsc: JavaSparkContext,
+      generator: RandomDataGenerator[Double],
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    randomVectorRDD(jsc.sc, generator, numRows, numCols).toJavaRDD()
+  }
+
+  /**
    * Returns `numPartitions` if it is positive, or `sc.defaultParallelism` otherwise.
    */
   private def numPartitionsOrDefault(sc: SparkContext, numPartitions: Int): Int = {

http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
----------------------------------------------------------------------
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
index fce5f67..5728df5 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -246,6 +246,23 @@ public class JavaRandomRDDsSuite {
       Assert.assertEquals(2, rdd.first().length());
     }
   }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testRandomVectorRDD() {
+    UniformGenerator generator = new UniformGenerator();
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = randomJavaVectorRDD(sc, generator, m, n);
+    JavaRDD<Vector> rdd2 = randomJavaVectorRDD(sc, generator, m, n, p);
+    JavaRDD<Vector> rdd3 = randomJavaVectorRDD(sc, generator, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Arrays.asList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
 }
 
 // This is just a test generator, it always returns a string of 42


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org