You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/04/23 19:47:56 UTC

spark git commit: [SPARK-14873][CORE] Java sampleByKey methods take ju.Map but with Scala Double values; results in type Object

Repository: spark
Updated Branches:
  refs/heads/master a55fbe2a1 -> be0d5d3bb


[SPARK-14873][CORE] Java sampleByKey methods take ju.Map but with Scala Double values; results in type Object

## What changes were proposed in this pull request?

Java `sampleByKey` methods should accept `Map` with `java.lang.Double` values

## How was this patch tested?

Existing (updated) Jenkins tests

Author: Sean Owen <so...@cloudera.com>

Closes #12637 from srowen/SPARK-14873.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be0d5d3b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be0d5d3b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be0d5d3b

Branch: refs/heads/master
Commit: be0d5d3bbebf0912c27bf41ce27b5ba214e61e19
Parents: a55fbe2
Author: Sean Owen <so...@cloudera.com>
Authored: Sat Apr 23 10:47:50 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Sat Apr 23 10:47:50 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/api/java/JavaPairRDD.scala | 18 +++++++++++------
 .../java/org/apache/spark/JavaAPISuite.java     |  5 ++---
 .../mllib/JavaStratifiedSamplingExample.java    | 21 +++++++++-----------
 3 files changed, 23 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/be0d5d3b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 2897272..1c95bc4 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -139,9 +139,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * math.ceil(numItems * samplingRate) over all key values.
    */
   def sampleByKey(withReplacement: Boolean,
-      fractions: java.util.Map[K, Double],
+      fractions: java.util.Map[K, jl.Double],
       seed: Long): JavaPairRDD[K, V] =
-    new JavaPairRDD[K, V](rdd.sampleByKey(withReplacement, fractions.asScala, seed))
+    new JavaPairRDD[K, V](rdd.sampleByKey(
+      withReplacement,
+      fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize
+      seed))
 
   /**
    * Return a subset of this RDD sampled by key (via stratified sampling).
@@ -154,7 +157,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Use Utils.random.nextLong as the default seed for the random number generator.
    */
   def sampleByKey(withReplacement: Boolean,
-      fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] =
+      fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] =
     sampleByKey(withReplacement, fractions, Utils.random.nextLong)
 
   /**
@@ -168,9 +171,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * two additional passes.
    */
   def sampleByKeyExact(withReplacement: Boolean,
-      fractions: java.util.Map[K, Double],
+      fractions: java.util.Map[K, jl.Double],
       seed: Long): JavaPairRDD[K, V] =
-    new JavaPairRDD[K, V](rdd.sampleByKeyExact(withReplacement, fractions.asScala, seed))
+    new JavaPairRDD[K, V](rdd.sampleByKeyExact(
+      withReplacement,
+      fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize
+      seed))
 
   /**
    * Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
@@ -186,7 +192,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    */
   def sampleByKeyExact(
       withReplacement: Boolean,
-      fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] =
+      fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] =
     sampleByKeyExact(withReplacement, fractions, Utils.random.nextLong)
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/be0d5d3b/core/src/test/java/org/apache/spark/JavaAPISuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index 0f65554..04f92d6 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -44,7 +44,6 @@ import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import com.google.common.base.Throwables;
 import com.google.common.io.Files;
 import org.apache.hadoop.io.IntWritable;
@@ -1644,7 +1643,7 @@ public class JavaAPISuite implements Serializable {
           return new Tuple2<>(i % 2, 1);
         }
       });
-    Map<Integer, Object> fractions = Maps.newHashMap();
+    Map<Integer, Double> fractions = new HashMap<>();
     fractions.put(0, 0.5);
     fractions.put(1, 1.0);
     JavaPairRDD<Integer, Integer> wr = rdd2.sampleByKey(true, fractions, 1L);
@@ -1670,7 +1669,7 @@ public class JavaAPISuite implements Serializable {
               return new Tuple2<>(i % 2, 1);
           }
       });
-    Map<Integer, Object> fractions = Maps.newHashMap();
+    Map<Integer, Double> fractions = new HashMap<>();
     fractions.put(0, 0.5);
     fractions.put(1, 1.0);
     JavaPairRDD<Integer, Integer> wrExact = rdd2.sampleByKeyExact(true, fractions, 1L);

http://git-wip-us.apache.org/repos/asf/spark/blob/be0d5d3b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
index 72bbb2a..286b95c 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
@@ -37,22 +37,19 @@ public class JavaStratifiedSamplingExample {
 
     @SuppressWarnings("unchecked")
     // $example on$
-    List<Tuple2<Integer, Character>> list = new ArrayList<>(
-      Arrays.<Tuple2<Integer, Character>>asList(
-        new Tuple2(1, 'a'),
-        new Tuple2(1, 'b'),
-        new Tuple2(2, 'c'),
-        new Tuple2(2, 'd'),
-        new Tuple2(2, 'e'),
-        new Tuple2(3, 'f')
-      )
+    List<Tuple2<Integer, Character>> list = Arrays.asList(
+        new Tuple2<>(1, 'a'),
+        new Tuple2<>(1, 'b'),
+        new Tuple2<>(2, 'c'),
+        new Tuple2<>(2, 'd'),
+        new Tuple2<>(2, 'e'),
+        new Tuple2<>(3, 'f')
     );
 
     JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
 
-    // specify the exact fraction desired from each key Map<K, Object>
-    ImmutableMap<Integer, Object> fractions =
-      ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3);
+    // specify the exact fraction desired from each key Map<K, Double>
+    ImmutableMap<Integer, Double> fractions = ImmutableMap.of(1, 0.1, 2, 0.6, 3, 0.3);
 
     // Get an approximate sample from each stratum
     JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org