You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/09/09 05:54:04 UTC

spark git commit: [SPARK-10464] [MLLIB] Add WeibullGenerator for RandomDataGenerator

Repository: spark
Updated Branches:
  refs/heads/master 52fe32f6a -> a1573489a


[SPARK-10464] [MLLIB] Add WeibullGenerator for RandomDataGenerator

Add WeibullGenerator for RandomDataGenerator.
#8611 need use WeibullGenerator to generate random data based on Weibull distribution.

Author: Yanbo Liang <yb...@gmail.com>

Closes #8622 from yanboliang/spark-10464.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1573489
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1573489
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1573489

Branch: refs/heads/master
Commit: a1573489a37def97b7c26b798898ffbbdc4defa8
Parents: 52fe32f
Author: Yanbo Liang <yb...@gmail.com>
Authored: Tue Sep 8 20:54:02 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Sep 8 20:54:02 2015 -0700

----------------------------------------------------------------------
 .../mllib/random/RandomDataGenerator.scala      | 27 ++++++++++++++++++--
 .../mllib/random/RandomDataGeneratorSuite.scala | 16 +++++++++++-
 2 files changed, 40 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a1573489/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index a2d85a6..9eab7ef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.mllib.random
 
-import org.apache.commons.math3.distribution.{ExponentialDistribution,
-  GammaDistribution, LogNormalDistribution, PoissonDistribution}
+import org.apache.commons.math3.distribution._
 
 import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.util.random.{XORShiftRandom, Pseudorandom}
@@ -195,3 +194,27 @@ class LogNormalGenerator @Since("1.3.0") (
   @Since("1.3.0")
   override def copy(): LogNormalGenerator = new LogNormalGenerator(mean, std)
 }
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the Weibull distribution with the
+ * given shape and scale parameter.
+ *
+ * @param alpha shape parameter for the Weibull distribution.
+ * @param beta scale parameter for the Weibull distribution.
+ */
+@DeveloperApi
+class WeibullGenerator(
+    val alpha: Double,
+    val beta: Double) extends RandomDataGenerator[Double] {
+
+  private val rng = new WeibullDistribution(alpha, beta)
+
+  override def nextValue(): Double = rng.sample()
+
+  override def setSeed(seed: Long): Unit = {
+    rng.reseedRandomGenerator(seed)
+  }
+
+  override def copy(): WeibullGenerator = new WeibullGenerator(alpha, beta)
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/a1573489/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
index a5ca151..8416771 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.random
 
-import scala.math
+import org.apache.commons.math3.special.Gamma
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.util.StatCounter
@@ -136,4 +136,18 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
         distributionChecks(gamma, expectedMean, expectedStd, 0.1)
     }
   }
+
+  test("WeibullGenerator") {
+    List((1.0, 2.0), (2.0, 3.0), (2.5, 3.5), (10.4, 2.222)).map {
+      case (alpha: Double, beta: Double) =>
+        val weibull = new WeibullGenerator(alpha, beta)
+        apiChecks(weibull)
+
+        val expectedMean = math.exp(Gamma.logGamma(1 + (1 / alpha))) * beta
+        val expectedVariance = math.exp(
+          Gamma.logGamma(1 + (2 / alpha))) * beta * beta - expectedMean * expectedMean
+        val expectedStd = math.sqrt(expectedVariance)
+        distributionChecks(weibull, expectedMean, expectedStd, 0.1)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org