You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by witgo <gi...@git.apache.org> on 2015/02/26 10:03:56 UTC
[GitHub] spark pull request: [SPARK-4902][CORE] gap-sampling performance op...
Github user witgo commented on the pull request:
https://github.com/apache/spark/pull/3744#issuecomment-76143785
```scala
test("bernoulli sampling benchmark") {
class BernoulliSamplerBenchmark(val fraction: Double, items: () => Iterator[Int]) extends scala.testing.Benchmark {
override def run(): Unit = {
val sampler = new BernoulliSampler[Int](fraction)
val count = sampler.sample(items()).size
}
}
val context = new org.apache.spark.TaskContextImpl(0, 0, 0, 0)
var fraction = 0.2
var len = 1e6.toInt
var noTimes = 1000
var array = (1 to len).toArray
var iter: () => Iterator[Int] = () => {
new Iterator[Int] {
var i = 0
override def hasNext = {
i < len
}
override def next = {
i += 1
i
}
}
}
var sampler = new BernoulliSamplerBenchmark(fraction, iter)
var time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"general sampling fraction=$fraction len=$len use time: $time Ms")
iter = () => {
new org.apache.spark.InterruptibleIterator(context, array.iterator)
}
sampler = new BernoulliSamplerBenchmark(fraction, iter)
time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"gap sampling fraction=$fraction len=$len use time: $time Ms")
fraction = 0.05
iter = () => {
new Iterator[Int] {
var i = 0
override def hasNext = {
i < len
}
override def next = {
i += 1
i
}
}
}
sampler = new BernoulliSamplerBenchmark(fraction, iter)
time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"general sampling fraction=$fraction len=$len use time: $time Ms")
array = (1 to len).toArray
iter = () => {
new org.apache.spark.InterruptibleIterator(context, array.iterator)
}
sampler = new BernoulliSamplerBenchmark(fraction, iter)
time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"gap sampling fraction=$fraction len=$len use time: $time Ms")
fraction = 0.01
iter = () => {
new Iterator[Int] {
var i = 0
override def hasNext = {
i < len
}
override def next = {
i += 1
i
}
}
}
sampler = new BernoulliSamplerBenchmark(fraction, iter)
time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"general sampling fraction=$fraction len=$len use time: $time Ms")
array = (1 to len).toArray
iter = () => {
new org.apache.spark.InterruptibleIterator(context, array.iterator)
}
sampler = new BernoulliSamplerBenchmark(fraction, iter)
time = sampler.runBenchmark(noTimes).sum.toDouble / noTimes
println(s"gap sampling fraction=$fraction len=$len use time: $time Ms")
}
```
=>
```
general sampling fraction=0.2 len=1000000 use time: 14.562 Ms
gap sampling fraction=0.2 len=1000000 use time: 16.352 Ms
general sampling fraction=0.05 len=1000000 use time: 5.408 Ms
gap sampling fraction=0.05 len=1000000 use time: 4.251 Ms
general sampling fraction=0.01 len=1000000 use time: 7.528 Ms
gap sampling fraction=0.01 len=1000000 use time: 1.009 Ms
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org