You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/01/01 02:48:31 UTC

[01/20] git commit: Added a serializable wrapper for HyperLogLog

Updated Branches:
  refs/heads/master 63b411dd8 -> 8b8e70ebd


Added a serializable wrapper for HyperLogLog


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/843727af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/843727af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/843727af

Branch: refs/heads/master
Commit: 843727af99786a45cf29352b4e05df92c6b3b6b9
Parents: fc26e5b
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 22:17:06 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 22:17:06 2013 -0700

----------------------------------------------------------------------
 .../spark/util/SerializableHyperLogLog.scala    | 44 ++++++++++++++++++++
 1 file changed, 44 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/843727af/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
new file mode 100644
index 0000000..28a8acc
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.{ObjectOutputStream, ObjectInputStream}
+import com.clearspring.analytics.stream.cardinality.{ICardinality, HyperLogLog}
+
+/**
+ * A wrapper around com.clearspring.analytics.stream.cardinality.HyperLogLog that is serializable.
+ */
+private[spark]
+class SerializableHyperLogLog(@transient var value: ICardinality) extends Serializable {
+
+
+  def merge(other: SerializableHyperLogLog) = new SerializableHyperLogLog(value.merge(other.value))
+
+  private def readObject(in: ObjectInputStream) {
+    val byteLength = in.readInt()
+    val bytes = new Array[Byte](byteLength)
+    in.readFully(bytes)
+    value = HyperLogLog.Builder.build(bytes)
+  }
+
+  private def writeObject(out: ObjectOutputStream) {
+    val bytes = value.getBytes()
+    out.writeInt(bytes.length)
+    out.write(bytes)
+  }
+}


[02/20] git commit: Added a countDistinct method to RDD that takes takes an accuracy parameter and returns the (approximate) number of distinct elements in the RDD.

Posted by rx...@apache.org.
Added a countDistinct method to RDD that takes takes an accuracy parameter and returns the (approximate) number of distinct elements in the RDD.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/1a701358
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/1a701358
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/1a701358

Branch: refs/heads/master
Commit: 1a701358c0811c7f270132291e0646fd806e4984
Parents: 843727a
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 22:24:48 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 22:24:48 2013 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/rdd/RDD.scala   | 26 +++++++++++++++++++-
 .../scala/org/apache/spark/rdd/RDDSuite.scala   | 13 ++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/1a701358/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 0355618..09932db 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.TextOutputFormat
 
 import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
+import com.clearspring.analytics.stream.cardinality.HyperLogLog
 
 import org.apache.spark.Partitioner._
 import org.apache.spark.api.java.JavaRDD
@@ -38,7 +39,7 @@ import org.apache.spark.partial.CountEvaluator
 import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.{Utils, BoundedPriorityQueue}
+import org.apache.spark.util.{Utils, BoundedPriorityQueue, SerializableHyperLogLog}
 
 import org.apache.spark.SparkContext._
 import org.apache.spark._
@@ -766,6 +767,29 @@ abstract class RDD[T: ClassManifest](
   }
 
   /**
+   * Return approximate number of distinct elements in the RDD.
+   *
+   * The accuracy of approximation can be controlled through the relative standard diviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05.
+   */
+  def countDistinct(relativeSD: Double = 0.05): Long = {
+
+    def hllCountPartition(iter: Iterator[T]): Iterator[SerializableHyperLogLog] = {
+      val hllCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
+      while (iter.hasNext) {
+        val v = iter.next()
+        hllCounter.value.offer(v)
+      }
+      Iterator(hllCounter)
+    }
+    def mergeCounters(c1: SerializableHyperLogLog, c2: SerializableHyperLogLog): SerializableHyperLogLog = c1.merge(c2)
+
+    mapPartitions(hllCountPartition).reduce(mergeCounters).value.cardinality()
+  }
+
+  /**
    * Take the first num elements of the RDD. It works by first scanning one partition, and use the
    * results from that partition to estimate the number of additional partitions needed to satisfy
    * the limit.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/1a701358/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 6d1bc5e..6baf9c7 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -63,6 +63,19 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  test("Approximate distinct count") {
+
+    def error(est: Long, size: Long) = math.abs(est - size)/size.toDouble
+
+    val size = 100
+    val uniformDistro = for (i <- 1 to 100000) yield i % size
+    val simpleRdd = sc.makeRDD(uniformDistro)
+    assert( error(simpleRdd.countDistinct(0.2), size) < 0.2)
+    assert( error(simpleRdd.countDistinct(0.05), size) < 0.05)
+    assert( error(simpleRdd.countDistinct(0.01), size) < 0.01)
+    assert( error(simpleRdd.countDistinct(0.001), size) < 0.001)
+  }
+
   test("SparkContext.union") {
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
     assert(sc.union(nums).collect().toList === List(1, 2, 3, 4))


[12/20] git commit: Using origin version

Posted by rx...@apache.org.
Using origin version


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d50ccc5c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d50ccc5c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d50ccc5c

Branch: refs/heads/master
Commit: d50ccc5ca9f9f0fa6418c88e7fbfb4a87b1a0e68
Parents: 49bf47e d63856c
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 15:08:34 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 15:08:34 2013 -0800

----------------------------------------------------------------------
 .gitignore                                      |    1 +
 README.md                                       |   13 +-
 assembly/pom.xml                                |   16 +-
 bagel/pom.xml                                   |   12 +-
 bin/compute-classpath.cmd                       |    2 +-
 bin/compute-classpath.sh                        |   24 +-
 conf/metrics.properties.template                |    8 +
 core/pom.xml                                    | 1593 ++++++++++++++---
 .../apache/spark/network/netty/FileClient.java  |    2 -
 .../apache/spark/network/netty/FileServer.java  |    1 -
 .../spark/network/netty/FileServerHandler.java  |   19 +-
 .../spark/network/netty/PathResolver.java       |   11 +-
 .../hadoop/mapred/SparkHadoopMapRedUtil.scala   |   17 +-
 .../mapreduce/SparkHadoopMapReduceUtil.scala    |   33 +-
 .../scala/org/apache/spark/FutureAction.scala   |    2 +-
 .../org/apache/spark/MapOutputTracker.scala     |  210 ++-
 .../scala/org/apache/spark/Partitioner.scala    |    8 +-
 .../scala/org/apache/spark/SparkContext.scala   |  401 +++--
 .../main/scala/org/apache/spark/SparkEnv.scala  |   38 +-
 .../org/apache/spark/SparkHadoopWriter.scala    |   16 +-
 .../main/scala/org/apache/spark/TaskState.scala |    3 +-
 .../apache/spark/api/java/JavaDoubleRDD.scala   |   73 +-
 .../org/apache/spark/api/java/JavaPairRDD.scala |  104 +-
 .../org/apache/spark/api/java/JavaRDD.scala     |   28 +-
 .../org/apache/spark/api/java/JavaRDDLike.scala |   43 +-
 .../spark/api/java/JavaSparkContext.scala       |   60 +-
 .../java/JavaSparkContextVarargsWorkaround.java |    1 -
 .../java/function/DoubleFlatMapFunction.java    |   10 +-
 .../spark/api/java/function/DoubleFunction.java |    5 +-
 .../api/java/function/FlatMapFunction.scala     |    7 +-
 .../api/java/function/FlatMapFunction2.scala    |    7 +-
 .../spark/api/java/function/Function.java       |   11 +-
 .../spark/api/java/function/Function2.java      |   11 +-
 .../spark/api/java/function/Function3.java      |   36 +
 .../api/java/function/PairFlatMapFunction.java  |   15 +-
 .../spark/api/java/function/PairFunction.java   |   18 +-
 .../api/java/function/WrappedFunction3.scala    |   34 +
 .../org/apache/spark/api/python/PythonRDD.scala |  163 +-
 .../spark/api/python/PythonWorkerFactory.scala  |    4 +-
 .../spark/broadcast/BitTorrentBroadcast.scala   | 1060 -----------
 .../apache/spark/broadcast/HttpBroadcast.scala  |   10 +-
 .../apache/spark/broadcast/MultiTracker.scala   |  410 -----
 .../org/apache/spark/broadcast/SourceInfo.scala |   54 -
 .../spark/broadcast/TorrentBroadcast.scala      |  247 +++
 .../apache/spark/broadcast/TreeBroadcast.scala  |  601 -------
 .../org/apache/spark/deploy/ExecutorState.scala |    3 +-
 .../spark/deploy/FaultToleranceTest.scala       |   28 +-
 .../apache/spark/deploy/LocalSparkCluster.scala |   19 +-
 .../apache/spark/deploy/SparkHadoopUtil.scala   |   60 +-
 .../org/apache/spark/deploy/client/Client.scala |   50 +-
 .../spark/deploy/master/ApplicationState.scala  |    3 +-
 .../master/FileSystemPersistenceEngine.scala    |    6 +-
 .../org/apache/spark/deploy/master/Master.scala |   78 +-
 .../spark/deploy/master/RecoveryState.scala     |    4 +-
 .../deploy/master/SparkZooKeeperSession.scala   |   11 +-
 .../spark/deploy/master/WorkerState.scala       |    4 +-
 .../master/ZooKeeperLeaderElectionAgent.scala   |    4 +-
 .../master/ZooKeeperPersistenceEngine.scala     |    6 +-
 .../deploy/master/ui/ApplicationPage.scala      |   13 +-
 .../spark/deploy/master/ui/IndexPage.scala      |   16 +-
 .../spark/deploy/master/ui/MasterWebUI.scala    |   10 +-
 .../spark/deploy/worker/ExecutorRunner.scala    |    2 +-
 .../org/apache/spark/deploy/worker/Worker.scala |   48 +-
 .../spark/deploy/worker/ui/IndexPage.scala      |    9 +-
 .../spark/deploy/worker/ui/WorkerWebUI.scala    |   15 +-
 .../executor/CoarseGrainedExecutorBackend.scala |   32 +-
 .../org/apache/spark/executor/Executor.scala    |   64 +-
 .../apache/spark/executor/ExecutorSource.scala  |    2 -
 .../org/apache/spark/executor/TaskMetrics.scala |   31 +-
 .../spark/metrics/sink/GraphiteSink.scala       |   82 +
 .../spark/network/ConnectionManager.scala       |    8 +-
 .../spark/network/ConnectionManagerTest.scala   |    4 +-
 .../spark/network/netty/ShuffleCopier.scala     |    2 +-
 .../spark/network/netty/ShuffleSender.scala     |    7 +-
 .../main/scala/org/apache/spark/package.scala   |    2 +
 .../org/apache/spark/rdd/AsyncRDDActions.scala  |    3 +-
 .../scala/org/apache/spark/rdd/BlockRDD.scala   |    4 +-
 .../org/apache/spark/rdd/CartesianRDD.scala     |    5 +-
 .../org/apache/spark/rdd/CheckpointRDD.scala    |   21 +-
 .../org/apache/spark/rdd/CoalescedRDD.scala     |    3 +-
 .../apache/spark/rdd/DoubleRDDFunctions.scala   |  127 ++
 .../scala/org/apache/spark/rdd/EmptyRDD.scala   |    5 +-
 .../org/apache/spark/rdd/FilteredRDD.scala      |    3 +-
 .../org/apache/spark/rdd/FlatMappedRDD.scala    |    3 +-
 .../scala/org/apache/spark/rdd/GlommedRDD.scala |    3 +-
 .../scala/org/apache/spark/rdd/HadoopRDD.scala  |   11 +-
 .../scala/org/apache/spark/rdd/JdbcRDD.scala    |    4 +-
 .../org/apache/spark/rdd/MapPartitionsRDD.scala |   12 +-
 .../spark/rdd/MapPartitionsWithContextRDD.scala |   41 -
 .../scala/org/apache/spark/rdd/MappedRDD.scala  |    4 +-
 .../apache/spark/rdd/OrderedRDDFunctions.scala  |   10 +-
 .../org/apache/spark/rdd/PairRDDFunctions.scala |   33 +-
 .../spark/rdd/ParallelCollectionRDD.scala       |    8 +-
 .../apache/spark/rdd/PartitionPruningRDD.scala  |   14 +-
 .../scala/org/apache/spark/rdd/PipedRDD.scala   |    3 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala   |  133 +-
 .../apache/spark/rdd/RDDCheckpointData.scala    |    4 +-
 .../scala/org/apache/spark/rdd/SampledRDD.scala |    5 +-
 .../spark/rdd/SequenceFileRDDFunctions.scala    |   11 +-
 .../org/apache/spark/rdd/ShuffledRDD.scala      |    6 +-
 .../org/apache/spark/rdd/SubtractedRDD.scala    |    5 +-
 .../scala/org/apache/spark/rdd/UnionRDD.scala   |    7 +-
 .../apache/spark/rdd/ZippedPartitionsRDD.scala  |   57 +-
 .../scala/org/apache/spark/rdd/ZippedRDD.scala  |    6 +-
 .../apache/spark/scheduler/DAGScheduler.scala   |  494 ++++--
 .../spark/scheduler/DAGSchedulerEvent.scala     |   10 +-
 .../spark/scheduler/InputFormatInfo.scala       |    7 +-
 .../org/apache/spark/scheduler/JobLogger.scala  |  676 ++++---
 .../org/apache/spark/scheduler/JobWaiter.scala  |    1 +
 .../apache/spark/scheduler/SchedulingMode.scala |    2 +-
 .../apache/spark/scheduler/ShuffleMapTask.scala |   29 +-
 .../apache/spark/scheduler/SparkListener.scala  |   26 +-
 .../spark/scheduler/SparkListenerBus.scala      |    3 +-
 .../org/apache/spark/scheduler/Stage.scala      |    6 +-
 .../org/apache/spark/scheduler/StageInfo.scala  |   14 +-
 .../scala/org/apache/spark/scheduler/Task.scala |    2 +-
 .../org/apache/spark/scheduler/TaskInfo.scala   |   22 +
 .../apache/spark/scheduler/TaskLocality.scala   |    4 +-
 .../org/apache/spark/scheduler/TaskResult.scala |   20 +-
 .../scheduler/cluster/ClusterScheduler.scala    |   47 +-
 .../cluster/ClusterTaskSetManager.scala         |   24 +-
 .../cluster/CoarseGrainedClusterMessage.scala   |    4 +
 .../cluster/CoarseGrainedSchedulerBackend.scala |   64 +-
 .../cluster/SimrSchedulerBackend.scala          |   67 +
 .../cluster/SparkDeploySchedulerBackend.scala   |    2 +-
 .../scheduler/cluster/TaskResultGetter.scala    |    5 +-
 .../mesos/CoarseMesosSchedulerBackend.scala     |    3 +-
 .../spark/scheduler/local/LocalScheduler.scala  |   29 +-
 .../scheduler/local/LocalTaskSetManager.scala   |    2 +-
 .../spark/serializer/KryoSerializer.scala       |   52 +-
 .../org/apache/spark/storage/BlockId.scala      |    9 +-
 .../org/apache/spark/storage/BlockInfo.scala    |   81 +
 .../org/apache/spark/storage/BlockManager.scala |  565 ++----
 .../spark/storage/BlockManagerMaster.scala      |   19 +-
 .../spark/storage/BlockManagerMasterActor.scala |   16 +-
 .../spark/storage/BlockManagerSlaveActor.scala  |    1 +
 .../spark/storage/BlockObjectWriter.scala       |  142 +-
 .../apache/spark/storage/DiskBlockManager.scala |  151 ++
 .../org/apache/spark/storage/DiskStore.scala    |  263 +--
 .../org/apache/spark/storage/FileSegment.scala  |   28 +
 .../spark/storage/ShuffleBlockManager.scala     |  196 +-
 .../org/apache/spark/storage/StorageLevel.scala |    2 +-
 .../spark/storage/StoragePerfTester.scala       |  103 ++
 .../apache/spark/storage/ThreadingTest.scala    |    2 +-
 .../org/apache/spark/ui/exec/ExecutorsUI.scala  |   54 +-
 .../apache/spark/ui/jobs/ExecutorSummary.scala  |   27 +
 .../apache/spark/ui/jobs/ExecutorTable.scala    |   90 +
 .../org/apache/spark/ui/jobs/IndexPage.scala    |    2 +-
 .../spark/ui/jobs/JobProgressListener.scala     |  139 +-
 .../apache/spark/ui/jobs/JobProgressUI.scala    |    2 +-
 .../org/apache/spark/ui/jobs/PoolTable.scala    |    8 +-
 .../org/apache/spark/ui/jobs/StagePage.scala    |   96 +-
 .../org/apache/spark/ui/jobs/StageTable.scala   |   35 +-
 .../spark/ui/storage/BlockManagerUI.scala       |    5 +-
 .../scala/org/apache/spark/util/AkkaUtils.scala |   85 +-
 .../org/apache/spark/util/AppendOnlyMap.scala   |   93 +-
 .../spark/util/BoundedPriorityQueue.scala       |    2 +
 .../spark/util/IndestructibleActorSystem.scala  |   68 +
 .../org/apache/spark/util/MetadataCleaner.scala |    6 +-
 .../apache/spark/util/TimeStampedHashMap.scala  |    2 +-
 .../scala/org/apache/spark/util/Utils.scala     |   49 +-
 .../org/apache/spark/util/XORShiftRandom.scala  |   94 +
 .../apache/spark/util/collection/BitSet.scala   |  103 ++
 .../spark/util/collection/OpenHashMap.scala     |  153 ++
 .../spark/util/collection/OpenHashSet.scala     |  279 +++
 .../collection/PrimitiveKeyOpenHashMap.scala    |  128 ++
 .../spark/util/collection/PrimitiveVector.scala |   69 +
 .../org/apache/spark/AccumulatorSuite.scala     |   32 +-
 .../scala/org/apache/spark/BroadcastSuite.scala |   52 +-
 .../org/apache/spark/CheckpointSuite.scala      |    7 +-
 .../org/apache/spark/DistributedSuite.scala     |    5 +-
 .../scala/org/apache/spark/DriverSuite.scala    |    2 +-
 .../org/apache/spark/FileServerSuite.scala      |   16 +
 .../scala/org/apache/spark/JavaAPISuite.java    |   68 +
 .../org/apache/spark/JobCancellationSuite.scala |   34 +-
 .../org/apache/spark/LocalSparkContext.scala    |    2 +-
 .../apache/spark/MapOutputTrackerSuite.scala    |   26 +-
 .../apache/spark/PartitionPruningRDDSuite.scala |   45 -
 .../org/apache/spark/PartitioningSuite.scala    |   10 +-
 .../SparkContextSchedulerCreationSuite.scala    |  140 ++
 .../scala/org/apache/spark/UnpersistSuite.scala |    2 +-
 .../deploy/worker/ExecutorRunnerTest.scala      |   36 +
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala |   26 +
 .../org/apache/spark/rdd/DoubleRDDSuite.scala   |  271 +++
 .../spark/rdd/PartitionPruningRDDSuite.scala    |   86 +
 .../scala/org/apache/spark/rdd/RDDSuite.scala   |   28 +-
 .../spark/scheduler/DAGSchedulerSuite.scala     |   51 +-
 .../apache/spark/scheduler/JobLoggerSuite.scala |   24 +-
 .../spark/scheduler/SparkListenerSuite.scala    |  132 +-
 .../cluster/ClusterTaskSetManagerSuite.scala    |    5 +-
 .../cluster/TaskResultGetterSuite.scala         |    4 +-
 .../org/apache/spark/storage/BlockIdSuite.scala |    2 +-
 .../spark/storage/BlockManagerSuite.scala       |    2 +-
 .../spark/storage/DiskBlockManagerSuite.scala   |  111 ++
 .../scala/org/apache/spark/ui/UISuite.scala     |    1 -
 .../ui/jobs/JobProgressListenerSuite.scala      |   73 +
 .../apache/spark/util/SizeEstimatorSuite.scala  |   72 +-
 .../apache/spark/util/XORShiftRandomSuite.scala |   76 +
 .../spark/util/collection/BitSetSuite.scala     |   73 +
 .../util/collection/OpenHashMapSuite.scala      |  177 ++
 .../util/collection/OpenHashSetSuite.scala      |  180 ++
 .../PrimitiveKeyOpenHashMapSuite.scala          |  119 ++
 .../util/collection/PrimitiveVectorSuite.scala  |  117 ++
 docker/spark-test/README.md                     |    7 +-
 docs/_config.yml                                |    2 +-
 docs/_layouts/global.html                       |    8 +-
 docs/_plugins/copy_api_dirs.rb                  |    2 +-
 docs/bagel-programming-guide.md                 |    2 +-
 docs/building-with-maven.md                     |    6 +
 docs/cluster-overview.md                        |   16 +-
 docs/configuration.md                           |   69 +-
 docs/ec2-scripts.md                             |    2 +-
 docs/hadoop-third-party-distributions.md        |    7 +-
 docs/index.md                                   |    8 +-
 docs/job-scheduling.md                          |    2 +-
 docs/monitoring.md                              |    1 +
 docs/python-programming-guide.md                |   11 +
 docs/running-on-yarn.md                         |   46 +-
 docs/scala-programming-guide.md                 |    8 +-
 docs/spark-standalone.md                        |    4 +-
 docs/streaming-programming-guide.md             |   12 +-
 docs/tuning.md                                  |    5 +-
 ec2/spark_ec2.py                                |   72 +-
 examples/pom.xml                                |   60 +-
 .../org/apache/spark/examples/JavaLogQuery.java |    2 +-
 .../org/apache/spark/examples/JavaPageRank.java |    3 +-
 .../apache/spark/examples/JavaWordCount.java    |    2 +-
 .../apache/spark/mllib/examples/JavaALS.java    |    1 -
 .../streaming/examples/JavaKafkaWordCount.java  |   98 +
 .../apache/spark/examples/BroadcastTest.scala   |   21 +-
 .../org/apache/spark/examples/LocalALS.scala    |    2 +-
 .../spark/examples/MultiBroadcastTest.scala     |   15 +-
 .../org/apache/spark/examples/SparkHdfsLR.scala |    3 +-
 .../org/apache/spark/examples/SparkPi.scala     |    2 +-
 .../org/apache/spark/examples/SparkTC.scala     |    2 +-
 .../streaming/examples/ActorWordCount.scala     |    9 +-
 .../streaming/examples/KafkaWordCount.scala     |   28 +-
 .../streaming/examples/MQTTWordCount.scala      |  107 ++
 .../streaming/examples/ZeroMQWordCount.scala    |    8 +-
 mllib/pom.xml                                   |   12 +-
 .../apache/spark/mllib/clustering/KMeans.scala  |   11 +-
 .../spark/mllib/util/MFDataGenerator.scala      |    2 +-
 .../spark/mllib/clustering/JavaKMeansSuite.java |    4 +-
 .../mllib/recommendation/JavaALSSuite.java      |    2 -
 new-yarn/pom.xml                                |  161 ++
 .../spark/deploy/yarn/ApplicationMaster.scala   |  446 +++++
 .../yarn/ApplicationMasterArguments.scala       |   94 +
 .../org/apache/spark/deploy/yarn/Client.scala   |  519 ++++++
 .../spark/deploy/yarn/ClientArguments.scala     |  149 ++
 .../yarn/ClientDistributedCacheManager.scala    |  228 +++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  222 +++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  209 +++
 .../deploy/yarn/YarnAllocationHandler.scala     |  687 +++++++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   43 +
 .../cluster/YarnClientClusterScheduler.scala    |   47 +
 .../cluster/YarnClientSchedulerBackend.scala    |  109 ++
 .../cluster/YarnClusterScheduler.scala          |   55 +
 .../ClientDistributedCacheManagerSuite.scala    |  220 +++
 pom.xml                                         |  256 +--
 project/SparkBuild.scala                        |  172 +-
 project/plugins.sbt                             |    2 +-
 pyspark                                         |   10 +-
 pyspark2.cmd                                    |    2 +-
 python/epydoc.conf                              |    2 +-
 python/pyspark/accumulators.py                  |   19 +-
 python/pyspark/context.py                       |  116 +-
 python/pyspark/rdd.py                           |  114 +-
 python/pyspark/serializers.py                   |  301 +++-
 python/pyspark/tests.py                         |   18 +-
 python/pyspark/worker.py                        |   44 +-
 python/run-tests                                |    1 +
 python/test_support/userlibrary.py              |   17 +
 repl-bin/pom.xml                                |    8 +-
 repl-bin/src/deb/bin/run                        |    2 +-
 repl-bin/src/deb/bin/spark-executor             |    2 +-
 repl-bin/src/deb/bin/spark-shell                |    2 +-
 repl/lib/scala-jline.jar                        |  Bin 158463 -> 0 bytes
 repl/pom.xml                                    |   18 +-
 .../main/scala/org/apache/spark/repl/Main.scala |    8 +-
 .../org/apache/spark/repl/SparkExprTyper.scala  |  109 ++
 .../org/apache/spark/repl/SparkILoop.scala      |  949 +++++-----
 .../org/apache/spark/repl/SparkILoopInit.scala  |  143 ++
 .../org/apache/spark/repl/SparkIMain.scala      | 1674 ++++++++++--------
 .../org/apache/spark/repl/SparkISettings.scala  |   63 -
 .../org/apache/spark/repl/SparkImports.scala    |  108 +-
 .../spark/repl/SparkJLineCompletion.scala       |  206 ++-
 .../apache/spark/repl/SparkJLineReader.scala    |   65 +-
 .../apache/spark/repl/SparkMemberHandlers.scala |  109 +-
 .../scala/org/apache/spark/repl/ReplSuite.scala |  213 ++-
 run-example                                     |   12 +-
 run-example2.cmd                                |    2 +-
 sbt/sbt                                         |   21 +-
 spark-class                                     |   44 +-
 spark-class2.cmd                                |    7 +
 spark-shell                                     |   19 +-
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.jar     |  Bin 1358063 -> 0 bytes
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5 |    1 -
 .../0.7.2-spark/kafka-0.7.2-spark.jar.sha1      |    1 -
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.pom     |    9 -
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5 |    1 -
 .../0.7.2-spark/kafka-0.7.2-spark.pom.sha1      |    1 -
 .../apache/kafka/kafka/maven-metadata-local.xml |   12 -
 .../kafka/kafka/maven-metadata-local.xml.md5    |    1 -
 .../kafka/kafka/maven-metadata-local.xml.sha1   |    1 -
 streaming/pom.xml                               |   74 +-
 .../org/apache/spark/streaming/Checkpoint.scala |    8 +-
 .../org/apache/spark/streaming/DStream.scala    |   97 +-
 .../spark/streaming/DStreamCheckpointData.scala |    6 +-
 .../apache/spark/streaming/DStreamGraph.scala   |    1 +
 .../scala/org/apache/spark/streaming/Job.scala  |   41 -
 .../org/apache/spark/streaming/JobManager.scala |   88 -
 .../spark/streaming/NetworkInputTracker.scala   |  174 --
 .../spark/streaming/PairDStreamFunctions.scala  |  197 ++-
 .../org/apache/spark/streaming/Scheduler.scala  |  131 --
 .../spark/streaming/StreamingContext.scala      |  112 +-
 .../spark/streaming/api/java/JavaDStream.scala  |   14 +-
 .../streaming/api/java/JavaDStreamLike.scala    |  128 +-
 .../streaming/api/java/JavaPairDStream.scala    |  233 ++-
 .../api/java/JavaStreamingContext.scala         |  201 ++-
 .../streaming/dstream/CoGroupedDStream.scala    |   58 -
 .../dstream/ConstantInputDStream.scala          |    3 +-
 .../streaming/dstream/FileInputDStream.scala    |   12 +-
 .../streaming/dstream/FilteredDStream.scala     |    3 +-
 .../dstream/FlatMapValuedDStream.scala          |    3 +-
 .../streaming/dstream/FlatMappedDStream.scala   |    3 +-
 .../streaming/dstream/FlumeInputDStream.scala   |    7 +-
 .../streaming/dstream/ForEachDStream.scala      |    6 +-
 .../streaming/dstream/GlommedDStream.scala      |    3 +-
 .../spark/streaming/dstream/InputDStream.scala  |    4 +-
 .../streaming/dstream/KafkaInputDStream.scala   |   68 +-
 .../streaming/dstream/MQTTInputDStream.scala    |  110 ++
 .../dstream/MapPartitionedDStream.scala         |    3 +-
 .../streaming/dstream/MapValuedDStream.scala    |    3 +-
 .../spark/streaming/dstream/MappedDStream.scala |    3 +-
 .../streaming/dstream/NetworkInputDStream.scala |   18 +-
 .../dstream/PluggableInputDStream.scala         |    3 +-
 .../streaming/dstream/QueueInputDStream.scala   |    4 +-
 .../streaming/dstream/RawInputDStream.scala     |    4 +-
 .../dstream/ReducedWindowedDStream.scala        |    9 +-
 .../streaming/dstream/ShuffledDStream.scala     |    3 +-
 .../streaming/dstream/SocketInputDStream.scala  |    6 +-
 .../spark/streaming/dstream/StateDStream.scala  |    4 +-
 .../streaming/dstream/TransformedDStream.scala  |   21 +-
 .../spark/streaming/dstream/UnionDStream.scala  |    5 +-
 .../streaming/dstream/WindowedDStream.scala     |    7 +-
 .../streaming/receivers/ActorReceiver.scala     |   35 +-
 .../streaming/receivers/ZeroMQReceiver.scala    |   13 +-
 .../spark/streaming/scheduler/BatchInfo.scala   |   55 +
 .../apache/spark/streaming/scheduler/Job.scala  |   41 +
 .../streaming/scheduler/JobGenerator.scala      |  131 ++
 .../streaming/scheduler/JobScheduler.scala      |  108 ++
 .../spark/streaming/scheduler/JobSet.scala      |   68 +
 .../scheduler/NetworkInputTracker.scala         |  175 ++
 .../streaming/scheduler/StreamingListener.scala |   75 +
 .../scheduler/StreamingListenerBus.scala        |   81 +
 .../streaming/util/MasterFailureTest.scala      |   45 +-
 .../apache/spark/streaming/JavaAPISuite.java    |  513 +++++-
 .../apache/spark/streaming/JavaTestUtils.scala  |   54 +-
 .../spark/streaming/BasicOperationsSuite.scala  |  153 +-
 .../spark/streaming/CheckpointSuite.scala       |   66 +-
 .../apache/spark/streaming/FailureSuite.scala   |   13 +-
 .../spark/streaming/InputStreamsSuite.scala     |  107 +-
 .../streaming/StreamingListenerSuite.scala      |   71 +
 .../apache/spark/streaming/TestSuiteBase.scala  |  118 +-
 .../spark/streaming/WindowOperationsSuite.scala |   14 +-
 tools/pom.xml                                   |   12 +-
 .../tools/JavaAPICompletenessChecker.scala      |    4 +-
 yarn/pom.xml                                    |   58 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  199 ++-
 .../org/apache/spark/deploy/yarn/Client.scala   |  435 +++--
 .../spark/deploy/yarn/ClientArguments.scala     |   41 +-
 .../yarn/ClientDistributedCacheManager.scala    |  228 +++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  243 +++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  145 +-
 .../deploy/yarn/YarnAllocationHandler.scala     |  358 ++--
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |    5 +-
 .../cluster/YarnClientClusterScheduler.scala    |   47 +
 .../cluster/YarnClientSchedulerBackend.scala    |  109 ++
 .../ClientDistributedCacheManagerSuite.scala    |  220 +++
 379 files changed, 19404 insertions(+), 8777 deletions(-)
----------------------------------------------------------------------



[19/20] git commit: Made the code more compact and readable

Posted by rx...@apache.org.
Made the code more compact and readable


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/bee445c9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/bee445c9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/bee445c9

Branch: refs/heads/master
Commit: bee445c927586136673f39259f23642a5a6e8efe
Parents: acb0323
Author: Hossein Falaki <fa...@gmail.com>
Authored: Tue Dec 31 16:58:18 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Tue Dec 31 16:58:18 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/rdd/PairRDDFunctions.scala | 12 ++----------
 core/src/main/scala/org/apache/spark/rdd/RDD.scala    | 14 +-------------
 .../apache/spark/util/SerializableHyperLogLog.scala   |  5 +++++
 3 files changed, 8 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bee445c9/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 088b298..04a8d05 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -218,19 +218,11 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * Partitioner to partition the output RDD.
    */
   def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
-    val createHLL = (v: V) => {
-      val hll = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
-      hll.value.offer(v)
-      hll
-    }
-    val mergeValueHLL = (hll: SerializableHyperLogLog, v: V) => {
-      hll.value.offer(v)
-      hll
-    }
+    val createHLL = (v: V) => new SerializableHyperLogLog(new HyperLogLog(relativeSD)).add(v)
+    val mergeValueHLL = (hll: SerializableHyperLogLog, v: V) => hll.add(v)
     val mergeHLL = (h1: SerializableHyperLogLog, h2: SerializableHyperLogLog) => h1.merge(h2)
 
     combineByKey(createHLL, mergeValueHLL, mergeHLL, partitioner).mapValues(_.value.cardinality())
-
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bee445c9/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 161fd06..4960e6e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -798,20 +798,8 @@ abstract class RDD[T: ClassTag](
    * relativeSD is 0.05.
    */
   def countApproxDistinct(relativeSD: Double = 0.05): Long = {
-
-    def hllCountPartition(iter: Iterator[T]): Iterator[SerializableHyperLogLog] = {
-      val hllCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
-      while (iter.hasNext) {
-        val v = iter.next()
-        hllCounter.value.offer(v)
-      }
-      Iterator(hllCounter)
-    }
-    def mergeCounters(c1: SerializableHyperLogLog, c2: SerializableHyperLogLog) = c1.merge(c2)
-
     val zeroCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
-    mapPartitions(hllCountPartition).aggregate(zeroCounter)(mergeCounters, mergeCounters)
-      .value.cardinality()
+    aggregate(zeroCounter)(_.add(_), _.merge(_)).value.cardinality()
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bee445c9/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
index 9cfd414..8b4e7c1 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
@@ -30,6 +30,11 @@ class SerializableHyperLogLog(var value: ICardinality) extends Externalizable {
 
   def merge(other: SerializableHyperLogLog) = new SerializableHyperLogLog(value.merge(other.value))
 
+  def add[T](elem: T) = {
+    this.value.offer(elem)
+    this
+  }
+
   def readExternal(in: ObjectInput) {
     val byteLength = in.readInt()
     val bytes = new Array[Byte](byteLength)


[15/20] git commit: Added Java API for countApproxDistinctByKey

Posted by rx...@apache.org.
Added Java API for countApproxDistinctByKey


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/ed06500d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/ed06500d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/ed06500d

Branch: refs/heads/master
Commit: ed06500d300e93ae3129a035a364117adcb7d361
Parents: b75d7c9
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 19:30:42 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 19:30:42 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/api/java/JavaPairRDD.scala | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ed06500d/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 363667f..55c8745 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -611,6 +611,42 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kClassTag: ClassTag[K
    * Return an RDD with the values of each tuple.
    */
   def values(): JavaRDD[V] = JavaRDD.fromRDD[V](rdd.map(_._2))
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. Uses the provided
+   * Partitioner to partition the output RDD.
+   */
+  def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD, partitioner)
+  }
+
+  /**
+   * Return approximate number of distinct values for each key this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism
+   * level.
+   */
+  def countApproxDistinctByKey(relativeSD: Double = 0.05): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD)
+  }
+
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. HashPartitions the
+   * output RDD into numPartitions.
+   *
+   */
+  def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD, numPartitions)
+  }
 }
 
 object JavaPairRDD {


[03/20] git commit: Added countDistinctByKey to PairRDDFunctions that counts the approximate number of unique values for each key in the RDD.

Posted by rx...@apache.org.
Added countDistinctByKey to PairRDDFunctions that counts the approximate number of unique values for each key in the RDD.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/ec5df800
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/ec5df800
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/ec5df800

Branch: refs/heads/master
Commit: ec5df800fdb0109314c0d5cd6dcac2ecbb9433d6
Parents: 1a70135
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 22:26:00 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 22:26:00 2013 -0700

----------------------------------------------------------------------
 .../org/apache/spark/rdd/PairRDDFunctions.scala | 51 ++++++++++++++++++++
 .../spark/rdd/PairRDDFunctionsSuite.scala       | 30 ++++++++++++
 2 files changed, 81 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ec5df800/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 93b78e1..f34593f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -39,12 +39,15 @@ import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob}
 import org.apache.hadoop.mapreduce.{RecordWriter => NewRecordWriter}
 
+import com.clearspring.analytics.stream.cardinality.HyperLogLog
+
 import org.apache.spark._
 import org.apache.spark.SparkContext._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.Aggregator
 import org.apache.spark.Partitioner
 import org.apache.spark.Partitioner.defaultPartitioner
+import org.apache.spark.util.SerializableHyperLogLog
 
 /**
  * Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
@@ -207,6 +210,54 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
   }
 
   /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard diviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. Uses the provided
+   * Partitioner to partition the output RDD.
+   */
+  def countDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
+    val createHLL = (v: V) => {
+      val hll = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
+      val bres = hll.value.offer(v)
+      hll
+    }
+    val mergeValueHLL = (hll: SerializableHyperLogLog, v: V) => {
+      hll.value.offer(v)
+      hll
+    }
+    val mergeHLL = (h1: SerializableHyperLogLog, h2: SerializableHyperLogLog) => h1.merge(h2)
+
+    combineByKey(createHLL, mergeValueHLL, mergeHLL, partitioner).map {
+      case (k, v) => (k, v.value.cardinality())
+    }
+  }
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD. 
+   * The accuracy of approximation can be controlled through the relative standard diviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. HashPartitions the
+   * output RDD into numPartitions.
+   *
+   */
+  def countDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = {
+    countDistinctByKey(relativeSD, new HashPartitioner(numPartitions))
+  }
+
+  /**
+   * Return approximate number of distinct values for each key this RDD.
+   * The accuracy of approximation can be controlled through the relative standard diviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism
+   * level.
+   */
+  def countDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = {
+    countDistinctByKey(relativeSD, defaultPartitioner(self))
+  }
+
+  /**
    * Merge the values for each key using an associative reduce function. This will also perform
    * the merging locally on each mapper before sending results to a reducer, similarly to a
    * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ec5df800/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 57d3382..d81bc8c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -109,6 +109,36 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     assert(deps.size === 2) // ShuffledRDD, ParallelCollection.
   }
 
+  test("countDistinctByKey") {
+    def error(est: Long, size: Long) = math.abs(est - size)/size.toDouble
+
+    /* Since HyperLogLog unique counting is approximate, and the relative standard deviation is
+    only a statistical bound, the tests can fail for large values of relativeSD. We will be using
+     relatively tight error bounds to check correctness of functionality rather than checking
+     whether the approximation conforms with the requested bound.
+     */
+    val relativeSD = 0.001
+
+    val stacked = (1 to 100).flatMap(i => (1 to i).map(j => (i, j)))
+    val rdd1 = sc.parallelize(stacked)
+    val counted1 = rdd1.countDistinctByKey(relativeSD).collect()
+    counted1.foreach{
+      case(k, count) => assert(math.abs(error(count, k)) < relativeSD)
+    }
+
+    import scala.util.Random
+    val rnd = new Random()
+    val randStacked = (1 to 100).flatMap{i =>
+      val num = rnd.nextInt%500
+      (1 to num).map(j => (num, j))
+    }
+    val rdd2 = sc.parallelize(randStacked)
+    val counted2 = rdd2.countDistinctByKey(relativeSD, 4).collect()
+    counted2.foreach{
+      case(k, count) => assert(math.abs(error(count, k)) < relativeSD)
+    }
+  }
+
   test("join") {
     val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))


[17/20] git commit: Added Java unit tests for countApproxDistinct and countApproxDistinctByKey

Posted by rx...@apache.org.
Added Java unit tests for countApproxDistinct and countApproxDistinctByKey


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d6cded71
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d6cded71
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d6cded71

Branch: refs/heads/master
Commit: d6cded7155b36880f81544bdf6fc6c20dd52ad7d
Parents: c3073b6
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 19:32:05 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 19:32:05 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/JavaAPISuite.java    | 32 ++++++++++++++++++++
 1 file changed, 32 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d6cded71/core/src/test/scala/org/apache/spark/JavaAPISuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/JavaAPISuite.java b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
index 79913dc..6398feb 100644
--- a/core/src/test/scala/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
@@ -930,4 +930,36 @@ public class JavaAPISuite implements Serializable {
                         parts[1]);
   }
 
+  @Test
+  public void countApproxDistinct() {
+    List<Integer> arrayData = new ArrayList<Integer>();
+    int size = 100;
+    for (int i = 0; i < 100000; i++) {
+      arrayData.add(i % size);
+    }
+    JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.2) - size) / (size * 1.0)) < 0.2);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.05);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.01) - size) / (size * 1.0)) <= 0.01);
+  }
+
+  @Test
+  public void countApproxDistinctByKey() {
+    double relativeSD = 0.001;
+
+    List<Tuple2<Integer, Integer>> arrayData = new ArrayList<Tuple2<Integer, Integer>>();
+    for (int i = 10; i < 100; i++)
+      for (int j = 0; j < i; j++)
+        arrayData.add(new Tuple2<Integer, Integer>(i, j));
+
+    JavaPairRDD<Integer, Integer> pairRdd = sc.parallelizePairs(arrayData);
+    List<Tuple2<Integer, Object>> res =  pairRdd.countApproxDistinctByKey(relativeSD).collect();
+    for (Tuple2<Integer, Object> resItem : res) {
+      double count = (double)resItem._1();
+      Long resCount = (Long)resItem._2();
+      Double error = Math.abs((resCount - count) / count);
+      Assert.assertTrue(error < relativeSD);
+    }
+
+  }
 }


[14/20] git commit: Added stream 2.5.1 jar depenency

Posted by rx...@apache.org.
Added stream 2.5.1 jar depenency


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b75d7c98
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b75d7c98
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b75d7c98

Branch: refs/heads/master
Commit: b75d7c98bc94d42f11522162e30ae4fc546d5bf9
Parents: a7de8e9
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 19:29:17 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 19:29:17 2013 -0800

----------------------------------------------------------------------
 project/SparkBuild.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b75d7c98/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7bcbd90..442db97 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -247,7 +247,8 @@ object SparkBuild extends Build {
         "com.codahale.metrics"     % "metrics-ganglia"  % "3.0.0",
         "com.codahale.metrics"     % "metrics-graphite" % "3.0.0",
         "com.twitter"             %% "chill"            % "0.3.1",
-        "com.twitter"              % "chill-java"       % "0.3.1"
+        "com.twitter"              % "chill-java"       % "0.3.1",
+        "com.clearspring.analytics" % "stream"          % "2.5.1"
       )
   )
 


[04/20] git commit: Added dependency on stream-lib version 2.4.0 for approximate distinct count support.

Posted by rx...@apache.org.
Added dependency on stream-lib version 2.4.0 for approximate distinct count support.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/654d60b6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/654d60b6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/654d60b6

Branch: refs/heads/master
Commit: 654d60b6ee65d27545773d3cec9b0ed6ed20ff3a
Parents: ec5df80
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 22:29:10 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 22:29:10 2013 -0700

----------------------------------------------------------------------
 project/SparkBuild.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/654d60b6/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f2bbe53..b332485 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -222,7 +222,8 @@ object SparkBuild extends Build {
       "com.codahale.metrics" % "metrics-json" % "3.0.0",
       "com.codahale.metrics" % "metrics-ganglia" % "3.0.0",
       "com.twitter" % "chill_2.9.3" % "0.3.1",
-      "com.twitter" % "chill-java" % "0.3.1"
+      "com.twitter" % "chill-java" % "0.3.1",
+      "com.clearspring.analytics" % "stream" % "2.4.0"
     )
   )
 


[08/20] git commit: Made SerializableHyperLogLog Externalizable and added Kryo tests

Posted by rx...@apache.org.
Made SerializableHyperLogLog Externalizable and added Kryo tests


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/2d511ab3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/2d511ab3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/2d511ab3

Branch: refs/heads/master
Commit: 2d511ab320a85eccafbb9e51a2183b07114bbaa1
Parents: 13227aa
Author: Hossein Falaki <fa...@gmail.com>
Authored: Fri Oct 18 15:30:45 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Fri Oct 18 15:30:45 2013 -0700

----------------------------------------------------------------------
 .../org/apache/spark/util/SerializableHyperLogLog.scala  | 11 ++++++-----
 .../apache/spark/serializer/KryoSerializerSuite.scala    |  4 ++++
 2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/2d511ab3/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
index 28a8acc..9cfd414 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
@@ -17,26 +17,27 @@
 
 package org.apache.spark.util
 
-import java.io.{ObjectOutputStream, ObjectInputStream}
+import java.io.{Externalizable, ObjectOutput, ObjectInput}
 import com.clearspring.analytics.stream.cardinality.{ICardinality, HyperLogLog}
 
 /**
- * A wrapper around com.clearspring.analytics.stream.cardinality.HyperLogLog that is serializable.
+ * A wrapper around [[com.clearspring.analytics.stream.cardinality.HyperLogLog]] that is serializable.
  */
 private[spark]
-class SerializableHyperLogLog(@transient var value: ICardinality) extends Serializable {
+class SerializableHyperLogLog(var value: ICardinality) extends Externalizable {
 
+  def this() = this(null)  // For deserialization
 
   def merge(other: SerializableHyperLogLog) = new SerializableHyperLogLog(value.merge(other.value))
 
-  private def readObject(in: ObjectInputStream) {
+  def readExternal(in: ObjectInput) {
     val byteLength = in.readInt()
     val bytes = new Array[Byte](byteLength)
     in.readFully(bytes)
     value = HyperLogLog.Builder.build(bytes)
   }
 
-  private def writeObject(out: ObjectOutputStream) {
+  def writeExternal(out: ObjectOutput) {
     val bytes = value.getBytes()
     out.writeInt(bytes.length)
     out.write(bytes)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/2d511ab3/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index c016c51..1852971 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -172,6 +172,10 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
     assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === (1, 11))
   }
 
+  test("kryo with SerializableHyperLogLog") {
+    assert(sc.parallelize( Array(1, 2, 3, 2, 3, 3, 2, 3, 1) ).countDistinct(0.01) === 3)
+  }
+
   test("kryo with reduce") {
     val control = 1 :: 2 :: Nil
     val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))


[20/20] git commit: Merge pull request #73 from falaki/ApproximateDistinctCount

Posted by rx...@apache.org.
Merge pull request #73 from falaki/ApproximateDistinctCount

Approximate distinct count

Added countApproxDistinct() to RDD and countApproxDistinctByKey() to PairRDDFunctions to approximately count distinct number of elements and distinct number of values per key, respectively. Both functions use HyperLogLog from stream-lib for counting. Both functions take a parameter that controls the trade-off between accuracy and memory consumption. Also added Scala docs and test suites for both methods.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/8b8e70eb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/8b8e70eb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/8b8e70eb

Branch: refs/heads/master
Commit: 8b8e70ebde880d08ebb3816b2f4003247559c7f8
Parents: 63b411d bee445c
Author: Reynold Xin <rx...@apache.org>
Authored: Tue Dec 31 17:48:24 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Tue Dec 31 17:48:24 2013 -0800

----------------------------------------------------------------------
 core/pom.xml                                    | 1582 +++++++++++++++---
 .../org/apache/spark/api/java/JavaPairRDD.scala |   36 +
 .../org/apache/spark/api/java/JavaRDDLike.scala |   11 +
 .../org/apache/spark/rdd/PairRDDFunctions.scala |   42 +
 .../main/scala/org/apache/spark/rdd/RDD.scala   |   16 +-
 .../spark/util/SerializableHyperLogLog.scala    |   50 +
 .../scala/org/apache/spark/JavaAPISuite.java    |   32 +
 .../spark/rdd/PairRDDFunctionsSuite.scala       |   34 +
 .../scala/org/apache/spark/rdd/RDDSuite.scala   |   13 +
 .../spark/serializer/KryoSerializerSuite.scala  |    4 +
 pom.xml                                         |    5 +
 project/SparkBuild.scala                        |    3 +-
 12 files changed, 1595 insertions(+), 233 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8b8e70eb/core/src/test/scala/org/apache/spark/JavaAPISuite.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8b8e70eb/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8b8e70eb/project/SparkBuild.scala
----------------------------------------------------------------------


[16/20] git commit: Added Java API for countApproxDistinct

Posted by rx...@apache.org.
Added Java API for countApproxDistinct


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c3073b6c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c3073b6c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c3073b6c

Branch: refs/heads/master
Commit: c3073b6cf2a647451441e8dfc18fe4334497113c
Parents: ed06500
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 19:31:06 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 19:31:06 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/api/java/JavaRDDLike.scala    | 11 +++++++++++
 1 file changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c3073b6c/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index f344804..924d8af 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -444,4 +444,15 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[T]]
     takeOrdered(num, comp)
   }
+
+  /**
+   * Return approximate number of distinct elements in the RDD.
+   *
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05.
+   */
+  def countApproxDistinct(relativeSD: Double = 0.05): Long = rdd.countApproxDistinct(relativeSD)
+
 }


[18/20] git commit: minor improvements

Posted by rx...@apache.org.
minor improvements


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/acb03230
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/acb03230
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/acb03230

Branch: refs/heads/master
Commit: acb0323053d270a377e497e975b2dfe59e2f997c
Parents: d6cded7
Author: Hossein Falaki <fa...@gmail.com>
Authored: Tue Dec 31 15:34:26 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Tue Dec 31 15:34:26 2013 -0800

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 5 ++---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala              | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/acb03230/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 1dc5f8d..088b298 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -229,9 +229,8 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
     }
     val mergeHLL = (h1: SerializableHyperLogLog, h2: SerializableHyperLogLog) => h1.merge(h2)
 
-    combineByKey(createHLL, mergeValueHLL, mergeHLL, partitioner).map {
-      case (k, v) => (k, v.value.cardinality())
-    }
+    combineByKey(createHLL, mergeValueHLL, mergeHLL, partitioner).mapValues(_.value.cardinality())
+
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/acb03230/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 74fab48..161fd06 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -809,7 +809,9 @@ abstract class RDD[T: ClassTag](
     }
     def mergeCounters(c1: SerializableHyperLogLog, c2: SerializableHyperLogLog) = c1.merge(c2)
 
-    mapPartitions(hllCountPartition).reduce(mergeCounters).value.cardinality()
+    val zeroCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
+    mapPartitions(hllCountPartition).aggregate(zeroCounter)(mergeCounters, mergeCounters)
+      .value.cardinality()
   }
 
   /**


[06/20] git commit: Improved code style.

Posted by rx...@apache.org.
Improved code style.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/79868fe7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/79868fe7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/79868fe7

Branch: refs/heads/master
Commit: 79868fe7246d8e6d57e0a376b2593fabea9a9d83
Parents: b611d9a
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 23:39:20 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 23:39:20 2013 -0700

----------------------------------------------------------------------
 .../org/apache/spark/rdd/PairRDDFunctions.scala   |  2 +-
 .../src/main/scala/org/apache/spark/rdd/RDD.scala |  2 +-
 .../apache/spark/rdd/PairRDDFunctionsSuite.scala  | 18 +++++++++++-------
 .../scala/org/apache/spark/rdd/RDDSuite.scala     | 12 ++++++------
 4 files changed, 19 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/79868fe7/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index d778692..322b519 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -219,7 +219,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
   def countDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
     val createHLL = (v: V) => {
       val hll = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
-      val bres = hll.value.offer(v)
+      hll.value.offer(v)
       hll
     }
     val mergeValueHLL = (hll: SerializableHyperLogLog, v: V) => {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/79868fe7/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 38fa96f..e23e7a6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -784,7 +784,7 @@ abstract class RDD[T: ClassManifest](
       }
       Iterator(hllCounter)
     }
-    def mergeCounters(c1: SerializableHyperLogLog, c2: SerializableHyperLogLog): SerializableHyperLogLog = c1.merge(c2)
+    def mergeCounters(c1: SerializableHyperLogLog, c2: SerializableHyperLogLog) = c1.merge(c2)
 
     mapPartitions(hllCountPartition).reduce(mergeCounters).value.cardinality()
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/79868fe7/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index d81bc8c..5683ada 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.rdd
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashSet
+import scala.util.Random
 
 import org.scalatest.FunSuite
 
@@ -110,15 +111,17 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
   }
 
   test("countDistinctByKey") {
-    def error(est: Long, size: Long) = math.abs(est - size)/size.toDouble
+    def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
 
     /* Since HyperLogLog unique counting is approximate, and the relative standard deviation is
-    only a statistical bound, the tests can fail for large values of relativeSD. We will be using
-     relatively tight error bounds to check correctness of functionality rather than checking
-     whether the approximation conforms with the requested bound.
+     * only a statistical bound, the tests can fail for large values of relativeSD. We will be using
+     * relatively tight error bounds to check correctness of functionality rather than checking
+     * whether the approximation conforms with the requested bound.
      */
     val relativeSD = 0.001
 
+    // For each value i, there are i tuples with first element equal to i.
+    // Therefore, the expected count for key i would be i.
     val stacked = (1 to 100).flatMap(i => (1 to i).map(j => (i, j)))
     val rdd1 = sc.parallelize(stacked)
     val counted1 = rdd1.countDistinctByKey(relativeSD).collect()
@@ -126,10 +129,11 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
       case(k, count) => assert(math.abs(error(count, k)) < relativeSD)
     }
 
-    import scala.util.Random
     val rnd = new Random()
-    val randStacked = (1 to 100).flatMap{i =>
-      val num = rnd.nextInt%500
+
+    // The expected count for key num would be num
+    val randStacked = (1 to 100).flatMap { i =>
+      val num = rnd.nextInt % 500
       (1 to num).map(j => (num, j))
     }
     val rdd2 = sc.parallelize(randStacked)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/79868fe7/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 6baf9c7..413ea85 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -63,17 +63,17 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
-  test("Approximate distinct count") {
+  test("countDistinct") {
 
-    def error(est: Long, size: Long) = math.abs(est - size)/size.toDouble
+    def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
 
     val size = 100
     val uniformDistro = for (i <- 1 to 100000) yield i % size
     val simpleRdd = sc.makeRDD(uniformDistro)
-    assert( error(simpleRdd.countDistinct(0.2), size) < 0.2)
-    assert( error(simpleRdd.countDistinct(0.05), size) < 0.05)
-    assert( error(simpleRdd.countDistinct(0.01), size) < 0.01)
-    assert( error(simpleRdd.countDistinct(0.001), size) < 0.001)
+    assert(error(simpleRdd.countDistinct(0.2), size) < 0.2)
+    assert(error(simpleRdd.countDistinct(0.05), size) < 0.05)
+    assert(error(simpleRdd.countDistinct(0.01), size) < 0.01)
+    assert(error(simpleRdd.countDistinct(0.001), size) < 0.001)
   }
 
   test("SparkContext.union") {


[10/20] Using origin version

Posted by rx...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d50ccc5c/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d50ccc5c/pom.xml
----------------------------------------------------------------------
diff --cc pom.xml
index 3349583,57e8435..8c87e43
--- a/pom.xml
+++ b/pom.xml
@@@ -231,15 -200,15 +200,20 @@@
          <artifactId>asm</artifactId>
          <version>4.0</version>
        </dependency>
+       <!-- In theory we need not directly depend on protobuf since Spark does not directly
+            use it. However, when building with Hadoop/YARN 2.2 Maven doesn't correctly bump
+            the protobuf version up from the one Mesos gives. For now we include this variable 
+            to explicitly bump the version when building with YARN. It would be nice to figure
+            out why Maven can't resolve this correctly (like SBT does). -->
        <dependency>
 +        <groupId>com.clearspring.analytics</groupId>
 +        <artifactId>stream</artifactId>
 +        <version>2.4.0</version>
 +      </dependency>
 +      <dependency>
          <groupId>com.google.protobuf</groupId>
          <artifactId>protobuf-java</artifactId>
-         <version>2.4.1</version>
+         <version>${protobuf.version}</version>
        </dependency>
        <dependency>
          <groupId>com.twitter</groupId>


[11/20] Using origin version

Posted by rx...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d50ccc5c/core/pom.xml
----------------------------------------------------------------------
diff --cc core/pom.xml
index e53875c,043f6cf..f0248bc
--- a/core/pom.xml
+++ b/core/pom.xml
@@@ -1,242 -1,231 +1,1351 @@@
--<?xml version="1.0" encoding="UTF-8"?>
--<!--
--  ~ Licensed to the Apache Software Foundation (ASF) under one or more
--  ~ contributor license agreements.  See the NOTICE file distributed with
--  ~ this work for additional information regarding copyright ownership.
--  ~ The ASF licenses this file to You under the Apache License, Version 2.0
--  ~ (the "License"); you may not use this file except in compliance with
--  ~ the License.  You may obtain a copy of the License at
--  ~
--  ~    http://www.apache.org/licenses/LICENSE-2.0
--  ~
--  ~ Unless required by applicable law or agreed to in writing, software
--  ~ distributed under the License is distributed on an "AS IS" BASIS,
--  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--  ~ See the License for the specific language governing permissions and
--  ~ limitations under the License.
--  -->
--
--<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
--  <modelVersion>4.0.0</modelVersion>
--  <parent>
--    <groupId>org.apache.spark</groupId>
--    <artifactId>spark-parent</artifactId>
--    <version>0.9.0-incubating-SNAPSHOT</version>
--    <relativePath>../pom.xml</relativePath>
--  </parent>
--
--  <groupId>org.apache.spark</groupId>
-   <artifactId>spark-core_2.9.3</artifactId>
-   <packaging>jar</packaging>
-   <name>Spark Project Core</name>
-   <url>http://spark.incubator.apache.org/</url>
- 
-   <dependencies>
-     <dependency>
-       <groupId>org.apache.hadoop</groupId>
-       <artifactId>hadoop-client</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>net.java.dev.jets3t</groupId>
-       <artifactId>jets3t</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.apache.avro</groupId>
-       <artifactId>avro</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.apache.avro</groupId>
-       <artifactId>avro-ipc</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.apache.zookeeper</groupId>
-       <artifactId>zookeeper</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.eclipse.jetty</groupId>
-       <artifactId>jetty-server</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.google.guava</groupId>
-       <artifactId>guava</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.google.code.findbugs</groupId>
-       <artifactId>jsr305</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-api</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.ning</groupId>
-       <artifactId>compress-lzf</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.xerial.snappy</groupId>
-       <artifactId>snappy-java</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.ow2.asm</groupId>
-       <artifactId>asm</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.google.protobuf</groupId>
-       <artifactId>protobuf-java</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.clearspring.analytics</groupId>
-       <artifactId>stream</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.twitter</groupId>
-       <artifactId>chill_2.9.3</artifactId>
-       <version>0.3.1</version>
-     </dependency>
-     <dependency>
-       <groupId>com.twitter</groupId>
-       <artifactId>chill-java</artifactId>
-       <version>0.3.1</version>
-     </dependency>
-     <dependency>
-       <groupId>com.typesafe.akka</groupId>
-       <artifactId>akka-actor</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.typesafe.akka</groupId>
-       <artifactId>akka-remote</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.typesafe.akka</groupId>
-       <artifactId>akka-slf4j</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.scala-lang</groupId>
-       <artifactId>scalap</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.scala-lang</groupId>
-       <artifactId>scala-library</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>net.liftweb</groupId>
-       <artifactId>lift-json_2.9.2</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>it.unimi.dsi</groupId>
-       <artifactId>fastutil</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>colt</groupId>
-       <artifactId>colt</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.github.scala-incubator.io</groupId>
-       <artifactId>scala-io-file_2.9.2</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.apache.mesos</groupId>
-       <artifactId>mesos</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>io.netty</groupId>
-       <artifactId>netty-all</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>log4j</groupId>
-       <artifactId>log4j</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.codahale.metrics</groupId>
-       <artifactId>metrics-core</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.codahale.metrics</groupId>
-       <artifactId>metrics-jvm</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.codahale.metrics</groupId>
-       <artifactId>metrics-json</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.codahale.metrics</groupId>
-       <artifactId>metrics-ganglia</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>org.apache.derby</groupId>
-       <artifactId>derby</artifactId>
-       <scope>test</scope>
-     </dependency>
-     <dependency>
-       <groupId>org.scalatest</groupId>
-       <artifactId>scalatest_2.9.3</artifactId>
-       <scope>test</scope>
-     </dependency>
-     <dependency>
-       <groupId>org.scalacheck</groupId>
-       <artifactId>scalacheck_2.9.3</artifactId>
-       <scope>test</scope>
-     </dependency>
-     <dependency>
-       <groupId>org.easymock</groupId>
-       <artifactId>easymock</artifactId>
-       <scope>test</scope>
-     </dependency>
-     <dependency>
-       <groupId>com.novocode</groupId>
-       <artifactId>junit-interface</artifactId>
-       <scope>test</scope>
-     </dependency>
-     <dependency>
-       <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-log4j12</artifactId>
-       <scope>test</scope>
-     </dependency>
-   </dependencies>
-   <build>
-     <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-     <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
-     <plugins>
-       <plugin>
-         <groupId>org.apache.maven.plugins</groupId>
-         <artifactId>maven-antrun-plugin</artifactId>
-         <executions>
-           <execution>
-             <phase>test</phase>
-             <goals>
-               <goal>run</goal>
-             </goals>
-             <configuration>
-               <exportAntProperties>true</exportAntProperties>
-               <tasks>
-                 <property name="spark.classpath" refid="maven.test.classpath" />
-                 <property environment="env" />
-                 <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
-                   <condition>
-                     <not>
-                       <or>
-                         <isset property="env.SCALA_HOME" />
-                         <isset property="env.SCALA_LIBRARY_PATH" />
-                       </or>
-                     </not>
-                   </condition>
-                 </fail>
-               </tasks>
-             </configuration>
-           </execution>
-         </executions>
-       </plugin>
-       <plugin>
-         <groupId>org.scalatest</groupId>
-         <artifactId>scalatest-maven-plugin</artifactId>
-         <configuration>
-           <environmentVariables>
-             <SPARK_HOME>${basedir}/..</SPARK_HOME>
-             <SPARK_TESTING>1</SPARK_TESTING>
-             <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
-           </environmentVariables>
-         </configuration>
-       </plugin>
-     </plugins>
-   </build>
- </project>
 -  <artifactId>spark-core_2.10</artifactId>
 -  <packaging>jar</packaging>
 -  <name>Spark Project Core</name>
 -  <url>http://spark.incubator.apache.org/</url>
 -
 -  <dependencies>
 -    <dependency>
 -      <groupId>org.apache.hadoop</groupId>
 -      <artifactId>hadoop-client</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>net.java.dev.jets3t</groupId>
 -      <artifactId>jets3t</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.avro</groupId>
 -      <artifactId>avro</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.avro</groupId>
 -      <artifactId>avro-ipc</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.zookeeper</groupId>
 -      <artifactId>zookeeper</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.eclipse.jetty</groupId>
 -      <artifactId>jetty-server</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.google.guava</groupId>
 -      <artifactId>guava</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.google.code.findbugs</groupId>
 -      <artifactId>jsr305</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.slf4j</groupId>
 -      <artifactId>slf4j-api</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.ning</groupId>
 -      <artifactId>compress-lzf</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.xerial.snappy</groupId>
 -      <artifactId>snappy-java</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.ow2.asm</groupId>
 -      <artifactId>asm</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.twitter</groupId>
 -      <artifactId>chill_${scala.binary.version}</artifactId>
 -      <version>0.3.1</version>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.twitter</groupId>
 -      <artifactId>chill-java</artifactId>
 -      <version>0.3.1</version>
 -    </dependency>
 -    <dependency>
 -      <groupId>${akka.group}</groupId>
 -      <artifactId>akka-remote_${scala.binary.version}</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>${akka.group}</groupId>
 -      <artifactId>akka-slf4j_${scala.binary.version}</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.scala-lang</groupId>
 -      <artifactId>scala-library</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>net.liftweb</groupId>
 -      <artifactId>lift-json_${scala.binary.version}</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>it.unimi.dsi</groupId>
 -      <artifactId>fastutil</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>colt</groupId>
 -      <artifactId>colt</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.mesos</groupId>
 -      <artifactId>mesos</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>io.netty</groupId>
 -      <artifactId>netty-all</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>log4j</groupId>
 -      <artifactId>log4j</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.codahale.metrics</groupId>
 -      <artifactId>metrics-core</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.codahale.metrics</groupId>
 -      <artifactId>metrics-jvm</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.codahale.metrics</groupId>
 -      <artifactId>metrics-json</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.codahale.metrics</groupId>
 -      <artifactId>metrics-ganglia</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.codahale.metrics</groupId>
 -      <artifactId>metrics-graphite</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.apache.derby</groupId>
 -      <artifactId>derby</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>commons-io</groupId>
 -      <artifactId>commons-io</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.scalatest</groupId>
 -      <artifactId>scalatest_${scala.binary.version}</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.scalacheck</groupId>
 -      <artifactId>scalacheck_${scala.binary.version}</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.easymock</groupId>
 -      <artifactId>easymock</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>com.novocode</groupId>
 -      <artifactId>junit-interface</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.slf4j</groupId>
 -      <artifactId>slf4j-log4j12</artifactId>
 -      <scope>test</scope>
 -    </dependency>
 -  </dependencies>
 -  <build>
 -    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
 -    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
 -    <plugins>
 -      <plugin>
 -        <groupId>org.apache.maven.plugins</groupId>
 -        <artifactId>maven-antrun-plugin</artifactId>
 -        <executions>
 -          <execution>
 -            <phase>test</phase>
 -            <goals>
 -              <goal>run</goal>
 -            </goals>
 -            <configuration>
 -              <exportAntProperties>true</exportAntProperties>
 -              <tasks>
 -                <property name="spark.classpath" refid="maven.test.classpath" />
 -                <property environment="env" />
 -                <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
 -                  <condition>
 -                    <not>
 -                      <or>
 -                        <isset property="env.SCALA_HOME" />
 -                        <isset property="env.SCALA_LIBRARY_PATH" />
 -                      </or>
 -                    </not>
 -                  </condition>
 -                </fail>
 -              </tasks>
 -            </configuration>
 -          </execution>
 -        </executions>
 -      </plugin>
 -      <plugin>
 -        <groupId>org.scalatest</groupId>
 -        <artifactId>scalatest-maven-plugin</artifactId>
 -        <configuration>
 -          <environmentVariables>
 -            <SPARK_HOME>${basedir}/..</SPARK_HOME>
 -            <SPARK_TESTING>1</SPARK_TESTING>
 -            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
 -          </environmentVariables>
 -        </configuration>
 -      </plugin>
 -    </plugins>
 -  </build>
 -</project>
++
++
++
++<!DOCTYPE html>
++<html>
++  <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# githubog: http://ogp.me/ns/fb/githubog#">
++    <meta charset='utf-8'>
++    <meta http-equiv="X-UA-Compatible" content="IE=edge">
++        <title>incubator-spark/core/pom.xml at master · apache/incubator-spark · GitHub</title>
++    <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub" />
++    <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub" />
++    <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-114.png" />
++    <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114.png" />
++    <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-144.png" />
++    <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144.png" />
++    <link rel="logo" type="image/svg" href="https://github-media-downloads.s3.amazonaws.com/github-logo.svg" />
++    <meta property="og:image" content="https://github.global.ssl.fastly.net/images/modules/logos_page/Octocat.png">
++    <meta name="hostname" content="github-fe126-cp1-prd.iad.github.net">
++    <meta name="ruby" content="ruby 1.9.3p194-tcs-github-tcmalloc (e1c0c3f392) [x86_64-linux]">
++    <link rel="assets" href="https://github.global.ssl.fastly.net/">
++    <link rel="conduit-xhr" href="https://ghconduit.com:25035/">
++    <link rel="xhr-socket" href="/_sockets" />
++    
++
++
++    <meta name="msapplication-TileImage" content="/windows-tile.png" />
++    <meta name="msapplication-TileColor" content="#ffffff" />
++    <meta name="selected-link" value="repo_source" data-pjax-transient />
++    <meta content="collector.githubapp.com" name="octolytics-host" /><meta content="collector-cdn.github.com" name="octolytics-script-host" /><meta content="github" name="octolytics-app-id" /><meta content="43A45EED:3C36:63F899:52C1FB77" name="octolytics-dimension-request_id" />
++    
++
++    
++    
++    <link rel="icon" type="image/x-icon" href="/favicon.ico" />
++
++    <meta content="authenticity_token" name="csrf-param" />
++<meta content="XL0k65gLgVyHEQhufVTLBNEWwGhZf67623b0mJZcY2A=" name="csrf-token" />
++
++    <link href="https://github.global.ssl.fastly.net/assets/github-3944f96c1c19f752fe766b332fb7716555c8296e.css" media="all" rel="stylesheet" type="text/css" />
++    <link href="https://github.global.ssl.fastly.net/assets/github2-b64d0ad5fa62a30a166145ae08b8c0a6d2f7dea7.css" media="all" rel="stylesheet" type="text/css" />
++    
++
++    
++
++      <script src="https://github.global.ssl.fastly.net/assets/frameworks-29a3fb0547e33bd8d4530bbad9bae3ef00d83293.js" type="text/javascript"></script>
++      <script src="https://github.global.ssl.fastly.net/assets/github-3fbe2841590c916eeba07af3fc626dd593d2f5ba.js" type="text/javascript"></script>
++      
++      <meta http-equiv="x-pjax-version" content="8983adfc0294e4e53e92b27093d9e927">
++
++        <link data-pjax-transient rel='permalink' href='/apache/incubator-spark/blob/50e3b8ec4c8150f1cfc6b92f8871f520adf2cfda/core/pom.xml'>
++  <meta property="og:title" content="incubator-spark"/>
++  <meta property="og:type" content="githubog:gitrepository"/>
++  <meta property="og:url" content="https://github.com/apache/incubator-spark"/>
++  <meta property="og:image" content="https://github.global.ssl.fastly.net/images/gravatars/gravatar-user-420.png"/>
++  <meta property="og:site_name" content="GitHub"/>
++  <meta property="og:description" content="incubator-spark - Mirror of Apache Spark"/>
++
++  <meta name="description" content="incubator-spark - Mirror of Apache Spark" />
++
++  <meta content="47359" name="octolytics-dimension-user_id" /><meta content="apache" name="octolytics-dimension-user_login" /><meta content="10960835" name="octolytics-dimension-repository_id" /><meta content="apache/incubator-spark" name="octolytics-dimension-repository_nwo" /><meta content="true" name="octolytics-dimension-repository_public" /><meta content="false" name="octolytics-dimension-repository_is_fork" /><meta content="10960835" name="octolytics-dimension-repository_network_root_id" /><meta content="apache/incubator-spark" name="octolytics-dimension-repository_network_root_nwo" />
++  <link href="https://github.com/apache/incubator-spark/commits/master.atom" rel="alternate" title="Recent Commits to incubator-spark:master" type="application/atom+xml" />
++
++  </head>
++
++
++  <body class="logged_out  env-production  vis-public mirror  page-blob">
++    <div class="wrapper">
++      
++      
++      
++      
++
++
++      
++      <div class="header header-logged-out">
++  <div class="container clearfix">
++
++    <a class="header-logo-wordmark" href="https://github.com/">
++      <span class="mega-octicon octicon-logo-github"></span>
++    </a>
++
++    <div class="header-actions">
++        <a class="button primary" href="/join">Sign up</a>
++      <a class="button signin" href="/login?return_to=%2Fapache%2Fincubator-spark%2Fblob%2Fmaster%2Fcore%2Fpom.xml">Sign in</a>
++    </div>
++
++    <div class="command-bar js-command-bar  in-repository">
++
++      <ul class="top-nav">
++          <li class="explore"><a href="/explore">Explore</a></li>
++        <li class="features"><a href="/features">Features</a></li>
++          <li class="enterprise"><a href="https://enterprise.github.com/">Enterprise</a></li>
++          <li class="blog"><a href="/blog">Blog</a></li>
++      </ul>
++        <form accept-charset="UTF-8" action="/search" class="command-bar-form" id="top_search_form" method="get">
++
++<input type="text" data-hotkey="/ s" name="q" id="js-command-bar-field" placeholder="Search or type a command" tabindex="1" autocapitalize="off"
++    
++    
++      data-repo="apache/incubator-spark"
++      data-branch="master"
++      data-sha="ede8c631c6d7a940c1ab1629574ec1003eb0861e"
++  >
++
++    <input type="hidden" name="nwo" value="apache/incubator-spark" />
++
++    <div class="select-menu js-menu-container js-select-menu search-context-select-menu">
++      <span class="minibutton select-menu-button js-menu-target">
++        <span class="js-select-button">This repository</span>
++      </span>
++
++      <div class="select-menu-modal-holder js-menu-content js-navigation-container">
++        <div class="select-menu-modal">
++
++          <div class="select-menu-item js-navigation-item js-this-repository-navigation-item selected">
++            <span class="select-menu-item-icon octicon octicon-check"></span>
++            <input type="radio" class="js-search-this-repository" name="search_target" value="repository" checked="checked" />
++            <div class="select-menu-item-text js-select-button-text">This repository</div>
++          </div> <!-- /.select-menu-item -->
++
++          <div class="select-menu-item js-navigation-item js-all-repositories-navigation-item">
++            <span class="select-menu-item-icon octicon octicon-check"></span>
++            <input type="radio" name="search_target" value="global" />
++            <div class="select-menu-item-text js-select-button-text">All repositories</div>
++          </div> <!-- /.select-menu-item -->
++
++        </div>
++      </div>
++    </div>
++
++  <span class="octicon help tooltipped downwards" title="Show command bar help">
++    <span class="octicon octicon-question"></span>
++  </span>
++
++
++  <input type="hidden" name="ref" value="cmdform">
++
++</form>
++    </div>
++
++  </div>
++</div>
++
++
++      
++
++
++          <div class="site" itemscope itemtype="http://schema.org/WebPage">
++    
++    <div class="pagehead repohead instapaper_ignore readability-menu">
++      <div class="container">
++        
++
++<ul class="pagehead-actions">
++
++
++  <li>
++    <a href="/login?return_to=%2Fapache%2Fincubator-spark"
++    class="minibutton with-count js-toggler-target star-button tooltipped upwards"
++    title="You must be signed in to use this feature" rel="nofollow">
++    <span class="octicon octicon-star"></span>Star
++  </a>
++
++    <a class="social-count js-social-count" href="/apache/incubator-spark/stargazers">
++      322
++    </a>
++
++  </li>
++
++    <li>
++      <a href="/login?return_to=%2Fapache%2Fincubator-spark"
++        class="minibutton with-count js-toggler-target fork-button tooltipped upwards"
++        title="You must be signed in to fork a repository" rel="nofollow">
++        <span class="octicon octicon-git-branch"></span>Fork
++      </a>
++      <a href="/apache/incubator-spark/network" class="social-count">
++        273
++      </a>
++    </li>
++</ul>
++
++        <h1 itemscope itemtype="http://data-vocabulary.org/Breadcrumb" class="entry-title public">
++          <span class="repo-label"><span>public</span></span>
++          <span class="mega-octicon octicon-repo"></span>
++          <span class="author">
++            <a href="/apache" class="url fn" itemprop="url" rel="author"><span itemprop="title">apache</span></a>
++          </span>
++          <span class="repohead-name-divider">/</span>
++          <strong><a href="/apache/incubator-spark" class="js-current-repository js-repo-home-link">incubator-spark</a></strong>
++
++          <span class="page-context-loader">
++            <img alt="Octocat-spinner-32" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++          </span>
++
++            <span class="mirror-flag">
++              <span class="text">mirrored from <a href="git://git.apache.org/incubator-spark.git">git://git.apache.org/incubator-spark.git</a></span>
++            </span>
++        </h1>
++      </div><!-- /.container -->
++    </div><!-- /.repohead -->
++
++    <div class="container">
++      
++
++      <div class="repository-with-sidebar repo-container  ">
++
++        <div class="repository-sidebar">
++            
++
++<div class="sunken-menu vertical-right repo-nav js-repo-nav js-repository-container-pjax js-octicon-loaders">
++  <div class="sunken-menu-contents">
++    <ul class="sunken-menu-group">
++      <li class="tooltipped leftwards" title="Code">
++        <a href="/apache/incubator-spark" aria-label="Code" class="selected js-selected-navigation-item sunken-menu-item" data-gotokey="c" data-pjax="true" data-selected-links="repo_source repo_downloads repo_commits repo_tags repo_branches /apache/incubator-spark">
++          <span class="octicon octicon-code"></span> <span class="full-word">Code</span>
++          <img alt="Octocat-spinner-32" class="mini-loader" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++</a>      </li>
++
++
++      <li class="tooltipped leftwards" title="Pull Requests">
++        <a href="/apache/incubator-spark/pulls" aria-label="Pull Requests" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-gotokey="p" data-selected-links="repo_pulls /apache/incubator-spark/pulls">
++            <span class="octicon octicon-git-pull-request"></span> <span class="full-word">Pull Requests</span>
++            <span class='counter'>45</span>
++            <img alt="Octocat-spinner-32" class="mini-loader" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++</a>      </li>
++
++
++    </ul>
++    <div class="sunken-menu-separator"></div>
++    <ul class="sunken-menu-group">
++
++      <li class="tooltipped leftwards" title="Pulse">
++        <a href="/apache/incubator-spark/pulse" aria-label="Pulse" class="js-selected-navigation-item sunken-menu-item" data-pjax="true" data-selected-links="pulse /apache/incubator-spark/pulse">
++          <span class="octicon octicon-pulse"></span> <span class="full-word">Pulse</span>
++          <img alt="Octocat-spinner-32" class="mini-loader" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++</a>      </li>
++
++      <li class="tooltipped leftwards" title="Graphs">
++        <a href="/apache/incubator-spark/graphs" aria-label="Graphs" class="js-selected-navigation-item sunken-menu-item" data-pjax="true" data-selected-links="repo_graphs repo_contributors /apache/incubator-spark/graphs">
++          <span class="octicon octicon-graph"></span> <span class="full-word">Graphs</span>
++          <img alt="Octocat-spinner-32" class="mini-loader" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++</a>      </li>
++
++      <li class="tooltipped leftwards" title="Network">
++        <a href="/apache/incubator-spark/network" aria-label="Network" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-selected-links="repo_network /apache/incubator-spark/network">
++          <span class="octicon octicon-git-branch"></span> <span class="full-word">Network</span>
++          <img alt="Octocat-spinner-32" class="mini-loader" height="16" src="https://github.global.ssl.fastly.net/images/spinners/octocat-spinner-32.gif" width="16" />
++</a>      </li>
++    </ul>
++
++
++  </div>
++</div>
++
++            <div class="only-with-full-nav">
++              
++
++  
++
++<div class="clone-url open"
++  data-protocol-type="http"
++  data-url="/users/set_protocol?protocol_selector=http&amp;protocol_type=clone">
++  <h3><strong>HTTPS</strong> clone URL</h3>
++  <div class="clone-url-box">
++    <input type="text" class="clone js-url-field"
++           value="https://github.com/apache/incubator-spark.git" readonly="readonly">
++
++    <span class="js-zeroclipboard url-box-clippy minibutton zeroclipboard-button" data-clipboard-text="https://github.com/apache/incubator-spark.git" data-copied-hint="copied!" title="copy to clipboard"><span class="octicon octicon-clippy"></span></span>
++  </div>
++</div>
++
++  
++
++<div class="clone-url "
++  data-protocol-type="subversion"
++  data-url="/users/set_protocol?protocol_selector=subversion&amp;protocol_type=clone">
++  <h3><strong>Subversion</strong> checkout URL</h3>
++  <div class="clone-url-box">
++    <input type="text" class="clone js-url-field"
++           value="https://github.com/apache/incubator-spark" readonly="readonly">
++
++    <span class="js-zeroclipboard url-box-clippy minibutton zeroclipboard-button" data-clipboard-text="https://github.com/apache/incubator-spark" data-copied-hint="copied!" title="copy to clipboard"><span class="octicon octicon-clippy"></span></span>
++  </div>
++</div>
++
++
++<p class="clone-options">You can clone with
++      <a href="#" class="js-clone-selector" data-protocol="http">HTTPS</a>,
++      or <a href="#" class="js-clone-selector" data-protocol="subversion">Subversion</a>.
++  <span class="octicon help tooltipped upwards" title="Get help on which URL is right for you.">
++    <a href="https://help.github.com/articles/which-remote-url-should-i-use">
++    <span class="octicon octicon-question"></span>
++    </a>
++  </span>
++</p>
++
++
++
++              <a href="/apache/incubator-spark/archive/master.zip"
++                 class="minibutton sidebar-button"
++                 title="Download this repository as a zip file"
++                 rel="nofollow">
++                <span class="octicon octicon-cloud-download"></span>
++                Download ZIP
++              </a>
++            </div>
++        </div><!-- /.repository-sidebar -->
++
++        <div id="js-repo-pjax-container" class="repository-content context-loader-container" data-pjax-container>
++          
++
++
++<!-- blob contrib key: blob_contributors:v21:ca1fa3336589a56eafe4ec1105b40975 -->
++
++<p title="This is a placeholder element" class="js-history-link-replace hidden"></p>
++
++<a href="/apache/incubator-spark/find/master" data-pjax data-hotkey="t" class="js-show-file-finder" style="display:none">Show File Finder</a>
++
++<div class="file-navigation">
++  
++
++<div class="select-menu js-menu-container js-select-menu" >
++  <span class="minibutton select-menu-button js-menu-target" data-hotkey="w"
++    data-master-branch="master"
++    data-ref="master"
++    role="button" aria-label="Switch branches or tags" tabindex="0">
++    <span class="octicon octicon-git-branch"></span>
++    <i>branch:</i>
++    <span class="js-select-button">master</span>
++  </span>
++
++  <div class="select-menu-modal-holder js-menu-content js-navigation-container" data-pjax>
++
++    <div class="select-menu-modal">
++      <div class="select-menu-header">
++        <span class="select-menu-title">Switch branches/tags</span>
++        <span class="octicon octicon-remove-close js-menu-close"></span>
++      </div> <!-- /.select-menu-header -->
++
++      <div class="select-menu-filters">
++        <div class="select-menu-text-filter">
++          <input type="text" aria-label="Filter branches/tags" id="context-commitish-filter-field" class="js-filterable-field js-navigation-enable" placeholder="Filter branches/tags">
++        </div>
++        <div class="select-menu-tabs">
++          <ul>
++            <li class="select-menu-tab">
++              <a href="#" data-tab-filter="branches" class="js-select-menu-tab">Branches</a>
++            </li>
++            <li class="select-menu-tab">
++              <a href="#" data-tab-filter="tags" class="js-select-menu-tab">Tags</a>
++            </li>
++          </ul>
++        </div><!-- /.select-menu-tabs -->
++      </div><!-- /.select-menu-filters -->
++
++      <div class="select-menu-list select-menu-tab-bucket js-select-menu-tab-bucket" data-tab-filter="branches">
++
++        <div data-filterable-for="context-commitish-filter-field" data-filterable-type="substring">
++
++
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/akka-actors/core/pom.xml"
++                 data-name="akka-actors"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="akka-actors">akka-actors</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/arthur/core/pom.xml"
++                 data-name="arthur"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="arthur">arthur</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/branch-0.5/core/pom.xml"
++                 data-name="branch-0.5"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="branch-0.5">branch-0.5</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/branch-0.6/core/pom.xml"
++                 data-name="branch-0.6"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="branch-0.6">branch-0.6</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/branch-0.7/core/pom.xml"
++                 data-name="branch-0.7"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="branch-0.7">branch-0.7</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/branch-0.8/core/pom.xml"
++                 data-name="branch-0.8"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="branch-0.8">branch-0.8</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/charles-newhadoop/core/pom.xml"
++                 data-name="charles-newhadoop"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="charles-newhadoop">charles-newhadoop</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/dev/core/pom.xml"
++                 data-name="dev"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="dev">dev</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/formatting/core/pom.xml"
++                 data-name="formatting"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="formatting">formatting</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/hive/core/pom.xml"
++                 data-name="hive"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="hive">hive</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/java-api/core/pom.xml"
++                 data-name="java-api"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="java-api">java-api</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item selected">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/master/core/pom.xml"
++                 data-name="master"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="master">master</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mesos-0.9/core/pom.xml"
++                 data-name="mesos-0.9"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mesos-0.9">mesos-0.9</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-bt/core/pom.xml"
++                 data-name="mos-bt"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-bt">mos-bt</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-bt-dev/core/pom.xml"
++                 data-name="mos-bt-dev"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-bt-dev">mos-bt-dev</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-bt-topo/core/pom.xml"
++                 data-name="mos-bt-topo"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-bt-topo">mos-bt-topo</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-shuffle/core/pom.xml"
++                 data-name="mos-shuffle"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-shuffle">mos-shuffle</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-shuffle-supertracked/core/pom.xml"
++                 data-name="mos-shuffle-supertracked"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-shuffle-supertracked">mos-shuffle-supertracked</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/mos-shuffle-tracked/core/pom.xml"
++                 data-name="mos-shuffle-tracked"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="mos-shuffle-tracked">mos-shuffle-tracked</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/multi-tracker/core/pom.xml"
++                 data-name="multi-tracker"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="multi-tracker">multi-tracker</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/new-rdds-protobuf/core/pom.xml"
++                 data-name="new-rdds-protobuf"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="new-rdds-protobuf">new-rdds-protobuf</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/object-file-fix/core/pom.xml"
++                 data-name="object-file-fix"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="object-file-fix">object-file-fix</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/old-mesos/core/pom.xml"
++                 data-name="old-mesos"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="old-mesos">old-mesos</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/old-rdds/core/pom.xml"
++                 data-name="old-rdds"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="old-rdds">old-rdds</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/perf/core/pom.xml"
++                 data-name="perf"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="perf">perf</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/reduce-logging/core/pom.xml"
++                 data-name="reduce-logging"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="reduce-logging">reduce-logging</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/rxin/core/pom.xml"
++                 data-name="rxin"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="rxin">rxin</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/scala-2.8/core/pom.xml"
++                 data-name="scala-2.8"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="scala-2.8">scala-2.8</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/scala-2.9/core/pom.xml"
++                 data-name="scala-2.9"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="scala-2.9">scala-2.9</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/scala-2.10/core/pom.xml"
++                 data-name="scala-2.10"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="scala-2.10">scala-2.10</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/sched-refactoring/core/pom.xml"
++                 data-name="sched-refactoring"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="sched-refactoring">sched-refactoring</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/shuffle-fix/core/pom.xml"
++                 data-name="shuffle-fix"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="shuffle-fix">shuffle-fix</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/sparkplug/core/pom.xml"
++                 data-name="sparkplug"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="sparkplug">sparkplug</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/streaming/core/pom.xml"
++                 data-name="streaming"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="streaming">streaming</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/td-checksum/core/pom.xml"
++                 data-name="td-checksum"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="td-checksum">td-checksum</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/td-rdd-save/core/pom.xml"
++                 data-name="td-rdd-save"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="td-rdd-save">td-rdd-save</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/tjhunter-branch/core/pom.xml"
++                 data-name="tjhunter-branch"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="tjhunter-branch">tjhunter-branch</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/twitter-mesos/core/pom.xml"
++                 data-name="twitter-mesos"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="twitter-mesos">twitter-mesos</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/blob/yarn/core/pom.xml"
++                 data-name="yarn"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="yarn">yarn</a>
++            </div> <!-- /.select-menu-item -->
++        </div>
++
++          <div class="select-menu-no-results">Nothing to show</div>
++      </div> <!-- /.select-menu-list -->
++
++      <div class="select-menu-list select-menu-tab-bucket js-select-menu-tab-bucket" data-tab-filter="tags">
++        <div data-filterable-for="context-commitish-filter-field" data-filterable-type="substring">
++
++
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.8.1-incubating/core/pom.xml"
++                 data-name="v0.8.1-incubating"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.8.1-incubating">v0.8.1-incubating</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.8.0-incubating-rc3/core/pom.xml"
++                 data-name="v0.8.0-incubating-rc3"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.8.0-incubating-rc3">v0.8.0-incubating-rc3</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.8.0-incubating/core/pom.xml"
++                 data-name="v0.8.0-incubating"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.8.0-incubating">v0.8.0-incubating</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.7.2/core/pom.xml"
++                 data-name="v0.7.2"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.7.2">v0.7.2</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.7.1/core/pom.xml"
++                 data-name="v0.7.1"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.7.1">v0.7.1</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.7.0-bizo-1/core/pom.xml"
++                 data-name="v0.7.0-bizo-1"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.7.0-bizo-1">v0.7.0-bizo-1</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.7.0/core/pom.xml"
++                 data-name="v0.7.0"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.7.0">v0.7.0</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.6.2/core/pom.xml"
++                 data-name="v0.6.2"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.6.2">v0.6.2</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.6.1/core/pom.xml"
++                 data-name="v0.6.1"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.6.1">v0.6.1</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.6.0-yarn/core/pom.xml"
++                 data-name="v0.6.0-yarn"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.6.0-yarn">v0.6.0-yarn</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.6.0/core/pom.xml"
++                 data-name="v0.6.0"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.6.0">v0.6.0</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.5.2/core/pom.xml"
++                 data-name="v0.5.2"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.5.2">v0.5.2</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.5.1/core/pom.xml"
++                 data-name="v0.5.1"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.5.1">v0.5.1</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/v0.5.0/core/pom.xml"
++                 data-name="v0.5.0"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="v0.5.0">v0.5.0</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/spark-parent-0.8.1-incubating/core/pom.xml"
++                 data-name="spark-parent-0.8.1-incubating"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="spark-parent-0.8.1-incubating">spark-parent-0.8.1-incubating</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/alpha-0.2/core/pom.xml"
++                 data-name="alpha-0.2"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="alpha-0.2">alpha-0.2</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/alpha-0.1/core/pom.xml"
++                 data-name="alpha-0.1"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="alpha-0.1">alpha-0.1</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/0.3-scala-2.9/core/pom.xml"
++                 data-name="0.3-scala-2.9"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="0.3-scala-2.9">0.3-scala-2.9</a>
++            </div> <!-- /.select-menu-item -->
++            <div class="select-menu-item js-navigation-item ">
++              <span class="select-menu-item-icon octicon octicon-check"></span>
++              <a href="/apache/incubator-spark/tree/0.3-scala-2.8/core/pom.xml"
++                 data-name="0.3-scala-2.8"
++                 data-skip-pjax="true"
++                 rel="nofollow"
++                 class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
++                 title="0.3-scala-2.8">0.3-scala-2.8</a>
++            </div> <!-- /.select-menu-item -->
++        </div>
++
++        <div class="select-menu-no-results">Nothing to show</div>
++      </div> <!-- /.select-menu-list -->
++
++    </div> <!-- /.select-menu-modal -->
++  </div> <!-- /.select-menu-modal-holder -->
++</div> <!-- /.select-menu -->
++
++  <div class="breadcrumb">
++    <span class='repo-root js-repo-root'><span itemscope="" itemtype="http://data-vocabulary.org/Breadcrumb"><a href="/apache/incubator-spark" data-branch="master" data-direction="back" data-pjax="true" itemscope="url"><span itemprop="title">incubator-spark</span></a></span></span><span class="separator"> / </span><span itemscope="" itemtype="http://data-vocabulary.org/Breadcrumb"><a href="/apache/incubator-spark/tree/master/core" data-branch="master" data-direction="back" data-pjax="true" itemscope="url"><span itemprop="title">core</span></a></span><span class="separator"> / </span><strong class="final-path">pom.xml</strong> <span class="js-zeroclipboard minibutton zeroclipboard-button" data-clipboard-text="core/pom.xml" data-copied-hint="copied!" title="copy to clipboard"><span class="octicon octicon-clippy"></span></span>
++  </div>
++</div>
++
++
++
++  <div class="commit file-history-tease">
++    <img class="main-avatar" height="24" src="https://2.gravatar.com/avatar/def597cee3897ba290da26972ff628d2?d=https%3A%2F%2Fidenticons.github.com%2F0ec10d5e3ed3720a2d578417a894cf49.png&amp;r=x&amp;s=140" width="24" />
++    <span class="author"><a href="/pwendell" rel="author">pwendell</a></span>
++    <time class="js-relative-date" datetime="2013-12-16T21:56:21-08:00" title="2013-12-16 21:56:21">December 16, 2013</time>
++    <div class="commit-title">
++        <a href="/apache/incubator-spark/commit/c1fec89895f03dbdbb6f445ea3cdcd2d050555c4" class="message" data-pjax="true" title="Cleanup">Cleanup</a>
++    </div>
++
++    <div class="participation">
++      <p class="quickstat"><a href="#blob_contributors_box" rel="facebox"><strong>15</strong> contributors</a></p>
++          <a class="avatar tooltipped downwards" title="ScrapCodes" href="/apache/incubator-spark/commits/master/core/pom.xml?author=ScrapCodes"><img height="20" src="https://1.gravatar.com/avatar/e9813bbbab2caa993bf7e2b2d60de894?d=https%3A%2F%2Fidenticons.github.com%2F38c660c74f82a216b75167debab770ed.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="mateiz" href="/apache/incubator-spark/commits/master/core/pom.xml?author=mateiz"><img height="20" src="https://2.gravatar.com/avatar/17d3d634fc898bf3ae19444450af6805?d=https%3A%2F%2Fidenticons.github.com%2F627fcfc1b5e8e4535cc26ecfa133743e.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="jey" href="/apache/incubator-spark/commits/master/core/pom.xml?author=jey"><img height="20" src="https://1.gravatar.com/avatar/85a3f16c85ee6e38527e82e035ed4bf9?d=https%3A%2F%2Fidenticons.github.com%2F2ebb6c06bdc16ef37ec965c6b325b5c6.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="markhamstra" href="/apache/incubator-spark/commits/master/core/pom.xml?author=markhamstra"><img height="20" src="https://2.gravatar.com/avatar/aaa2e907159f6919c4a4c8039d46752f?d=https%3A%2F%2Fidenticons.github.com%2F80aa657180765c73a93528281452d8dc.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="pwendell" href="/apache/incubator-spark/commits/master/core/pom.xml?author=pwendell"><img height="20" src="https://2.gravatar.com/avatar/def597cee3897ba290da26972ff628d2?d=https%3A%2F%2Fidenticons.github.com%2F0ec10d5e3ed3720a2d578417a894cf49.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="rxin" href="/apache/incubator-spark/commits/master/core/pom.xml?author=rxin"><img height="20" src="https://0.gravatar.com/avatar/73bfac91877fce35c6d20e16e9e53677?d=https%3A%2F%2Fidenticons.github.com%2F017ef876ba320362d11baf01bb584eee.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="tomdz" href="/apache/incubator-spark/commits/master/core/pom.xml?author=tomdz"><img height="20" src="https://2.gravatar.com/avatar/18039bff98071ad398b4301cfb0522b4?d=https%3A%2F%2Fidenticons.github.com%2F95b22f6d3c85b4945466aee72a712a01.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="shivaram" href="/apache/incubator-spark/commits/master/core/pom.xml?author=shivaram"><img height="20" src="https://0.gravatar.com/avatar/9d61244268a5be0ab361dc6e4af65ee4?d=https%3A%2F%2Fidenticons.github.com%2F020255c0f2993d01f44c91e0b508fa84.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="mridulm" href="/apache/incubator-spark/commits/master/core/pom.xml?author=mridulm"><img height="20" src="https://1.gravatar.com/avatar/0c6f3ad3eb45b1c42ca026f9d6ff3794?d=https%3A%2F%2Fidenticons.github.com%2Feed4d497b06cb847301ab49f40608dd4.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="woggle" href="/apache/incubator-spark/commits/master/core/pom.xml?author=woggle"><img height="20" src="https://1.gravatar.com/avatar/86f3647dccd886de735435991f55848e?d=https%3A%2F%2Fidenticons.github.com%2Ff23fdb10552dbc5ec1a5c9ba8e6f6be3.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="colorant" href="/apache/incubator-spark/commits/master/core/pom.xml?author=colorant"><img height="20" src="https://0.gravatar.com/avatar/1e54a90ff3671ca36be8a1163038ca56?d=https%3A%2F%2Fidenticons.github.com%2F90d602b3c6ec06eda12e485e109415fe.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="jerryshao" href="/apache/incubator-spark/commits/master/core/pom.xml?author=jerryshao"><img height="20" src="https://2.gravatar.com/avatar/24fb58dab7ef7c3202c4c8061ee51a12?d=https%3A%2F%2Fidenticons.github.com%2Ff8737f93e1f940e20f1259ad5403be32.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="russellcardullo" href="/apache/incubator-spark/commits/master/core/pom.xml?author=russellcardullo"><img height="20" src="https://0.gravatar.com/avatar/4173fab43d50a19e42950a8f7c7d96f8?d=https%3A%2F%2Fidenticons.github.com%2Fcae42ebaab97a0f8a1feb2f3fc97ee91.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="folone" href="/apache/incubator-spark/commits/master/core/pom.xml?author=folone"><img height="20" src="https://2.gravatar.com/avatar/50e7e3f60b3507383d2b327857b66a62?d=https%3A%2F%2Fidenticons.github.com%2F6c2bcbccd23191b40f4932e2b8450681.png&amp;r=x&amp;s=140" width="20" /></a>
++    <a class="avatar tooltipped downwards" title="witgo" href="/apache/incubator-spark/commits/master/core/pom.xml?author=witgo"><img height="20" src="https://2.gravatar.com/avatar/82f14c57ba7522241b0e3c67d759609e?d=https%3A%2F%2Fidenticons.github.com%2F878f3103ff85a3edb2d415930bbdbd5a.png&amp;r=x&amp;s=140" width="20" /></a>
++
++
++    </div>
++    <div id="blob_contributors_box" style="display:none">
++      <h2 class="facebox-header">Users who have contributed to this file</h2>
++      <ul class="facebox-user-list">
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://1.gravatar.com/avatar/e9813bbbab2caa993bf7e2b2d60de894?d=https%3A%2F%2Fidenticons.github.com%2F38c660c74f82a216b75167debab770ed.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/ScrapCodes">ScrapCodes</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/17d3d634fc898bf3ae19444450af6805?d=https%3A%2F%2Fidenticons.github.com%2F627fcfc1b5e8e4535cc26ecfa133743e.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/mateiz">mateiz</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://1.gravatar.com/avatar/85a3f16c85ee6e38527e82e035ed4bf9?d=https%3A%2F%2Fidenticons.github.com%2F2ebb6c06bdc16ef37ec965c6b325b5c6.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/jey">jey</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/aaa2e907159f6919c4a4c8039d46752f?d=https%3A%2F%2Fidenticons.github.com%2F80aa657180765c73a93528281452d8dc.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/markhamstra">markhamstra</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/def597cee3897ba290da26972ff628d2?d=https%3A%2F%2Fidenticons.github.com%2F0ec10d5e3ed3720a2d578417a894cf49.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/pwendell">pwendell</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://0.gravatar.com/avatar/73bfac91877fce35c6d20e16e9e53677?d=https%3A%2F%2Fidenticons.github.com%2F017ef876ba320362d11baf01bb584eee.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/rxin">rxin</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/18039bff98071ad398b4301cfb0522b4?d=https%3A%2F%2Fidenticons.github.com%2F95b22f6d3c85b4945466aee72a712a01.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/tomdz">tomdz</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://0.gravatar.com/avatar/9d61244268a5be0ab361dc6e4af65ee4?d=https%3A%2F%2Fidenticons.github.com%2F020255c0f2993d01f44c91e0b508fa84.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/shivaram">shivaram</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://1.gravatar.com/avatar/0c6f3ad3eb45b1c42ca026f9d6ff3794?d=https%3A%2F%2Fidenticons.github.com%2Feed4d497b06cb847301ab49f40608dd4.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/mridulm">mridulm</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://1.gravatar.com/avatar/86f3647dccd886de735435991f55848e?d=https%3A%2F%2Fidenticons.github.com%2Ff23fdb10552dbc5ec1a5c9ba8e6f6be3.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/woggle">woggle</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://0.gravatar.com/avatar/1e54a90ff3671ca36be8a1163038ca56?d=https%3A%2F%2Fidenticons.github.com%2F90d602b3c6ec06eda12e485e109415fe.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/colorant">colorant</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/24fb58dab7ef7c3202c4c8061ee51a12?d=https%3A%2F%2Fidenticons.github.com%2Ff8737f93e1f940e20f1259ad5403be32.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/jerryshao">jerryshao</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://0.gravatar.com/avatar/4173fab43d50a19e42950a8f7c7d96f8?d=https%3A%2F%2Fidenticons.github.com%2Fcae42ebaab97a0f8a1feb2f3fc97ee91.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/russellcardullo">russellcardullo</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/50e7e3f60b3507383d2b327857b66a62?d=https%3A%2F%2Fidenticons.github.com%2F6c2bcbccd23191b40f4932e2b8450681.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/folone">folone</a>
++          </li>
++          <li class="facebox-user-list-item">
++            <img height="24" src="https://2.gravatar.com/avatar/82f14c57ba7522241b0e3c67d759609e?d=https%3A%2F%2Fidenticons.github.com%2F878f3103ff85a3edb2d415930bbdbd5a.png&amp;r=x&amp;s=140" width="24" />
++            <a href="/witgo">witgo</a>
++          </li>
++      </ul>
++    </div>
++  </div>
++
++<div id="files" class="bubble">
++  <div class="file">
++    <div class="meta">
++      <div class="info">
++        <span class="icon"><b class="octicon octicon-file-text"></b></span>
++        <span class="mode" title="File Mode">file</span>
++          <span>232 lines (228 sloc)</span>
++        <span>7.715 kb</span>
++      </div>
++      <div class="actions">
++        <div class="button-group">
++              <a class="minibutton disabled tooltipped leftwards" href="#"
++                 title="You must be signed in to make or propose changes">Edit</a>
++          <a href="/apache/incubator-spark/raw/master/core/pom.xml" class="button minibutton " id="raw-url">Raw</a>
++            <a href="/apache/incubator-spark/blame/master/core/pom.xml" class="button minibutton ">Blame</a>
++          <a href="/apache/incubator-spark/commits/master/core/pom.xml" class="button minibutton " rel="nofollow">History</a>
++        </div><!-- /.button-group -->
++          <a class="minibutton danger disabled empty-icon tooltipped leftwards" href="#"
++             title="You must be signed in to make or propose changes">
++          Delete
++        </a>
++      </div><!-- /.actions -->
++
++    </div>
++        <div class="blob-wrapper data type-xml js-blob-data">
++        <table class="file-code file-diff">
++          <tr class="file-code-line">
++            <td class="blob-line-nums">
++              <span id="L1" rel="#L1">1</span>
++<span id="L2" rel="#L2">2</span>
++<span id="L3" rel="#L3">3</span>
++<span id="L4" rel="#L4">4</span>
++<span id="L5" rel="#L5">5</span>
++<span id="L6" rel="#L6">6</span>
++<span id="L7" rel="#L7">7</span>
++<span id="L8" rel="#L8">8</span>
++<span id="L9" rel="#L9">9</span>
++<span id="L10" rel="#L10">10</span>
++<span id="L11" rel="#L11">11</span>
++<span id="L12" rel="#L12">12</span>
++<span id="L13" rel="#L13">13</span>
++<span id="L14" rel="#L14">14</span>
++<span id="L15" rel="#L15">15</span>
++<span id="L16" rel="#L16">16</span>
++<span id="L17" rel="#L17">17</span>
++<span id="L18" rel="#L18">18</span>
++<span id="L19" rel="#L19">19</span>
++<span id="L20" rel="#L20">20</span>
++<span id="L21" rel="#L21">21</span>
++<span id="L22" rel="#L22">22</span>
++<span id="L23" rel="#L23">23</span>
++<span id="L24" rel="#L24">24</span>
++<span id="L25" rel="#L25">25</span>
++<span id="L26" rel="#L26">26</span>
++<span id="L27" rel="#L27">27</span>
++<span id="L28" rel="#L28">28</span>
++<span id="L29" rel="#L29">29</span>
++<span id="L30" rel="#L30">30</span>
++<span id="L31" rel="#L31">31</span>
++<span id="L32" rel="#L32">32</span>
++<span id="L33" rel="#L33">33</span>
++<span id="L34" rel="#L34">34</span>
++<span id="L35" rel="#L35">35</span>
++<span id="L36" rel="#L36">36</span>
++<span id="L37" rel="#L37">37</span>
++<span id="L38" rel="#L38">38</span>
++<span id="L39" rel="#L39">39</span>
++<span id="L40" rel="#L40">40</span>
++<span id="L41" rel="#L41">41</span>
++<span id="L42" rel="#L42">42</span>
++<span id="L43" rel="#L43">43</span>
++<span id="L44" rel="#L44">44</span>
++<span id="L45" rel="#L45">45</span>
++<span id="L46" rel="#L46">46</span>
++<span id="L47" rel="#L47">47</span>
++<span id="L48" rel="#L48">48</span>
++<span id="L49" rel="#L49">49</span>
++<span id="L50" rel="#L50">50</span>
++<span id="L51" rel="#L51">51</span>
++<span id="L52" rel="#L52">52</span>
++<span id="L53" rel="#L53">53</span>
++<span id="L54" rel="#L54">54</span>
++<span id="L55" rel="#L55">55</span>
++<span id="L56" rel="#L56">56</span>
++<span id="L57" rel="#L57">57</span>
++<span id="L58" rel="#L58">58</span>
++<span id="L59" rel="#L59">59</span>
++<span id="L60" rel="#L60">60</span>
++<span id="L61" rel="#L61">61</span>
++<span id="L62" rel="#L62">62</span>
++<span id="L63" rel="#L63">63</span>
++<span id="L64" rel="#L64">64</span>
++<span id="L65" rel="#L65">65</span>
++<span id="L66" rel="#L66">66</span>
++<span id="L67" rel="#L67">67</span>
++<span id="L68" rel="#L68">68</span>
++<span id="L69" rel="#L69">69</span>
++<span id="L70" rel="#L70">70</span>
++<span id="L71" rel="#L71">71</span>
++<span id="L72" rel="#L72">72</span>
++<span id="L73" rel="#L73">73</span>
++<span id="L74" rel="#L74">74</span>
++<span id="L75" rel="#L75">75</span>
++<span id="L76" rel="#L76">76</span>
++<span id="L77" rel="#L77">77</span>
++<span id="L78" rel="#L78">78</span>
++<span id="L79" rel="#L79">79</span>
++<span id="L80" rel="#L80">80</span>
++<span id="L81" rel="#L81">81</span>
++<span id="L82" rel="#L82">82</span>
++<span id="L83" rel="#L83">83</span>
++<span id="L84" rel="#L84">84</span>
++<span id="L85" rel="#L85">85</span>
++<span id="L86" rel="#L86">86</span>
++<span id="L87" rel="#L87">87</span>
++<span id="L88" rel="#L88">88</span>
++<span id="L89" rel="#L89">89</span>
++<span id="L90" rel="#L90">90</span>
++<span id="L91" rel="#L91">91</span>
++<span id="L92" rel="#L92">92</span>
++<span id="L93" rel="#L93">93</span>
++<span id="L94" rel="#L94">94</span>
++<span id="L95" rel="#L95">95</span>
++<span id="L96" rel="#L96">96</span>
++<span id="L97" rel="#L97">97</span>
++<span id="L98" rel="#L98">98</span>
++<span id="L99" rel="#L99">99</span>
++<span id="L100" rel="#L100">100</span>
++<span id="L101" rel="#L101">101</span>
++<span id="L102" rel="#L102">102</span>
++<span id="L103" rel="#L103">103</span>
++<span id="L104" rel="#L104">104</span>
++<span id="L105" rel="#L105">105</span>
++<span id="L106" rel="#L106">106</span>
++<span id="L107" rel="#L107">107</span>
++<span id="L108" rel="#L108">108</span>
++<span id="L109" rel="#L109">109</span>
++<span id="L110" rel="#L110">110</span>
++<span id="L111" rel="#L111">111</span>
++<span id="L112" rel="#L112">112</span>
++<span id="L113" rel="#L113">113</span>
++<span id="L114" rel="#L114">114</span>
++<span id="L115" rel="#L115">115</span>
++<span id="L116" rel="#L116">116</span>
++<span id="L117" rel="#L117">117</span>
++<span id="L118" rel="#L118">118</span>
++<span id="L119" rel="#L119">119</span>
++<span id="L120" rel="#L120">120</span>
++<span id="L121" rel="#L121">121</span>
++<span id="L122" rel="#L122">122</span>
++<span id="L123" rel="#L123">123</span>
++<span id="L124" rel="#L124">124</span>
++<span id="L125" rel="#L125">125</span>
++<span id="L126" rel="#L126">126</span>
++<span id="L127" rel="#L127">127</span>
++<span id="L128" rel="#L128">128</span>
++<span id="L129" rel="#L129">129</span>
++<span id="L130" rel="#L130">130</span>
++<span id="L131" rel="#L131">131</span>
++<span id="L132" rel="#L132">132</span>
++<span id="L133" rel="#L133">133</span>
++<span id="L134" rel="#L134">134</span>
++<span id="L135" rel="#L135">135</span>
++<span id="L136" rel="#L136">136</span>
++<span id="L137" rel="#L137">137</span>
++<span id="L138" rel="#L138">138</span>
++<span id="L139" rel="#L139">139</span>
++<span id="L140" rel="#L140">140</span>
++<span id="L141" rel="#L141">141</span>
++<span id="L142" rel="#L142">142</span>
++<span id="L143" rel="#L143">143</span>
++<span id="L144" rel="#L144">144</span>
++<span id="L145" rel="#L145">145</span>
++<span id="L146" rel="#L146">146</span>
++<span id="L147" rel="#L147">147</span>
++<span id="L148" rel="#L148">148</span>
++<span id="L149" rel="#L149">149</span>
++<span id="L150" rel="#L150">150</span>
++<span id="L151" rel="#L151">151</span>
++<span id="L152" rel="#L152">152</span>
++<span id="L153" rel="#L153">153</span>
++<span id="L154" rel="#L154">154</span>
++<span id="L155" rel="#L155">155</span>
++<span id="L156" rel="#L156">156</span>
++<span id="L157" rel="#L157">157</span>
++<span id="L158" rel="#L158">158</span>
++<span id="L159" rel="#L159">159</span>
++<span id="L160" rel="#L160">160</span>
++<span id="L161" rel="#L161">161</span>
++<span id="L162" rel="#L162">162</span>
++<span id="L163" rel="#L163">163</span>
++<span id="L164" rel="#L164">164</span>
++<span id="L165" rel="#L165">165</span>
++<span id="L166" rel="#L166">166</span>
++<span id="L167" rel="#L167">167</span>
++<span id="L168" rel="#L168">168</span>
++<span id="L169" rel="#L169">169</span>
++<span id="L170" rel="#L170">170</span>
++<span id="L171" rel="#L171">171</span>
++<span id="L172" rel="#L172">172</span>
++<span id="L173" rel="#L173">173</span>
++<span id="L174" rel="#L174">174</span>
++<span id="L175" rel="#L175">175</span>
++<span id="L176" rel="#L176">176</span>
++<span id="L177" rel="#L177">177</span>
++<span id="L178" rel="#L178">178</span>
++<span id="L179" rel="#L179">179</span>
++<span id="L180" rel="#L180">180</span>
++<span id="L181" rel="#L181">181</span>
++<span id="L182" rel="#L182">182</span>
++<span id="L183" rel="#L183">183</span>
++<span id="L184" rel="#L184">184</span>
++<span id="L185" rel="#L185">185</span>
++<span id="L186" rel="#L186">186</span>
++<span id="L187" rel="#L187">187</span>
++<span id="L188" rel="#L188">188</span>
++<span id="L189" rel="#L189">189</span>
++<span id="L190" rel="#L190">190</span>
++<span id="L191" rel="#L191">191</span>
++<span id="L192" rel="#L192">192</span>
++<span id="L193" rel="#L193">193</span>
++<span id="L194" rel="#L194">194</span>
++<span id="L195" rel="#L195">195</span>
++<span id="L196" rel="#L196">196</span>
++<span id="L197" rel="#L197">197</span>
++<span id="L198" rel="#L198">198</span>
++<span id="L199" rel="#L199">199</span>
++<span id="L200" rel="#L200">200</span>
++<span id="L201" rel="#L201">201</span>
++<span id="L202" rel="#L202">202</span>
++<span id="L203" rel="#L203">203</span>
++<span id="L204" rel="#L204">204</span>
++<span id="L205" rel="#L205">205</span>
++<span id="L206" rel="#L206">206</span>
++<span id="L207" rel="#L207">207</span>
++<span id="L208" rel="#L208">208</span>
++<span id="L209" rel="#L209">209</span>
++<span id="L210" rel="#L210">210</span>
++<span id="L211" rel="#L211">211</span>
++<span id="L212" rel="#L212">212</span>
++<span id="L213" rel="#L213">213</span>
++<span id="L214" rel="#L214">214</span>
++<span id="L215" rel="#L215">215</span>
++<span id="L216" rel="#L216">216</span>
++<span id="L217" rel="#L217">217</span>
++<span id="L218" rel="#L218">218</span>
++<span id="L219" rel="#L219">219</span>
++<span id="L220" rel="#L220">220</span>
++<span id="L221" rel="#L221">221</span>
++<span id="L222" rel="#L222">222</span>
++<span id="L223" rel="#L223">223</span>
++<span id="L224" rel="#L224">224</span>
++<span id="L225" rel="#L225">225</span>
++<span id="L226" rel="#L226">226</span>
++<span id="L227" rel="#L227">227</span>
++<span id="L228" rel="#L228">228</span>
++<span id="L229" rel="#L229">229</span>
++<span id="L230" rel="#L230">230</span>
++<span id="L231" rel="#L231">231</span>
++
++            </td>
++            <td class="blob-line-code">
++                    <div class="code-body highlight"><pre><div class='line' id='LC1'><span class="cp">&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;</span></div><div class='line' id='LC2'><span class="c">&lt;!--</span></div><div class='line' id='LC3'><span class="c">  ~ Licensed to the Apache Software Foundation (ASF) under one or more</span></div><div class='line' id='LC4'><span class="c">  ~ contributor license agreements.  See the NOTICE file distributed with</span></div><div class='line' id='LC5'><span class="c">  ~ this work for additional information regarding copyright ownership.</span></div><div class='line' id='LC6'><span class="c">  ~ The ASF licenses this file to You under the Apache License, Version 2.0</span></div><div class='line' id='LC7'><span class="c">  ~ (the &quot;License&quot;); you may not use this file except in compliance with</span></div><div class='line

<TRUNCATED>
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d50ccc5c/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d50ccc5c/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------


[07/20] git commit: Added stream-lib dependency to Maven build

Posted by rx...@apache.org.
Added stream-lib dependency to Maven build


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/13227aaa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/13227aaa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/13227aaa

Branch: refs/heads/master
Commit: 13227aaa28ba7bb29b94a598b6efd45c7264d78b
Parents: 79868fe
Author: Hossein Falaki <fa...@gmail.com>
Authored: Fri Oct 18 14:10:24 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Fri Oct 18 14:10:24 2013 -0700

----------------------------------------------------------------------
 core/pom.xml | 4 ++++
 pom.xml      | 5 +++++
 2 files changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/13227aaa/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 8621d25..e53875c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -85,6 +85,10 @@
       <artifactId>protobuf-java</artifactId>
     </dependency>
     <dependency>
+      <groupId>com.clearspring.analytics</groupId>
+      <artifactId>stream</artifactId>
+    </dependency>
+    <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>chill_2.9.3</artifactId>
       <version>0.3.1</version>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/13227aaa/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5ad7b1b..3349583 100644
--- a/pom.xml
+++ b/pom.xml
@@ -232,6 +232,11 @@
         <version>4.0</version>
       </dependency>
       <dependency>
+        <groupId>com.clearspring.analytics</groupId>
+        <artifactId>stream</artifactId>
+        <version>2.4.0</version>
+      </dependency>
+      <dependency>
         <groupId>com.google.protobuf</groupId>
         <artifactId>protobuf-java</artifactId>
         <version>2.4.1</version>


[13/20] git commit: Renamed countDistinct and countDistinctByKey methods to include Approx

Posted by rx...@apache.org.
Renamed countDistinct and countDistinctByKey methods to include Approx


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/a7de8e9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/a7de8e9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/a7de8e9b

Branch: refs/heads/master
Commit: a7de8e9b1c9859f45db4a620dd62a62d472d8396
Parents: d50ccc5
Author: Hossein Falaki <fa...@gmail.com>
Authored: Mon Dec 30 19:28:03 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Mon Dec 30 19:28:03 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/rdd/PairRDDFunctions.scala     | 10 +++++-----
 core/src/main/scala/org/apache/spark/rdd/RDD.scala        |  2 +-
 .../org/apache/spark/rdd/PairRDDFunctionsSuite.scala      |  6 +++---
 core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala   | 10 +++++-----
 .../org/apache/spark/serializer/KryoSerializerSuite.scala |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a7de8e9b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 4e4f860..1dc5f8d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -217,7 +217,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * more accurate counts but increase the memory footprint and vise versa. Uses the provided
    * Partitioner to partition the output RDD.
    */
-  def countDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
+  def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
     val createHLL = (v: V) => {
       val hll = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
       hll.value.offer(v)
@@ -242,8 +242,8 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * output RDD into numPartitions.
    *
    */
-  def countDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = {
-    countDistinctByKey(relativeSD, new HashPartitioner(numPartitions))
+  def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = {
+    countApproxDistinctByKey(relativeSD, new HashPartitioner(numPartitions))
   }
 
   /**
@@ -254,8 +254,8 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism
    * level.
    */
-  def countDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = {
-    countDistinctByKey(relativeSD, defaultPartitioner(self))
+  def countApproxDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = {
+    countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a7de8e9b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 136fa45..74fab48 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -797,7 +797,7 @@ abstract class RDD[T: ClassTag](
    * more accurate counts but increase the memory footprint and vise versa. The default value of
    * relativeSD is 0.05.
    */
-  def countDistinct(relativeSD: Double = 0.05): Long = {
+  def countApproxDistinct(relativeSD: Double = 0.05): Long = {
 
     def hllCountPartition(iter: Iterator[T]): Iterator[SerializableHyperLogLog] = {
       val hllCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a7de8e9b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 6ad58b8..5da538a 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -110,7 +110,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     assert(deps.size === 2) // ShuffledRDD, ParallelCollection.
   }
 
-  test("countDistinctByKey") {
+  test("countApproxDistinctByKey") {
     def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
 
     /* Since HyperLogLog unique counting is approximate, and the relative standard deviation is
@@ -124,7 +124,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     // Therefore, the expected count for key i would be i.
     val stacked = (1 to 100).flatMap(i => (1 to i).map(j => (i, j)))
     val rdd1 = sc.parallelize(stacked)
-    val counted1 = rdd1.countDistinctByKey(relativeSD).collect()
+    val counted1 = rdd1.countApproxDistinctByKey(relativeSD).collect()
     counted1.foreach{
       case(k, count) => assert(error(count, k) < relativeSD)
     }
@@ -137,7 +137,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
       (1 to num).map(j => (num, j))
     }
     val rdd2 = sc.parallelize(randStacked)
-    val counted2 = rdd2.countDistinctByKey(relativeSD, 4).collect()
+    val counted2 = rdd2.countApproxDistinctByKey(relativeSD, 4).collect()
     counted2.foreach{
       case(k, count) => assert(error(count, k) < relativeSD)
     }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a7de8e9b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 2f81b81..1383359 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -63,17 +63,17 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
-  test("countDistinct") {
+  test("countApproxDistinct") {
 
     def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
 
     val size = 100
     val uniformDistro = for (i <- 1 to 100000) yield i % size
     val simpleRdd = sc.makeRDD(uniformDistro)
-    assert(error(simpleRdd.countDistinct(0.2), size) < 0.2)
-    assert(error(simpleRdd.countDistinct(0.05), size) < 0.05)
-    assert(error(simpleRdd.countDistinct(0.01), size) < 0.01)
-    assert(error(simpleRdd.countDistinct(0.001), size) < 0.001)
+    assert(error(simpleRdd.countApproxDistinct(0.2), size) < 0.2)
+    assert(error(simpleRdd.countApproxDistinct(0.05), size) < 0.05)
+    assert(error(simpleRdd.countApproxDistinct(0.01), size) < 0.01)
+    assert(error(simpleRdd.countApproxDistinct(0.001), size) < 0.001)
   }
 
   test("SparkContext.union") {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a7de8e9b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 1852971..636e3ab 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -173,7 +173,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   }
 
   test("kryo with SerializableHyperLogLog") {
-    assert(sc.parallelize( Array(1, 2, 3, 2, 3, 3, 2, 3, 1) ).countDistinct(0.01) === 3)
+    assert(sc.parallelize( Array(1, 2, 3, 2, 3, 3, 2, 3, 1) ).countApproxDistinct(0.01) === 3)
   }
 
   test("kryo with reduce") {


[05/20] git commit: Fixed document typo

Posted by rx...@apache.org.
Fixed document typo


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b611d9a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b611d9a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b611d9a6

Branch: refs/heads/master
Commit: b611d9a65c0eda8ca7ceb015773ea4a4e26f2640
Parents: 654d60b
Author: Hossein Falaki <fa...@gmail.com>
Authored: Thu Oct 17 23:05:22 2013 -0700
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Thu Oct 17 23:05:22 2013 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 6 +++---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala             | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b611d9a6/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index f34593f..d778692 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -211,7 +211,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
 
   /**
    * Return approximate number of distinct values for each key in this RDD.
-   * The accuracy of approximation can be controlled through the relative standard diviation
+   * The accuracy of approximation can be controlled through the relative standard deviation
    * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
    * more accurate counts but increase the memory footprint and vise versa. Uses the provided
    * Partitioner to partition the output RDD.
@@ -235,7 +235,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
 
   /**
    * Return approximate number of distinct values for each key in this RDD. 
-   * The accuracy of approximation can be controlled through the relative standard diviation
+   * The accuracy of approximation can be controlled through the relative standard deviation
    * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
    * more accurate counts but increase the memory footprint and vise versa. HashPartitions the
    * output RDD into numPartitions.
@@ -247,7 +247,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
 
   /**
    * Return approximate number of distinct values for each key this RDD.
-   * The accuracy of approximation can be controlled through the relative standard diviation
+   * The accuracy of approximation can be controlled through the relative standard deviation
    * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
    * more accurate counts but increase the memory footprint and vise versa. The default value of
    * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b611d9a6/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 09932db..38fa96f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -769,7 +769,7 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return approximate number of distinct elements in the RDD.
    *
-   * The accuracy of approximation can be controlled through the relative standard diviation
+   * The accuracy of approximation can be controlled through the relative standard deviation
    * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
    * more accurate counts but increase the memory footprint and vise versa. The default value of
    * relativeSD is 0.05.


[09/20] git commit: Removed superfluous abs call from test cases.

Posted by rx...@apache.org.
Removed superfluous abs call from test cases.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/49bf47e1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/49bf47e1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/49bf47e1

Branch: refs/heads/master
Commit: 49bf47e1b792b82561b164f4f8006ddd4dd350ee
Parents: 2d511ab
Author: Hossein Falaki <fa...@gmail.com>
Authored: Tue Dec 10 19:50:50 2013 -0800
Committer: Hossein Falaki <fa...@gmail.com>
Committed: Tue Dec 10 19:50:50 2013 -0800

----------------------------------------------------------------------
 .../test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/49bf47e1/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 5683ada..6ad58b8 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -126,7 +126,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     val rdd1 = sc.parallelize(stacked)
     val counted1 = rdd1.countDistinctByKey(relativeSD).collect()
     counted1.foreach{
-      case(k, count) => assert(math.abs(error(count, k)) < relativeSD)
+      case(k, count) => assert(error(count, k) < relativeSD)
     }
 
     val rnd = new Random()
@@ -139,7 +139,7 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     val rdd2 = sc.parallelize(randStacked)
     val counted2 = rdd2.countDistinctByKey(relativeSD, 4).collect()
     counted2.foreach{
-      case(k, count) => assert(math.abs(error(count, k)) < relativeSD)
+      case(k, count) => assert(error(count, k) < relativeSD)
     }
   }