You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/13 06:31:33 UTC
[3/6] git commit: Merge branch 'master' into
remove_simpleredundantreturn_scala
Merge branch 'master' into remove_simpleredundantreturn_scala
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/91a56360
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/91a56360
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/91a56360
Branch: refs/heads/master
Commit: 91a563608e301bb243fca3765d569bde65ad747c
Parents: 93a65e5 288a878
Author: Henry Saputra <hs...@apache.org>
Authored: Sun Jan 12 10:34:13 2014 -0800
Committer: Henry Saputra <hs...@apache.org>
Committed: Sun Jan 12 10:34:13 2014 -0800
----------------------------------------------------------------------
bin/pyspark | 7 +-
conf/log4j.properties.template | 4 +-
core/pom.xml | 10 +
.../org/apache/spark/log4j-defaults.properties | 4 +-
.../scala/org/apache/spark/Aggregator.scala | 61 ++--
.../scala/org/apache/spark/SparkContext.scala | 122 ++++---
.../main/scala/org/apache/spark/SparkEnv.scala | 6 +-
.../apache/spark/api/java/JavaDoubleRDD.scala | 12 +-
.../org/apache/spark/api/java/JavaPairRDD.scala | 6 +
.../org/apache/spark/api/java/JavaRDD.scala | 6 +
.../org/apache/spark/api/java/JavaRDDLike.scala | 6 +
.../spark/api/java/JavaSparkContext.scala | 54 ++-
.../scala/org/apache/spark/deploy/Client.scala | 151 ++++++++
.../apache/spark/deploy/ClientArguments.scala | 117 +++++++
.../org/apache/spark/deploy/DeployMessage.scala | 52 ++-
.../apache/spark/deploy/DriverDescription.scala | 29 ++
.../apache/spark/deploy/client/AppClient.scala | 201 +++++++++++
.../spark/deploy/client/AppClientListener.scala | 39 +++
.../org/apache/spark/deploy/client/Client.scala | 200 -----------
.../spark/deploy/client/ClientListener.scala | 39 ---
.../apache/spark/deploy/client/TestClient.scala | 4 +-
.../apache/spark/deploy/master/DriverInfo.scala | 36 ++
.../spark/deploy/master/DriverState.scala | 33 ++
.../master/FileSystemPersistenceEngine.scala | 17 +-
.../org/apache/spark/deploy/master/Master.scala | 189 +++++++++-
.../spark/deploy/master/PersistenceEngine.scala | 11 +-
.../apache/spark/deploy/master/WorkerInfo.scala | 20 +-
.../master/ZooKeeperPersistenceEngine.scala | 14 +-
.../deploy/master/ui/ApplicationPage.scala | 4 +-
.../spark/deploy/master/ui/IndexPage.scala | 56 ++-
.../spark/deploy/worker/CommandUtils.scala | 63 ++++
.../spark/deploy/worker/DriverRunner.scala | 234 +++++++++++++
.../spark/deploy/worker/DriverWrapper.scala | 31 ++
.../spark/deploy/worker/ExecutorRunner.scala | 67 +---
.../org/apache/spark/deploy/worker/Worker.scala | 63 +++-
.../spark/deploy/worker/WorkerWatcher.scala | 55 +++
.../spark/deploy/worker/ui/IndexPage.scala | 65 +++-
.../spark/deploy/worker/ui/WorkerWebUI.scala | 43 ++-
.../executor/CoarseGrainedExecutorBackend.scala | 27 +-
.../org/apache/spark/executor/Executor.scala | 5 +
.../org/apache/spark/rdd/CoGroupedRDD.scala | 88 +++--
.../scala/org/apache/spark/rdd/HadoopRDD.scala | 37 +-
.../org/apache/spark/rdd/NewHadoopRDD.scala | 36 +-
.../org/apache/spark/rdd/PairRDDFunctions.scala | 7 +-
.../apache/spark/scheduler/DAGScheduler.scala | 4 +-
.../cluster/SparkDeploySchedulerBackend.scala | 10 +-
.../org/apache/spark/storage/BlockId.scala | 12 +-
.../org/apache/spark/storage/BlockManager.scala | 4 +-
.../spark/storage/BlockManagerMasterActor.scala | 7 +-
.../spark/storage/BlockObjectWriter.scala | 4 +
.../apache/spark/storage/DiskBlockManager.scala | 11 +-
.../spark/storage/ShuffleBlockManager.scala | 2 +-
.../org/apache/spark/ui/jobs/StagePage.scala | 2 +-
.../org/apache/spark/ui/jobs/StageTable.scala | 2 +-
.../org/apache/spark/util/AppendOnlyMap.scala | 237 -------------
.../apache/spark/util/TimeStampedHashMap.scala | 17 +-
.../scala/org/apache/spark/util/Utils.scala | 28 +-
.../scala/org/apache/spark/util/Vector.scala | 8 +
.../spark/util/collection/AppendOnlyMap.scala | 297 ++++++++++++++++
.../util/collection/ExternalAppendOnlyMap.scala | 350 +++++++++++++++++++
.../collection/SizeTrackingAppendOnlyMap.scala | 101 ++++++
.../apache/spark/deploy/JsonProtocolSuite.scala | 40 ++-
.../spark/deploy/worker/DriverRunnerTest.scala | 131 +++++++
.../deploy/worker/ExecutorRunnerTest.scala | 4 +-
.../deploy/worker/WorkerWatcherSuite.scala | 32 ++
.../apache/spark/util/AppendOnlyMapSuite.scala | 154 --------
.../util/SizeTrackingAppendOnlyMapSuite.scala | 120 +++++++
.../org/apache/spark/util/VectorSuite.scala | 44 +++
.../util/collection/AppendOnlyMapSuite.scala | 198 +++++++++++
.../collection/ExternalAppendOnlyMapSuite.scala | 230 ++++++++++++
docs/configuration.md | 25 +-
docs/python-programming-guide.md | 5 +-
docs/running-on-yarn.md | 15 +-
docs/spark-standalone.md | 38 +-
ec2/spark_ec2.py | 12 +-
.../streaming/examples/JavaFlumeEventCount.java | 2 +
.../streaming/examples/JavaKafkaWordCount.java | 2 +
.../examples/JavaNetworkWordCount.java | 11 +-
.../streaming/examples/JavaQueueStream.java | 2 +
.../spark/examples/DriverSubmissionTest.scala | 46 +++
.../streaming/examples/ActorWordCount.scala | 12 +-
.../streaming/examples/FlumeEventCount.scala | 4 +-
.../streaming/examples/HdfsWordCount.scala | 3 +-
.../streaming/examples/KafkaWordCount.scala | 5 +-
.../streaming/examples/MQTTWordCount.scala | 8 +-
.../streaming/examples/NetworkWordCount.scala | 7 +-
.../spark/streaming/examples/QueueStream.scala | 8 +-
.../streaming/examples/RawNetworkGrep.scala | 5 +-
.../examples/RecoverableNetworkWordCount.scala | 118 +++++++
.../examples/StatefulNetworkWordCount.scala | 2 +
.../streaming/examples/StreamingExamples.scala | 21 ++
.../streaming/examples/TwitterAlgebirdCMS.scala | 10 +-
.../streaming/examples/TwitterAlgebirdHLL.scala | 9 +-
.../streaming/examples/TwitterPopularTags.scala | 2 +
.../streaming/examples/ZeroMQWordCount.scala | 1 +
.../examples/clickstream/PageViewStream.scala | 4 +-
external/kafka/pom.xml | 4 +-
.../spark/streaming/mqtt/MQTTInputDStream.scala | 2 +-
.../apache/spark/mllib/recommendation/ALS.scala | 15 +-
pom.xml | 17 +
project/SparkBuild.scala | 19 +-
.../org/apache/spark/streaming/Checkpoint.scala | 188 ++++++----
.../org/apache/spark/streaming/DStream.scala | 17 +-
.../spark/streaming/DStreamCheckpointData.scala | 106 +++---
.../apache/spark/streaming/DStreamGraph.scala | 38 +-
.../spark/streaming/StreamingContext.scala | 75 +++-
.../api/java/JavaStreamingContext.scala | 96 ++++-
.../streaming/dstream/FileInputDStream.scala | 42 ++-
.../streaming/scheduler/JobGenerator.scala | 31 +-
.../streaming/util/MasterFailureTest.scala | 55 +--
.../apache/spark/streaming/JavaAPISuite.java | 29 +-
.../spark/streaming/CheckpointSuite.scala | 10 +-
.../spark/deploy/yarn/ApplicationMaster.scala | 13 +-
.../spark/deploy/yarn/WorkerLauncher.scala | 28 +-
.../cluster/YarnClientSchedulerBackend.scala | 50 +--
.../spark/deploy/yarn/ApplicationMaster.scala | 13 +-
.../org/apache/spark/deploy/yarn/Client.scala | 1 +
.../spark/deploy/yarn/WorkerLauncher.scala | 28 +-
118 files changed, 4403 insertions(+), 1231 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/Vector.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 0000000,d98c7aa..b8c852b
mode 000000,100644..100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@@ -1,0 -1,297 +1,297 @@@
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ package org.apache.spark.util.collection
+
+ import java.util.{Arrays, Comparator}
+
+ /**
+ * A simple open hash table optimized for the append-only use case, where keys
+ * are never removed, but the value for each key may be changed.
+ *
+ * This implementation uses quadratic probing with a power-of-2 hash table
+ * size, which is guaranteed to explore all spaces for each key (see
+ * http://en.wikipedia.org/wiki/Quadratic_probing).
+ *
+ * TODO: Cache the hash values of each key? java.util.HashMap does that.
+ */
+ private[spark]
+ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
+ V)] with Serializable {
+ require(initialCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
+ require(initialCapacity >= 1, "Invalid initial capacity")
+
+ private var capacity = nextPowerOf2(initialCapacity)
+ private var mask = capacity - 1
+ private var curSize = 0
+ private var growThreshold = (LOAD_FACTOR * capacity).toInt
+
+ // Holds keys and values in the same array for memory locality; specifically, the order of
+ // elements is key0, value0, key1, value1, key2, value2, etc.
+ private var data = new Array[AnyRef](2 * capacity)
+
+ // Treat the null key differently so we can use nulls in "data" to represent empty items.
+ private var haveNullValue = false
+ private var nullValue: V = null.asInstanceOf[V]
+
+ // Triggered by destructiveSortedIterator; the underlying data array may no longer be used
+ private var destroyed = false
+ private val destructionMessage = "Map state is invalid from destructive sorting!"
+
+ private val LOAD_FACTOR = 0.7
+
+ /** Get the value for a given key */
+ def apply(key: K): V = {
+ assert(!destroyed, destructionMessage)
+ val k = key.asInstanceOf[AnyRef]
+ if (k.eq(null)) {
+ return nullValue
+ }
+ var pos = rehash(k.hashCode) & mask
+ var i = 1
+ while (true) {
+ val curKey = data(2 * pos)
+ if (k.eq(curKey) || k.equals(curKey)) {
+ return data(2 * pos + 1).asInstanceOf[V]
+ } else if (curKey.eq(null)) {
+ return null.asInstanceOf[V]
+ } else {
+ val delta = i
+ pos = (pos + delta) & mask
+ i += 1
+ }
+ }
- return null.asInstanceOf[V]
++ null.asInstanceOf[V]
+ }
+
+ /** Set the value for a key */
+ def update(key: K, value: V): Unit = {
+ assert(!destroyed, destructionMessage)
+ val k = key.asInstanceOf[AnyRef]
+ if (k.eq(null)) {
+ if (!haveNullValue) {
+ incrementSize()
+ }
+ nullValue = value
+ haveNullValue = true
+ return
+ }
+ var pos = rehash(key.hashCode) & mask
+ var i = 1
+ while (true) {
+ val curKey = data(2 * pos)
+ if (curKey.eq(null)) {
+ data(2 * pos) = k
+ data(2 * pos + 1) = value.asInstanceOf[AnyRef]
+ incrementSize() // Since we added a new key
+ return
+ } else if (k.eq(curKey) || k.equals(curKey)) {
+ data(2 * pos + 1) = value.asInstanceOf[AnyRef]
+ return
+ } else {
+ val delta = i
+ pos = (pos + delta) & mask
+ i += 1
+ }
+ }
+ }
+
+ /**
+ * Set the value for key to updateFunc(hadValue, oldValue), where oldValue will be the old value
+ * for key, if any, or null otherwise. Returns the newly updated value.
+ */
+ def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
+ assert(!destroyed, destructionMessage)
+ val k = key.asInstanceOf[AnyRef]
+ if (k.eq(null)) {
+ if (!haveNullValue) {
+ incrementSize()
+ }
+ nullValue = updateFunc(haveNullValue, nullValue)
+ haveNullValue = true
+ return nullValue
+ }
+ var pos = rehash(k.hashCode) & mask
+ var i = 1
+ while (true) {
+ val curKey = data(2 * pos)
+ if (k.eq(curKey) || k.equals(curKey)) {
+ val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
+ data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
+ return newValue
+ } else if (curKey.eq(null)) {
+ val newValue = updateFunc(false, null.asInstanceOf[V])
+ data(2 * pos) = k
+ data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
+ incrementSize()
+ return newValue
+ } else {
+ val delta = i
+ pos = (pos + delta) & mask
+ i += 1
+ }
+ }
+ null.asInstanceOf[V] // Never reached but needed to keep compiler happy
+ }
+
+ /** Iterator method from Iterable */
+ override def iterator: Iterator[(K, V)] = {
+ assert(!destroyed, destructionMessage)
+ new Iterator[(K, V)] {
+ var pos = -1
+
+ /** Get the next value we should return from next(), or null if we're finished iterating */
+ def nextValue(): (K, V) = {
+ if (pos == -1) { // Treat position -1 as looking at the null value
+ if (haveNullValue) {
+ return (null.asInstanceOf[K], nullValue)
+ }
+ pos += 1
+ }
+ while (pos < capacity) {
+ if (!data(2 * pos).eq(null)) {
+ return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
+ }
+ pos += 1
+ }
+ null
+ }
+
+ override def hasNext: Boolean = nextValue() != null
+
+ override def next(): (K, V) = {
+ val value = nextValue()
+ if (value == null) {
+ throw new NoSuchElementException("End of iterator")
+ }
+ pos += 1
+ value
+ }
+ }
+ }
+
+ override def size: Int = curSize
+
+ /** Increase table size by 1, rehashing if necessary */
+ private def incrementSize() {
+ curSize += 1
+ if (curSize > growThreshold) {
+ growTable()
+ }
+ }
+
+ /**
+ * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
+ * We use the Murmur Hash 3 finalization step that's also used in fastutil.
+ */
+ private def rehash(h: Int): Int = {
+ it.unimi.dsi.fastutil.HashCommon.murmurHash3(h)
+ }
+
+ /** Double the table's size and re-hash everything */
+ protected def growTable() {
+ val newCapacity = capacity * 2
+ if (newCapacity >= (1 << 30)) {
+ // We can't make the table this big because we want an array of 2x
+ // that size for our data, but array sizes are at most Int.MaxValue
+ throw new Exception("Can't make capacity bigger than 2^29 elements")
+ }
+ val newData = new Array[AnyRef](2 * newCapacity)
+ val newMask = newCapacity - 1
+ // Insert all our old values into the new array. Note that because our old keys are
+ // unique, there's no need to check for equality here when we insert.
+ var oldPos = 0
+ while (oldPos < capacity) {
+ if (!data(2 * oldPos).eq(null)) {
+ val key = data(2 * oldPos)
+ val value = data(2 * oldPos + 1)
+ var newPos = rehash(key.hashCode) & newMask
+ var i = 1
+ var keepGoing = true
+ while (keepGoing) {
+ val curKey = newData(2 * newPos)
+ if (curKey.eq(null)) {
+ newData(2 * newPos) = key
+ newData(2 * newPos + 1) = value
+ keepGoing = false
+ } else {
+ val delta = i
+ newPos = (newPos + delta) & newMask
+ i += 1
+ }
+ }
+ }
+ oldPos += 1
+ }
+ data = newData
+ capacity = newCapacity
+ mask = newMask
+ growThreshold = (LOAD_FACTOR * newCapacity).toInt
+ }
+
+ private def nextPowerOf2(n: Int): Int = {
+ val highBit = Integer.highestOneBit(n)
+ if (highBit == n) n else highBit << 1
+ }
+
+ /**
+ * Return an iterator of the map in sorted order. This provides a way to sort the map without
+ * using additional memory, at the expense of destroying the validity of the map.
+ */
+ def destructiveSortedIterator(cmp: Comparator[(K, V)]): Iterator[(K, V)] = {
+ destroyed = true
+ // Pack KV pairs into the front of the underlying array
+ var keyIndex, newIndex = 0
+ while (keyIndex < capacity) {
+ if (data(2 * keyIndex) != null) {
+ data(newIndex) = (data(2 * keyIndex), data(2 * keyIndex + 1))
+ newIndex += 1
+ }
+ keyIndex += 1
+ }
+ assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
+
+ // Sort by the given ordering
+ val rawOrdering = new Comparator[AnyRef] {
+ def compare(x: AnyRef, y: AnyRef): Int = {
+ cmp.compare(x.asInstanceOf[(K, V)], y.asInstanceOf[(K, V)])
+ }
+ }
+ Arrays.sort(data, 0, newIndex, rawOrdering)
+
+ new Iterator[(K, V)] {
+ var i = 0
+ var nullValueReady = haveNullValue
+ def hasNext: Boolean = (i < newIndex || nullValueReady)
+ def next(): (K, V) = {
+ if (nullValueReady) {
+ nullValueReady = false
+ (null.asInstanceOf[K], nullValue)
+ } else {
+ val item = data(i).asInstanceOf[(K, V)]
+ i += 1
+ item
+ }
+ }
+ }
+ }
+
+ /**
+ * Return whether the next insert will cause the map to grow
+ */
+ def atGrowThreshold: Boolean = curSize == growThreshold
+ }
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------