You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/13 06:31:33 UTC

[3/6] git commit: Merge branch 'master' into remove_simpleredundantreturn_scala

Merge branch 'master' into remove_simpleredundantreturn_scala


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/91a56360
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/91a56360
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/91a56360

Branch: refs/heads/master
Commit: 91a563608e301bb243fca3765d569bde65ad747c
Parents: 93a65e5 288a878
Author: Henry Saputra <hs...@apache.org>
Authored: Sun Jan 12 10:34:13 2014 -0800
Committer: Henry Saputra <hs...@apache.org>
Committed: Sun Jan 12 10:34:13 2014 -0800

----------------------------------------------------------------------
 bin/pyspark                                     |   7 +-
 conf/log4j.properties.template                  |   4 +-
 core/pom.xml                                    |  10 +
 .../org/apache/spark/log4j-defaults.properties  |   4 +-
 .../scala/org/apache/spark/Aggregator.scala     |  61 ++--
 .../scala/org/apache/spark/SparkContext.scala   | 122 ++++---
 .../main/scala/org/apache/spark/SparkEnv.scala  |   6 +-
 .../apache/spark/api/java/JavaDoubleRDD.scala   |  12 +-
 .../org/apache/spark/api/java/JavaPairRDD.scala |   6 +
 .../org/apache/spark/api/java/JavaRDD.scala     |   6 +
 .../org/apache/spark/api/java/JavaRDDLike.scala |   6 +
 .../spark/api/java/JavaSparkContext.scala       |  54 ++-
 .../scala/org/apache/spark/deploy/Client.scala  | 151 ++++++++
 .../apache/spark/deploy/ClientArguments.scala   | 117 +++++++
 .../org/apache/spark/deploy/DeployMessage.scala |  52 ++-
 .../apache/spark/deploy/DriverDescription.scala |  29 ++
 .../apache/spark/deploy/client/AppClient.scala  | 201 +++++++++++
 .../spark/deploy/client/AppClientListener.scala |  39 +++
 .../org/apache/spark/deploy/client/Client.scala | 200 -----------
 .../spark/deploy/client/ClientListener.scala    |  39 ---
 .../apache/spark/deploy/client/TestClient.scala |   4 +-
 .../apache/spark/deploy/master/DriverInfo.scala |  36 ++
 .../spark/deploy/master/DriverState.scala       |  33 ++
 .../master/FileSystemPersistenceEngine.scala    |  17 +-
 .../org/apache/spark/deploy/master/Master.scala | 189 +++++++++-
 .../spark/deploy/master/PersistenceEngine.scala |  11 +-
 .../apache/spark/deploy/master/WorkerInfo.scala |  20 +-
 .../master/ZooKeeperPersistenceEngine.scala     |  14 +-
 .../deploy/master/ui/ApplicationPage.scala      |   4 +-
 .../spark/deploy/master/ui/IndexPage.scala      |  56 ++-
 .../spark/deploy/worker/CommandUtils.scala      |  63 ++++
 .../spark/deploy/worker/DriverRunner.scala      | 234 +++++++++++++
 .../spark/deploy/worker/DriverWrapper.scala     |  31 ++
 .../spark/deploy/worker/ExecutorRunner.scala    |  67 +---
 .../org/apache/spark/deploy/worker/Worker.scala |  63 +++-
 .../spark/deploy/worker/WorkerWatcher.scala     |  55 +++
 .../spark/deploy/worker/ui/IndexPage.scala      |  65 +++-
 .../spark/deploy/worker/ui/WorkerWebUI.scala    |  43 ++-
 .../executor/CoarseGrainedExecutorBackend.scala |  27 +-
 .../org/apache/spark/executor/Executor.scala    |   5 +
 .../org/apache/spark/rdd/CoGroupedRDD.scala     |  88 +++--
 .../scala/org/apache/spark/rdd/HadoopRDD.scala  |  37 +-
 .../org/apache/spark/rdd/NewHadoopRDD.scala     |  36 +-
 .../org/apache/spark/rdd/PairRDDFunctions.scala |   7 +-
 .../apache/spark/scheduler/DAGScheduler.scala   |   4 +-
 .../cluster/SparkDeploySchedulerBackend.scala   |  10 +-
 .../org/apache/spark/storage/BlockId.scala      |  12 +-
 .../org/apache/spark/storage/BlockManager.scala |   4 +-
 .../spark/storage/BlockManagerMasterActor.scala |   7 +-
 .../spark/storage/BlockObjectWriter.scala       |   4 +
 .../apache/spark/storage/DiskBlockManager.scala |  11 +-
 .../spark/storage/ShuffleBlockManager.scala     |   2 +-
 .../org/apache/spark/ui/jobs/StagePage.scala    |   2 +-
 .../org/apache/spark/ui/jobs/StageTable.scala   |   2 +-
 .../org/apache/spark/util/AppendOnlyMap.scala   | 237 -------------
 .../apache/spark/util/TimeStampedHashMap.scala  |  17 +-
 .../scala/org/apache/spark/util/Utils.scala     |  28 +-
 .../scala/org/apache/spark/util/Vector.scala    |   8 +
 .../spark/util/collection/AppendOnlyMap.scala   | 297 ++++++++++++++++
 .../util/collection/ExternalAppendOnlyMap.scala | 350 +++++++++++++++++++
 .../collection/SizeTrackingAppendOnlyMap.scala  | 101 ++++++
 .../apache/spark/deploy/JsonProtocolSuite.scala |  40 ++-
 .../spark/deploy/worker/DriverRunnerTest.scala  | 131 +++++++
 .../deploy/worker/ExecutorRunnerTest.scala      |   4 +-
 .../deploy/worker/WorkerWatcherSuite.scala      |  32 ++
 .../apache/spark/util/AppendOnlyMapSuite.scala  | 154 --------
 .../util/SizeTrackingAppendOnlyMapSuite.scala   | 120 +++++++
 .../org/apache/spark/util/VectorSuite.scala     |  44 +++
 .../util/collection/AppendOnlyMapSuite.scala    | 198 +++++++++++
 .../collection/ExternalAppendOnlyMapSuite.scala | 230 ++++++++++++
 docs/configuration.md                           |  25 +-
 docs/python-programming-guide.md                |   5 +-
 docs/running-on-yarn.md                         |  15 +-
 docs/spark-standalone.md                        |  38 +-
 ec2/spark_ec2.py                                |  12 +-
 .../streaming/examples/JavaFlumeEventCount.java |   2 +
 .../streaming/examples/JavaKafkaWordCount.java  |   2 +
 .../examples/JavaNetworkWordCount.java          |  11 +-
 .../streaming/examples/JavaQueueStream.java     |   2 +
 .../spark/examples/DriverSubmissionTest.scala   |  46 +++
 .../streaming/examples/ActorWordCount.scala     |  12 +-
 .../streaming/examples/FlumeEventCount.scala    |   4 +-
 .../streaming/examples/HdfsWordCount.scala      |   3 +-
 .../streaming/examples/KafkaWordCount.scala     |   5 +-
 .../streaming/examples/MQTTWordCount.scala      |   8 +-
 .../streaming/examples/NetworkWordCount.scala   |   7 +-
 .../spark/streaming/examples/QueueStream.scala  |   8 +-
 .../streaming/examples/RawNetworkGrep.scala     |   5 +-
 .../examples/RecoverableNetworkWordCount.scala  | 118 +++++++
 .../examples/StatefulNetworkWordCount.scala     |   2 +
 .../streaming/examples/StreamingExamples.scala  |  21 ++
 .../streaming/examples/TwitterAlgebirdCMS.scala |  10 +-
 .../streaming/examples/TwitterAlgebirdHLL.scala |   9 +-
 .../streaming/examples/TwitterPopularTags.scala |   2 +
 .../streaming/examples/ZeroMQWordCount.scala    |   1 +
 .../examples/clickstream/PageViewStream.scala   |   4 +-
 external/kafka/pom.xml                          |   4 +-
 .../spark/streaming/mqtt/MQTTInputDStream.scala |   2 +-
 .../apache/spark/mllib/recommendation/ALS.scala |  15 +-
 pom.xml                                         |  17 +
 project/SparkBuild.scala                        |  19 +-
 .../org/apache/spark/streaming/Checkpoint.scala | 188 ++++++----
 .../org/apache/spark/streaming/DStream.scala    |  17 +-
 .../spark/streaming/DStreamCheckpointData.scala | 106 +++---
 .../apache/spark/streaming/DStreamGraph.scala   |  38 +-
 .../spark/streaming/StreamingContext.scala      |  75 +++-
 .../api/java/JavaStreamingContext.scala         |  96 ++++-
 .../streaming/dstream/FileInputDStream.scala    |  42 ++-
 .../streaming/scheduler/JobGenerator.scala      |  31 +-
 .../streaming/util/MasterFailureTest.scala      |  55 +--
 .../apache/spark/streaming/JavaAPISuite.java    |  29 +-
 .../spark/streaming/CheckpointSuite.scala       |  10 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  13 +-
 .../spark/deploy/yarn/WorkerLauncher.scala      |  28 +-
 .../cluster/YarnClientSchedulerBackend.scala    |  50 +--
 .../spark/deploy/yarn/ApplicationMaster.scala   |  13 +-
 .../org/apache/spark/deploy/yarn/Client.scala   |   1 +
 .../spark/deploy/yarn/WorkerLauncher.scala      |  28 +-
 118 files changed, 4403 insertions(+), 1231 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/Vector.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 0000000,d98c7aa..b8c852b
mode 000000,100644..100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@@ -1,0 -1,297 +1,297 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  *    http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+ 
+ package org.apache.spark.util.collection
+ 
+ import java.util.{Arrays, Comparator}
+ 
+ /**
+  * A simple open hash table optimized for the append-only use case, where keys
+  * are never removed, but the value for each key may be changed.
+  *
+  * This implementation uses quadratic probing with a power-of-2 hash table
+  * size, which is guaranteed to explore all spaces for each key (see
+  * http://en.wikipedia.org/wiki/Quadratic_probing).
+  *
+  * TODO: Cache the hash values of each key? java.util.HashMap does that.
+  */
+ private[spark]
+ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
+   V)] with Serializable {
+   require(initialCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
+   require(initialCapacity >= 1, "Invalid initial capacity")
+ 
+   private var capacity = nextPowerOf2(initialCapacity)
+   private var mask = capacity - 1
+   private var curSize = 0
+   private var growThreshold = (LOAD_FACTOR * capacity).toInt
+ 
+   // Holds keys and values in the same array for memory locality; specifically, the order of
+   // elements is key0, value0, key1, value1, key2, value2, etc.
+   private var data = new Array[AnyRef](2 * capacity)
+ 
+   // Treat the null key differently so we can use nulls in "data" to represent empty items.
+   private var haveNullValue = false
+   private var nullValue: V = null.asInstanceOf[V]
+ 
+   // Triggered by destructiveSortedIterator; the underlying data array may no longer be used
+   private var destroyed = false
+   private val destructionMessage = "Map state is invalid from destructive sorting!"
+ 
+   private val LOAD_FACTOR = 0.7
+ 
+   /** Get the value for a given key */
+   def apply(key: K): V = {
+     assert(!destroyed, destructionMessage)
+     val k = key.asInstanceOf[AnyRef]
+     if (k.eq(null)) {
+       return nullValue
+     }
+     var pos = rehash(k.hashCode) & mask
+     var i = 1
+     while (true) {
+       val curKey = data(2 * pos)
+       if (k.eq(curKey) || k.equals(curKey)) {
+         return data(2 * pos + 1).asInstanceOf[V]
+       } else if (curKey.eq(null)) {
+         return null.asInstanceOf[V]
+       } else {
+         val delta = i
+         pos = (pos + delta) & mask
+         i += 1
+       }
+     }
 -    return null.asInstanceOf[V]
++    null.asInstanceOf[V]
+   }
+ 
+   /** Set the value for a key */
+   def update(key: K, value: V): Unit = {
+     assert(!destroyed, destructionMessage)
+     val k = key.asInstanceOf[AnyRef]
+     if (k.eq(null)) {
+       if (!haveNullValue) {
+         incrementSize()
+       }
+       nullValue = value
+       haveNullValue = true
+       return
+     }
+     var pos = rehash(key.hashCode) & mask
+     var i = 1
+     while (true) {
+       val curKey = data(2 * pos)
+       if (curKey.eq(null)) {
+         data(2 * pos) = k
+         data(2 * pos + 1) = value.asInstanceOf[AnyRef]
+         incrementSize()  // Since we added a new key
+         return
+       } else if (k.eq(curKey) || k.equals(curKey)) {
+         data(2 * pos + 1) = value.asInstanceOf[AnyRef]
+         return
+       } else {
+         val delta = i
+         pos = (pos + delta) & mask
+         i += 1
+       }
+     }
+   }
+ 
+   /**
+    * Set the value for key to updateFunc(hadValue, oldValue), where oldValue will be the old value
+    * for key, if any, or null otherwise. Returns the newly updated value.
+    */
+   def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
+     assert(!destroyed, destructionMessage)
+     val k = key.asInstanceOf[AnyRef]
+     if (k.eq(null)) {
+       if (!haveNullValue) {
+         incrementSize()
+       }
+       nullValue = updateFunc(haveNullValue, nullValue)
+       haveNullValue = true
+       return nullValue
+     }
+     var pos = rehash(k.hashCode) & mask
+     var i = 1
+     while (true) {
+       val curKey = data(2 * pos)
+       if (k.eq(curKey) || k.equals(curKey)) {
+         val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
+         data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
+         return newValue
+       } else if (curKey.eq(null)) {
+         val newValue = updateFunc(false, null.asInstanceOf[V])
+         data(2 * pos) = k
+         data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
+         incrementSize()
+         return newValue
+       } else {
+         val delta = i
+         pos = (pos + delta) & mask
+         i += 1
+       }
+     }
+     null.asInstanceOf[V] // Never reached but needed to keep compiler happy
+   }
+ 
+   /** Iterator method from Iterable */
+   override def iterator: Iterator[(K, V)] = {
+     assert(!destroyed, destructionMessage)
+     new Iterator[(K, V)] {
+       var pos = -1
+ 
+       /** Get the next value we should return from next(), or null if we're finished iterating */
+       def nextValue(): (K, V) = {
+         if (pos == -1) {    // Treat position -1 as looking at the null value
+           if (haveNullValue) {
+             return (null.asInstanceOf[K], nullValue)
+           }
+           pos += 1
+         }
+         while (pos < capacity) {
+           if (!data(2 * pos).eq(null)) {
+             return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
+           }
+           pos += 1
+         }
+         null
+       }
+ 
+       override def hasNext: Boolean = nextValue() != null
+ 
+       override def next(): (K, V) = {
+         val value = nextValue()
+         if (value == null) {
+           throw new NoSuchElementException("End of iterator")
+         }
+         pos += 1
+         value
+       }
+     }
+   }
+ 
+   override def size: Int = curSize
+ 
+   /** Increase table size by 1, rehashing if necessary */
+   private def incrementSize() {
+     curSize += 1
+     if (curSize > growThreshold) {
+       growTable()
+     }
+   }
+ 
+   /**
+    * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
+    * We use the Murmur Hash 3 finalization step that's also used in fastutil.
+    */
+   private def rehash(h: Int): Int = {
+     it.unimi.dsi.fastutil.HashCommon.murmurHash3(h)
+   }
+ 
+   /** Double the table's size and re-hash everything */
+   protected def growTable() {
+     val newCapacity = capacity * 2
+     if (newCapacity >= (1 << 30)) {
+       // We can't make the table this big because we want an array of 2x
+       // that size for our data, but array sizes are at most Int.MaxValue
+       throw new Exception("Can't make capacity bigger than 2^29 elements")
+     }
+     val newData = new Array[AnyRef](2 * newCapacity)
+     val newMask = newCapacity - 1
+     // Insert all our old values into the new array. Note that because our old keys are
+     // unique, there's no need to check for equality here when we insert.
+     var oldPos = 0
+     while (oldPos < capacity) {
+       if (!data(2 * oldPos).eq(null)) {
+         val key = data(2 * oldPos)
+         val value = data(2 * oldPos + 1)
+         var newPos = rehash(key.hashCode) & newMask
+         var i = 1
+         var keepGoing = true
+         while (keepGoing) {
+           val curKey = newData(2 * newPos)
+           if (curKey.eq(null)) {
+             newData(2 * newPos) = key
+             newData(2 * newPos + 1) = value
+             keepGoing = false
+           } else {
+             val delta = i
+             newPos = (newPos + delta) & newMask
+             i += 1
+           }
+         }
+       }
+       oldPos += 1
+     }
+     data = newData
+     capacity = newCapacity
+     mask = newMask
+     growThreshold = (LOAD_FACTOR * newCapacity).toInt
+   }
+ 
+   private def nextPowerOf2(n: Int): Int = {
+     val highBit = Integer.highestOneBit(n)
+     if (highBit == n) n else highBit << 1
+   }
+ 
+   /**
+    * Return an iterator of the map in sorted order. This provides a way to sort the map without
+    * using additional memory, at the expense of destroying the validity of the map.
+    */
+   def destructiveSortedIterator(cmp: Comparator[(K, V)]): Iterator[(K, V)] = {
+     destroyed = true
+     // Pack KV pairs into the front of the underlying array
+     var keyIndex, newIndex = 0
+     while (keyIndex < capacity) {
+       if (data(2 * keyIndex) != null) {
+         data(newIndex) = (data(2 * keyIndex), data(2 * keyIndex + 1))
+         newIndex += 1
+       }
+       keyIndex += 1
+     }
+     assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
+ 
+     // Sort by the given ordering
+     val rawOrdering = new Comparator[AnyRef] {
+       def compare(x: AnyRef, y: AnyRef): Int = {
+         cmp.compare(x.asInstanceOf[(K, V)], y.asInstanceOf[(K, V)])
+       }
+     }
+     Arrays.sort(data, 0, newIndex, rawOrdering)
+ 
+     new Iterator[(K, V)] {
+       var i = 0
+       var nullValueReady = haveNullValue
+       def hasNext: Boolean = (i < newIndex || nullValueReady)
+       def next(): (K, V) = {
+         if (nullValueReady) {
+           nullValueReady = false
+           (null.asInstanceOf[K], nullValue)
+         } else {
+           val item = data(i).asInstanceOf[(K, V)]
+           i += 1
+           item
+         }
+       }
+     }
+   }
+ 
+   /**
+    * Return whether the next insert will cause the map to grow
+    */
+   def atGrowThreshold: Boolean = curSize == growThreshold
+ }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/91a56360/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------