You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/10 03:38:30 UTC
[25/37] git commit: Merge remote-tracking branch
'apache-github/master' into standalone-driver
Merge remote-tracking branch 'apache-github/master' into standalone-driver
Conflicts:
core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
core/src/main/scala/org/apache/spark/deploy/master/Master.scala
core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c0498f92
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c0498f92
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c0498f92
Branch: refs/heads/master
Commit: c0498f9265e32ba82bcf48bf0df0f29c6cfea587
Parents: f236ddd e4d6057
Author: Patrick Wendell <pw...@gmail.com>
Authored: Mon Jan 6 17:29:21 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Mon Jan 6 17:29:21 2014 -0800
----------------------------------------------------------------------
.gitignore | 2 +
README.md | 28 +-
assembly/lib/PY4J_LICENSE.txt | 27 -
assembly/lib/PY4J_VERSION.txt | 1 -
assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar | Bin 103286 -> 0 bytes
assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom | 9 -
.../net/sf/py4j/py4j/maven-metadata-local.xml | 12 -
assembly/pom.xml | 14 +-
assembly/src/main/assembly/assembly.xml | 11 +-
bin/compute-classpath.cmd | 2 +-
bin/compute-classpath.sh | 2 +-
bin/pyspark | 70 ++
bin/pyspark.cmd | 23 +
bin/pyspark2.cmd | 55 +
bin/run-example | 91 ++
bin/run-example.cmd | 23 +
bin/run-example2.cmd | 61 ++
bin/slaves.sh | 91 --
bin/spark-class | 154 +++
bin/spark-class.cmd | 23 +
bin/spark-class2.cmd | 85 ++
bin/spark-config.sh | 36 -
bin/spark-daemon.sh | 183 ----
bin/spark-daemons.sh | 35 -
bin/spark-shell | 102 ++
bin/spark-shell.cmd | 23 +
bin/start-all.sh | 34 -
bin/start-master.sh | 52 -
bin/start-slave.sh | 35 -
bin/start-slaves.sh | 48 -
bin/stop-all.sh | 32 -
bin/stop-master.sh | 27 -
bin/stop-slaves.sh | 35 -
core/pom.xml | 422 ++++----
.../apache/spark/network/netty/FileClient.java | 32 +-
.../netty/FileClientChannelInitializer.java | 6 +-
.../spark/network/netty/FileClientHandler.java | 12 +-
.../apache/spark/network/netty/FileServer.java | 29 +-
.../netty/FileServerChannelInitializer.java | 3 +-
.../spark/network/netty/FileServerHandler.java | 18 +-
.../org/apache/spark/default-log4j.properties | 8 +
.../scala/org/apache/spark/Accumulators.scala | 8 +-
.../scala/org/apache/spark/HttpServer.scala | 1 +
.../main/scala/org/apache/spark/Logging.scala | 41 +-
.../org/apache/spark/MapOutputTracker.scala | 11 +-
.../scala/org/apache/spark/Partitioner.scala | 4 +-
.../main/scala/org/apache/spark/SparkConf.scala | 190 ++++
.../scala/org/apache/spark/SparkContext.scala | 297 ++++--
.../main/scala/org/apache/spark/SparkEnv.scala | 54 +-
.../scala/org/apache/spark/TaskEndReason.scala | 2 -
.../org/apache/spark/api/java/JavaPairRDD.scala | 36 +
.../org/apache/spark/api/java/JavaRDDLike.scala | 11 +
.../spark/api/java/JavaSparkContext.scala | 56 +-
.../org/apache/spark/api/python/PythonRDD.scala | 4 +-
.../org/apache/spark/broadcast/Broadcast.scala | 8 +-
.../spark/broadcast/BroadcastFactory.scala | 4 +-
.../apache/spark/broadcast/HttpBroadcast.scala | 43 +-
.../spark/broadcast/TorrentBroadcast.scala | 45 +-
.../spark/deploy/FaultToleranceTest.scala | 4 +-
.../apache/spark/deploy/LocalSparkCluster.scala | 7 +-
.../apache/spark/deploy/SparkHadoopUtil.scala | 14 +-
.../apache/spark/deploy/client/AppClient.scala | 21 +-
.../apache/spark/deploy/client/TestClient.scala | 10 +-
.../org/apache/spark/deploy/master/Master.scala | 42 +-
.../spark/deploy/master/MasterArguments.scala | 11 +-
.../deploy/master/SparkZooKeeperSession.scala | 7 +-
.../master/ZooKeeperLeaderElectionAgent.scala | 9 +-
.../master/ZooKeeperPersistenceEngine.scala | 8 +-
.../spark/deploy/master/ui/MasterWebUI.scala | 2 +-
.../org/apache/spark/deploy/worker/Worker.scala | 21 +-
.../spark/deploy/worker/ui/WorkerWebUI.scala | 12 +-
.../executor/CoarseGrainedExecutorBackend.scala | 6 +-
.../org/apache/spark/executor/Executor.scala | 47 +-
.../org/apache/spark/io/CompressionCodec.scala | 19 +-
.../apache/spark/metrics/MetricsConfig.scala | 1 -
.../apache/spark/metrics/MetricsSystem.scala | 11 +-
.../spark/network/ConnectionManager.scala | 22 +-
.../org/apache/spark/network/ReceiverTest.scala | 12 +-
.../org/apache/spark/network/SenderTest.scala | 16 +-
.../spark/network/netty/ShuffleCopier.scala | 10 +-
.../org/apache/spark/rdd/CheckpointRDD.scala | 37 +-
.../org/apache/spark/rdd/CoGroupedRDD.scala | 2 +-
.../org/apache/spark/rdd/PairRDDFunctions.scala | 42 +
.../spark/rdd/PartitionerAwareUnionRDD.scala | 110 ++
.../main/scala/org/apache/spark/rdd/RDD.scala | 19 +-
.../apache/spark/rdd/RDDCheckpointData.scala | 17 +-
.../org/apache/spark/rdd/ShuffledRDD.scala | 2 +-
.../org/apache/spark/rdd/SubtractedRDD.scala | 2 +-
.../apache/spark/scheduler/DAGScheduler.scala | 47 +-
.../spark/scheduler/InputFormatInfo.scala | 14 +-
.../org/apache/spark/scheduler/JobLogger.scala | 6 +-
.../scala/org/apache/spark/scheduler/Pool.scala | 4 -
.../org/apache/spark/scheduler/ResultTask.scala | 4 +-
.../apache/spark/scheduler/Schedulable.scala | 1 -
.../spark/scheduler/SchedulableBuilder.scala | 6 +-
.../spark/scheduler/SchedulerBackend.scala | 3 -
.../apache/spark/scheduler/ShuffleMapTask.scala | 6 +-
.../apache/spark/scheduler/SparkListener.scala | 20 +-
.../spark/scheduler/SparkListenerBus.scala | 2 +-
.../spark/scheduler/TaskResultGetter.scala | 3 +-
.../spark/scheduler/TaskSchedulerImpl.scala | 32 +-
.../apache/spark/scheduler/TaskSetManager.scala | 33 +-
.../cluster/CoarseGrainedSchedulerBackend.scala | 20 +-
.../cluster/SimrSchedulerBackend.scala | 4 +-
.../cluster/SparkDeploySchedulerBackend.scala | 8 +-
.../mesos/CoarseMesosSchedulerBackend.scala | 18 +-
.../cluster/mesos/MesosSchedulerBackend.scala | 12 +-
.../spark/scheduler/local/LocalBackend.scala | 3 +-
.../spark/serializer/JavaSerializer.scala | 3 +-
.../spark/serializer/KryoSerializer.scala | 14 +-
.../apache/spark/serializer/Serializer.scala | 3 +
.../spark/serializer/SerializerManager.scala | 23 +-
.../spark/storage/BlockFetcherIterator.scala | 4 +-
.../org/apache/spark/storage/BlockManager.scala | 58 +-
.../spark/storage/BlockManagerMaster.scala | 11 +-
.../spark/storage/BlockManagerMasterActor.scala | 16 +-
.../spark/storage/BlockManagerWorker.scala | 3 -
.../spark/storage/BlockMessageArray.scala | 2 -
.../spark/storage/BlockObjectWriter.scala | 5 +-
.../apache/spark/storage/DiskBlockManager.scala | 2 +-
.../spark/storage/ShuffleBlockManager.scala | 10 +-
.../spark/storage/StoragePerfTester.scala | 2 +-
.../apache/spark/storage/ThreadingTest.scala | 8 +-
.../scala/org/apache/spark/ui/SparkUI.scala | 4 +-
.../apache/spark/ui/UIWorkloadGenerator.scala | 21 +-
.../org/apache/spark/ui/env/EnvironmentUI.scala | 15 +-
.../spark/ui/jobs/JobProgressListener.scala | 15 +-
.../scala/org/apache/spark/util/AkkaUtils.scala | 39 +-
.../org/apache/spark/util/MetadataCleaner.scala | 35 +-
.../spark/util/SerializableHyperLogLog.scala | 50 +
.../org/apache/spark/util/SizeEstimator.scala | 14 +-
.../scala/org/apache/spark/util/Utils.scala | 25 +-
core/src/test/resources/spark.conf | 8 +
.../test/resources/uncommons-maths-1.2.2.jar | Bin 49019 -> 0 bytes
.../org/apache/spark/CheckpointSuite.scala | 363 ++++---
.../scala/org/apache/spark/DriverSuite.scala | 8 +-
.../org/apache/spark/FileServerSuite.scala | 108 +-
.../scala/org/apache/spark/JavaAPISuite.java | 36 +-
.../apache/spark/MapOutputTrackerSuite.scala | 16 +-
.../org/apache/spark/SharedSparkContext.scala | 4 +-
.../scala/org/apache/spark/SparkConfSuite.scala | 110 ++
.../deploy/worker/ExecutorRunnerTest.scala | 4 +-
.../apache/spark/io/CompressionCodecSuite.scala | 8 +-
.../spark/metrics/MetricsSystemSuite.scala | 8 +-
.../spark/rdd/PairRDDFunctionsSuite.scala | 34 +
.../scala/org/apache/spark/rdd/RDDSuite.scala | 40 +
.../spark/scheduler/ClusterSchedulerSuite.scala | 2 +-
.../spark/scheduler/DAGSchedulerSuite.scala | 23 +-
.../apache/spark/scheduler/JobLoggerSuite.scala | 4 +-
.../spark/scheduler/SparkListenerSuite.scala | 2 +-
.../spark/scheduler/TaskResultGetterSuite.scala | 6 +-
.../spark/scheduler/TaskSetManagerSuite.scala | 4 +-
.../spark/serializer/KryoSerializerSuite.scala | 33 +-
.../spark/storage/BlockManagerSuite.scala | 97 +-
.../spark/storage/DiskBlockManagerSuite.scala | 18 +-
.../apache/spark/util/SizeEstimatorSuite.scala | 2 +-
data/kmeans_data.txt | 6 +
data/lr_data.txt | 1000 ++++++++++++++++++
data/pagerank_data.txt | 6 +
docs/README.md | 4 +-
docs/_config.yml | 2 +-
docs/_plugins/copy_api_dirs.rb | 4 +-
docs/api.md | 2 +-
docs/bagel-programming-guide.md | 4 +-
docs/building-with-maven.md | 14 +-
docs/configuration.md | 74 +-
docs/css/bootstrap.min.css | 2 +-
docs/hadoop-third-party-distributions.md | 2 +-
docs/index.md | 16 +-
docs/java-programming-guide.md | 4 +-
docs/job-scheduling.md | 21 +-
docs/mllib-guide.md | 2 +-
docs/monitoring.md | 3 +-
docs/python-programming-guide.md | 45 +-
docs/quick-start.md | 62 +-
docs/running-on-mesos.md | 19 +-
docs/running-on-yarn.md | 17 +-
docs/scala-programming-guide.md | 20 +-
docs/spark-debugger.md | 2 +-
docs/spark-standalone.md | 35 +-
docs/streaming-programming-guide.md | 8 +-
docs/tuning.md | 21 +-
ec2/spark_ec2.py | 2 +-
.../org/apache/spark/examples/JavaHdfsLR.java | 2 +-
.../org/apache/spark/examples/JavaKMeans.java | 2 +-
.../org/apache/spark/examples/JavaLogQuery.java | 2 +-
.../org/apache/spark/examples/JavaPageRank.java | 3 +-
.../org/apache/spark/examples/JavaSparkPi.java | 2 +-
.../java/org/apache/spark/examples/JavaTC.java | 2 +-
.../apache/spark/examples/JavaWordCount.java | 2 +-
.../apache/spark/mllib/examples/JavaALS.java | 2 +-
.../apache/spark/mllib/examples/JavaKMeans.java | 2 +-
.../org/apache/spark/mllib/examples/JavaLR.java | 2 +-
.../streaming/examples/JavaFlumeEventCount.java | 3 +-
.../streaming/examples/JavaKafkaWordCount.java | 7 +-
.../examples/JavaNetworkWordCount.java | 3 +-
.../streaming/examples/JavaQueueStream.java | 2 +-
.../apache/spark/examples/BroadcastTest.scala | 2 +-
.../spark/examples/ExceptionHandlingTest.scala | 2 +-
.../org/apache/spark/examples/GroupByTest.scala | 2 +-
.../org/apache/spark/examples/HBaseTest.scala | 2 +-
.../org/apache/spark/examples/HdfsTest.scala | 2 +-
.../org/apache/spark/examples/LogQuery.scala | 2 +-
.../spark/examples/MultiBroadcastTest.scala | 2 +-
.../examples/SimpleSkewedGroupByTest.scala | 2 +-
.../spark/examples/SkewedGroupByTest.scala | 2 +-
.../org/apache/spark/examples/SparkALS.scala | 2 +-
.../org/apache/spark/examples/SparkHdfsLR.scala | 2 +-
.../org/apache/spark/examples/SparkKMeans.scala | 2 +-
.../org/apache/spark/examples/SparkLR.scala | 2 +-
.../apache/spark/examples/SparkPageRank.scala | 2 +-
.../org/apache/spark/examples/SparkPi.scala | 2 +-
.../org/apache/spark/examples/SparkTC.scala | 2 +-
.../examples/bagel/WikipediaPageRank.scala | 10 +-
.../bagel/WikipediaPageRankStandalone.scala | 8 +-
.../streaming/examples/ActorWordCount.scala | 9 +-
.../streaming/examples/FlumeEventCount.scala | 2 +-
.../streaming/examples/HdfsWordCount.scala | 4 +-
.../streaming/examples/KafkaWordCount.scala | 4 +-
.../streaming/examples/MQTTWordCount.scala | 6 +-
.../streaming/examples/NetworkWordCount.scala | 4 +-
.../spark/streaming/examples/QueueStream.scala | 2 +-
.../streaming/examples/RawNetworkGrep.scala | 2 +-
.../examples/StatefulNetworkWordCount.scala | 4 +-
.../streaming/examples/TwitterAlgebirdCMS.scala | 2 +-
.../streaming/examples/TwitterAlgebirdHLL.scala | 2 +-
.../streaming/examples/TwitterPopularTags.scala | 2 +-
.../streaming/examples/ZeroMQWordCount.scala | 6 +-
.../clickstream/PageViewGenerator.scala | 4 +-
.../examples/clickstream/PageViewStream.scala | 6 +-
kmeans_data.txt | 6 -
lr_data.txt | 1000 ------------------
make-distribution.sh | 24 +-
.../spark/mllib/api/python/PythonMLLibAPI.scala | 232 ++++
.../spark/mllib/classification/NaiveBayes.scala | 119 +++
.../apache/spark/mllib/recommendation/ALS.scala | 13 +-
.../mllib/classification/NaiveBayesSuite.scala | 108 ++
new-yarn/pom.xml | 161 ---
.../spark/deploy/yarn/ApplicationMaster.scala | 446 --------
.../yarn/ApplicationMasterArguments.scala | 94 --
.../org/apache/spark/deploy/yarn/Client.scala | 519 ---------
.../spark/deploy/yarn/ClientArguments.scala | 149 ---
.../yarn/ClientDistributedCacheManager.scala | 228 ----
.../spark/deploy/yarn/WorkerLauncher.scala | 222 ----
.../spark/deploy/yarn/WorkerRunnable.scala | 209 ----
.../deploy/yarn/YarnAllocationHandler.scala | 687 ------------
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 -
.../cluster/YarnClientClusterScheduler.scala | 47 -
.../cluster/YarnClientSchedulerBackend.scala | 109 --
.../cluster/YarnClusterScheduler.scala | 55 -
.../ClientDistributedCacheManagerSuite.scala | 220 ----
pagerank_data.txt | 6 -
pom.xml | 69 +-
project/SparkBuild.scala | 52 +-
pyspark | 70 --
pyspark.cmd | 23 -
pyspark2.cmd | 55 -
python/epydoc.conf | 2 +-
python/lib/py4j-0.8.1-src.zip | Bin 0 -> 37662 bytes
python/lib/py4j0.7.egg | Bin 191756 -> 0 bytes
python/pyspark/__init__.py | 34 +-
python/pyspark/broadcast.py | 11 +
python/pyspark/conf.py | 171 +++
python/pyspark/context.py | 68 +-
python/pyspark/java_gateway.py | 4 +-
python/pyspark/mllib/__init__.py | 20 +
python/pyspark/mllib/_common.py | 227 ++++
python/pyspark/mllib/classification.py | 86 ++
python/pyspark/mllib/clustering.py | 79 ++
python/pyspark/mllib/recommendation.py | 74 ++
python/pyspark/mllib/regression.py | 110 ++
python/pyspark/rdd.py | 66 +-
python/pyspark/serializers.py | 2 +-
python/pyspark/shell.py | 4 +-
python/pyspark/tests.py | 4 +-
python/run-tests | 3 +-
repl-bin/src/deb/bin/run | 3 +-
repl/pom.xml | 1 -
.../org/apache/spark/repl/SparkILoop.scala | 19 +-
.../org/apache/spark/repl/SparkIMain.scala | 7 +-
run-example | 91 --
run-example.cmd | 23 -
run-example2.cmd | 61 --
sbin/slaves.sh | 91 ++
sbin/spark-config.sh | 36 +
sbin/spark-daemon.sh | 183 ++++
sbin/spark-daemons.sh | 35 +
sbin/spark-executor | 23 +
sbin/start-all.sh | 34 +
sbin/start-master.sh | 52 +
sbin/start-slave.sh | 35 +
sbin/start-slaves.sh | 48 +
sbin/stop-all.sh | 32 +
sbin/stop-master.sh | 27 +
sbin/stop-slaves.sh | 35 +
sbt/sbt | 43 -
sbt/sbt-launch-0.11.3-2.jar | Bin 1096763 -> 0 bytes
sbt/sbt.cmd | 25 -
spark-class | 149 ---
spark-class.cmd | 23 -
spark-class2.cmd | 85 --
spark-executor | 22 -
spark-shell | 102 --
spark-shell.cmd | 22 -
.../org/apache/spark/streaming/Checkpoint.scala | 66 +-
.../org/apache/spark/streaming/DStream.scala | 4 +-
.../apache/spark/streaming/DStreamGraph.scala | 1 -
.../spark/streaming/PairDStreamFunctions.scala | 13 +-
.../spark/streaming/StreamingContext.scala | 95 +-
.../streaming/api/java/JavaPairDStream.scala | 18 +-
.../api/java/JavaStreamingContext.scala | 45 +-
.../streaming/dstream/FileInputDStream.scala | 153 +--
.../streaming/dstream/NetworkInputDStream.scala | 8 +-
.../streaming/dstream/ShuffledDStream.scala | 9 +-
.../streaming/dstream/WindowedDStream.scala | 16 +-
.../streaming/scheduler/JobGenerator.scala | 71 +-
.../streaming/scheduler/JobScheduler.scala | 6 +-
.../streaming/util/MasterFailureTest.scala | 3 -
.../spark/streaming/util/RawTextSender.scala | 4 +-
.../apache/spark/streaming/JavaAPISuite.java | 10 +-
.../spark/streaming/BasicOperationsSuite.scala | 8 +-
.../spark/streaming/CheckpointSuite.scala | 59 +-
.../spark/streaming/InputStreamsSuite.scala | 20 +-
.../apache/spark/streaming/TestSuiteBase.scala | 34 +-
.../spark/streaming/WindowOperationsSuite.scala | 5 +-
yarn/README.md | 12 +
yarn/alpha/pom.xml | 32 +
.../spark/deploy/yarn/ApplicationMaster.scala | 464 ++++++++
.../org/apache/spark/deploy/yarn/Client.scala | 509 +++++++++
.../spark/deploy/yarn/WorkerLauncher.scala | 250 +++++
.../spark/deploy/yarn/WorkerRunnable.scala | 236 +++++
.../deploy/yarn/YarnAllocationHandler.scala | 680 ++++++++++++
.../yarn/ApplicationMasterArguments.scala | 94 ++
.../spark/deploy/yarn/ClientArguments.scala | 150 +++
.../yarn/ClientDistributedCacheManager.scala | 228 ++++
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 +
.../cluster/YarnClientClusterScheduler.scala | 48 +
.../cluster/YarnClientSchedulerBackend.scala | 110 ++
.../cluster/YarnClusterScheduler.scala | 56 +
.../ClientDistributedCacheManagerSuite.scala | 220 ++++
yarn/pom.xml | 84 +-
.../spark/deploy/yarn/ApplicationMaster.scala | 477 ---------
.../yarn/ApplicationMasterArguments.scala | 94 --
.../org/apache/spark/deploy/yarn/Client.scala | 503 ---------
.../spark/deploy/yarn/ClientArguments.scala | 146 ---
.../yarn/ClientDistributedCacheManager.scala | 228 ----
.../spark/deploy/yarn/WorkerLauncher.scala | 243 -----
.../spark/deploy/yarn/WorkerRunnable.scala | 235 ----
.../deploy/yarn/YarnAllocationHandler.scala | 673 ------------
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 -
.../cluster/YarnClientClusterScheduler.scala | 47 -
.../cluster/YarnClientSchedulerBackend.scala | 109 --
.../cluster/YarnClusterScheduler.scala | 59 --
.../ClientDistributedCacheManagerSuite.scala | 220 ----
yarn/stable/pom.xml | 32 +
.../spark/deploy/yarn/ApplicationMaster.scala | 432 ++++++++
.../org/apache/spark/deploy/yarn/Client.scala | 525 +++++++++
.../spark/deploy/yarn/WorkerLauncher.scala | 230 ++++
.../spark/deploy/yarn/WorkerRunnable.scala | 210 ++++
.../deploy/yarn/YarnAllocationHandler.scala | 695 ++++++++++++
360 files changed, 12080 insertions(+), 10645 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index 4779c75,0000000..1415e2f
mode 100644,000000..100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@@ -1,192 -1,0 +1,201 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.client
+
+import java.util.concurrent.TimeoutException
+
- import scala.concurrent.duration._
+import scala.concurrent.Await
++import scala.concurrent.duration._
+
+import akka.actor._
+import akka.pattern.ask
- import akka.remote.{RemotingLifecycleEvent, DisassociatedEvent}
++import akka.remote.{AssociationErrorEvent, DisassociatedEvent, RemotingLifecycleEvent}
+
- import org.apache.spark.{SparkException, Logging}
++import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.util.AkkaUtils
+
-
+/**
+ * Interface allowing applications to speak with a Spark deploy cluster. Takes a master URL,
+ * an app description, and a listener for cluster events, and calls back the listener when various
+ * events occur.
+ *
+ * @param masterUrls Each url should look like spark://host:port.
+ */
+private[spark] class AppClient(
+ actorSystem: ActorSystem,
+ masterUrls: Array[String],
+ appDescription: ApplicationDescription,
- listener: AppClientListener)
++ listener: AppClientListener,
++ conf: SparkConf)
+ extends Logging {
+
+ val REGISTRATION_TIMEOUT = 20.seconds
+ val REGISTRATION_RETRIES = 3
+
+ var masterAddress: Address = null
+ var actor: ActorRef = null
+ var appId: String = null
+ var registered = false
+ var activeMasterUrl: String = null
+
+ class ClientActor extends Actor with Logging {
+ var master: ActorSelection = null
+ var alreadyDisconnected = false // To avoid calling listener.disconnected() multiple times
+ var alreadyDead = false // To avoid calling listener.dead() multiple times
+
+ override def preStart() {
+ context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+ try {
+ registerWithMaster()
+ } catch {
+ case e: Exception =>
+ logWarning("Failed to connect to master", e)
+ markDisconnected()
+ context.stop(self)
+ }
+ }
+
+ def tryRegisterAllMasters() {
+ for (masterUrl <- masterUrls) {
+ logInfo("Connecting to master " + masterUrl + "...")
+ val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
+ actor ! RegisterApplication(appDescription)
+ }
+ }
+
+ def registerWithMaster() {
+ tryRegisterAllMasters()
+
+ import context.dispatcher
+ var retries = 0
+ lazy val retryTimer: Cancellable =
+ context.system.scheduler.schedule(REGISTRATION_TIMEOUT, REGISTRATION_TIMEOUT) {
+ retries += 1
+ if (registered) {
+ retryTimer.cancel()
+ } else if (retries >= REGISTRATION_RETRIES) {
+ logError("All masters are unresponsive! Giving up.")
+ markDead()
+ } else {
+ tryRegisterAllMasters()
+ }
+ }
+ retryTimer // start timer
+ }
+
+ def changeMaster(url: String) {
+ activeMasterUrl = url
+ master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
+ masterAddress = activeMasterUrl match {
+ case Master.sparkUrlRegex(host, port) =>
+ Address("akka.tcp", Master.systemName, host, port.toInt)
+ case x =>
+ throw new SparkException("Invalid spark URL: " + x)
+ }
+ }
+
++ private def isPossibleMaster(remoteUrl: Address) = {
++ masterUrls.map(s => Master.toAkkaUrl(s))
++ .map(u => AddressFromURIString(u).hostPort)
++ .contains(remoteUrl.hostPort)
++ }
++
+ override def receive = {
+ case RegisteredApplication(appId_, masterUrl) =>
+ appId = appId_
+ registered = true
+ changeMaster(masterUrl)
+ listener.connected(appId)
+
+ case ApplicationRemoved(message) =>
+ logError("Master removed our application: %s; stopping client".format(message))
+ markDisconnected()
+ context.stop(self)
+
+ case ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) =>
+ val fullId = appId + "/" + id
+ logInfo("Executor added: %s on %s (%s) with %d cores".format(fullId, workerId, hostPort, cores))
+ listener.executorAdded(fullId, workerId, hostPort, cores, memory)
+
+ case ExecutorUpdated(id, state, message, exitStatus) =>
+ val fullId = appId + "/" + id
+ val messageText = message.map(s => " (" + s + ")").getOrElse("")
+ logInfo("Executor updated: %s is now %s%s".format(fullId, state, messageText))
+ if (ExecutorState.isFinished(state)) {
+ listener.executorRemoved(fullId, message.getOrElse(""), exitStatus)
+ }
+
+ case MasterChanged(masterUrl, masterWebUiUrl) =>
+ logInfo("Master has changed, new master is at " + masterUrl)
+ changeMaster(masterUrl)
+ alreadyDisconnected = false
+ sender ! MasterChangeAcknowledged(appId)
+
+ case DisassociatedEvent(_, address, _) if address == masterAddress =>
+ logWarning(s"Connection to $address failed; waiting for master to reconnect...")
+ markDisconnected()
+
++ case AssociationErrorEvent(cause, _, address, _) if isPossibleMaster(address) =>
++ logWarning(s"Could not connect to $address: $cause")
++
+ case StopAppClient =>
+ markDead()
+ sender ! true
+ context.stop(self)
+ }
+
+ /**
+ * Notify the listener that we disconnected, if we hadn't already done so before.
+ */
+ def markDisconnected() {
+ if (!alreadyDisconnected) {
+ listener.disconnected()
+ alreadyDisconnected = true
+ }
+ }
+
+ def markDead() {
+ if (!alreadyDead) {
+ listener.dead()
+ alreadyDead = true
+ }
+ }
+ }
+
+ def start() {
+ // Just launch an actor; it will call back into the listener.
+ actor = actorSystem.actorOf(Props(new ClientActor))
+ }
+
+ def stop() {
+ if (actor != null) {
+ try {
- val timeout = AkkaUtils.askTimeout
++ val timeout = AkkaUtils.askTimeout(conf)
+ val future = actor.ask(StopAppClient)(timeout)
+ Await.result(future, timeout)
+ } catch {
+ case e: TimeoutException =>
+ logInfo("Stop request to Master timed out; it may already be shut down.")
+ }
+ actor = null
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 9359bf1,ef649fd..62567a2
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@@ -45,11 -45,13 +45,13 @@@ private[spark] object TestClient
def main(args: Array[String]) {
val url = args(0)
- val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0)
+ val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0,
+ conf = new SparkConf)
val desc = new ApplicationDescription(
- "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()), "dummy-spark-home", "ignored")
+ "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()),
+ "dummy-spark-home", "ignored")
val listener = new TestListener
- val client = new AppClient(actorSystem, Array(url), desc, listener)
- val client = new Client(actorSystem, Array(url), desc, listener, new SparkConf)
++ val client = new AppClient(actorSystem, Array(url), desc, listener, new SparkConf)
client.start()
actorSystem.awaitTermination()
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 29f20da,7b696cf..93fa700
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@@ -30,25 -29,27 +30,28 @@@ import akka.pattern.as
import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
import akka.serialization.SerializationExtension
- import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.{SparkConf, SparkContext, Logging, SparkException}
-import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
++
++import org.apache.spark.{SparkConf, Logging, SparkException}
+import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
import org.apache.spark.deploy.DeployMessages._
import org.apache.spark.deploy.master.MasterMessages._
import org.apache.spark.deploy.master.ui.MasterWebUI
import org.apache.spark.metrics.MetricsSystem
import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.deploy.master.DriverState.DriverState
private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Actor with Logging {
- import context.dispatcher
+ import context.dispatcher // to use Akka's scheduler.schedule()
+
+ val conf = new SparkConf
val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
- val WORKER_TIMEOUT = System.getProperty("spark.worker.timeout", "60").toLong * 1000
- val RETAINED_APPLICATIONS = System.getProperty("spark.deploy.retainedApplications", "200").toInt
- val REAPER_ITERATIONS = System.getProperty("spark.dead.worker.persistence", "15").toInt
- val RECOVERY_DIR = System.getProperty("spark.deploy.recoveryDirectory", "")
- val RECOVERY_MODE = System.getProperty("spark.deploy.recoveryMode", "NONE")
+ val WORKER_TIMEOUT = conf.get("spark.worker.timeout", "60").toLong * 1000
+ val RETAINED_APPLICATIONS = conf.get("spark.deploy.retainedApplications", "200").toInt
+ val REAPER_ITERATIONS = conf.get("spark.dead.worker.persistence", "15").toInt
+ val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
+ val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
- var nextAppNumber = 0
val workers = new HashSet[WorkerInfo]
val idToWorker = new HashMap[String, WorkerInfo]
val actorToWorker = new HashMap[ActorRef, WorkerInfo]
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 2947ed1,fcaf4e9..2072f00
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@@ -26,11 -26,10 +26,11 @@@ import scala.concurrent.duration.
import akka.actor._
import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
+
- import org.apache.spark.{Logging, SparkException}
+ import org.apache.spark.{Logging, SparkConf, SparkException}
import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
import org.apache.spark.deploy.DeployMessages._
-import org.apache.spark.deploy.master.Master
+import org.apache.spark.deploy.master.{DriverState, Master}
import org.apache.spark.deploy.worker.ui.WorkerWebUI
import org.apache.spark.metrics.MetricsSystem
import org.apache.spark.util.{AkkaUtils, Utils}
@@@ -45,9 -44,8 +45,10 @@@ private[spark] class Worker
cores: Int,
memory: Int,
masterUrls: Array[String],
+ actorSystemName: String,
+ actorName: String,
- workDirPath: String = null)
+ workDirPath: String = null,
+ val conf: SparkConf)
extends Actor with Logging {
import context.dispatcher
@@@ -323,14 -276,16 +325,17 @@@ private[spark] object Worker
}
def startSystemAndActor(host: String, port: Int, webUiPort: Int, cores: Int, memory: Int,
- masterUrls: Array[String], workDir: String, workerNumber: Option[Int] = None)
- : (ActorSystem, Int) = {
+ masterUrls: Array[String], workDir: String, workerNumber: Option[Int] = None)
+ : (ActorSystem, Int) =
+ {
// The LocalSparkCluster runs multiple local sparkWorkerX actor systems
+ val conf = new SparkConf
val systemName = "sparkWorker" + workerNumber.map(_.toString).getOrElse("")
+ val actorName = "Worker"
- val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port)
+ val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port,
+ conf = conf)
actorSystem.actorOf(Props(classOf[Worker], host, boundPort, webUiPort, cores, memory,
- masterUrls, systemName, actorName, workDir), name = actorName)
- masterUrls, workDir, conf), name = "Worker")
++ masterUrls, workDir, conf), name = actorName)
(actorSystem, boundPort)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index eb1199e,53a2b94..f9e43e0
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@@ -22,11 -22,10 +22,11 @@@ import java.nio.ByteBuffe
import akka.actor._
import akka.remote._
- import org.apache.spark.Logging
+ import org.apache.spark.{SparkConf, SparkContext, Logging}
import org.apache.spark.TaskState.TaskState
+import org.apache.spark.deploy.worker.WorkerWatcher
import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.util.{AkkaUtils, Utils}
private[spark] class CoarseGrainedExecutorBackend(
driverUrl: String,
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 0615f7b,9858717..c8c77e9
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@@ -45,16 -45,16 +45,16 @@@ private[spark] class SparkDeploySchedul
// The endpoint for executors to talk to us
val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
- System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
+ conf.get("spark.driver.host"), conf.get("spark.driver.port"),
CoarseGrainedSchedulerBackend.ACTOR_NAME)
- val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}")
+ val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{WORKER_URL}}")
val command = Command(
"org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs)
val sparkHome = sc.getSparkHome().getOrElse(null)
- val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome,
+ val appDesc = new ApplicationDescription(appName, maxCores, sc.executorMemory, command, sparkHome,
"http://" + sc.ui.appUIAddress)
- client = new AppClient(sc.env.actorSystem, masters, appDesc, this)
- client = new Client(sc.env.actorSystem, masters, appDesc, this, conf)
++ client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
client.start()
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c0498f92/docs/spark-standalone.md
----------------------------------------------------------------------