You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/08 07:22:11 UTC
[09/13] git commit: Merge remote-tracking branch 'apache/master' into
project-refactor
Merge remote-tracking branch 'apache/master' into project-refactor
Conflicts:
examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/3b4c4c7f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/3b4c4c7f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/3b4c4c7f
Branch: refs/heads/master
Commit: 3b4c4c7f4d0d6e45a1acb0baf0d9416a8997b686
Parents: d0fd3b9 a2e7e04
Author: Tathagata Das <ta...@gmail.com>
Authored: Mon Jan 6 03:05:52 2014 -0800
Committer: Tathagata Das <ta...@gmail.com>
Committed: Mon Jan 6 03:05:52 2014 -0800
----------------------------------------------------------------------
.gitignore | 2 +
README.md | 28 +-
assembly/lib/PY4J_LICENSE.txt | 27 -
assembly/lib/PY4J_VERSION.txt | 1 -
assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar | Bin 103286 -> 0 bytes
assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom | 9 -
.../net/sf/py4j/py4j/maven-metadata-local.xml | 12 -
assembly/pom.xml | 14 +-
assembly/src/main/assembly/assembly.xml | 11 +-
bin/compute-classpath.cmd | 2 +-
bin/compute-classpath.sh | 2 +-
bin/pyspark | 70 ++
bin/pyspark.cmd | 23 +
bin/pyspark2.cmd | 55 +
bin/run-example | 91 ++
bin/run-example.cmd | 23 +
bin/run-example2.cmd | 61 ++
bin/slaves.sh | 91 --
bin/spark-class | 154 +++
bin/spark-class.cmd | 23 +
bin/spark-class2.cmd | 85 ++
bin/spark-config.sh | 36 -
bin/spark-daemon.sh | 183 ----
bin/spark-daemons.sh | 35 -
bin/spark-shell | 102 ++
bin/spark-shell.cmd | 23 +
bin/start-all.sh | 34 -
bin/start-master.sh | 52 -
bin/start-slave.sh | 35 -
bin/start-slaves.sh | 48 -
bin/stop-all.sh | 32 -
bin/stop-master.sh | 27 -
bin/stop-slaves.sh | 35 -
core/pom.xml | 422 ++++----
.../apache/spark/network/netty/FileClient.java | 32 +-
.../netty/FileClientChannelInitializer.java | 6 +-
.../spark/network/netty/FileClientHandler.java | 12 +-
.../apache/spark/network/netty/FileServer.java | 29 +-
.../netty/FileServerChannelInitializer.java | 3 +-
.../spark/network/netty/FileServerHandler.java | 18 +-
.../org/apache/spark/default-log4j.properties | 8 +
.../scala/org/apache/spark/Accumulators.scala | 8 +-
.../scala/org/apache/spark/HttpServer.scala | 1 +
.../main/scala/org/apache/spark/Logging.scala | 41 +-
.../org/apache/spark/MapOutputTracker.scala | 11 +-
.../scala/org/apache/spark/Partitioner.scala | 4 +-
.../main/scala/org/apache/spark/SparkConf.scala | 189 ++++
.../scala/org/apache/spark/SparkContext.scala | 297 ++++--
.../main/scala/org/apache/spark/SparkEnv.scala | 54 +-
.../org/apache/spark/api/java/JavaPairRDD.scala | 36 +
.../org/apache/spark/api/java/JavaRDDLike.scala | 11 +
.../spark/api/java/JavaSparkContext.scala | 56 +-
.../org/apache/spark/api/python/PythonRDD.scala | 4 +-
.../org/apache/spark/broadcast/Broadcast.scala | 8 +-
.../spark/broadcast/BroadcastFactory.scala | 4 +-
.../apache/spark/broadcast/HttpBroadcast.scala | 43 +-
.../spark/broadcast/TorrentBroadcast.scala | 45 +-
.../spark/deploy/FaultToleranceTest.scala | 4 +-
.../apache/spark/deploy/LocalSparkCluster.scala | 7 +-
.../apache/spark/deploy/SparkHadoopUtil.scala | 14 +-
.../org/apache/spark/deploy/client/Client.scala | 21 +-
.../apache/spark/deploy/client/TestClient.scala | 10 +-
.../org/apache/spark/deploy/master/Master.scala | 41 +-
.../spark/deploy/master/MasterArguments.scala | 11 +-
.../deploy/master/SparkZooKeeperSession.scala | 7 +-
.../master/ZooKeeperLeaderElectionAgent.scala | 9 +-
.../master/ZooKeeperPersistenceEngine.scala | 8 +-
.../spark/deploy/master/ui/MasterWebUI.scala | 2 +-
.../org/apache/spark/deploy/worker/Worker.scala | 34 +-
.../spark/deploy/worker/ui/WorkerWebUI.scala | 6 +-
.../executor/CoarseGrainedExecutorBackend.scala | 6 +-
.../org/apache/spark/executor/Executor.scala | 47 +-
.../org/apache/spark/io/CompressionCodec.scala | 19 +-
.../apache/spark/metrics/MetricsConfig.scala | 1 -
.../apache/spark/metrics/MetricsSystem.scala | 11 +-
.../spark/network/ConnectionManager.scala | 22 +-
.../org/apache/spark/network/ReceiverTest.scala | 12 +-
.../org/apache/spark/network/SenderTest.scala | 16 +-
.../spark/network/netty/ShuffleCopier.scala | 10 +-
.../org/apache/spark/rdd/CheckpointRDD.scala | 37 +-
.../org/apache/spark/rdd/CoGroupedRDD.scala | 2 +-
.../org/apache/spark/rdd/PairRDDFunctions.scala | 42 +
.../spark/rdd/PartitionerAwareUnionRDD.scala | 110 ++
.../main/scala/org/apache/spark/rdd/RDD.scala | 19 +-
.../apache/spark/rdd/RDDCheckpointData.scala | 17 +-
.../org/apache/spark/rdd/ShuffledRDD.scala | 2 +-
.../org/apache/spark/rdd/SubtractedRDD.scala | 2 +-
.../apache/spark/scheduler/DAGScheduler.scala | 3 +-
.../spark/scheduler/InputFormatInfo.scala | 14 +-
.../org/apache/spark/scheduler/ResultTask.scala | 4 +-
.../spark/scheduler/SchedulableBuilder.scala | 6 +-
.../spark/scheduler/SchedulerBackend.scala | 3 -
.../apache/spark/scheduler/ShuffleMapTask.scala | 6 +-
.../spark/scheduler/TaskResultGetter.scala | 3 +-
.../spark/scheduler/TaskSchedulerImpl.scala | 25 +-
.../apache/spark/scheduler/TaskSetManager.scala | 23 +-
.../cluster/CoarseGrainedSchedulerBackend.scala | 20 +-
.../cluster/SimrSchedulerBackend.scala | 4 +-
.../cluster/SparkDeploySchedulerBackend.scala | 8 +-
.../mesos/CoarseMesosSchedulerBackend.scala | 18 +-
.../cluster/mesos/MesosSchedulerBackend.scala | 12 +-
.../spark/scheduler/local/LocalBackend.scala | 3 +-
.../spark/serializer/JavaSerializer.scala | 3 +-
.../spark/serializer/KryoSerializer.scala | 14 +-
.../apache/spark/serializer/Serializer.scala | 3 +
.../spark/serializer/SerializerManager.scala | 23 +-
.../spark/storage/BlockFetcherIterator.scala | 4 +-
.../org/apache/spark/storage/BlockManager.scala | 58 +-
.../spark/storage/BlockManagerMaster.scala | 11 +-
.../spark/storage/BlockManagerMasterActor.scala | 16 +-
.../spark/storage/BlockManagerWorker.scala | 3 -
.../spark/storage/BlockMessageArray.scala | 2 -
.../spark/storage/BlockObjectWriter.scala | 5 +-
.../apache/spark/storage/DiskBlockManager.scala | 2 +-
.../spark/storage/ShuffleBlockManager.scala | 10 +-
.../spark/storage/StoragePerfTester.scala | 2 +-
.../apache/spark/storage/ThreadingTest.scala | 8 +-
.../scala/org/apache/spark/ui/SparkUI.scala | 4 +-
.../apache/spark/ui/UIWorkloadGenerator.scala | 21 +-
.../org/apache/spark/ui/env/EnvironmentUI.scala | 15 +-
.../spark/ui/jobs/JobProgressListener.scala | 4 +-
.../scala/org/apache/spark/util/AkkaUtils.scala | 39 +-
.../org/apache/spark/util/MetadataCleaner.scala | 35 +-
.../spark/util/SerializableHyperLogLog.scala | 50 +
.../org/apache/spark/util/SizeEstimator.scala | 14 +-
.../scala/org/apache/spark/util/Utils.scala | 25 +-
core/src/test/resources/spark.conf | 8 +
.../test/resources/uncommons-maths-1.2.2.jar | Bin 49019 -> 0 bytes
.../org/apache/spark/CheckpointSuite.scala | 363 ++++---
.../scala/org/apache/spark/DriverSuite.scala | 8 +-
.../org/apache/spark/FileServerSuite.scala | 108 +-
.../scala/org/apache/spark/JavaAPISuite.java | 36 +-
.../apache/spark/MapOutputTrackerSuite.scala | 16 +-
.../org/apache/spark/SharedSparkContext.scala | 4 +-
.../scala/org/apache/spark/SparkConfSuite.scala | 110 ++
.../deploy/worker/ExecutorRunnerTest.scala | 4 +-
.../apache/spark/io/CompressionCodecSuite.scala | 8 +-
.../spark/metrics/MetricsSystemSuite.scala | 8 +-
.../spark/rdd/PairRDDFunctionsSuite.scala | 34 +
.../scala/org/apache/spark/rdd/RDDSuite.scala | 40 +
.../spark/scheduler/ClusterSchedulerSuite.scala | 2 +-
.../spark/scheduler/DAGSchedulerSuite.scala | 23 +-
.../apache/spark/scheduler/JobLoggerSuite.scala | 2 +-
.../spark/scheduler/TaskResultGetterSuite.scala | 6 +-
.../spark/scheduler/TaskSetManagerSuite.scala | 4 +-
.../spark/serializer/KryoSerializerSuite.scala | 33 +-
.../spark/storage/BlockManagerSuite.scala | 97 +-
.../spark/storage/DiskBlockManagerSuite.scala | 18 +-
.../apache/spark/util/SizeEstimatorSuite.scala | 2 +-
data/kmeans_data.txt | 6 +
data/lr_data.txt | 1000 ++++++++++++++++++
data/pagerank_data.txt | 6 +
docs/README.md | 4 +-
docs/_config.yml | 2 +-
docs/_plugins/copy_api_dirs.rb | 4 +-
docs/api.md | 2 +-
docs/bagel-programming-guide.md | 4 +-
docs/building-with-maven.md | 14 +-
docs/configuration.md | 71 +-
docs/css/bootstrap.min.css | 2 +-
docs/hadoop-third-party-distributions.md | 2 +-
docs/index.md | 16 +-
docs/java-programming-guide.md | 4 +-
docs/job-scheduling.md | 21 +-
docs/mllib-guide.md | 2 +-
docs/monitoring.md | 3 +-
docs/python-programming-guide.md | 45 +-
docs/quick-start.md | 62 +-
docs/running-on-mesos.md | 19 +-
docs/running-on-yarn.md | 17 +-
docs/scala-programming-guide.md | 20 +-
docs/spark-debugger.md | 2 +-
docs/spark-standalone.md | 35 +-
docs/streaming-programming-guide.md | 8 +-
docs/tuning.md | 21 +-
ec2/spark_ec2.py | 2 +-
.../org/apache/spark/examples/JavaHdfsLR.java | 2 +-
.../org/apache/spark/examples/JavaKMeans.java | 2 +-
.../org/apache/spark/examples/JavaLogQuery.java | 2 +-
.../org/apache/spark/examples/JavaPageRank.java | 3 +-
.../org/apache/spark/examples/JavaSparkPi.java | 2 +-
.../java/org/apache/spark/examples/JavaTC.java | 2 +-
.../apache/spark/examples/JavaWordCount.java | 2 +-
.../apache/spark/mllib/examples/JavaALS.java | 2 +-
.../apache/spark/mllib/examples/JavaKMeans.java | 2 +-
.../org/apache/spark/mllib/examples/JavaLR.java | 2 +-
.../streaming/examples/JavaFlumeEventCount.java | 3 +-
.../streaming/examples/JavaKafkaWordCount.java | 5 +-
.../examples/JavaNetworkWordCount.java | 3 +-
.../streaming/examples/JavaQueueStream.java | 2 +-
.../apache/spark/examples/BroadcastTest.scala | 2 +-
.../spark/examples/ExceptionHandlingTest.scala | 2 +-
.../org/apache/spark/examples/GroupByTest.scala | 2 +-
.../org/apache/spark/examples/HBaseTest.scala | 2 +-
.../org/apache/spark/examples/HdfsTest.scala | 2 +-
.../org/apache/spark/examples/LogQuery.scala | 2 +-
.../spark/examples/MultiBroadcastTest.scala | 2 +-
.../examples/SimpleSkewedGroupByTest.scala | 2 +-
.../spark/examples/SkewedGroupByTest.scala | 2 +-
.../org/apache/spark/examples/SparkALS.scala | 2 +-
.../org/apache/spark/examples/SparkHdfsLR.scala | 2 +-
.../org/apache/spark/examples/SparkKMeans.scala | 2 +-
.../org/apache/spark/examples/SparkLR.scala | 2 +-
.../apache/spark/examples/SparkPageRank.scala | 2 +-
.../org/apache/spark/examples/SparkPi.scala | 2 +-
.../org/apache/spark/examples/SparkTC.scala | 2 +-
.../examples/bagel/WikipediaPageRank.scala | 10 +-
.../bagel/WikipediaPageRankStandalone.scala | 8 +-
.../streaming/examples/ActorWordCount.scala | 9 +-
.../streaming/examples/FlumeEventCount.scala | 2 +-
.../streaming/examples/HdfsWordCount.scala | 4 +-
.../streaming/examples/KafkaWordCount.scala | 4 +-
.../streaming/examples/MQTTWordCount.scala | 6 +-
.../streaming/examples/NetworkWordCount.scala | 4 +-
.../spark/streaming/examples/QueueStream.scala | 2 +-
.../streaming/examples/RawNetworkGrep.scala | 2 +-
.../examples/StatefulNetworkWordCount.scala | 4 +-
.../streaming/examples/TwitterAlgebirdCMS.scala | 2 +-
.../streaming/examples/TwitterAlgebirdHLL.scala | 2 +-
.../streaming/examples/TwitterPopularTags.scala | 2 +-
.../streaming/examples/ZeroMQWordCount.scala | 6 +-
.../clickstream/PageViewGenerator.scala | 4 +-
.../examples/clickstream/PageViewStream.scala | 6 +-
.../streaming/flume/FlumeStreamSuite.scala | 2 +-
kmeans_data.txt | 6 -
lr_data.txt | 1000 ------------------
make-distribution.sh | 24 +-
.../spark/mllib/classification/NaiveBayes.scala | 119 +++
.../apache/spark/mllib/recommendation/ALS.scala | 13 +-
.../mllib/classification/NaiveBayesSuite.scala | 108 ++
new-yarn/pom.xml | 161 ---
.../spark/deploy/yarn/ApplicationMaster.scala | 446 --------
.../yarn/ApplicationMasterArguments.scala | 94 --
.../org/apache/spark/deploy/yarn/Client.scala | 521 ---------
.../spark/deploy/yarn/ClientArguments.scala | 149 ---
.../yarn/ClientDistributedCacheManager.scala | 228 ----
.../spark/deploy/yarn/WorkerLauncher.scala | 222 ----
.../spark/deploy/yarn/WorkerRunnable.scala | 209 ----
.../deploy/yarn/YarnAllocationHandler.scala | 687 ------------
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 -
.../cluster/YarnClientClusterScheduler.scala | 48 -
.../cluster/YarnClientSchedulerBackend.scala | 110 --
.../cluster/YarnClusterScheduler.scala | 56 -
.../ClientDistributedCacheManagerSuite.scala | 220 ----
pagerank_data.txt | 6 -
pom.xml | 66 +-
project/SparkBuild.scala | 54 +-
pyspark | 70 --
pyspark.cmd | 23 -
pyspark2.cmd | 55 -
python/epydoc.conf | 2 +-
python/lib/py4j-0.8.1-src.zip | Bin 0 -> 37662 bytes
python/lib/py4j0.7.egg | Bin 191756 -> 0 bytes
python/pyspark/__init__.py | 34 +-
python/pyspark/broadcast.py | 11 +
python/pyspark/conf.py | 171 +++
python/pyspark/context.py | 68 +-
python/pyspark/java_gateway.py | 3 +-
python/pyspark/rdd.py | 66 +-
python/pyspark/shell.py | 2 +-
python/pyspark/tests.py | 4 +-
python/run-tests | 3 +-
repl-bin/src/deb/bin/run | 3 +-
repl/pom.xml | 1 -
.../org/apache/spark/repl/SparkILoop.scala | 19 +-
.../org/apache/spark/repl/SparkIMain.scala | 7 +-
run-example | 91 --
run-example.cmd | 23 -
run-example2.cmd | 61 --
sbin/slaves.sh | 91 ++
sbin/spark-config.sh | 36 +
sbin/spark-daemon.sh | 183 ++++
sbin/spark-daemons.sh | 35 +
sbin/spark-executor | 23 +
sbin/start-all.sh | 34 +
sbin/start-master.sh | 52 +
sbin/start-slave.sh | 35 +
sbin/start-slaves.sh | 48 +
sbin/stop-all.sh | 32 +
sbin/stop-master.sh | 27 +
sbin/stop-slaves.sh | 35 +
sbt/sbt | 43 -
sbt/sbt-launch-0.11.3-2.jar | Bin 1096763 -> 0 bytes
sbt/sbt.cmd | 25 -
spark-class | 149 ---
spark-class.cmd | 23 -
spark-class2.cmd | 85 --
spark-executor | 22 -
spark-shell | 102 --
spark-shell.cmd | 22 -
.../org/apache/spark/streaming/Checkpoint.scala | 66 +-
.../org/apache/spark/streaming/DStream.scala | 4 +-
.../apache/spark/streaming/DStreamGraph.scala | 1 -
.../spark/streaming/PairDStreamFunctions.scala | 13 +-
.../spark/streaming/StreamingContext.scala | 89 +-
.../streaming/api/java/JavaPairDStream.scala | 18 +-
.../api/java/JavaStreamingContext.scala | 38 +-
.../streaming/dstream/FileInputDStream.scala | 153 +--
.../streaming/dstream/NetworkInputDStream.scala | 8 +-
.../streaming/dstream/ShuffledDStream.scala | 9 +-
.../streaming/dstream/WindowedDStream.scala | 16 +-
.../streaming/scheduler/JobGenerator.scala | 71 +-
.../streaming/scheduler/JobScheduler.scala | 6 +-
.../streaming/util/MasterFailureTest.scala | 3 -
.../spark/streaming/util/RawTextSender.scala | 4 +-
.../apache/spark/streaming/JavaAPISuite.java | 4 +-
.../spark/streaming/BasicOperationsSuite.scala | 8 +-
.../spark/streaming/CheckpointSuite.scala | 59 +-
.../spark/streaming/InputStreamsSuite.scala | 15 +-
.../apache/spark/streaming/TestSuiteBase.scala | 21 +-
.../spark/streaming/WindowOperationsSuite.scala | 5 +-
yarn/README.md | 12 +
yarn/alpha/pom.xml | 32 +
.../spark/deploy/yarn/ApplicationMaster.scala | 464 ++++++++
.../org/apache/spark/deploy/yarn/Client.scala | 509 +++++++++
.../spark/deploy/yarn/WorkerLauncher.scala | 250 +++++
.../spark/deploy/yarn/WorkerRunnable.scala | 236 +++++
.../deploy/yarn/YarnAllocationHandler.scala | 680 ++++++++++++
.../yarn/ApplicationMasterArguments.scala | 94 ++
.../spark/deploy/yarn/ClientArguments.scala | 150 +++
.../yarn/ClientDistributedCacheManager.scala | 228 ++++
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 +
.../cluster/YarnClientClusterScheduler.scala | 48 +
.../cluster/YarnClientSchedulerBackend.scala | 110 ++
.../cluster/YarnClusterScheduler.scala | 56 +
.../ClientDistributedCacheManagerSuite.scala | 220 ++++
yarn/pom.xml | 84 +-
.../spark/deploy/yarn/ApplicationMaster.scala | 477 ---------
.../yarn/ApplicationMasterArguments.scala | 94 --
.../org/apache/spark/deploy/yarn/Client.scala | 503 ---------
.../spark/deploy/yarn/ClientArguments.scala | 146 ---
.../yarn/ClientDistributedCacheManager.scala | 228 ----
.../spark/deploy/yarn/WorkerLauncher.scala | 243 -----
.../spark/deploy/yarn/WorkerRunnable.scala | 235 ----
.../deploy/yarn/YarnAllocationHandler.scala | 673 ------------
.../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 43 -
.../cluster/YarnClientClusterScheduler.scala | 48 -
.../cluster/YarnClientSchedulerBackend.scala | 110 --
.../cluster/YarnClusterScheduler.scala | 59 --
.../ClientDistributedCacheManagerSuite.scala | 220 ----
yarn/stable/pom.xml | 32 +
.../spark/deploy/yarn/ApplicationMaster.scala | 432 ++++++++
.../org/apache/spark/deploy/yarn/Client.scala | 525 +++++++++
.../spark/deploy/yarn/WorkerLauncher.scala | 230 ++++
.../spark/deploy/yarn/WorkerRunnable.scala | 210 ++++
.../deploy/yarn/YarnAllocationHandler.scala | 695 ++++++++++++
346 files changed, 11176 insertions(+), 10576 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
----------------------------------------------------------------------
diff --cc examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
index 64832a9,64ac724..83900a1
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
@@@ -50,10 -49,11 +50,11 @@@ public class JavaFlumeEventCount
Duration batchInterval = new Duration(2000);
- JavaStreamingContext sc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval,
+ JavaStreamingContext ssc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval,
- System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ System.getenv("SPARK_HOME"),
+ JavaStreamingContext.jarOfClass(JavaFlumeEventCount.class));
-
- JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream("localhost", port);
+ FlumeFunctions flumeFunc = new FlumeFunctions(ssc);
+ JavaDStream<SparkFlumeEvent> flumeStream = flumeFunc.flumeStream("localhost", port);
flumeStream.count();
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
----------------------------------------------------------------------
diff --cc external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
index ba33320,0000000..74840f6
mode 100644,000000..100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
@@@ -1,86 -1,0 +1,86 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import java.net.InetSocketAddress
+import java.nio.ByteBuffer
+import java.nio.charset.Charset
+
+import org.apache.avro.ipc.NettyTransceiver
+import org.apache.avro.ipc.specific.SpecificRequestor
+import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol}
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{TestOutputStream, StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.util.ManualClock
+
+class FlumeStreamSuite extends TestSuiteBase {
+
+ val testPort = 9999
+
+ test("flume input stream") {
+ // Set up the streaming context and input streams
- val ssc = new StreamingContext(master, framework, batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
+ val flumeStream = ssc.flumeStream("localhost", testPort, StorageLevel.MEMORY_AND_DISK)
+ val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
+ with SynchronizedBuffer[Seq[SparkFlumeEvent]]
+ val outputStream = new TestOutputStream(flumeStream, outputBuffer)
+ ssc.registerOutputStream(outputStream)
+ ssc.start()
+
+ val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+ val input = Seq(1, 2, 3, 4, 5)
+ Thread.sleep(1000)
+ val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", testPort))
+ val client = SpecificRequestor.getClient(
+ classOf[AvroSourceProtocol], transceiver)
+
+ for (i <- 0 until input.size) {
+ val event = new AvroFlumeEvent
+ event.setBody(ByteBuffer.wrap(input(i).toString.getBytes()))
+ event.setHeaders(Map[CharSequence, CharSequence]("test" -> "header"))
+ client.append(event)
+ Thread.sleep(500)
+ clock.addToTime(batchDuration.milliseconds)
+ }
+
+ val startTime = System.currentTimeMillis()
+ while (outputBuffer.size < input.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+ logInfo("output.size = " + outputBuffer.size + ", input.size = " + input.size)
+ Thread.sleep(100)
+ }
+ Thread.sleep(1000)
+ val timeTaken = System.currentTimeMillis() - startTime
+ assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+ logInfo("Stopping context")
+ ssc.stop()
+
+ val decoder = Charset.forName("UTF-8").newDecoder()
+
+ assert(outputBuffer.size === input.length)
+ for (i <- 0 until outputBuffer.size) {
+ assert(outputBuffer(i).size === 1)
+ val str = decoder.decode(outputBuffer(i).head.event.getBody)
+ assert(str.toString === input(i).toString)
+ assert(outputBuffer(i).head.event.getHeaders.get("test") === "header")
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/pom.xml
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/project/SparkBuild.scala
----------------------------------------------------------------------
diff --cc project/SparkBuild.scala
index b7aa732,a6c560d..84becf6
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@@ -82,34 -89,12 +86,33 @@@ object SparkBuild extends Build
}
// Conditionally include the yarn sub-project
- lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core)
+ lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
+ lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
- //lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core)
-
- lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]()
- lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
+ lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]()
+ lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]()
+ lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
+ .dependsOn(streaming % "compile->compile;test->test")
+
+ lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings)
+ .dependsOn(streaming % "compile->compile;test->test")
+
+ lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings)
+ .dependsOn(streaming % "compile->compile;test->test")
+
+ lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings)
+ .dependsOn(streaming % "compile->compile;test->test")
+
+ lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings)
+ .dependsOn(streaming % "compile->compile;test->test")
+
+ lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
+ lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
+
+ lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
+ .dependsOn(core, mllib, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)
+
// Everything except assembly, tools and examples belong to packageProjects
lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib) ++ maybeYarnRef
@@@ -192,10 -178,11 +196,10 @@@
Some("sonatype-staging" at nexus + "service/local/staging/deploy/maven2")
},
-*/
-
+ */
libraryDependencies ++= Seq(
- "io.netty" % "netty-all" % "4.0.0.CR1",
+ "io.netty" % "netty-all" % "4.0.13.Final",
"org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106",
/** Workaround for SPARK-959. Dependency used by org.eclipse.jetty. Fixed in ivy 2.3.0. */
"org.eclipse.jetty.orbit" % "javax.servlet" % "2.5.0.v201103041518" artifacts Artifact("javax.servlet", "jar", "jar"),
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
----------------------------------------------------------------------
diff --cc streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 96f57cb,b3a7cf0..693cb7f
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@@ -23,25 -40,16 +23,23 @@@ import scala.reflect.ClassTa
import java.io.InputStream
import java.util.concurrent.atomic.AtomicInteger
- import java.util.UUID
+import akka.actor.Props
+import akka.actor.SupervisorStrategy
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.fs.Path
-import twitter4j.Status
-import twitter4j.auth.Authorization
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.receivers._
+import org.apache.spark.streaming.scheduler._
-
/**
* A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
* information (such as, cluster URL and job name) to internally create a SparkContext, it provides
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
----------------------------------------------------------------------
diff --cc streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index ea4a0fe,7dec4b3..7068f32
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@@ -17,21 -17,27 +17,23 @@@
package org.apache.spark.streaming.api.java
-import java.io.InputStream
-import java.lang.{Integer => JInt}
-import java.util.{List => JList, Map => JMap}
+
import scala.collection.JavaConversions._
import scala.reflect.ClassTag
+import java.io.InputStream
- import java.util.{Map => JMap, List => JList}
++import java.lang.{Integer => JInt}
++import java.util.{List => JList, Map => JMap}
+
+ import akka.actor.{Props, SupervisorStrategy}
-import akka.util.ByteString
-import akka.zeromq.Subscribe
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
- import akka.actor.Props
- import akka.actor.SupervisorStrategy
-import twitter4j.Status
-import twitter4j.auth.Authorization
+ import org.apache.spark.{SparkConf, SparkContext}
+ import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
+ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
- import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
- import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext, JavaRDD}
import org.apache.spark.streaming._
-import org.apache.spark.streaming.dstream._
import org.apache.spark.streaming.scheduler.StreamingListener
/**
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
----------------------------------------------------------------------
diff --cc streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 6218795,d53d433..0d2145d
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@@ -17,17 -17,24 +17,19 @@@
package org.apache.spark.streaming;
-import com.google.common.base.Optional;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Files;
-
-import kafka.serializer.StringDecoder;
+import scala.Tuple2;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.spark.SparkConf;
-import org.apache.spark.streaming.api.java.JavaDStreamLike;
+ import org.junit.After;
import org.junit.Assert;
-import org.junit.Before;
import org.junit.Test;
+import java.io.*;
+import java.util.*;
-import scala.Tuple2;
-import twitter4j.Status;
+import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
++import org.apache.spark.SparkConf;
import org.apache.spark.HashPartitioner;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
----------------------------------------------------------------------
diff --cc streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 0cffed6,5185954..a8e0532
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@@ -49,9 -56,9 +49,9 @@@ class InputStreamsSuite extends TestSui
testServer.start()
// Set up the streaming context and input streams
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
val networkStream = ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK)
- val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String ]]
+ val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
val outputStream = new TestOutputStream(networkStream, outputBuffer)
def output = outputBuffer.flatMap(x => x)
ssc.registerOutputStream(outputStream)
@@@ -92,13 -99,62 +92,13 @@@
}
- test("flume input stream") {
- // Set up the streaming context and input streams
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
- val flumeStream = ssc.flumeStream("localhost", testPort, StorageLevel.MEMORY_AND_DISK)
- val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
- with SynchronizedBuffer[Seq[SparkFlumeEvent]]
- val outputStream = new TestOutputStream(flumeStream, outputBuffer)
- ssc.registerOutputStream(outputStream)
- ssc.start()
-
- val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
- val input = Seq(1, 2, 3, 4, 5)
- Thread.sleep(1000)
- val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", testPort))
- val client = SpecificRequestor.getClient(
- classOf[AvroSourceProtocol], transceiver)
-
- for (i <- 0 until input.size) {
- val event = new AvroFlumeEvent
- event.setBody(ByteBuffer.wrap(input(i).toString.getBytes()))
- event.setHeaders(Map[CharSequence, CharSequence]("test" -> "header"))
- client.append(event)
- Thread.sleep(500)
- clock.addToTime(batchDuration.milliseconds)
- }
-
- val startTime = System.currentTimeMillis()
- while (outputBuffer.size < input.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
- logInfo("output.size = " + outputBuffer.size + ", input.size = " + input.size)
- Thread.sleep(100)
- }
- Thread.sleep(1000)
- val timeTaken = System.currentTimeMillis() - startTime
- assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
- logInfo("Stopping context")
- ssc.stop()
-
- val decoder = Charset.forName("UTF-8").newDecoder()
-
- assert(outputBuffer.size === input.length)
- for (i <- 0 until outputBuffer.size) {
- assert(outputBuffer(i).size === 1)
- val str = decoder.decode(outputBuffer(i).head.event.getBody)
- assert(str.toString === input(i).toString)
- assert(outputBuffer(i).head.event.getHeaders.get("test") === "header")
- }
- }
-
-
test("file input stream") {
// Disable manual clock as FileInputDStream does not work with manual clock
- System.clearProperty("spark.streaming.clock")
+ conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
// Set up the streaming context and input streams
val testDir = Files.createTempDir()
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
val fileStream = ssc.textFileStream(testDir.toString)
val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
def output = outputBuffer.flatMap(x => x)
@@@ -150,7 -206,7 +150,7 @@@
testServer.start()
// Set up the streaming context and input streams
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
val networkStream = ssc.actorStream[String](Props(new TestActor(port)), "TestActor",
StorageLevel.MEMORY_AND_DISK) //Had to pass the local value of port to prevent from closing over entire scope
val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
@@@ -193,6 -249,21 +193,7 @@@
}
}
- test("kafka input stream") {
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
- val topics = Map("my-topic" -> 1)
- val test1 = ssc.kafkaStream("localhost:12345", "group", topics)
- val test2 = ssc.kafkaStream("localhost:12345", "group", topics, StorageLevel.MEMORY_AND_DISK)
-
- // Test specifying decoder
- val kafkaParams = Map("zookeeper.connect"->"localhost:12345","group.id"->"consumer-group")
- val test3 = ssc.kafkaStream[
- String,
- String,
- kafka.serializer.StringDecoder,
- kafka.serializer.StringDecoder](kafkaParams, topics, StorageLevel.MEMORY_AND_DISK)
- }
+
test("multi-thread receiver") {
// set up the test receiver
val numThreads = 10
@@@ -202,7 -273,7 +203,7 @@@
MultiThreadTestReceiver.haveAllThreadsFinished = false
// set up the network stream using the test receiver
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(new SparkContext(conf), batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
val networkStream = ssc.networkStream[Int](testReceiver)
val countStream = networkStream.count
val outputBuffer = new ArrayBuffer[Seq[Long]] with SynchronizedBuffer[Seq[Long]]
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3b4c4c7f/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
----------------------------------------------------------------------
diff --cc streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index f56c046,33464bc..b20d02f
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@@ -136,16 -142,17 +142,13 @@@ trait TestSuiteBase extends FunSuite wi
// Default before function for any streaming test suite. Override this
// if you want to add your stuff to "before" (i.e., don't call before { } )
def beforeFunction() {
- //if (useManualClock) {
- // System.setProperty(
- // "spark.streaming.clock",
- // "org.apache.spark.streaming.util.ManualClock"
- // )
- //} else {
- // System.clearProperty("spark.streaming.clock")
- //}
if (useManualClock) {
+ logInfo("Using manual clock")
- System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+ conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+ } else {
+ logInfo("Using real clock")
- System.clearProperty("spark.streaming.clock")
++ conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
}
- // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
- System.clearProperty("spark.driver.port")
- System.clearProperty("spark.hostPort")
}
// Default after function for any streaming test suite. Override this
@@@ -168,9 -175,9 +171,8 @@@
operation: DStream[U] => DStream[V],
numPartitions: Int = numInputPartitions
): StreamingContext = {
-
- val sc = new SparkContext(conf)
// Create StreamingContext
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(sc, batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
if (checkpointDir != null) {
ssc.checkpoint(checkpointDir)
}
@@@ -194,9 -201,9 +196,8 @@@
input2: Seq[Seq[V]],
operation: (DStream[U], DStream[V]) => DStream[W]
): StreamingContext = {
-
- val sc = new SparkContext(conf)
// Create StreamingContext
- val ssc = new StreamingContext(master, framework, batchDuration)
- val ssc = new StreamingContext(sc, batchDuration)
++ val ssc = new StreamingContext(conf, batchDuration)
if (checkpointDir != null) {
ssc.checkpoint(checkpointDir)
}