You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bahir.apache.org by lr...@apache.org on 2016/06/10 15:23:40 UTC

[01/50] [abbrv] bahir git commit: [SPARK-10227] fatal warnings with sbt on Scala 2.11

Repository: bahir
Updated Branches:
  refs/heads/master [created] d32542483


[SPARK-10227] fatal warnings with sbt on Scala 2.11

The bulk of the changes are on `transient` annotation on class parameter. Often the compiler doesn't generate a field for this parameters, so the the transient annotation would be unnecessary.
But if the class parameter are used in methods, then fields are created. So it is safer to keep the annotations.

The remainder are some potential bugs, and deprecated syntax.

Author: Luc Bourlier <lu...@typesafe.com>

Closes #8433 from skyluc/issue/sbt-2.11.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/0092cb93
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/0092cb93
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/0092cb93

Branch: refs/heads/master
Commit: 0092cb936dfa03d789666c35fdbcfb440141a44f
Parents: 9c8e4c2
Author: Luc Bourlier <lu...@typesafe.com>
Authored: Wed Sep 9 09:57:58 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Sep 9 09:57:58 2015 +0100

----------------------------------------------------------------------
 .../scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala   | 2 +-
 .../org/apache/spark/streaming/twitter/TwitterInputDStream.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/0092cb93/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 7c2f18c..116c170 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -38,7 +38,7 @@ import org.apache.spark.streaming.receiver.Receiver
 
 private[streaming]
 class MQTTInputDStream(
-    @transient ssc_ : StreamingContext,
+    ssc_ : StreamingContext,
     brokerUrl: String,
     topic: String,
     storageLevel: StorageLevel

http://git-wip-us.apache.org/repos/asf/bahir/blob/0092cb93/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index 7cf02d8..d7de74b 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -39,7 +39,7 @@ import org.apache.spark.streaming.receiver.Receiver
 */
 private[streaming]
 class TwitterInputDStream(
-    @transient ssc_ : StreamingContext,
+    ssc_ : StreamingContext,
     twitterAuth: Option[Authorization],
     filters: Seq[String],
     storageLevel: StorageLevel


[23/50] [abbrv] bahir git commit: [SPARK-12510][STREAMING] Refactor ActorReceiver to support Java

Posted by lr...@apache.org.
[SPARK-12510][STREAMING] Refactor ActorReceiver to support Java

This PR includes the following changes:

1. Rename `ActorReceiver` to `ActorReceiverSupervisor`
2. Remove `ActorHelper`
3. Add a new `ActorReceiver` for Scala and `JavaActorReceiver` for Java
4. Add `JavaActorWordCount` example

Author: Shixiong Zhu <sh...@databricks.com>

Closes #10457 from zsxwing/java-actor-stream.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/cca0efef
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/cca0efef
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/cca0efef

Branch: refs/heads/master
Commit: cca0efef68e91fd6d30cbae0a73fe1c737cd28a1
Parents: 67f0c90
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Thu Jan 7 15:26:55 2016 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Thu Jan 7 15:26:55 2016 -0800

----------------------------------------------------------------------
 .../streaming/akka/JavaActorWordCount.java      | 137 +++++++++++++++++++
 .../streaming/akka/ActorWordCount.scala         |   9 +-
 .../spark/streaming/akka/ActorReceiver.scala    |  64 +++++++--
 .../spark/streaming/zeromq/ZeroMQReceiver.scala |   5 +-
 4 files changed, 197 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/cca0efef/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
new file mode 100644
index 0000000..2377207
--- /dev/null
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.Arrays;
+
+import scala.Tuple2;
+
+import akka.actor.ActorSelection;
+import akka.actor.Props;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.receiver.JavaActorReceiver;
+
+/**
+ * A sample actor as receiver, is also simplest. This receiver actor
+ * goes and subscribe to a typical publisher/feeder actor and receives
+ * data.
+ *
+ * @see [[org.apache.spark.examples.streaming.FeederActor]]
+ */
+class JavaSampleActorReceiver<T> extends JavaActorReceiver {
+
+  private final String urlOfPublisher;
+
+  public JavaSampleActorReceiver(String urlOfPublisher) {
+    this.urlOfPublisher = urlOfPublisher;
+  }
+
+  private ActorSelection remotePublisher;
+
+  @Override
+  public void preStart() {
+    remotePublisher = getContext().actorSelection(urlOfPublisher);
+    remotePublisher.tell(new SubscribeReceiver(getSelf()), getSelf());
+  }
+
+  public void onReceive(Object msg) throws Exception {
+    store((T) msg);
+  }
+
+  @Override
+  public void postStop() {
+    remotePublisher.tell(new UnsubscribeReceiver(getSelf()), getSelf());
+  }
+}
+
+/**
+ * A sample word count program demonstrating the use of plugging in
+ * Actor as Receiver
+ * Usage: JavaActorWordCount <hostname> <port>
+ *   <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
+ *
+ * To run this example locally, you may run Feeder Actor as
+ * <code><pre>
+ *     $ bin/run-example org.apache.spark.examples.streaming.FeederActor localhost 9999
+ * </pre></code>
+ * and then run the example
+ * <code><pre>
+ *     $ bin/run-example org.apache.spark.examples.streaming.JavaActorWordCount localhost 9999
+ * </pre></code>
+ */
+public class JavaActorWordCount {
+
+  public static void main(String[] args) {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaActorWordCount <hostname> <port>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    final String host = args[0];
+    final String port = args[1];
+    SparkConf sparkConf = new SparkConf().setAppName("JavaActorWordCount");
+    // Create the context and set the batch size
+    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
+
+    String feederActorURI = "akka.tcp://test@" + host + ":" + port + "/user/FeederActor";
+
+    /*
+     * Following is the use of actorStream to plug in custom actor as receiver
+     *
+     * An important point to note:
+     * Since Actor may exist outside the spark framework, It is thus user's responsibility
+     * to ensure the type safety, i.e type of data received and InputDstream
+     * should be same.
+     *
+     * For example: Both actorStream and JavaSampleActorReceiver are parameterized
+     * to same type to ensure type safety.
+     */
+    JavaDStream<String> lines = jssc.actorStream(
+        Props.create(JavaSampleActorReceiver.class, feederActorURI), "SampleReceiver");
+
+    // compute wordcount
+    lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterable<String> call(String s) {
+        return Arrays.asList(s.split("\\s+"));
+      }
+    }).mapToPair(new PairFunction<String, String, Integer>() {
+      @Override
+      public Tuple2<String, Integer> call(String s) {
+        return new Tuple2<String, Integer>(s, 1);
+      }
+    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+      @Override
+      public Integer call(Integer i1, Integer i2) {
+        return i1 + i2;
+      }
+    }).print();
+
+    jssc.start();
+    jssc.awaitTermination();
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/cca0efef/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index a47fb7b..88cdc6b 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -26,8 +26,7 @@ import akka.actor.{actorRef2Scala, Actor, ActorRef, Props}
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
-import org.apache.spark.streaming.receiver.ActorHelper
+import org.apache.spark.streaming.receiver.ActorReceiver
 import org.apache.spark.util.AkkaUtils
 
 case class SubscribeReceiver(receiverActor: ActorRef)
@@ -80,7 +79,7 @@ class FeederActor extends Actor {
  * @see [[org.apache.spark.examples.streaming.FeederActor]]
  */
 class SampleActorReceiver[T: ClassTag](urlOfPublisher: String)
-extends Actor with ActorHelper {
+extends ActorReceiver {
 
   lazy private val remotePublisher = context.actorSelection(urlOfPublisher)
 
@@ -127,9 +126,9 @@ object FeederActor {
  *   <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
  *
  * To run this example locally, you may run Feeder Actor as
- *    `$ bin/run-example org.apache.spark.examples.streaming.FeederActor 127.0.0.1 9999`
+ *    `$ bin/run-example org.apache.spark.examples.streaming.FeederActor localhost 9999`
  * and then run the example
- *    `$ bin/run-example org.apache.spark.examples.streaming.ActorWordCount 127.0.0.1 9999`
+ *    `$ bin/run-example org.apache.spark.examples.streaming.ActorWordCount localhost 9999`
  */
 object ActorWordCount {
   def main(args: Array[String]) {

http://git-wip-us.apache.org/repos/asf/bahir/blob/cca0efef/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
index 7ec7401..0eabf3d 100644
--- a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
@@ -47,13 +47,12 @@ object ActorSupervisorStrategy {
 
 /**
  * :: DeveloperApi ::
- * A receiver trait to be mixed in with your Actor to gain access to
- * the API for pushing received data into Spark Streaming for being processed.
+ * A base Actor that provides APIs for pushing received data into Spark Streaming for processing.
  *
  * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html
  *
  * @example {{{
- *  class MyActor extends Actor with ActorHelper{
+ *  class MyActor extends ActorReceiver {
  *      def receive {
  *          case anything: String => store(anything)
  *      }
@@ -69,13 +68,60 @@ object ActorSupervisorStrategy {
  *       should be same.
  */
 @DeveloperApi
-trait ActorHelper extends Logging{
+abstract class ActorReceiver extends Actor {
 
-  self: Actor => // to ensure that this can be added to Actor classes only
+  /** Store an iterator of received data as a data block into Spark's memory. */
+  def store[T](iter: Iterator[T]) {
+    context.parent ! IteratorData(iter)
+  }
+
+  /**
+   * Store the bytes of received data as a data block into Spark's memory. Note
+   * that the data in the ByteBuffer must be serialized using the same serializer
+   * that Spark is configured to use.
+   */
+  def store(bytes: ByteBuffer) {
+    context.parent ! ByteBufferData(bytes)
+  }
+
+  /**
+   * Store a single item of received data to Spark's memory.
+   * These single items will be aggregated together into data blocks before
+   * being pushed into Spark's memory.
+   */
+  def store[T](item: T) {
+    context.parent ! SingleItemData(item)
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * A Java UntypedActor that provides APIs for pushing received data into Spark Streaming for
+ * processing.
+ *
+ * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html
+ *
+ * @example {{{
+ *  class MyActor extends JavaActorReceiver {
+ *      def receive {
+ *          case anything: String => store(anything)
+ *      }
+ *  }
+ *
+ *  // Can be used with an actorStream as follows
+ *  ssc.actorStream[String](Props(new MyActor),"MyActorReceiver")
+ *
+ * }}}
+ *
+ * @note Since Actor may exist outside the spark framework, It is thus user's responsibility
+ *       to ensure the type safety, i.e parametrized type of push block and InputDStream
+ *       should be same.
+ */
+@DeveloperApi
+abstract class JavaActorReceiver extends UntypedActor {
 
   /** Store an iterator of received data as a data block into Spark's memory. */
   def store[T](iter: Iterator[T]) {
-    logDebug("Storing iterator")
     context.parent ! IteratorData(iter)
   }
 
@@ -85,7 +131,6 @@ trait ActorHelper extends Logging{
    * that Spark is configured to use.
    */
   def store(bytes: ByteBuffer) {
-    logDebug("Storing Bytes")
     context.parent ! ByteBufferData(bytes)
   }
 
@@ -95,7 +140,6 @@ trait ActorHelper extends Logging{
    * being pushed into Spark's memory.
    */
   def store[T](item: T) {
-    logDebug("Storing item")
     context.parent ! SingleItemData(item)
   }
 }
@@ -104,7 +148,7 @@ trait ActorHelper extends Logging{
  * :: DeveloperApi ::
  * Statistics for querying the supervisor about state of workers. Used in
  * conjunction with `StreamingContext.actorStream` and
- * [[org.apache.spark.streaming.receiver.ActorHelper]].
+ * [[org.apache.spark.streaming.receiver.ActorReceiver]].
  */
 @DeveloperApi
 case class Statistics(numberOfMsgs: Int,
@@ -137,7 +181,7 @@ private[streaming] case class ByteBufferData(bytes: ByteBuffer) extends ActorRec
  *  context.parent ! Props(new Worker, "Worker")
  * }}}
  */
-private[streaming] class ActorReceiver[T: ClassTag](
+private[streaming] class ActorReceiverSupervisor[T: ClassTag](
     props: Props,
     name: String,
     storageLevel: StorageLevel,

http://git-wip-us.apache.org/repos/asf/bahir/blob/cca0efef/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
index 588e6ba..506ba87 100644
--- a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
+++ b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
@@ -19,12 +19,11 @@ package org.apache.spark.streaming.zeromq
 
 import scala.reflect.ClassTag
 
-import akka.actor.Actor
 import akka.util.ByteString
 import akka.zeromq._
 
 import org.apache.spark.Logging
-import org.apache.spark.streaming.receiver.ActorHelper
+import org.apache.spark.streaming.receiver.ActorReceiver
 
 /**
  * A receiver to subscribe to ZeroMQ stream.
@@ -33,7 +32,7 @@ private[streaming] class ZeroMQReceiver[T: ClassTag](
     publisherUrl: String,
     subscribe: Subscribe,
     bytesToObjects: Seq[ByteString] => Iterator[T])
-  extends Actor with ActorHelper with Logging {
+  extends ActorReceiver with Logging {
 
   override def preStart(): Unit = {
     ZeroMQExtension(context.system)


[22/50] [abbrv] bahir git commit: [STREAMING][DOCS][EXAMPLES] Minor fixes

Posted by lr...@apache.org.
[STREAMING][DOCS][EXAMPLES] Minor fixes

Author: Jacek Laskowski <ja...@japila.pl>

Closes #10603 from jaceklaskowski/streaming-actor-custom-receiver.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/67f0c90c
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/67f0c90c
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/67f0c90c

Branch: refs/heads/master
Commit: 67f0c90ca46fdec7dfbd84b02f153bf45ed1b225
Parents: 289c340
Author: Jacek Laskowski <ja...@japila.pl>
Authored: Thu Jan 7 00:27:13 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Thu Jan 7 00:27:13 2016 -0800

----------------------------------------------------------------------
 .../spark/examples/streaming/akka/ActorWordCount.scala    | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/67f0c90c/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 8b8dae0..a47fb7b 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -62,15 +62,13 @@ class FeederActor extends Actor {
   }.start()
 
   def receive: Receive = {
-
     case SubscribeReceiver(receiverActor: ActorRef) =>
       println("received subscribe from %s".format(receiverActor.toString))
-    receivers = LinkedList(receiverActor) ++ receivers
+      receivers = LinkedList(receiverActor) ++ receivers
 
     case UnsubscribeReceiver(receiverActor: ActorRef) =>
       println("received unsubscribe from %s".format(receiverActor.toString))
-    receivers = receivers.dropWhile(x => x eq receiverActor)
-
+      receivers = receivers.dropWhile(x => x eq receiverActor)
   }
 }
 
@@ -129,9 +127,9 @@ object FeederActor {
  *   <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
  *
  * To run this example locally, you may run Feeder Actor as
- *    `$ bin/run-example org.apache.spark.examples.streaming.FeederActor 127.0.1.1 9999`
+ *    `$ bin/run-example org.apache.spark.examples.streaming.FeederActor 127.0.0.1 9999`
  * and then run the example
- *    `$ bin/run-example org.apache.spark.examples.streaming.ActorWordCount 127.0.1.1 9999`
+ *    `$ bin/run-example org.apache.spark.examples.streaming.ActorWordCount 127.0.0.1 9999`
  */
 object ActorWordCount {
   def main(args: Array[String]) {


[30/50] [abbrv] bahir git commit: [SPARK-6363][BUILD] Make Scala 2.11 the default Scala version

Posted by lr...@apache.org.
[SPARK-6363][BUILD] Make Scala 2.11 the default Scala version

This patch changes Spark's build to make Scala 2.11 the default Scala version. To be clear, this does not mean that Spark will stop supporting Scala 2.10: users will still be able to compile Spark for Scala 2.10 by following the instructions on the "Building Spark" page; however, it does mean that Scala 2.11 will be the default Scala version used by our CI builds (including pull request builds).

The Scala 2.11 compiler is faster than 2.10, so I think we'll be able to look forward to a slight speedup in our CI builds (it looks like it's about 2X faster for the Maven compile-only builds, for instance).

After this patch is merged, I'll update Jenkins to add new compile-only jobs to ensure that Scala 2.10 compilation doesn't break.

Author: Josh Rosen <jo...@databricks.com>

Closes #10608 from JoshRosen/SPARK-6363.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/42d6c06e
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/42d6c06e
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/42d6c06e

Branch: refs/heads/master
Commit: 42d6c06e7fc0409a0c211416171151d3deade997
Parents: 881d1af
Author: Josh Rosen <jo...@databricks.com>
Authored: Sat Jan 30 00:20:28 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Sat Jan 30 00:20:28 2016 -0800

----------------------------------------------------------------------
 streaming-akka/pom.xml    | 4 ++--
 streaming-mqtt/pom.xml    | 4 ++--
 streaming-twitter/pom.xml | 4 ++--
 streaming-zeromq/pom.xml  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/42d6c06e/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 06c8e8a..bbe644e 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming-akka_2.10</artifactId>
+  <artifactId>spark-streaming-akka_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-akka</sbt.project.name>
   </properties>

http://git-wip-us.apache.org/repos/asf/bahir/blob/42d6c06e/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index d3a2bf5..d0d9687 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming-mqtt_2.10</artifactId>
+  <artifactId>spark-streaming-mqtt_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-mqtt</sbt.project.name>
   </properties>

http://git-wip-us.apache.org/repos/asf/bahir/blob/42d6c06e/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7b628b0..5d4053a 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming-twitter_2.10</artifactId>
+  <artifactId>spark-streaming-twitter_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-twitter</sbt.project.name>
   </properties>

http://git-wip-us.apache.org/repos/asf/bahir/blob/42d6c06e/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 7781aae..f16bc0f 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming-zeromq_2.10</artifactId>
+  <artifactId>spark-streaming-zeromq_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-zeromq</sbt.project.name>
   </properties>


[32/50] [abbrv] bahir git commit: [SPARK-13339][DOCS] Clarify commutative / associative operator requirements for reduce, fold

Posted by lr...@apache.org.
[SPARK-13339][DOCS] Clarify commutative / associative operator requirements for reduce, fold

Clarify that reduce functions need to be commutative, and fold functions do not

See https://github.com/apache/spark/pull/11091

Author: Sean Owen <so...@cloudera.com>

Closes #11217 from srowen/SPARK-13339.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/5d450505
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/5d450505
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/5d450505

Branch: refs/heads/master
Commit: 5d450505cd58c0e831a1a3932408748906853cf6
Parents: 898980d
Author: Sean Owen <so...@cloudera.com>
Authored: Fri Feb 19 10:26:38 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Feb 19 10:26:38 2016 +0000

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/5d450505/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index 86447f5..2056663 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -453,7 +453,7 @@ class DStream(object):
         2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
         This is more efficient than `invReduceFunc` is None.
 
-        @param reduceFunc:     associative reduce function
+        @param reduceFunc:     associative and commutative reduce function
         @param invReduceFunc:  inverse reduce function of `reduceFunc`
         @param windowDuration: width of the window; must be a multiple of this DStream's
                                batching interval
@@ -524,7 +524,7 @@ class DStream(object):
         `invFunc` can be None, then it will reduce all the RDDs in window, could be slower
         than having `invFunc`.
 
-        @param func:           associative reduce function
+        @param func:           associative and commutative reduce function
         @param invFunc:        inverse function of `reduceFunc`
         @param windowDuration: width of the window; must be a multiple of this DStream's
                               batching interval


[36/50] [abbrv] bahir git commit: [SPARK-13583][CORE][STREAMING] Remove unused imports and add checkstyle rule

Posted by lr...@apache.org.
[SPARK-13583][CORE][STREAMING] Remove unused imports and add checkstyle rule

## What changes were proposed in this pull request?

After SPARK-6990, `dev/lint-java` keeps Java code healthy and helps PR review by saving much time.
This issue aims remove unused imports from Java/Scala code and add `UnusedImports` checkstyle rule to help developers.

## How was this patch tested?
```
./dev/lint-java
./build/sbt compile
```

Author: Dongjoon Hyun <do...@apache.org>

Closes #11438 from dongjoon-hyun/SPARK-13583.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/1d9e8910
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/1d9e8910
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/1d9e8910

Branch: refs/heads/master
Commit: 1d9e89108b86e96429a7f1d835a823a74d299830
Parents: a784af6
Author: Dongjoon Hyun <do...@apache.org>
Authored: Thu Mar 3 10:12:32 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Thu Mar 3 10:12:32 2016 +0000

----------------------------------------------------------------------
 .../apache/spark/examples/streaming/akka/ActorWordCount.scala    | 1 -
 .../scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala  | 2 +-
 .../spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala    | 1 -
 .../spark/examples/streaming/twitter/TwitterPopularTags.scala    | 1 -
 .../scala/org/apache/spark/streaming/twitter/TwitterUtils.scala  | 4 ++--
 .../apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala | 3 +--
 6 files changed, 4 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 9f7c7d5..2770b8a 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -19,7 +19,6 @@
 package org.apache.spark.examples.streaming
 
 import scala.collection.mutable.LinkedHashSet
-import scala.reflect.ClassTag
 import scala.util.Random
 
 import akka.actor._

http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index a6a9249..fdcd18c 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -23,7 +23,7 @@ import scala.language.postfixOps
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.Eventually
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Milliseconds, StreamingContext}
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala
index 825c671..5af82e1 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdCMS.scala
@@ -22,7 +22,6 @@ import com.twitter.algebird._
 import com.twitter.algebird.CMSHasherImplicits._
 
 import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.twitter._

http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
index 49cee1b..c386e39 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
@@ -19,7 +19,6 @@
 package org.apache.spark.examples.streaming
 
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.SparkContext._
 import org.apache.spark.streaming.twitter._
 import org.apache.spark.SparkConf
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
index 3e843e9..9cb0106 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
@@ -22,8 +22,8 @@ import twitter4j.auth.Authorization
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.{JavaDStream, JavaReceiverInputDStream, JavaStreamingContext}
-import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
 object TwitterUtils {
   /**

http://git-wip-us.apache.org/repos/asf/bahir/blob/1d9e8910/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
index f612e50..99b5617 100644
--- a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
+++ b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
@@ -25,9 +25,8 @@ import akka.actor.actorRef2Scala
 import akka.util.ByteString
 import akka.zeromq._
 import akka.zeromq.Subscribe
-import com.typesafe.config.ConfigFactory
 
-import org.apache.spark.{SparkConf, TaskContext}
+import org.apache.spark.SparkConf
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.zeromq._
 


[45/50] [abbrv] bahir git commit: Update README.md

Posted by lr...@apache.org.
Update README.md

Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/73a768a8
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/73a768a8
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/73a768a8

Branch: refs/heads/master
Commit: 73a768a837935423ab710ba7d8eef505982d14c5
Parents: e1dfd0e
Author: Christian Kadner <ck...@users.noreply.github.com>
Authored: Mon Jun 6 22:36:46 2016 -0700
Committer: Christian Kadner <ck...@users.noreply.github.com>
Committed: Mon Jun 6 22:36:46 2016 -0700

----------------------------------------------------------------------
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/73a768a8/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index a809a78..000b3fe 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Bahir Repository with single "src" folder
+# Bahir Repository
 
 Initial Bahir repository containing the source for the Spark streaming connectors for akka, mqtt, twitter, zeromq 
 extracted from [Apache Spark revision 8301fad](https://github.com/apache/spark/tree/8301fadd8d269da11e72870b7a889596e3337839)


[13/50] [abbrv] bahir git commit: [SPARK-11713] [PYSPARK] [STREAMING] Initial RDD updateStateByKey for PySpark

Posted by lr...@apache.org.
[SPARK-11713] [PYSPARK] [STREAMING] Initial RDD updateStateByKey for PySpark

Adding ability to define an initial state RDD for use with updateStateByKey PySpark.  Added unit test and changed stateful_network_wordcount example to use initial RDD.

Author: Bryan Cutler <bj...@us.ibm.com>

Closes #10082 from BryanCutler/initial-rdd-updateStateByKey-SPARK-11713.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e30f0c2d
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e30f0c2d
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e30f0c2d

Branch: refs/heads/master
Commit: e30f0c2d3893814ef43a6d81a0e39c1910ce1f2b
Parents: be8b358
Author: Bryan Cutler <bj...@us.ibm.com>
Authored: Thu Dec 10 14:21:15 2015 -0800
Committer: Davies Liu <da...@gmail.com>
Committed: Thu Dec 10 14:21:15 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e30f0c2d/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index acec850..f61137c 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -568,7 +568,7 @@ class DStream(object):
                                                              self._ssc._jduration(slideDuration))
         return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
 
-    def updateStateByKey(self, updateFunc, numPartitions=None):
+    def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
         """
         Return a new "state" DStream where the state for each key is updated by applying
         the given function on the previous state of the key and the new values of the key.
@@ -579,6 +579,9 @@ class DStream(object):
         if numPartitions is None:
             numPartitions = self._sc.defaultParallelism
 
+        if initialRDD and not isinstance(initialRDD, RDD):
+            initialRDD = self._sc.parallelize(initialRDD)
+
         def reduceFunc(t, a, b):
             if a is None:
                 g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
@@ -590,7 +593,13 @@ class DStream(object):
 
         jreduceFunc = TransformFunction(self._sc, reduceFunc,
                                         self._sc.serializer, self._jrdd_deserializer)
-        dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)
+        if initialRDD:
+            initialRDD = initialRDD._reserialize(self._jrdd_deserializer)
+            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc,
+                                                       initialRDD._jrdd)
+        else:
+            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)
+
         return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
 
 


[50/50] [abbrv] bahir git commit: [BAHIR-2] Initial maven build for Bahir spark extensions

Posted by lr...@apache.org.
[BAHIR-2] Initial maven build for Bahir spark extensions


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/d3254248
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/d3254248
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/d3254248

Branch: refs/heads/master
Commit: d32542483d08d74cfc898454ca3b4f68e3d155bc
Parents: ab21f44
Author: Luciano Resende <lr...@apache.org>
Authored: Tue Jun 7 23:35:17 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 8 00:14:24 2016 -0700

----------------------------------------------------------------------
 dev/checkstyle-suppressions.xml                 |   45 +
 dev/checkstyle.xml                              |  172 ++
 pom.xml                                         | 2047 ++++++++++++++++++
 scalastyle-config.xml                           |  321 +++
 streaming-akka/pom.xml                          |   16 +-
 .../spark/streaming/akka/ActorReceiver.scala    |    3 +-
 streaming-mqtt/pom.xml                          |   16 +-
 .../spark/streaming/mqtt/MQTTTestUtils.scala    |    3 +-
 streaming-twitter/pom.xml                       |   16 +-
 .../streaming/twitter/TwitterInputDStream.scala |    2 +-
 .../streaming/twitter/TwitterStreamSuite.scala  |    3 +-
 streaming-zeromq/pom.xml                        |   18 +-
 .../spark/streaming/zeromq/ZeroMQReceiver.scala |    2 +-
 13 files changed, 2626 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/dev/checkstyle-suppressions.xml
----------------------------------------------------------------------
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
new file mode 100644
index 0000000..bfc2e73
--- /dev/null
+++ b/dev/checkstyle-suppressions.xml
@@ -0,0 +1,45 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE suppressions PUBLIC
+"-//Puppy Crawl//DTD Suppressions 1.1//EN"
+"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
+
+<!--
+
+    This file contains suppression rules for Checkstyle checks.
+    Ideally only files that cannot be modified (e.g. third-party code)
+    should be added here. All other violations should be fixed.
+
+-->
+
+<suppressions>
+    <suppress checks=".*"
+              files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
+    <suppress checks=".*"
+              files="sql/core/src/main/java/org/apache/spark/sql/api.java/*"/>
+    <suppress checks="LineLength"
+              files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/spark/examples/JavaLogQuery.java"/>
+    <suppress checks="LineLength"
+              files="src/main/java/org/apache/hive/service/*"/>
+    <suppress checks="MethodName"
+              files="src/main/java/org/apache/hive/service/auth/PasswdAuthenticationProvider.java"/>
+    <suppress checks="NoFinalizer"
+              files="src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java"/>
+</suppressions>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/dev/checkstyle.xml
----------------------------------------------------------------------
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
new file mode 100644
index 0000000..3de6aa9
--- /dev/null
+++ b/dev/checkstyle.xml
@@ -0,0 +1,172 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!DOCTYPE module PUBLIC
+          "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+          "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+
+<!--
+
+    Checkstyle configuration based on the Google coding conventions from:
+
+    -  Google Java Style
+       https://google-styleguide.googlecode.com/svn-history/r130/trunk/javaguide.html
+
+    with Spark-specific changes from:
+
+    https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide
+
+    Checkstyle is very configurable. Be sure to read the documentation at
+    http://checkstyle.sf.net (or in your downloaded distribution).
+
+    Most Checks are configurable, be sure to consult the documentation.
+
+    To completely disable a check, just comment it out or delete it from the file.
+
+    Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+
+ -->
+
+<module name = "Checker">
+    <property name="charset" value="UTF-8"/>
+
+    <property name="severity" value="error"/>
+
+    <property name="fileExtensions" value="java, properties, xml"/>
+
+    <module name="SuppressionFilter">
+      <property name="file" value="dev/checkstyle-suppressions.xml"/>
+    </module>
+
+    <!-- Checks for whitespace                               -->
+    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+    <module name="FileTabCharacter">
+        <property name="eachLine" value="true"/>
+    </module>
+
+    <module name="RegexpSingleline">
+        <!-- \s matches whitespace character, $ matches end of line. -->
+        <property name="format" value="\s+$"/>
+        <property name="message" value="No trailing whitespace allowed."/>
+    </module>
+
+    <module name="NewlineAtEndOfFile"/>
+
+    <module name="TreeWalker">
+        <module name="OuterTypeFilename"/>
+        <module name="IllegalTokenText">
+            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+            <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+            <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
+        </module>
+        <module name="AvoidEscapedUnicodeCharacters">
+            <property name="allowEscapesForControlCharacters" value="true"/>
+            <property name="allowByTailComment" value="true"/>
+            <property name="allowNonPrintableEscapes" value="true"/>
+        </module>
+        <module name="LineLength">
+            <property name="max" value="100"/>
+            <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+        </module>
+        <module name="NoLineWrap"/>
+        <module name="EmptyBlock">
+            <property name="option" value="TEXT"/>
+            <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+        </module>
+        <module name="NeedBraces">
+            <property name="allowSingleLineStatement" value="true"/>
+        </module>
+        <module name="OneStatementPerLine"/>
+        <module name="ArrayTypeStyle"/>
+        <module name="FallThrough"/>
+        <module name="UpperEll"/>
+        <module name="ModifierOrder"/>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="DOT"/>
+            <property name="option" value="nl"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="tokens" value="COMMA"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="PackageName">
+            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+            <message key="name.invalidPattern"
+             value="Package name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="ClassTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*$)"/>
+            <message key="name.invalidPattern"
+             value="Class type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="MethodTypeParameterName">
+            <property name="format" value="([A-Z][a-zA-Z0-9]*)"/>
+            <message key="name.invalidPattern"
+             value="Method type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="NoFinalizer"/>
+        <module name="GenericWhitespace">
+            <message key="ws.followed"
+             value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+             <message key="ws.preceded"
+             value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+             <message key="ws.illegalFollow"
+             value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+             <message key="ws.notPreceded"
+             value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+        </module>
+        <!-- TODO: 11/09/15 disabled - indentation is currently inconsistent -->
+        <!--
+        <module name="Indentation">
+            <property name="basicOffset" value="4"/>
+            <property name="braceAdjustment" value="0"/>
+            <property name="caseIndent" value="4"/>
+            <property name="throwsIndent" value="4"/>
+            <property name="lineWrappingIndentation" value="4"/>
+            <property name="arrayInitIndent" value="4"/>
+        </module>
+        -->
+        <!-- TODO: 11/09/15 disabled - order is currently wrong in many places -->
+        <!--
+        <module name="ImportOrder">
+            <property name="separated" value="true"/>
+            <property name="ordered" value="true"/>
+            <property name="groups" value="/^javax?\./,scala,*,org.apache.spark"/>
+        </module>
+        -->
+        <module name="MethodParamPad"/>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+        </module>
+        <module name="AnnotationLocation">
+            <property name="tokens" value="VARIABLE_DEF"/>
+            <property name="allowSamelineMultipleAnnotations" value="true"/>
+        </module>
+        <module name="MethodName">
+            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+            <message key="name.invalidPattern"
+             value="Method name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="EmptyCatchBlock">
+            <property name="exceptionVariableName" value="expected"/>
+        </module>
+        <module name="CommentsIndentation"/>
+        <module name="UnusedImports"/>
+        <module name="RedundantImport"/>
+        <module name="RedundantModifier"/>
+    </module>
+</module>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..7196f96
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,2047 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>18</version>
+  </parent>
+  <groupId>org.apache.bahir</groupId>
+  <artifactId>bahir-parent_2.11</artifactId>
+  <version>2.0.0-SNAPSHOT</version>
+  <packaging>pom</packaging>
+  <name>Apache Bahir - Parent POM</name>
+  <url>http://bahir.apache.org/</url>
+  <licenses>
+    <license>
+      <name>Apache 2.0 License</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+  <scm>
+    <connection>scm:git:git@github.com:apache/bahir.git</connection>
+    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
+    <url>scm:git:git@github.com:apache/bahir.git</url>
+    <tag>HEAD</tag>
+  </scm>
+  <issueManagement>
+    <system>JIRA</system>
+    <url>https://issues.apache.org/jira/browse/BAHIR</url>
+  </issueManagement>
+
+  <prerequisites>
+    <maven>${maven.version}</maven>
+  </prerequisites>
+
+  <mailingLists>
+    <mailingList>
+      <name>Dev Mailing List</name>
+      <post>dev@bahir.apache.org</post>
+      <subscribe>dev-subscribe@bahir.apache.org</subscribe>
+      <unsubscribe>dev-unsubscribe@bahir.apache.org</unsubscribe>
+    </mailingList>
+
+    <mailingList>
+      <name>User Mailing List</name>
+      <post>user@bahir.apache.org</post>
+      <subscribe>user-subscribe@bahir.apache.org</subscribe>
+      <unsubscribe>user-unsubscribe@bahir.apache.org</unsubscribe>
+    </mailingList>
+
+    <mailingList>
+      <name>Commits Mailing List</name>
+      <post>commits@bahir.apache.org</post>
+      <subscribe>commits-subscribe@bahir.apache.org</subscribe>
+      <unsubscribe>commits-unsubscribe@bahir.apache.org</unsubscribe>
+    </mailingList>
+  </mailingLists>
+
+  <modules>
+    <module>streaming-akka</module>
+    <module>streaming-mqtt</module>
+    <module>streaming-twitter</module>
+    <module>streaming-zeromq</module>
+  </modules>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+    <akka.group>com.typesafe.akka</akka.group>
+    <akka.version>2.3.11</akka.version>
+    <java.version>1.7</java.version>
+    <maven.version>3.3.9</maven.version>
+    <spark.version>2.0.0-SNAPSHOT</spark.version>
+    <slf4j.version>1.7.16</slf4j.version>
+    <log4j.version>1.2.17</log4j.version>
+
+
+    <!-- Streaming Akka connector -->
+    <akka.group>com.typesafe.akka</akka.group>
+    <akka.version>2.3.11</akka.version>
+    <protobuf.version>2.5.0</protobuf.version>
+
+    <hadoop.version>2.2.0</hadoop.version>
+    <yarn.version>${hadoop.version}</yarn.version>
+    <zookeeper.version>3.4.5</zookeeper.version>
+    <curator.version>2.4.0</curator.version>
+    <chill.version>0.7.4</chill.version>
+    <ivy.version>2.4.0</ivy.version>
+    <codahale.metrics.version>3.1.2</codahale.metrics.version>
+    <avro.version>1.7.7</avro.version>
+    <jets3t.version>0.7.1</jets3t.version>
+    <!--  org.apache.httpcomponents/httpclient-->
+    <commons.httpclient.version>4.5.2</commons.httpclient.version>
+    <commons.httpcore.version>4.4.4</commons.httpcore.version>
+    <!--  commons-httpclient/commons-httpclient-->
+    <httpclient.classic.version>3.1</httpclient.classic.version>
+    <commons.math3.version>3.4.1</commons.math3.version>
+    <!-- managed up from 3.2.1 for SPARK-11652 -->
+    <commons.collections.version>3.2.2</commons.collections.version>
+    <scala.version>2.11.8</scala.version>
+    <scala.binary.version>2.11</scala.binary.version>
+    <jline.version>${scala.version}</jline.version>
+    <jline.groupid>org.scala-lang</jline.groupid>
+    <fasterxml.jackson.version>2.5.3</fasterxml.jackson.version>
+    <snappy.version>1.1.2.4</snappy.version>
+    <netlib.java.version>1.1.2</netlib.java.version>
+    <commons-codec.version>1.10</commons-codec.version>
+    <commons-io.version>2.4</commons-io.version>
+    <!-- org.apache.commons/commons-lang/-->
+    <commons-lang2.version>2.6</commons-lang2.version>
+    <!-- org.apache.commons/commons-lang3/-->
+    <commons-lang3.version>3.3.2</commons-lang3.version>
+    <jersey.version>2.22.2</jersey.version>
+    <joda.version>2.9.3</joda.version>
+    <jodd.version>3.5.2</jodd.version>
+    <jsr305.version>1.3.9</jsr305.version>
+    <libthrift.version>0.9.2</libthrift.version>
+    <antlr4.version>4.5.2-1</antlr4.version>
+    <jpam.version>1.1</jpam.version>
+    <selenium.version>2.52.0</selenium.version>
+
+    <test.java.home>${java.home}</test.java.home>
+    <test.exclude.tags></test.exclude.tags>
+
+    <!-- Package to use when relocating shaded classes. -->
+    <spark.shade.packageName>org.spark_project</spark.shade.packageName>
+
+    <!-- Modules that copy jars to the build directory should do so under this location. -->
+    <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
+
+    <!-- Allow modules to enable / disable certain build plugins easily. -->
+    <build.testJarPhase>prepare-package</build.testJarPhase>
+    <build.copyDependenciesPhase>none</build.copyDependenciesPhase>
+
+    <!--
+      Dependency scopes that can be overridden by enabling certain profiles. These profiles are
+      declared in the projects that build assemblies.
+
+      For other projects the scope should remain as "compile", otherwise they are not available
+      during compilation if the dependency is transivite (e.g. "graphx/" depending on "core/" and
+      needing Hadoop classes in the classpath to compile).
+    -->
+    <flume.deps.scope>compile</flume.deps.scope>
+    <hadoop.deps.scope>compile</hadoop.deps.scope>
+    <hive.deps.scope>compile</hive.deps.scope>
+    <parquet.deps.scope>compile</parquet.deps.scope>
+    <parquet.test.deps.scope>test</parquet.test.deps.scope>
+
+    <!--
+      Overridable test home. So that you can call individual pom files directly without
+      things breaking.
+    -->
+    <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+
+    <PermGen>64m</PermGen>
+    <MaxPermGen>512m</MaxPermGen>
+    <CodeCacheSize>512m</CodeCacheSize>
+  </properties>
+
+  <repositories>
+    <repository>
+      <id>central</id>
+      <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
+      <name>Maven Repository</name>
+      <url>https://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <id>central</id>
+      <url>https://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </pluginRepository>
+  </pluginRepositories>
+  
+  <dependencies>
+    <!--
+      This is a dummy dependency that is used to trigger the maven-shade plugin so that Spark's
+      published POMs are flattened and do not contain variables. Without this dependency, some
+      subprojects' published POMs would contain variables like ${scala.binary.version} that will
+      be substituted according to the default properties instead of the ones determined by the
+      profiles that were active during publishing, causing the Scala 2.10 build's POMs to have 2.11
+      dependencies due to the incorrect substitutions. By ensuring that maven-shade runs for all
+      subprojects, we eliminate this problem because the substitutions are baked into the final POM.
+
+      For more details, see SPARK-3812 and MNG-2971.
+    -->
+    <dependency>
+      <groupId>org.spark-project.spark</groupId>
+      <artifactId>unused</artifactId>
+      <version>1.0.0</version>
+    </dependency>
+    <!--
+         This is needed by the scalatest plugin, and so is declared here to be available in
+         all child modules, just as scalatest is run in all children
+    -->
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
+        <version>${spark.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill_${scala.binary.version}</artifactId>
+        <version>${chill.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill-java</artifactId>
+        <version>${chill.version}</version>
+      </dependency>
+      <!-- This artifact is a shaded version of ASM 5.0.4. The POM that was used to produce this
+           is at https://github.com/apache/geronimo-xbean/tree/xbean-4.4/xbean-asm5-shaded
+           For context on why we shade ASM, see SPARK-782 and SPARK-6152. -->
+      <dependency>
+        <groupId>org.apache.xbean</groupId>
+        <artifactId>xbean-asm5-shaded</artifactId>
+        <version>4.4</version>
+      </dependency>
+
+      <!-- Shaded deps marked as provided. These are promoted to compile scope
+           in the modules where we want the shaded classes to appear in the
+           associated jar. -->
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-http</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-continuation</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-servlet</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-servlets</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-util</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-security</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-plus</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-server</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>14.0.1</version>
+        <scope>provided</scope>
+      </dependency>
+      <!-- End of shaded deps -->
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-lang3</artifactId>
+        <version>${commons-lang3.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-lang</groupId>
+        <artifactId>commons-lang</artifactId>
+        <version>${commons-lang2.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-io</groupId>
+        <artifactId>commons-io</artifactId>
+        <version>${commons-io.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-codec</groupId>
+        <artifactId>commons-codec</artifactId>
+        <version>${commons-codec.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-math3</artifactId>
+        <version>${commons.math3.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-collections</groupId>
+        <artifactId>commons-collections</artifactId>
+        <version>${commons.collections.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.ivy</groupId>
+        <artifactId>ivy</artifactId>
+        <version>${ivy.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.google.code.findbugs</groupId>
+        <artifactId>jsr305</artifactId>
+        <version>${jsr305.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-httpclient</groupId>
+        <artifactId>commons-httpclient</artifactId>
+        <version>${httpclient.classic.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpclient</artifactId>
+        <version>${commons.httpclient.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpmime</artifactId>
+        <version>${commons.httpclient.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpcore</artifactId>
+        <version>${commons.httpcore.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.seleniumhq.selenium</groupId>
+        <artifactId>selenium-java</artifactId>
+        <version>${selenium.version}</version>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.seleniumhq.selenium</groupId>
+        <artifactId>selenium-htmlunit-driver</artifactId>
+        <version>${selenium.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <!-- Added for selenium only, and should match its dependent version: -->
+      <dependency>
+        <groupId>xml-apis</groupId>
+        <artifactId>xml-apis</artifactId>
+        <version>1.4.01</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+        <version>${slf4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-log4j12</artifactId>
+        <version>${slf4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>jul-to-slf4j</artifactId>
+        <version>${slf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>jcl-over-slf4j</artifactId>
+        <version>${slf4j.version}</version>
+        <!-- <scope>runtime</scope> --> <!-- more correct, but scalac 2.10.3 doesn't like it -->
+      </dependency>
+      <dependency>
+        <groupId>log4j</groupId>
+        <artifactId>log4j</artifactId>
+        <version>${log4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.ning</groupId>
+        <artifactId>compress-lzf</artifactId>
+        <version>1.0.3</version>
+      </dependency>
+      <dependency>
+        <groupId>org.xerial.snappy</groupId>
+        <artifactId>snappy-java</artifactId>
+        <version>${snappy.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>net.jpountz.lz4</groupId>
+        <artifactId>lz4</artifactId>
+        <version>1.3.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.clearspring.analytics</groupId>
+        <artifactId>stream</artifactId>
+        <version>2.7.0</version>
+        <exclusions>
+          <!-- Only HyperLogLogPlus is used, which doesn't depend on fastutil -->
+          <exclusion>
+            <groupId>it.unimi.dsi</groupId>
+            <artifactId>fastutil</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <!-- In theory we need not directly depend on protobuf since Spark does not directly
+           use it. However, when building with Hadoop/YARN 2.2 Maven doesn't correctly bump
+           the protobuf version up from the one Mesos gives. For now we include this variable
+           to explicitly bump the version when building with YARN. It would be nice to figure
+           out why Maven can't resolve this correctly (like SBT does). -->
+      <dependency>
+        <groupId>com.google.protobuf</groupId>
+        <artifactId>protobuf-java</artifactId>
+        <version>${protobuf.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-actor_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-remote_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-slf4j_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-testkit_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-actor_${scala.binary.version}</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.roaringbitmap</groupId>
+        <artifactId>RoaringBitmap</artifactId>
+        <version>0.5.11</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-net</groupId>
+        <artifactId>commons-net</artifactId>
+        <version>2.2</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-all</artifactId>
+        <version>4.0.29.Final</version>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty</artifactId>
+        <version>3.8.0.Final</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.derby</groupId>
+        <artifactId>derby</artifactId>
+        <version>${derby.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-core</artifactId>
+        <version>${codahale.metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-jvm</artifactId>
+        <version>${codahale.metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-json</artifactId>
+        <version>${codahale.metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-ganglia</artifactId>
+        <version>${codahale.metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-graphite</artifactId>
+        <version>${codahale.metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-databind</artifactId>
+        <version>${fasterxml.jackson.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-annotations</artifactId>
+        <version>${fasterxml.jackson.version}</version>
+      </dependency>
+      <!-- Guava is excluded because of SPARK-6149.  The Guava version referenced in this module is
+           15.0, which causes runtime incompatibility issues. -->
+      <dependency>
+        <groupId>com.fasterxml.jackson.module</groupId>
+        <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
+        <version>${fasterxml.jackson.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.module</groupId>
+        <artifactId>jackson-module-jaxb-annotations</artifactId>
+        <version>${fasterxml.jackson.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey.core</groupId>
+        <artifactId>jersey-server</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey.core</groupId>
+        <artifactId>jersey-common</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey.core</groupId>
+        <artifactId>jersey-client</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey.containers</groupId>
+        <artifactId>jersey-container-servlet</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey.containers</groupId>
+        <artifactId>jersey-container-servlet-core</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.glassfish.jersey</groupId>
+        <artifactId>jersey-client</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>javax.ws.rs</groupId>
+        <artifactId>javax.ws.rs-api</artifactId>
+        <version>2.0.1</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scalanlp</groupId>
+        <artifactId>breeze_${scala.binary.version}</artifactId>
+        <version>0.11.2</version>
+        <exclusions>
+          <!-- This is included as a compile-scoped dependency by jtransforms, which is
+               a dependency of breeze. -->
+          <exclusion>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.json4s</groupId>
+        <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
+        <version>3.2.11</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-compiler</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-reflect</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-library</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-actors</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scalap</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest_${scala.binary.version}</artifactId>
+        <version>2.2.6</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.mockito</groupId>
+        <artifactId>mockito-core</artifactId>
+        <version>1.10.19</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.scalacheck</groupId>
+        <artifactId>scalacheck_${scala.binary.version}</artifactId>
+        <version>1.12.5</version> <!-- 1.13.0 appears incompatible with scalatest 2.2.6 -->
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.12</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.hamcrest</groupId>
+        <artifactId>hamcrest-core</artifactId>
+        <version>1.3</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.hamcrest</groupId>
+        <artifactId>hamcrest-library</artifactId>
+        <version>1.3</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.novocode</groupId>
+        <artifactId>junit-interface</artifactId>
+        <version>0.11</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.spotify</groupId>
+        <artifactId>docker-client</artifactId>
+        <classifier>shaded</classifier>
+        <version>3.6.6</version>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <artifactId>guava</artifactId>
+            <groupId>com.google.guava</groupId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>mysql</groupId>
+        <artifactId>mysql-connector-java</artifactId>
+        <version>5.1.38</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.postgresql</groupId>
+        <artifactId>postgresql</artifactId>
+        <version>9.4.1207.jre7</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-recipes</artifactId>
+        <version>${curator.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>jline</groupId>
+            <artifactId>jline</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-client</artifactId>
+        <version>${curator.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-framework</artifactId>
+        <version>${curator.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-test</artifactId>
+        <version>${curator.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.codehaus.jackson</groupId>
+            <artifactId>jackson-mapper-asl</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api-2.5</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro</artifactId>
+        <version>${avro.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-ipc</artifactId>
+        <version>${avro.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty-util</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.velocity</groupId>
+            <artifactId>velocity</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <!-- avro-mapred for some reason depends on avro-ipc's test jar, so undo that. -->
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-ipc</artifactId>
+        <classifier>tests</classifier>
+        <version>${avro.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-mapred</artifactId>
+        <version>${avro.version}</version>
+        <classifier>${avro.mapred.classifier}</classifier>
+        <scope>${hive.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty-util</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.velocity</groupId>
+            <artifactId>velocity</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <!-- See SPARK-1556 for info on this dependency: -->
+      <dependency>
+        <groupId>net.java.dev.jets3t</groupId>
+        <artifactId>jets3t</artifactId>
+        <version>${jets3t.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-api</artifactId>
+        <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-common</artifactId>
+        <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-tests</artifactId>
+        <version>${yarn.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-web-proxy</artifactId>
+        <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-client</artifactId>
+        <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey.contribs</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.zookeeper</groupId>
+        <artifactId>zookeeper</artifactId>
+        <version>${zookeeper.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>net.sf.jpam</groupId>
+        <artifactId>jpam</artifactId>
+        <scope>${hive.deps.scope}</scope>
+        <version>${jpam.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-column</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-hadoop</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-avro</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.test.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-hadoop-bundle</artifactId>
+        <version>${hive.parquet.version}</version>
+        <scope>compile</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.flume</groupId>
+        <artifactId>flume-ng-core</artifactId>
+        <version>${flume.version}</version>
+        <scope>${flume.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.flume</groupId>
+            <artifactId>flume-ng-auth</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.flume</groupId>
+        <artifactId>flume-ng-sdk</artifactId>
+        <version>${flume.version}</version>
+        <scope>${flume.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.calcite</groupId>
+        <artifactId>calcite-core</artifactId>
+        <version>${calcite.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-annotations</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.codehaus.janino</groupId>
+            <artifactId>janino</artifactId>
+          </exclusion>
+          <!-- hsqldb interferes with the use of derby as the default db
+            in hive's use of datanucleus.
+          -->
+          <exclusion>
+            <groupId>org.hsqldb</groupId>
+            <artifactId>hsqldb</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.pentaho</groupId>
+            <artifactId>pentaho-aggdesigner-algorithm</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>joda-time</groupId>
+        <artifactId>joda-time</artifactId>
+        <version>${joda.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.jodd</groupId>
+        <artifactId>jodd-core</artifactId>
+        <version>${jodd.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.thrift</groupId>
+        <artifactId>libthrift</artifactId>
+        <version>${libthrift.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.thrift</groupId>
+        <artifactId>libfb303</artifactId>
+        <version>${libthrift.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.antlr</groupId>
+        <artifactId>antlr4-runtime</artifactId>
+        <version>${antlr4.version}</version>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-enforcer-plugin</artifactId>
+          <version>1.4.1</version>
+          <executions>
+            <execution>
+              <id>enforce-versions</id>
+              <goals>
+                <goal>enforce</goal>
+              </goals>
+              <configuration>
+                <rules>
+                  <requireMavenVersion>
+                    <version>${maven.version}</version>
+                  </requireMavenVersion>
+                  <requireJavaVersion>
+                    <version>${java.version}</version>
+                  </requireJavaVersion>
+                  <bannedDependencies>
+                    <excludes>
+                      <!--
+                        Akka depends on io.netty:netty, which puts classes under the org.jboss.netty
+                        package. This conflicts with the classes in org.jboss.netty:netty
+                        artifact, so we have to ban that artifact here. In Netty 4.x, the classes
+                        are under the io.netty package, so it's fine for us to depend on both
+                        io.netty:netty and io.netty:netty-all.
+                      -->
+                      <exclude>org.jboss.netty</exclude>
+                      <exclude>org.codehaus.groovy</exclude>
+                    </excludes>
+                    <searchTransitive>true</searchTransitive>
+                  </bannedDependencies>
+                </rules>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>build-helper-maven-plugin</artifactId>
+          <version>1.10</version>
+        </plugin>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <version>3.2.2</version>
+          <executions>
+            <execution>
+              <id>eclipse-add-source</id>
+              <goals>
+                <goal>add-source</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>scala-compile-first</id>
+              <phase>process-resources</phase>
+              <goals>
+                <goal>compile</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>scala-test-compile-first</id>
+              <phase>process-test-resources</phase>
+              <goals>
+                <goal>testCompile</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>attach-scaladocs</id>
+              <phase>verify</phase>
+              <goals>
+                <goal>doc-jar</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <scalaVersion>${scala.version}</scalaVersion>
+            <recompileMode>incremental</recompileMode>
+            <useZincServer>true</useZincServer>
+            <args>
+              <arg>-unchecked</arg>
+              <arg>-deprecation</arg>
+              <arg>-feature</arg>
+            </args>
+            <jvmArgs>
+              <jvmArg>-Xms1024m</jvmArg>
+              <jvmArg>-Xmx1024m</jvmArg>
+              <jvmArg>-XX:PermSize=${PermGen}</jvmArg>
+              <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
+              <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
+            </jvmArgs>
+            <javacArgs>
+              <javacArg>-source</javacArg>
+              <javacArg>${java.version}</javacArg>
+              <javacArg>-target</javacArg>
+              <javacArg>${java.version}</javacArg>
+              <javacArg>-Xlint:all,-serial,-path</javacArg>
+            </javacArgs>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>3.5.1</version>
+          <configuration>
+            <source>${java.version}</source>
+            <target>${java.version}</target>
+            <encoding>UTF-8</encoding>
+            <maxmem>1024m</maxmem>
+            <fork>true</fork>
+            <compilerArgs>
+              <arg>-Xlint:all,-serial,-path</arg>
+            </compilerArgs>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.antlr</groupId>
+          <artifactId>antlr3-maven-plugin</artifactId>
+          <version>3.5.2</version>
+        </plugin>
+        <plugin>
+          <groupId>org.antlr</groupId>
+          <artifactId>antlr4-maven-plugin</artifactId>
+          <version>${antlr4.version}</version>
+        </plugin>
+        <!-- Surefire runs all Java tests -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>2.19.1</version>
+          <!-- Note config is repeated in scalatest config -->
+          <configuration>
+            <includes>
+              <include>**/Test*.java</include>
+              <include>**/*Test.java</include>
+              <include>**/*TestCase.java</include>
+              <include>**/*Suite.java</include>
+            </includes>
+            <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+            <argLine>-Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <environmentVariables>
+              <!--
+                Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+                launched by the tests have access to the correct test-time classpath.
+              -->
+              <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
+              <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+              <SPARK_TESTING>1</SPARK_TESTING>
+              <JAVA_HOME>${test.java.home}</JAVA_HOME>
+            </environmentVariables>
+            <systemProperties>
+              <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+              <derby.system.durability>test</derby.system.durability>
+              <java.awt.headless>true</java.awt.headless>
+              <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+              <spark.test.home>${spark.test.home}</spark.test.home>
+              <spark.testing>1</spark.testing>
+              <spark.master.rest.enabled>false</spark.master.rest.enabled>
+              <spark.ui.enabled>false</spark.ui.enabled>
+              <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+              <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
+              <!-- Needed by sql/hive tests. -->
+              <test.src.tables>src</test.src.tables>
+            </systemProperties>
+            <failIfNoTests>false</failIfNoTests>
+            <excludedGroups>${test.exclude.tags}</excludedGroups>
+          </configuration>
+          <executions>
+            <execution>
+              <id>test</id>
+              <goals>
+                <goal>test</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <!-- Scalatest runs all Scala tests -->
+        <plugin>
+          <groupId>org.scalatest</groupId>
+          <artifactId>scalatest-maven-plugin</artifactId>
+          <version>1.0</version>
+          <!-- Note config is repeated in surefire config -->
+          <configuration>
+            <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+            <junitxml>.</junitxml>
+            <filereports>SparkTestSuite.txt</filereports>
+            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <stderr/>
+            <environmentVariables>
+              <!--
+                Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+                launched by the tests have access to the correct test-time classpath.
+              -->
+              <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
+              <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+              <SPARK_TESTING>1</SPARK_TESTING>
+              <JAVA_HOME>${test.java.home}</JAVA_HOME>
+            </environmentVariables>
+            <systemProperties>
+              <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+              <derby.system.durability>test</derby.system.durability>
+              <java.awt.headless>true</java.awt.headless>
+              <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
+              <spark.test.home>${spark.test.home}</spark.test.home>
+              <spark.testing>1</spark.testing>
+              <spark.ui.enabled>false</spark.ui.enabled>
+              <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+              <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
+              <!-- Needed by sql/hive tests. -->
+              <test.src.tables>__not_used__</test.src.tables>
+            </systemProperties>
+            <tagsToExclude>${test.exclude.tags}</tagsToExclude>
+          </configuration>
+          <executions>
+            <execution>
+              <id>test</id>
+              <goals>
+                <goal>test</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>2.6</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-antrun-plugin</artifactId>
+          <version>1.8</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-source-plugin</artifactId>
+          <version>2.4</version>
+          <configuration>
+            <attach>true</attach>
+          </configuration>
+          <executions>
+            <execution>
+              <id>create-source-jar</id>
+              <goals>
+                <goal>jar-no-fork</goal>
+                <goal>test-jar-no-fork</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-clean-plugin</artifactId>
+          <version>3.0.0</version>
+          <configuration>
+            <filesets>
+              <fileset>
+                <directory>work</directory>
+              </fileset>
+              <fileset>
+                <directory>checkpoint</directory>
+              </fileset>
+              <fileset>
+                <directory>lib_managed</directory>
+              </fileset>
+            </filesets>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-javadoc-plugin</artifactId>
+          <version>2.10.3</version>
+        </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>exec-maven-plugin</artifactId>
+          <version>1.4.0</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-assembly-plugin</artifactId>
+          <version>2.6</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-shade-plugin</artifactId>
+          <version>2.4.3</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-install-plugin</artifactId>
+          <version>2.5.2</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-deploy-plugin</artifactId>
+          <version>2.8.2</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <executions>
+            <execution>
+              <id>default-cli</id>
+              <goals>
+                 <goal>build-classpath</goal>
+              </goals>
+              <configuration>
+                <!-- This includes dependencies with 'runtime' and 'compile' scopes;
+                     see the docs for includeScope for more details -->
+                <includeScope>runtime</includeScope>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+        <!-- This plugin's configuration is used to store Eclipse m2e settings only. -->
+        <!-- It has no influence on the Maven build itself. -->
+        <plugin>
+          <groupId>org.eclipse.m2e</groupId>
+          <artifactId>lifecycle-mapping</artifactId>
+          <version>1.0.0</version>
+          <configuration>
+            <lifecycleMappingMetadata>
+              <pluginExecutions>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-dependency-plugin</artifactId>
+                    <versionRange>[2.8,)</versionRange>
+                    <goals>
+                      <goal>build-classpath</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore></ignore>
+                  </action>
+                </pluginExecution>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-jar-plugin</artifactId>
+                    <versionRange>[2.6,)</versionRange>
+                    <goals>
+                      <goal>test-jar</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore></ignore>
+                  </action>
+                </pluginExecution>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-antrun-plugin</artifactId>
+                    <versionRange>[1.8,)</versionRange>
+                    <goals>
+                      <goal>run</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore></ignore>
+                  </action>
+                </pluginExecution>
+              </pluginExecutions>
+            </lifecycleMappingMetadata>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+
+    <plugins>
+      <!-- This plugin dumps the test classpath into a file -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.10</version>
+        <executions>
+          <execution>
+            <id>generate-test-classpath</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>build-classpath</goal>
+            </goals>
+            <configuration>
+              <includeScope>test</includeScope>
+              <outputProperty>test_classpath</outputProperty>
+            </configuration>
+          </execution>
+          <execution>
+            <id>copy-module-dependencies</id>
+            <phase>${build.copyDependenciesPhase}</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <includeScope>runtime</includeScope>
+              <outputDirectory>${jars.target.dir}</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!--
+        The shade plug-in is used here to create effective pom's (see SPARK-3812), and also
+        remove references from the shaded libraries from artifacts published by Spark.
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <include>org.spark-project.spark:unused</include>
+              <include>org.eclipse.jetty:jetty-io</include>
+              <include>org.eclipse.jetty:jetty-http</include>
+              <include>org.eclipse.jetty:jetty-continuation</include>
+              <include>org.eclipse.jetty:jetty-servlet</include>
+              <include>org.eclipse.jetty:jetty-servlets</include>
+              <include>org.eclipse.jetty:jetty-plus</include>
+              <include>org.eclipse.jetty:jetty-security</include>
+              <include>org.eclipse.jetty:jetty-util</include>
+              <include>org.eclipse.jetty:jetty-server</include>
+              <include>com.google.guava:guava</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>org.eclipse.jetty</pattern>
+              <shadedPattern>${spark.shade.packageName}.jetty</shadedPattern>
+              <includes>
+                <include>org.eclipse.jetty.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
+            </relocation>
+          </relocations>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.scalastyle</groupId>
+        <artifactId>scalastyle-maven-plugin</artifactId>
+        <version>0.8.0</version>
+        <configuration>
+          <verbose>false</verbose>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+          <failOnWarning>false</failOnWarning>
+          <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
+          <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
+          <configLocation>scalastyle-config.xml</configLocation>
+          <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
+          <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+          <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.17</version>
+        <configuration>
+          <verbose>false</verbose>
+          <failOnViolation>false</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <failOnWarning>false</failOnWarning>
+          <sourceDirectories>${basedir}/src/main/java,${basedir}/src/main/scala</sourceDirectories>
+          <testSourceDirectory>${basedir}/src/test/java</testSourceDirectory>
+          <configLocation>dev/checkstyle.xml</configLocation>
+          <outputFile>${basedir}/target/checkstyle-output.xml</outputFile>
+          <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+          <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>create-tmp-dir</id>
+            <phase>generate-test-resources</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <target>
+                <mkdir dir="${project.build.directory}/tmp" />
+              </target>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Enable surefire and scalatest in all children, in one place: -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+      <!-- Build test-jar's for all projects, since some projects depend on tests from others -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>prepare-test-jar</id>
+            <phase>${build.testJarPhase}</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+            <configuration>
+              <excludes>
+                <exclude>log4j.properties</exclude>
+              </excludes>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+
+    <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
+    <profile>
+      <id>spark-ganglia-lgpl</id>
+      <modules>
+        <module>spark-ganglia-lgpl</module>
+      </modules>
+    </profile>
+
+    <profile>
+      <id>doclint-java8-disable</id>
+      <activation>
+        <jdk>[1.8,)</jdk>
+      </activation>
+
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <configuration>
+              <additionalparam>-Xdoclint:all -Xdoclint:-missing</additionalparam>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <!-- A series of build profiles where customizations for particular Hadoop releases can be made -->
+
+    <!-- Hadoop-a.b.c dependencies can be found at
+    http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
+    -->
+
+    <profile>
+      <id>hadoop-2.2</id>
+    <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
+    </profile>
+
+    <profile>
+      <id>hadoop-2.3</id>
+      <properties>
+        <hadoop.version>2.3.0</hadoop.version>
+        <jets3t.version>0.9.3</jets3t.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>hadoop-2.4</id>
+      <properties>
+        <hadoop.version>2.4.0</hadoop.version>
+        <jets3t.version>0.9.3</jets3t.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>hadoop-2.6</id>
+      <properties>
+        <hadoop.version>2.6.0</hadoop.version>
+        <jets3t.version>0.9.3</jets3t.version>
+        <zookeeper.version>3.4.6</zookeeper.version>
+        <curator.version>2.6.0</curator.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>hadoop-2.7</id>
+      <properties>
+        <hadoop.version>2.7.0</hadoop.version>
+        <jets3t.version>0.9.3</jets3t.version>
+        <zookeeper.version>3.4.6</zookeeper.version>
+        <curator.version>2.6.0</curator.version>
+      </properties>
+    </profile>
+
+
+    <profile>
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>scala-2.10</name></property>
+      </activation>
+      <properties>
+        <scala.version>2.10.6</scala.version>
+        <scala.binary.version>2.10</scala.binary.version>
+        <jline.version>${scala.version}</jline.version>
+        <jline.groupid>org.scala-lang</jline.groupid>
+      </properties>
+      <dependencyManagement>
+        <dependencies>
+          <dependency>
+            <groupId>${jline.groupid}</groupId>
+            <artifactId>jline</artifactId>
+            <version>${jline.version}</version>
+          </dependency>
+        </dependencies>
+      </dependencyManagement>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-enforcer-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>enforce-versions</id>
+                <goals>
+                  <goal>enforce</goal>
+                </goals>
+                <configuration>
+                  <rules>
+                    <bannedDependencies>
+                      <excludes combine.children="append">
+                        <exclude>*:*_2.11</exclude>
+                      </excludes>
+                    </bannedDependencies>
+                  </rules>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <profile>
+      <id>test-java-home</id>
+      <activation>
+        <property><name>env.JAVA_HOME</name></property>
+      </activation>
+      <properties>
+        <test.java.home>${env.JAVA_HOME}</test.java.home>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>scala-2.11</id>
+      <activation>
+        <property><name>!scala-2.10</name></property>
+      </activation>
+      <properties>
+        <scala.version>2.11.8</scala.version>
+        <scala.binary.version>2.11</scala.binary.version>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-enforcer-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>enforce-versions</id>
+                <goals>
+                  <goal>enforce</goal>
+                </goals>
+                <configuration>
+                  <rules>
+                    <bannedDependencies>
+                      <excludes combine.children="append">
+                        <exclude>*:*_2.10</exclude>
+                      </excludes>
+                    </bannedDependencies>
+                  </rules>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <!--
+      These empty profiles are available in some sub-modules. Declare them here so that
+      maven does not complain when they're provided on the command line for a sub-module
+      that does not have them.
+    -->
+    <profile>
+      <id>flume-provided</id>
+    </profile>
+    <profile>
+      <id>hadoop-provided</id>
+    </profile>
+    <profile>
+      <id>hive-provided</id>
+    </profile>
+    <profile>
+      <id>parquet-provided</id>
+    </profile>
+    <profile>
+      <id>sparkr</id>
+    </profile>
+  </profiles>
+</project>


[37/50] [abbrv] bahir git commit: [MINOR] Fix typos in comments and testcase name of code

Posted by lr...@apache.org.
[MINOR] Fix typos in comments and testcase name of code

## What changes were proposed in this pull request?

This PR fixes typos in comments and testcase name of code.

## How was this patch tested?

manual.

Author: Dongjoon Hyun <do...@apache.org>

Closes #11481 from dongjoon-hyun/minor_fix_typos_in_code.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/9bff9b31
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/9bff9b31
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/9bff9b31

Branch: refs/heads/master
Commit: 9bff9b31bbd70e36363c0e535292db300499947b
Parents: 1d9e891
Author: Dongjoon Hyun <do...@apache.org>
Authored: Thu Mar 3 22:42:12 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Thu Mar 3 22:42:12 2016 +0000

----------------------------------------------------------------------
 .../spark/examples/streaming/twitter/TwitterPopularTags.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/9bff9b31/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
index c386e39..5b69963 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterPopularTags.scala
@@ -44,7 +44,7 @@ object TwitterPopularTags {
     val filters = args.takeRight(args.length - 4)
 
     // Set the system properties so that Twitter4j library used by twitter stream
-    // can use them to generat OAuth credentials
+    // can use them to generate OAuth credentials
     System.setProperty("twitter4j.oauth.consumerKey", consumerKey)
     System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)
     System.setProperty("twitter4j.oauth.accessToken", accessToken)


[11/50] [abbrv] bahir git commit: [SPARK-11812][PYSPARK] invFunc=None works properly with python's reduceByKeyAndWindow

Posted by lr...@apache.org.
[SPARK-11812][PYSPARK] invFunc=None works properly with python's reduceByKeyAndWindow

invFunc is optional and can be None. Instead of invFunc (the parameter) invReduceFunc (a local function) was checked for trueness (that is, not None, in this context). A local function is never None,
thus the case of invFunc=None (a common one when inverse reduction is not defined) was treated incorrectly, resulting in loss of data.

In addition, the docstring used wrong parameter names, also fixed.

Author: David Tolpin <da...@gmail.com>

Closes #9775 from dtolpin/master.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/0845b564
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/0845b564
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/0845b564

Branch: refs/heads/master
Commit: 0845b56452984ac72e0d144f66ab1bfcbd4323b9
Parents: 502fd82
Author: David Tolpin <da...@gmail.com>
Authored: Thu Nov 19 13:57:23 2015 -0800
Committer: Tathagata Das <ta...@gmail.com>
Committed: Thu Nov 19 13:57:23 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/0845b564/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index 698336c..acec850 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -524,8 +524,8 @@ class DStream(object):
         `invFunc` can be None, then it will reduce all the RDDs in window, could be slower
         than having `invFunc`.
 
-        @param reduceFunc:     associative reduce function
-        @param invReduceFunc:  inverse function of `reduceFunc`
+        @param func:           associative reduce function
+        @param invFunc:        inverse function of `reduceFunc`
         @param windowDuration: width of the window; must be a multiple of this DStream's
                               batching interval
         @param slideDuration:  sliding interval of the window (i.e., the interval after which
@@ -556,7 +556,7 @@ class DStream(object):
                                     if kv[1] is not None else kv[0])
 
         jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer)
-        if invReduceFunc:
+        if invFunc:
             jinvReduceFunc = TransformFunction(self._sc, invReduceFunc, reduced._jrdd_deserializer)
         else:
             jinvReduceFunc = None


[41/50] [abbrv] bahir git commit: [SPARK-13807] De-duplicate `Python*Helper` instantiation code in PySpark streaming

Posted by lr...@apache.org.
[SPARK-13807] De-duplicate `Python*Helper` instantiation code in PySpark streaming

This patch de-duplicates code in PySpark streaming which loads the `Python*Helper` classes. I also changed a few `raise e` statements to simply `raise` in order to preserve the full exception stacktrace when re-throwing.

Here's a link to the whitespace-change-free diff: https://github.com/apache/spark/compare/master...JoshRosen:pyspark-reflection-deduplication?w=0

Author: Josh Rosen <jo...@databricks.com>

Closes #11641 from JoshRosen/pyspark-reflection-deduplication.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/c66dcf7e
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/c66dcf7e
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/c66dcf7e

Branch: refs/heads/master
Commit: c66dcf7e38420893fe9c1d9fb4ed66b8d6f589ac
Parents: 41ae1d2
Author: Josh Rosen <jo...@databricks.com>
Authored: Fri Mar 11 11:18:51 2016 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Fri Mar 11 11:18:51 2016 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/mqtt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/c66dcf7e/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index 3a515ea..388e952 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -48,7 +48,7 @@ class MQTTUtils(object):
         except Py4JJavaError as e:
             if 'ClassNotFoundException' in str(e.java_exception):
                 MQTTUtils._printErrorMsg(ssc.sparkContext)
-            raise e
+            raise
 
         return DStream(jstream, ssc, UTF8Deserializer())
 


[31/50] [abbrv] bahir git commit: [SPARK-13177][EXAMPLES] Update ActorWordCount example to not directly use low level linked list as it is deprecated.

Posted by lr...@apache.org.
[SPARK-13177][EXAMPLES] Update ActorWordCount example to not directly use low level linked list as it is deprecated.

Author: sachin aggarwal <di...@gmail.com>

Closes #11113 from agsachin/master.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/898980d0
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/898980d0
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/898980d0

Branch: refs/heads/master
Commit: 898980d0674877147598b0ce9b93b3e5386ffa2d
Parents: 42d6c06
Author: sachin aggarwal <di...@gmail.com>
Authored: Tue Feb 9 08:52:58 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Tue Feb 9 08:52:58 2016 +0000

----------------------------------------------------------------------
 .../spark/examples/streaming/akka/ActorWordCount.scala       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/898980d0/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 8e88987..9f7c7d5 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -18,7 +18,7 @@
 // scalastyle:off println
 package org.apache.spark.examples.streaming
 
-import scala.collection.mutable.LinkedList
+import scala.collection.mutable.LinkedHashSet
 import scala.reflect.ClassTag
 import scala.util.Random
 
@@ -39,7 +39,7 @@ case class UnsubscribeReceiver(receiverActor: ActorRef)
 class FeederActor extends Actor {
 
   val rand = new Random()
-  var receivers: LinkedList[ActorRef] = new LinkedList[ActorRef]()
+  val receivers = new LinkedHashSet[ActorRef]()
 
   val strings: Array[String] = Array("words ", "may ", "count ")
 
@@ -63,11 +63,11 @@ class FeederActor extends Actor {
   def receive: Receive = {
     case SubscribeReceiver(receiverActor: ActorRef) =>
       println("received subscribe from %s".format(receiverActor.toString))
-      receivers = LinkedList(receiverActor) ++ receivers
+      receivers += receiverActor
 
     case UnsubscribeReceiver(receiverActor: ActorRef) =>
       println("received unsubscribe from %s".format(receiverActor.toString))
-      receivers = receivers.dropWhile(x => x eq receiverActor)
+      receivers -= receiverActor
   }
 }
 


[16/50] [abbrv] bahir git commit: [SPARK-12091] [PYSPARK] Deprecate the JAVA-specific deserialized storage levels

Posted by lr...@apache.org.
[SPARK-12091] [PYSPARK] Deprecate the JAVA-specific deserialized storage levels

The current default storage level of Python persist API is MEMORY_ONLY_SER. This is different from the default level MEMORY_ONLY in the official document and RDD APIs.

davies Is this inconsistency intentional? Thanks!

Updates: Since the data is always serialized on the Python side, the storage levels of JAVA-specific deserialization are not removed, such as MEMORY_ONLY.

Updates: Based on the reviewers' feedback. In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/2/library/pickle.html) library, so it does not matter whether you choose a serialized level. The available storage levels in Python include `MEMORY_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK`, `MEMORY_AND_DISK_2`, `DISK_ONLY`, `DISK_ONLY_2` and `OFF_HEAP`.

Author: gatorsmile <ga...@gmail.com>

Closes #10092 from gatorsmile/persistStorageLevel.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/6b49590b
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/6b49590b
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/6b49590b

Branch: refs/heads/master
Commit: 6b49590b631651df6c6f9cb6cf44b206b3067411
Parents: c615575
Author: gatorsmile <ga...@gmail.com>
Authored: Fri Dec 18 20:06:05 2015 -0800
Committer: Davies Liu <da...@gmail.com>
Committed: Fri Dec 18 20:06:05 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 4 ++--
 streaming-mqtt/python/mqtt.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/6b49590b/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index b994a53..adc2651 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -208,10 +208,10 @@ class DStream(object):
     def cache(self):
         """
         Persist the RDDs of this DStream with the default storage level
-        (C{MEMORY_ONLY_SER}).
+        (C{MEMORY_ONLY}).
         """
         self.is_cached = True
-        self.persist(StorageLevel.MEMORY_ONLY_SER)
+        self.persist(StorageLevel.MEMORY_ONLY)
         return self
 
     def persist(self, storageLevel):

http://git-wip-us.apache.org/repos/asf/bahir/blob/6b49590b/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index 1ce4093..3a515ea 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -28,7 +28,7 @@ class MQTTUtils(object):
 
     @staticmethod
     def createStream(ssc, brokerUrl, topic,
-                     storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2):
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_2):
         """
         Create an input stream that pulls messages from a Mqtt Broker.
 


[25/50] [abbrv] bahir git commit: [SPARK-4628][BUILD] Remove all non-Maven-Central repositories from build

Posted by lr...@apache.org.
[SPARK-4628][BUILD] Remove all non-Maven-Central repositories from build

This patch removes all non-Maven-central repositories from Spark's build, thereby avoiding any risk of future build-breaks due to us accidentally depending on an artifact which is not present in an immutable public Maven repository.

I tested this by running

```
build/mvn \
        -Phive \
        -Phive-thriftserver \
        -Pkinesis-asl \
        -Pspark-ganglia-lgpl \
        -Pyarn \
        dependency:go-offline
```

inside of a fresh Ubuntu Docker container with no Ivy or Maven caches (I did a similar test for SBT).

Author: Josh Rosen <jo...@databricks.com>

Closes #10659 from JoshRosen/SPARK-4628.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/d29a61bf
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/d29a61bf
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/d29a61bf

Branch: refs/heads/master
Commit: d29a61bf6566e5dd82b3b15500004a6e203e3b8c
Parents: 2067a3c
Author: Josh Rosen <jo...@databricks.com>
Authored: Fri Jan 8 20:58:53 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Jan 8 20:58:53 2016 -0800

----------------------------------------------------------------------
 streaming-mqtt/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/d29a61bf/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index b3ba72a..d3a2bf5 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -51,7 +51,7 @@
     <dependency>
       <groupId>org.eclipse.paho</groupId>
       <artifactId>org.eclipse.paho.client.mqttv3</artifactId>
-      <version>1.0.1</version>
+      <version>1.0.2</version>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>


[10/50] [abbrv] bahir git commit: [SPARK-6328][PYTHON] Python API for StreamingListener

Posted by lr...@apache.org.
[SPARK-6328][PYTHON] Python API for StreamingListener

Author: Daniel Jalova <dj...@us.ibm.com>

Closes #9186 from djalova/SPARK-6328.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/502fd825
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/502fd825
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/502fd825

Branch: refs/heads/master
Commit: 502fd8257d60393e8a2d5596feee95d02abd6f75
Parents: 3b1cc91
Author: Daniel Jalova <dj...@us.ibm.com>
Authored: Mon Nov 16 11:29:27 2015 -0800
Committer: Tathagata Das <ta...@gmail.com>
Committed: Mon Nov 16 11:29:27 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/502fd825/streaming-mqtt/python/__init__.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/__init__.py b/streaming-mqtt/python/__init__.py
index d2644a1..66e8f8e 100644
--- a/streaming-mqtt/python/__init__.py
+++ b/streaming-mqtt/python/__init__.py
@@ -17,5 +17,6 @@
 
 from pyspark.streaming.context import StreamingContext
 from pyspark.streaming.dstream import DStream
+from pyspark.streaming.listener import StreamingListener
 
-__all__ = ['StreamingContext', 'DStream']
+__all__ = ['StreamingContext', 'DStream', 'StreamingListener']


[04/50] [abbrv] bahir git commit: [SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py.

Posted by lr...@apache.org.
[SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py.

This change does two things:

- tag a few tests and adds the mechanism in the build to be able to disable those tags,
  both in maven and sbt, for both junit and scalatest suites.
- add some logic to run-tests.py to disable some tags depending on what files have
  changed; that's used to disable expensive tests when a module hasn't explicitly
  been changed, to speed up testing for changes that don't directly affect those
  modules.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #8437 from vanzin/test-tags.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/762497f3
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/762497f3
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/762497f3

Branch: refs/heads/master
Commit: 762497f3c3547b0460eccfdfa90fdd609a35b7a4
Parents: e59132d
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue Sep 15 10:45:02 2015 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Tue Sep 15 10:45:02 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/pom.xml    | 10 ----------
 streaming-twitter/pom.xml | 10 ----------
 streaming-zeromq/pom.xml  | 10 ----------
 3 files changed, 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/762497f3/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 05e6338..913c47d 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -59,16 +59,6 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.activemq</groupId>
       <artifactId>activemq-core</artifactId>
       <version>5.7.0</version>

http://git-wip-us.apache.org/repos/asf/bahir/blob/762497f3/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 244ad58..9137bf2 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -58,16 +58,6 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

http://git-wip-us.apache.org/repos/asf/bahir/blob/762497f3/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 171df86..6fec4f0 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -57,16 +57,6 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>


[39/50] [abbrv] bahir git commit: [SPARK-13702][CORE][SQL][MLLIB] Use diamond operator for generic instance creation in Java code.

Posted by lr...@apache.org.
[SPARK-13702][CORE][SQL][MLLIB] Use diamond operator for generic instance creation in Java code.

## What changes were proposed in this pull request?

In order to make `docs/examples` (and other related code) more simple/readable/user-friendly, this PR replaces existing codes like the followings by using `diamond` operator.

```
-    final ArrayList<Product2<Object, Object>> dataToWrite =
-      new ArrayList<Product2<Object, Object>>();
+    final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
```

Java 7 or higher supports **diamond** operator which replaces the type arguments required to invoke the constructor of a generic class with an empty set of type parameters (<>). Currently, Spark Java code use mixed usage of this.

## How was this patch tested?

Manual.
Pass the existing tests.

Author: Dongjoon Hyun <do...@apache.org>

Closes #11541 from dongjoon-hyun/SPARK-13702.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/84a14711
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/84a14711
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/84a14711

Branch: refs/heads/master
Commit: 84a147111c86c7bc219c959fa652829991c2b074
Parents: 3b4a1c0
Author: Dongjoon Hyun <do...@apache.org>
Authored: Wed Mar 9 10:31:26 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Mar 9 10:31:26 2016 +0000

----------------------------------------------------------------------
 .../apache/spark/examples/streaming/akka/JavaActorWordCount.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/84a14711/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
index 7bb70d0..7884b8c 100644
--- a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -129,7 +129,7 @@ public class JavaActorWordCount {
     }).mapToPair(new PairFunction<String, String, Integer>() {
       @Override
       public Tuple2<String, Integer> call(String s) {
-        return new Tuple2<String, Integer>(s, 1);
+        return new Tuple2<>(s, 1);
       }
     }).reduceByKey(new Function2<Integer, Integer, Integer>() {
       @Override


[42/50] [abbrv] bahir git commit: [SPARK-13823][CORE][STREAMING][SQL] Always specify Charset in String <-> byte[] conversions (and remaining Coverity items)

Posted by lr...@apache.org.
[SPARK-13823][CORE][STREAMING][SQL] Always specify Charset in String <-> byte[] conversions (and remaining Coverity items)

## What changes were proposed in this pull request?

- Fixes calls to `new String(byte[])` or `String.getBytes()` that rely on platform default encoding, to use UTF-8
- Same for `InputStreamReader` and `OutputStreamWriter` constructors
- Standardizes on UTF-8 everywhere
- Standardizes specifying the encoding with `StandardCharsets.UTF-8`, not the Guava constant or "UTF-8" (which means handling `UnuspportedEncodingException`)
- (also addresses the other remaining Coverity scan issues, which are pretty trivial; these are separated into commit https://github.com/srowen/spark/commit/1deecd8d9ca986d8adb1a42d315890ce5349d29c )

## How was this patch tested?

Jenkins tests

Author: Sean Owen <so...@cloudera.com>

Closes #11657 from srowen/SPARK-13823.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/a8dc23af
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/a8dc23af
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/a8dc23af

Branch: refs/heads/master
Commit: a8dc23aff14399c939e8ed6d8dfdf22973c1ef4e
Parents: c66dcf7
Author: Sean Owen <so...@cloudera.com>
Authored: Sun Mar 13 21:03:49 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Sun Mar 13 21:03:49 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala | 4 +++-
 .../scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala    | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/a8dc23af/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 079bd8a..cbad6f7 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.mqtt
 
+import java.nio.charset.StandardCharsets
+
 import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken
 import org.eclipse.paho.client.mqttv3.MqttCallback
 import org.eclipse.paho.client.mqttv3.MqttClient
@@ -75,7 +77,7 @@ class MQTTReceiver(
 
       // Handles Mqtt message
       override def messageArrived(topic: String, message: MqttMessage) {
-        store(new String(message.getPayload(), "utf-8"))
+        store(new String(message.getPayload(), StandardCharsets.UTF_8))
       }
 
       override def deliveryComplete(token: IMqttDeliveryToken) {

http://git-wip-us.apache.org/repos/asf/bahir/blob/a8dc23af/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
index 26c6dc4..3680c13 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.streaming.mqtt
 
 import java.net.{ServerSocket, URI}
+import java.nio.charset.StandardCharsets
 
 import scala.language.postfixOps
 
-import com.google.common.base.Charsets.UTF_8
 import org.apache.activemq.broker.{BrokerService, TransportConnector}
 import org.apache.commons.lang3.RandomUtils
 import org.eclipse.paho.client.mqttv3._
@@ -85,7 +85,7 @@ private[mqtt] class MQTTTestUtils extends Logging {
       client.connect()
       if (client.isConnected) {
         val msgTopic = client.getTopic(topic)
-        val message = new MqttMessage(data.getBytes(UTF_8))
+        val message = new MqttMessage(data.getBytes(StandardCharsets.UTF_8))
         message.setQos(1)
         message.setRetained(true)
 


[03/50] [abbrv] bahir git commit: Update version to 1.6.0-SNAPSHOT.

Posted by lr...@apache.org.
Update version to 1.6.0-SNAPSHOT.

Author: Reynold Xin <rx...@databricks.com>

Closes #8350 from rxin/1.6.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e59132d4
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e59132d4
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e59132d4

Branch: refs/heads/master
Commit: e59132d40e8e4b5b452a0ce21a7699239e53ea3e
Parents: 17a28c6
Author: Reynold Xin <rx...@databricks.com>
Authored: Tue Sep 15 00:54:20 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Sep 15 00:54:20 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/pom.xml    | 2 +-
 streaming-twitter/pom.xml | 2 +-
 streaming-zeromq/pom.xml  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e59132d4/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 69b3098..05e6338 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.5.0-SNAPSHOT</version>
+    <version>1.6.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/e59132d4/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 178ae8d..244ad58 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.5.0-SNAPSHOT</version>
+    <version>1.6.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/e59132d4/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 37bfd10..171df86 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.5.0-SNAPSHOT</version>
+    <version>1.6.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 


[12/50] [abbrv] bahir git commit: [SPARK-12023][BUILD] Fix warnings while packaging spark with maven.

Posted by lr...@apache.org.
[SPARK-12023][BUILD] Fix warnings while packaging spark with maven.

this is a trivial fix, discussed [here](http://stackoverflow.com/questions/28500401/maven-assembly-plugin-warning-the-assembly-descriptor-contains-a-filesystem-roo/).

Author: Prashant Sharma <sc...@gmail.com>

Closes #10014 from ScrapCodes/assembly-warning.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/be8b3585
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/be8b3585
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/be8b3585

Branch: refs/heads/master
Commit: be8b358551232d91373558c7cb447c42efafc400
Parents: 0845b56
Author: Prashant Sharma <sc...@gmail.com>
Authored: Mon Nov 30 10:11:27 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Nov 30 10:11:27 2015 +0000

----------------------------------------------------------------------
 streaming-mqtt/src/main/assembly/assembly.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/be8b3585/streaming-mqtt/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/assembly/assembly.xml b/streaming-mqtt/src/main/assembly/assembly.xml
index ecab5b3..c110b01 100644
--- a/streaming-mqtt/src/main/assembly/assembly.xml
+++ b/streaming-mqtt/src/main/assembly/assembly.xml
@@ -24,7 +24,7 @@
   <fileSets>
     <fileSet>
       <directory>${project.build.directory}/scala-${scala.binary.version}/test-classes</directory>
-      <outputDirectory>/</outputDirectory>
+      <outputDirectory></outputDirectory>
     </fileSet>
   </fileSets>
 


[40/50] [abbrv] bahir git commit: [SPARK-3854][BUILD] Scala style: require spaces before `{`.

Posted by lr...@apache.org.
[SPARK-3854][BUILD] Scala style: require spaces before `{`.

## What changes were proposed in this pull request?

Since the opening curly brace, '{', has many usages as discussed in [SPARK-3854](https://issues.apache.org/jira/browse/SPARK-3854), this PR adds a ScalaStyle rule to prevent '){' pattern  for the following majority pattern and fixes the code accordingly. If we enforce this in ScalaStyle from now, it will improve the Scala code quality and reduce review time.
```
// Correct:
if (true) {
  println("Wow!")
}

// Incorrect:
if (true){
   println("Wow!")
}
```
IntelliJ also shows new warnings based on this.

## How was this patch tested?

Pass the Jenkins ScalaStyle test.

Author: Dongjoon Hyun <do...@apache.org>

Closes #11637 from dongjoon-hyun/SPARK-3854.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/41ae1d27
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/41ae1d27
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/41ae1d27

Branch: refs/heads/master
Commit: 41ae1d27524342aa9bbf854a7cbfddd37e8e8370
Parents: 84a1471
Author: Dongjoon Hyun <do...@apache.org>
Authored: Thu Mar 10 15:57:22 2016 -0800
Committer: Andrew Or <an...@databricks.com>
Committed: Thu Mar 10 15:57:22 2016 -0800

----------------------------------------------------------------------
 .../org/apache/spark/examples/streaming/akka/ActorWordCount.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/41ae1d27/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 2770b8a..844772a 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -100,7 +100,7 @@ class SampleActorReceiver[T](urlOfPublisher: String) extends ActorReceiver {
 object FeederActor {
 
   def main(args: Array[String]) {
-    if (args.length < 2){
+    if (args.length < 2) {
       System.err.println("Usage: FeederActor <hostname> <port>\n")
       System.exit(1)
     }


[33/50] [abbrv] bahir git commit: [SPARK-13069][STREAMING] Add "ask" style store() to ActorReciever

Posted by lr...@apache.org.
[SPARK-13069][STREAMING] Add "ask" style store() to ActorReciever

Introduces a "ask" style ```store``` in ```ActorReceiver``` as a way to allow actor receiver blocked by back pressure or maxRate.

Author: Lin Zhao <li...@exabeam.com>

Closes #11176 from lin-zhao/SPARK-13069.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/6ac5a189
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/6ac5a189
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/6ac5a189

Branch: refs/heads/master
Commit: 6ac5a1893dbab0f44c2723611aa2f1e6cac82a7c
Parents: 5d45050
Author: Lin Zhao <li...@exabeam.com>
Authored: Thu Feb 25 12:32:17 2016 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Thu Feb 25 12:32:24 2016 -0800

----------------------------------------------------------------------
 .../spark/streaming/akka/ActorReceiver.scala    | 39 +++++++++++++++++++-
 .../streaming/akka/JavaAkkaUtilsSuite.java      |  2 +
 .../spark/streaming/akka/AkkaUtilsSuite.scala   |  3 ++
 3 files changed, 43 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/6ac5a189/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
index c75dc92..33415c1 100644
--- a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
@@ -20,12 +20,15 @@ package org.apache.spark.streaming.akka
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.concurrent.Future
 import scala.concurrent.duration._
 import scala.language.postfixOps
 import scala.reflect.ClassTag
 
 import akka.actor._
 import akka.actor.SupervisorStrategy.{Escalate, Restart}
+import akka.pattern.ask
+import akka.util.Timeout
 import com.typesafe.config.ConfigFactory
 
 import org.apache.spark.{Logging, TaskContext}
@@ -105,13 +108,26 @@ abstract class ActorReceiver extends Actor {
   }
 
   /**
-   * Store a single item of received data to Spark's memory.
+   * Store a single item of received data to Spark's memory asynchronously.
    * These single items will be aggregated together into data blocks before
    * being pushed into Spark's memory.
    */
   def store[T](item: T) {
     context.parent ! SingleItemData(item)
   }
+
+  /**
+   * Store a single item of received data to Spark's memory and returns a `Future`.
+   * The `Future` will be completed when the operator finishes, or with an
+   * `akka.pattern.AskTimeoutException` after the given timeout has expired.
+   * These single items will be aggregated together into data blocks before
+   * being pushed into Spark's memory.
+   *
+   * This method allows the user to control the flow speed using `Future`
+   */
+  def store[T](item: T, timeout: Timeout): Future[Unit] = {
+    context.parent.ask(AskStoreSingleItemData(item))(timeout).map(_ => ())(context.dispatcher)
+  }
 }
 
 /**
@@ -162,6 +178,19 @@ abstract class JavaActorReceiver extends UntypedActor {
   def store[T](item: T) {
     context.parent ! SingleItemData(item)
   }
+
+  /**
+   * Store a single item of received data to Spark's memory and returns a `Future`.
+   * The `Future` will be completed when the operator finishes, or with an
+   * `akka.pattern.AskTimeoutException` after the given timeout has expired.
+   * These single items will be aggregated together into data blocks before
+   * being pushed into Spark's memory.
+   *
+   * This method allows the user to control the flow speed using `Future`
+   */
+  def store[T](item: T, timeout: Timeout): Future[Unit] = {
+    context.parent.ask(AskStoreSingleItemData(item))(timeout).map(_ => ())(context.dispatcher)
+  }
 }
 
 /**
@@ -179,8 +208,10 @@ case class Statistics(numberOfMsgs: Int,
 /** Case class to receive data sent by child actors */
 private[akka] sealed trait ActorReceiverData
 private[akka] case class SingleItemData[T](item: T) extends ActorReceiverData
+private[akka] case class AskStoreSingleItemData[T](item: T) extends ActorReceiverData
 private[akka] case class IteratorData[T](iterator: Iterator[T]) extends ActorReceiverData
 private[akka] case class ByteBufferData(bytes: ByteBuffer) extends ActorReceiverData
+private[akka] object Ack extends ActorReceiverData
 
 /**
  * Provides Actors as receivers for receiving stream.
@@ -233,6 +264,12 @@ private[akka] class ActorReceiverSupervisor[T: ClassTag](
         store(msg.asInstanceOf[T])
         n.incrementAndGet
 
+      case AskStoreSingleItemData(msg) =>
+        logDebug("received single sync")
+        store(msg.asInstanceOf[T])
+        n.incrementAndGet
+        sender() ! Ack
+
       case ByteBufferData(bytes) =>
         logDebug("received bytes")
         store(bytes)

http://git-wip-us.apache.org/repos/asf/bahir/blob/6ac5a189/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
----------------------------------------------------------------------
diff --git a/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java b/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
index b732506..ac5ef31 100644
--- a/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
+++ b/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
@@ -20,6 +20,7 @@ package org.apache.spark.streaming.akka;
 import akka.actor.ActorSystem;
 import akka.actor.Props;
 import akka.actor.SupervisorStrategy;
+import akka.util.Timeout;
 import org.apache.spark.streaming.Duration;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
 import org.junit.Test;
@@ -62,5 +63,6 @@ class JavaTestActor extends JavaActorReceiver {
   @Override
   public void onReceive(Object message) throws Exception {
     store((String) message);
+    store((String) message, new Timeout(1000));
   }
 }

http://git-wip-us.apache.org/repos/asf/bahir/blob/6ac5a189/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
index f437585..ce95d9d 100644
--- a/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
+++ b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.akka
 
+import scala.concurrent.duration._
+
 import akka.actor.{Props, SupervisorStrategy}
 
 import org.apache.spark.SparkFunSuite
@@ -60,5 +62,6 @@ class AkkaUtilsSuite extends SparkFunSuite {
 class TestActor extends ActorReceiver {
   override def receive: Receive = {
     case m: String => store(m)
+    case m => store(m, 10.seconds)
   }
 }


[14/50] [abbrv] bahir git commit: [SPARK-11904][PYSPARK] reduceByKeyAndWindow does not require checkpointing when invFunc is None

Posted by lr...@apache.org.
[SPARK-11904][PYSPARK] reduceByKeyAndWindow does not require checkpointing when invFunc is None

when invFunc is None, `reduceByKeyAndWindow(func, None, winsize, slidesize)` is equivalent to

     reduceByKey(func).window(winsize, slidesize).reduceByKey(winsize, slidesize)

and no checkpoint is necessary. The corresponding Scala code does exactly that, but Python code always creates a windowed stream with obligatory checkpointing. The patch fixes this.

I do not know how to unit-test this.

Author: David Tolpin <da...@gmail.com>

Closes #9888 from dtolpin/master.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/c25b799a
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/c25b799a
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/c25b799a

Branch: refs/heads/master
Commit: c25b799a8df1a79f66feb94e7a60be34d2a2b262
Parents: e30f0c2
Author: David Tolpin <da...@gmail.com>
Authored: Wed Dec 16 22:10:24 2015 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Wed Dec 16 22:10:24 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 45 ++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/c25b799a/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index f61137c..b994a53 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -542,31 +542,32 @@ class DStream(object):
 
         reduced = self.reduceByKey(func, numPartitions)
 
-        def reduceFunc(t, a, b):
-            b = b.reduceByKey(func, numPartitions)
-            r = a.union(b).reduceByKey(func, numPartitions) if a else b
-            if filterFunc:
-                r = r.filter(filterFunc)
-            return r
-
-        def invReduceFunc(t, a, b):
-            b = b.reduceByKey(func, numPartitions)
-            joined = a.leftOuterJoin(b, numPartitions)
-            return joined.mapValues(lambda kv: invFunc(kv[0], kv[1])
-                                    if kv[1] is not None else kv[0])
-
-        jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer)
         if invFunc:
+            def reduceFunc(t, a, b):
+                b = b.reduceByKey(func, numPartitions)
+                r = a.union(b).reduceByKey(func, numPartitions) if a else b
+                if filterFunc:
+                    r = r.filter(filterFunc)
+                return r
+
+            def invReduceFunc(t, a, b):
+                b = b.reduceByKey(func, numPartitions)
+                joined = a.leftOuterJoin(b, numPartitions)
+                return joined.mapValues(lambda kv: invFunc(kv[0], kv[1])
+                                        if kv[1] is not None else kv[0])
+
+            jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer)
             jinvReduceFunc = TransformFunction(self._sc, invReduceFunc, reduced._jrdd_deserializer)
+            if slideDuration is None:
+                slideDuration = self._slideDuration
+            dstream = self._sc._jvm.PythonReducedWindowedDStream(
+                reduced._jdstream.dstream(),
+                jreduceFunc, jinvReduceFunc,
+                self._ssc._jduration(windowDuration),
+                self._ssc._jduration(slideDuration))
+            return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
         else:
-            jinvReduceFunc = None
-        if slideDuration is None:
-            slideDuration = self._slideDuration
-        dstream = self._sc._jvm.PythonReducedWindowedDStream(reduced._jdstream.dstream(),
-                                                             jreduceFunc, jinvReduceFunc,
-                                                             self._ssc._jduration(windowDuration),
-                                                             self._ssc._jduration(slideDuration))
-        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
+            return reduced.window(windowDuration, slideDuration).reduceByKey(func, numPartitions)
 
     def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
         """


[20/50] [abbrv] bahir git commit: [SPARK-3873][EXAMPLES] Import ordering fixes.

Posted by lr...@apache.org.
[SPARK-3873][EXAMPLES] Import ordering fixes.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #10575 from vanzin/SPARK-3873-examples.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/b509629b
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/b509629b
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/b509629b

Branch: refs/heads/master
Commit: b509629bb1f1fc095cbfe29b9f395c57d45e2504
Parents: d2d4833
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Mon Jan 4 22:42:54 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Jan 4 22:42:54 2016 -0800

----------------------------------------------------------------------
 .../spark/examples/streaming/akka/ActorWordCount.scala       | 6 +++---
 .../examples/streaming/twitter/TwitterAlgebirdHLL.scala      | 4 ++--
 .../streaming/twitter/TwitterHashTagJoinSentiments.scala     | 2 +-
 .../spark/examples/streaming/zeromq/ZeroMQWordCount.scala    | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/b509629b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index e9c9907..8b8dae0 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -22,13 +22,13 @@ import scala.collection.mutable.LinkedList
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import akka.actor.{Actor, ActorRef, Props, actorRef2Scala}
+import akka.actor.{actorRef2Scala, Actor, ActorRef, Props}
 
-import org.apache.spark.{SparkConf, SecurityManager}
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
-import org.apache.spark.util.AkkaUtils
 import org.apache.spark.streaming.receiver.ActorHelper
+import org.apache.spark.util.AkkaUtils
 
 case class SubscribeReceiver(receiverActor: ActorRef)
 case class UnsubscribeReceiver(receiverActor: ActorRef)

http://git-wip-us.apache.org/repos/asf/bahir/blob/b509629b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
index 49826ed..0ec6214 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
@@ -18,13 +18,13 @@
 // scalastyle:off println
 package org.apache.spark.examples.streaming
 
-import com.twitter.algebird.HyperLogLogMonoid
 import com.twitter.algebird.HyperLogLog._
+import com.twitter.algebird.HyperLogLogMonoid
 
+import org.apache.spark.SparkConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.twitter._
-import org.apache.spark.SparkConf
 
 // scalastyle:off
 /**

http://git-wip-us.apache.org/repos/asf/bahir/blob/b509629b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
index 0328fa8..edf0e0b 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
@@ -19,8 +19,8 @@
 package org.apache.spark.examples.streaming
 
 import org.apache.spark.SparkConf
-import org.apache.spark.streaming.twitter.TwitterUtils
 import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.twitter.TwitterUtils
 
 /**
  * Displays the most positive hash tags by joining the streaming Twitter data with a static RDD of

http://git-wip-us.apache.org/repos/asf/bahir/blob/b509629b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
index 6ac9a72..9644890 100644
--- a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
+++ b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
@@ -18,18 +18,18 @@
 // scalastyle:off println
 package org.apache.spark.examples.streaming
 
+import scala.language.implicitConversions
+
 import akka.actor.ActorSystem
 import akka.actor.actorRef2Scala
+import akka.util.ByteString
 import akka.zeromq._
 import akka.zeromq.Subscribe
-import akka.util.ByteString
 
+import org.apache.spark.SparkConf
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.zeromq._
 
-import scala.language.implicitConversions
-import org.apache.spark.SparkConf
-
 /**
  * A simple publisher for demonstration purposes, repeatedly publishes random Messages
  * every one second.


[18/50] [abbrv] bahir git commit: [SPARK-12353][STREAMING][PYSPARK] Fix countByValue inconsistent output in Python API

Posted by lr...@apache.org.
[SPARK-12353][STREAMING][PYSPARK] Fix countByValue inconsistent output in Python API

The semantics of Python countByValue is different from Scala API, it is more like countDistinctValue, so here change to make it consistent with Scala/Java API.

Author: jerryshao <ss...@hortonworks.com>

Closes #10350 from jerryshao/SPARK-12353.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/47e0e0ed
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/47e0e0ed
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/47e0e0ed

Branch: refs/heads/master
Commit: 47e0e0edd2974140e7ca88924681319487cfbc92
Parents: e2d30ef
Author: jerryshao <ss...@hortonworks.com>
Authored: Mon Dec 28 10:43:23 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Dec 28 10:43:23 2015 +0000

----------------------------------------------------------------------
 streaming-mqtt/python/dstream.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/47e0e0ed/streaming-mqtt/python/dstream.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/dstream.py b/streaming-mqtt/python/dstream.py
index adc2651..86447f5 100644
--- a/streaming-mqtt/python/dstream.py
+++ b/streaming-mqtt/python/dstream.py
@@ -247,7 +247,7 @@ class DStream(object):
         Return a new DStream in which each RDD contains the counts of each
         distinct value in each RDD of this DStream.
         """
-        return self.map(lambda x: (x, None)).reduceByKey(lambda x, y: None).count()
+        return self.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x+y)
 
     def saveAsTextFiles(self, prefix, suffix=None):
         """
@@ -493,7 +493,7 @@ class DStream(object):
         keyed = self.map(lambda x: (x, 1))
         counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
                                              windowDuration, slideDuration, numPartitions)
-        return counted.filter(lambda kv: kv[1] > 0).count()
+        return counted.filter(lambda kv: kv[1] > 0)
 
     def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None):
         """


[47/50] [abbrv] bahir git commit: Add git configuration files

Posted by lr...@apache.org.
Add git configuration files


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e1871c00
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e1871c00
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e1871c00

Branch: refs/heads/master
Commit: e1871c00222c347af08c06194ab4a778cc0bd9ff
Parents: 5907de3
Author: Luciano Resende <lr...@apache.org>
Authored: Thu May 19 18:21:47 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jun 7 22:54:31 2016 -0700

----------------------------------------------------------------------
 .gitattributes | 13 +++++++++++++
 .gitignore     | 21 +++++++++++++++++++++
 2 files changed, 34 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e1871c00/.gitattributes
----------------------------------------------------------------------
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a8edefd
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,13 @@
+# Set the default behavior to have all files normalized to Unix-style
+# line endings upon check-in.
+* text=auto
+
+# Declare files that will always have CRLF line endings on checkout.
+*.bat text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.dll binary
+*.exp binary
+*.lib binary
+*.pdb binary
+*.exe binary

http://git-wip-us.apache.org/repos/asf/bahir/blob/e1871c00/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1801b67
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+# Mac
+.DS_Store
+
+# Eclipse
+.classpath
+.project
+.settings/
+target/
+
+# Intellij
+.idea/
+.idea_modules/
+*.iml
+*.iws
+*.class
+*.log
+
+# Others
+.checkstyle
+.fbExcludeFilterFile
+dependency-reduced-pom.xml


[46/50] [abbrv] bahir git commit: Update README.md

Posted by lr...@apache.org.
Update README.md

Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/5907de31
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/5907de31
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/5907de31

Branch: refs/heads/master
Commit: 5907de317b3ae7d692e9cb2fa4aabec2572604d2
Parents: 73a768a
Author: Christian Kadner <ck...@users.noreply.github.com>
Authored: Mon Jun 6 22:40:32 2016 -0700
Committer: Christian Kadner <ck...@users.noreply.github.com>
Committed: Mon Jun 6 22:40:32 2016 -0700

----------------------------------------------------------------------
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/5907de31/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 000b3fe..8cdbc5e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Bahir Repository
 
-Initial Bahir repository containing the source for the Spark streaming connectors for akka, mqtt, twitter, zeromq 
+Initial Bahir repository (see issue [BAHIR-1](https://issues.apache.org/jira/browse/BAHIR-1)) containing the source for the Spark streaming connectors for akka, mqtt, twitter, zeromq 
 extracted from [Apache Spark revision 8301fad](https://github.com/apache/spark/tree/8301fadd8d269da11e72870b7a889596e3337839)
 (before the [deletion of the streaming connectors akka, mqtt, twitter, zeromq](https://issues.apache.org/jira/browse/SPARK-13843)). 
 


[49/50] [abbrv] bahir git commit: [BAHIR-2] Initial maven build for Bahir spark extensions

Posted by lr...@apache.org.
http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
new file mode 100644
index 0000000..270104f
--- /dev/null
+++ b/scalastyle-config.xml
@@ -0,0 +1,321 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<!--
+
+If you wish to turn off checking for a section of code, you can put a comment in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we enforce                                   -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+
+  <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
+
+  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
+  <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
+    <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
+  </check>
+
+  <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
+  <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
+  <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">JavaConversions</parameter></parameters>
+    <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
+    scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <!-- ================================================================================ -->
+  <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
+  <!-- ================================================================================ -->
+
+  <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
+  <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
+  <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <!-- Should turn this on, but we have a few places that need to be fixed first -->
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- ================================================================================ -->
+  <!--                               rules we don't want                                -->
+  <!-- ================================================================================ -->
+
+  <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+</scalastyle>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index bbe644e..0269244 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -19,32 +19,32 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>bahir-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
+    <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <groupId>org.apache.spark</groupId>
+  <groupId>org.apache.bahir</groupId>
   <artifactId>spark-streaming-akka_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-akka</sbt.project.name>
   </properties>
   <packaging>jar</packaging>
-  <name>Spark Project External Akka</name>
-  <url>http://spark.apache.org/</url>
+  <name>Apache Bahir - Spark Streaming Akka</name>
+  <url>http://bahir.apache.org/</url>
 
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
index 33415c1..e3be880 100644
--- a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
@@ -31,8 +31,9 @@ import akka.pattern.ask
 import akka.util.Timeout
 import com.typesafe.config.ConfigFactory
 
-import org.apache.spark.{Logging, TaskContext}
+import org.apache.spark.TaskContext
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.receiver.Receiver
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index d0d9687..0036591 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -19,32 +19,32 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>bahir-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
+    <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <groupId>org.apache.spark</groupId>
+  <groupId>org.apache.bahir</groupId>
   <artifactId>spark-streaming-mqtt_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-mqtt</sbt.project.name>
   </properties>
   <packaging>jar</packaging>
-  <name>Spark Project External MQTT</name>
-  <url>http://spark.apache.org/</url>
+  <name>Apache Bahir - Spark Streaming MQTT</name>
+  <url>http://bahir.apache.org/</url>
 
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
index 3680c13..80ab27b 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
@@ -27,7 +27,8 @@ import org.apache.commons.lang3.RandomUtils
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 
-import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 /**

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 5d4053a..b0fb582 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -19,32 +19,32 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>bahir-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
+    <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <groupId>org.apache.spark</groupId>
+  <groupId>org.apache.bahir</groupId>
   <artifactId>spark-streaming-twitter_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-twitter</sbt.project.name>
   </properties>
   <packaging>jar</packaging>
-  <name>Spark Project External Twitter</name>
-  <url>http://spark.apache.org/</url>
+  <name>Apache Bahir - Spark Streaming Twitter</name>
+  <url>http://bahir.apache.org/</url>
 
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index bdd57fd..bd23a12 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -22,7 +22,7 @@ import twitter4j.auth.Authorization
 import twitter4j.auth.OAuthAuthorization
 import twitter4j.conf.ConfigurationBuilder
 
-import org.apache.spark.Logging
+import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.dstream._

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index 7e5fc0c..bd23831 100644
--- a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -21,7 +21,8 @@ import org.scalatest.BeforeAndAfter
 import twitter4j.Status
 import twitter4j.auth.{Authorization, NullAuthorization}
 
-import org.apache.spark.{Logging, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f16bc0f..e248c66 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -19,37 +19,37 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>bahir-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
+    <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <groupId>org.apache.spark</groupId>
+  <groupId>org.apache.bahir</groupId>
   <artifactId>spark-streaming-zeromq_2.11</artifactId>
   <properties>
     <sbt.project.name>streaming-zeromq</sbt.project.name>
   </properties>
   <packaging>jar</packaging>
-  <name>Spark Project External ZeroMQ</name>
-  <url>http://spark.apache.org/</url>
+  <name>Apache Bahir - Spark Streaming ZeroMQ</name>
+  <url>http://bahir.apache.org/</url>
 
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.spark</groupId>
+      <groupId>org.apache.bahir</groupId>
       <artifactId>spark-streaming-akka_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <version>${spark.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/bahir/blob/d3254248/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
index dd367cd..4f6f006 100644
--- a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
+++ b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import akka.util.ByteString
 import akka.zeromq._
 
-import org.apache.spark.Logging
+import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.akka.ActorReceiver
 
 /**


[24/50] [abbrv] bahir git commit: [SPARK-12618][CORE][STREAMING][SQL] Clean up build warnings: 2.0.0 edition

Posted by lr...@apache.org.
[SPARK-12618][CORE][STREAMING][SQL] Clean up build warnings: 2.0.0 edition

Fix most build warnings: mostly deprecated API usages. I'll annotate some of the changes below. CC rxin who is leading the charge to remove the deprecated APIs.

Author: Sean Owen <so...@cloudera.com>

Closes #10570 from srowen/SPARK-12618.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/2067a3c4
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/2067a3c4
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/2067a3c4

Branch: refs/heads/master
Commit: 2067a3c4e3944db228850e26b60a8d875b2aa1f9
Parents: cca0efe
Author: Sean Owen <so...@cloudera.com>
Authored: Fri Jan 8 17:47:44 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Jan 8 17:47:44 2016 +0000

----------------------------------------------------------------------
 .../JavaTwitterHashTagJoinSentiments.java       | 36 ++++++++------------
 1 file changed, 15 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/2067a3c4/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
index 030ee30..d869768 100644
--- a/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
+++ b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
@@ -17,13 +17,13 @@
 
 package org.apache.spark.examples.streaming;
 
-import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.function.VoidFunction;
 import org.apache.spark.streaming.Duration;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
@@ -33,8 +33,6 @@ import org.apache.spark.streaming.twitter.TwitterUtils;
 import scala.Tuple2;
 import twitter4j.Status;
 
-import java.io.IOException;
-import java.net.URI;
 import java.util.Arrays;
 import java.util.List;
 
@@ -44,7 +42,7 @@ import java.util.List;
  */
 public class JavaTwitterHashTagJoinSentiments {
 
-  public static void main(String[] args) throws IOException {
+  public static void main(String[] args) {
     if (args.length < 4) {
       System.err.println("Usage: JavaTwitterHashTagJoinSentiments <consumer key> <consumer secret>" +
         " <access token> <access token secret> [<filters>]");
@@ -79,7 +77,7 @@ public class JavaTwitterHashTagJoinSentiments {
 
     JavaDStream<String> hashTags = words.filter(new Function<String, Boolean>() {
       @Override
-      public Boolean call(String word) throws Exception {
+      public Boolean call(String word) {
         return word.startsWith("#");
       }
     });
@@ -91,8 +89,7 @@ public class JavaTwitterHashTagJoinSentiments {
         @Override
         public Tuple2<String, Double> call(String line) {
           String[] columns = line.split("\t");
-          return new Tuple2<String, Double>(columns[0],
-            Double.parseDouble(columns[1]));
+          return new Tuple2<>(columns[0], Double.parseDouble(columns[1]));
         }
       });
 
@@ -101,7 +98,7 @@ public class JavaTwitterHashTagJoinSentiments {
         @Override
         public Tuple2<String, Integer> call(String s) {
           // leave out the # character
-          return new Tuple2<String, Integer>(s.substring(1), 1);
+          return new Tuple2<>(s.substring(1), 1);
         }
       });
 
@@ -120,9 +117,8 @@ public class JavaTwitterHashTagJoinSentiments {
       hashTagTotals.transformToPair(new Function<JavaPairRDD<String, Integer>,
         JavaPairRDD<String, Tuple2<Double, Integer>>>() {
         @Override
-        public JavaPairRDD<String, Tuple2<Double, Integer>> call(JavaPairRDD<String,
-          Integer> topicCount)
-          throws Exception {
+        public JavaPairRDD<String, Tuple2<Double, Integer>> call(
+            JavaPairRDD<String, Integer> topicCount) {
           return wordSentiments.join(topicCount);
         }
       });
@@ -131,9 +127,9 @@ public class JavaTwitterHashTagJoinSentiments {
       new PairFunction<Tuple2<String, Tuple2<Double, Integer>>, String, Double>() {
         @Override
         public Tuple2<String, Double> call(Tuple2<String,
-          Tuple2<Double, Integer>> topicAndTuplePair) throws Exception {
+          Tuple2<Double, Integer>> topicAndTuplePair) {
           Tuple2<Double, Integer> happinessAndCount = topicAndTuplePair._2();
-          return new Tuple2<String, Double>(topicAndTuplePair._1(),
+          return new Tuple2<>(topicAndTuplePair._1(),
             happinessAndCount._1() * happinessAndCount._2());
         }
       });
@@ -141,9 +137,8 @@ public class JavaTwitterHashTagJoinSentiments {
     JavaPairDStream<Double, String> happinessTopicPairs = topicHappiness.mapToPair(
       new PairFunction<Tuple2<String, Double>, Double, String>() {
         @Override
-        public Tuple2<Double, String> call(Tuple2<String, Double> topicHappiness)
-          throws Exception {
-          return new Tuple2<Double, String>(topicHappiness._2(),
+        public Tuple2<Double, String> call(Tuple2<String, Double> topicHappiness) {
+          return new Tuple2<>(topicHappiness._2(),
             topicHappiness._1());
         }
       });
@@ -151,17 +146,17 @@ public class JavaTwitterHashTagJoinSentiments {
     JavaPairDStream<Double, String> happiest10 = happinessTopicPairs.transformToPair(
       new Function<JavaPairRDD<Double, String>, JavaPairRDD<Double, String>>() {
         @Override
-        public JavaPairRDD<Double, String> call(JavaPairRDD<Double,
-          String> happinessAndTopics) throws Exception {
+        public JavaPairRDD<Double, String> call(
+            JavaPairRDD<Double, String> happinessAndTopics) {
           return happinessAndTopics.sortByKey(false);
         }
       }
     );
 
     // Print hash tags with the most positive sentiment values
-    happiest10.foreachRDD(new Function<JavaPairRDD<Double, String>, Void>() {
+    happiest10.foreachRDD(new VoidFunction<JavaPairRDD<Double, String>>() {
       @Override
-      public Void call(JavaPairRDD<Double, String> happinessTopicPairs) throws Exception {
+      public void call(JavaPairRDD<Double, String> happinessTopicPairs) {
         List<Tuple2<Double, String>> topList = happinessTopicPairs.take(10);
         System.out.println(
           String.format("\nHappiest topics in last 10 seconds (%s total):",
@@ -170,7 +165,6 @@ public class JavaTwitterHashTagJoinSentiments {
           System.out.println(
             String.format("%s (%s happiness)", pair._2(), pair._1()));
         }
-        return null;
       }
     });
 


[38/50] [abbrv] bahir git commit: Fixing the type of the sentiment happiness value

Posted by lr...@apache.org.
Fixing the type of the sentiment happiness value

## What changes were proposed in this pull request?

Added the conversion to int for the 'happiness value' read from the file. Otherwise, later on line 75 the multiplication will multiply a string by a number, yielding values like "-2-2" instead of -4.

## How was this patch tested?

Tested manually.

Author: Yury Liavitski <se...@gmail.com>
Author: Yury Liavitski <yu...@il111.ice.local>

Closes #11540 from heliocentrist/fix-sentiment-value-type.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/3b4a1c0e
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/3b4a1c0e
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/3b4a1c0e

Branch: refs/heads/master
Commit: 3b4a1c0e2609ec7c852495f9d4aefac7e8730691
Parents: 9bff9b3
Author: Yury Liavitski <se...@gmail.com>
Authored: Mon Mar 7 10:54:33 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Mar 7 10:54:33 2016 +0000

----------------------------------------------------------------------
 .../streaming/twitter/TwitterHashTagJoinSentiments.scala         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/3b4a1c0e/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
index edf0e0b..a8d392c 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
@@ -56,8 +56,8 @@ object TwitterHashTagJoinSentiments {
     val wordSentimentFilePath = "data/streaming/AFINN-111.txt"
     val wordSentiments = ssc.sparkContext.textFile(wordSentimentFilePath).map { line =>
       val Array(word, happinessValue) = line.split("\t")
-      (word, happinessValue)
-    } cache()
+      (word, happinessValue.toInt)
+    }.cache()
 
     // Determine the hash tags with the highest sentiment values by joining the streaming RDD
     // with the static RDD inside the transform() method and then multiplying


[43/50] [abbrv] bahir git commit: [SPARK-13848][SPARK-5185] Update to Py4J 0.9.2 in order to fix classloading issue

Posted by lr...@apache.org.
[SPARK-13848][SPARK-5185] Update to Py4J 0.9.2 in order to fix classloading issue

This patch upgrades Py4J from 0.9.1 to 0.9.2 in order to include a patch which modifies Py4J to use the current thread's ContextClassLoader when performing reflection / class loading. This is necessary in order to fix [SPARK-5185](https://issues.apache.org/jira/browse/SPARK-5185), a longstanding issue affecting the use of `--jars` and `--packages` in PySpark.

In order to demonstrate that the fix works, I removed the workarounds which were added as part of [SPARK-6027](https://issues.apache.org/jira/browse/SPARK-6027) / #4779 and other patches.

Py4J diff: https://github.com/bartdag/py4j/compare/0.9.1...0.9.2

/cc zsxwing tdas davies brkyvz

Author: Josh Rosen <jo...@databricks.com>

Closes #11687 from JoshRosen/py4j-0.9.2.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/54040f37
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/54040f37
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/54040f37

Branch: refs/heads/master
Commit: 54040f372efee60b3283fbd15d030b5e8d3aba87
Parents: a8dc23a
Author: Josh Rosen <jo...@databricks.com>
Authored: Mon Mar 14 12:22:02 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Mon Mar 14 12:22:02 2016 -0700

----------------------------------------------------------------------
 streaming-mqtt/python/mqtt.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/54040f37/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index 388e952..8848a70 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -38,18 +38,15 @@ class MQTTUtils(object):
         :param storageLevel:  RDD storage level.
         :return: A DStream object
         """
-        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
-
         try:
-            helperClass = ssc._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
-                .loadClass("org.apache.spark.streaming.mqtt.MQTTUtilsPythonHelper")
-            helper = helperClass.newInstance()
-            jstream = helper.createStream(ssc._jssc, brokerUrl, topic, jlevel)
-        except Py4JJavaError as e:
-            if 'ClassNotFoundException' in str(e.java_exception):
+            helper = ssc._jvm.org.apache.spark.streaming.mqtt.MQTTUtilsPythonHelper()
+        except TypeError as e:
+            if str(e) == "'JavaPackage' object is not callable":
                 MQTTUtils._printErrorMsg(ssc.sparkContext)
             raise
 
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        jstream = helper.createStream(ssc._jssc, brokerUrl, topic, jlevel)
         return DStream(jstream, ssc, UTF8Deserializer())
 
     @staticmethod


[15/50] [abbrv] bahir git commit: [SPARK-9057][STREAMING] Twitter example joining to static RDD of word sentiment values

Posted by lr...@apache.org.
[SPARK-9057][STREAMING] Twitter example joining to static RDD of word sentiment values

Example of joining a static RDD of word sentiments to a streaming RDD of Tweets in order to demo the usage of the transform() method.

Author: Jeff L <sh...@alumni.carnegiemellon.edu>

Closes #8431 from Agent007/SPARK-9057.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/c6155756
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/c6155756
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/c6155756

Branch: refs/heads/master
Commit: c61557569466855728f41c8180bbde8a78e2fb20
Parents: c25b799
Author: Jeff L <sh...@alumni.carnegiemellon.edu>
Authored: Fri Dec 18 15:06:54 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Dec 18 15:06:54 2015 +0000

----------------------------------------------------------------------
 .../JavaTwitterHashTagJoinSentiments.java       | 180 +++++++++++++++++++
 .../twitter/TwitterHashTagJoinSentiments.scala  |  96 ++++++++++
 2 files changed, 276 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/c6155756/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
new file mode 100644
index 0000000..030ee30
--- /dev/null
+++ b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.twitter.TwitterUtils;
+import scala.Tuple2;
+import twitter4j.Status;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Displays the most positive hash tags by joining the streaming Twitter data with a static RDD of
+ * the AFINN word list (http://neuro.imm.dtu.dk/wiki/AFINN)
+ */
+public class JavaTwitterHashTagJoinSentiments {
+
+  public static void main(String[] args) throws IOException {
+    if (args.length < 4) {
+      System.err.println("Usage: JavaTwitterHashTagJoinSentiments <consumer key> <consumer secret>" +
+        " <access token> <access token secret> [<filters>]");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    String consumerKey = args[0];
+    String consumerSecret = args[1];
+    String accessToken = args[2];
+    String accessTokenSecret = args[3];
+    String[] filters = Arrays.copyOfRange(args, 4, args.length);
+
+    // Set the system properties so that Twitter4j library used by Twitter stream
+    // can use them to generate OAuth credentials
+    System.setProperty("twitter4j.oauth.consumerKey", consumerKey);
+    System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret);
+    System.setProperty("twitter4j.oauth.accessToken", accessToken);
+    System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret);
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaTwitterHashTagJoinSentiments");
+    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
+    JavaReceiverInputDStream<Status> stream = TwitterUtils.createStream(jssc, filters);
+
+    JavaDStream<String> words = stream.flatMap(new FlatMapFunction<Status, String>() {
+      @Override
+      public Iterable<String> call(Status s) {
+        return Arrays.asList(s.getText().split(" "));
+      }
+    });
+
+    JavaDStream<String> hashTags = words.filter(new Function<String, Boolean>() {
+      @Override
+      public Boolean call(String word) throws Exception {
+        return word.startsWith("#");
+      }
+    });
+
+    // Read in the word-sentiment list and create a static RDD from it
+    String wordSentimentFilePath = "data/streaming/AFINN-111.txt";
+    final JavaPairRDD<String, Double> wordSentiments = jssc.sparkContext().textFile(wordSentimentFilePath)
+      .mapToPair(new PairFunction<String, String, Double>(){
+        @Override
+        public Tuple2<String, Double> call(String line) {
+          String[] columns = line.split("\t");
+          return new Tuple2<String, Double>(columns[0],
+            Double.parseDouble(columns[1]));
+        }
+      });
+
+    JavaPairDStream<String, Integer> hashTagCount = hashTags.mapToPair(
+      new PairFunction<String, String, Integer>() {
+        @Override
+        public Tuple2<String, Integer> call(String s) {
+          // leave out the # character
+          return new Tuple2<String, Integer>(s.substring(1), 1);
+        }
+      });
+
+    JavaPairDStream<String, Integer> hashTagTotals = hashTagCount.reduceByKeyAndWindow(
+      new Function2<Integer, Integer, Integer>() {
+        @Override
+        public Integer call(Integer a, Integer b) {
+          return a + b;
+        }
+      }, new Duration(10000));
+
+    // Determine the hash tags with the highest sentiment values by joining the streaming RDD
+    // with the static RDD inside the transform() method and then multiplying
+    // the frequency of the hash tag by its sentiment value
+    JavaPairDStream<String, Tuple2<Double, Integer>> joinedTuples =
+      hashTagTotals.transformToPair(new Function<JavaPairRDD<String, Integer>,
+        JavaPairRDD<String, Tuple2<Double, Integer>>>() {
+        @Override
+        public JavaPairRDD<String, Tuple2<Double, Integer>> call(JavaPairRDD<String,
+          Integer> topicCount)
+          throws Exception {
+          return wordSentiments.join(topicCount);
+        }
+      });
+
+    JavaPairDStream<String, Double> topicHappiness = joinedTuples.mapToPair(
+      new PairFunction<Tuple2<String, Tuple2<Double, Integer>>, String, Double>() {
+        @Override
+        public Tuple2<String, Double> call(Tuple2<String,
+          Tuple2<Double, Integer>> topicAndTuplePair) throws Exception {
+          Tuple2<Double, Integer> happinessAndCount = topicAndTuplePair._2();
+          return new Tuple2<String, Double>(topicAndTuplePair._1(),
+            happinessAndCount._1() * happinessAndCount._2());
+        }
+      });
+
+    JavaPairDStream<Double, String> happinessTopicPairs = topicHappiness.mapToPair(
+      new PairFunction<Tuple2<String, Double>, Double, String>() {
+        @Override
+        public Tuple2<Double, String> call(Tuple2<String, Double> topicHappiness)
+          throws Exception {
+          return new Tuple2<Double, String>(topicHappiness._2(),
+            topicHappiness._1());
+        }
+      });
+
+    JavaPairDStream<Double, String> happiest10 = happinessTopicPairs.transformToPair(
+      new Function<JavaPairRDD<Double, String>, JavaPairRDD<Double, String>>() {
+        @Override
+        public JavaPairRDD<Double, String> call(JavaPairRDD<Double,
+          String> happinessAndTopics) throws Exception {
+          return happinessAndTopics.sortByKey(false);
+        }
+      }
+    );
+
+    // Print hash tags with the most positive sentiment values
+    happiest10.foreachRDD(new Function<JavaPairRDD<Double, String>, Void>() {
+      @Override
+      public Void call(JavaPairRDD<Double, String> happinessTopicPairs) throws Exception {
+        List<Tuple2<Double, String>> topList = happinessTopicPairs.take(10);
+        System.out.println(
+          String.format("\nHappiest topics in last 10 seconds (%s total):",
+            happinessTopicPairs.count()));
+        for (Tuple2<Double, String> pair : topList) {
+          System.out.println(
+            String.format("%s (%s happiness)", pair._2(), pair._1()));
+        }
+        return null;
+      }
+    });
+
+    jssc.start();
+    jssc.awaitTermination();
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/c6155756/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
new file mode 100644
index 0000000..0328fa8
--- /dev/null
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.twitter.TwitterUtils
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Displays the most positive hash tags by joining the streaming Twitter data with a static RDD of
+ * the AFINN word list (http://neuro.imm.dtu.dk/wiki/AFINN)
+ */
+object TwitterHashTagJoinSentiments {
+  def main(args: Array[String]) {
+    if (args.length < 4) {
+      System.err.println("Usage: TwitterHashTagJoinSentiments <consumer key> <consumer secret> " +
+        "<access token> <access token secret> [<filters>]")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val Array(consumerKey, consumerSecret, accessToken, accessTokenSecret) = args.take(4)
+    val filters = args.takeRight(args.length - 4)
+
+    // Set the system properties so that Twitter4j library used by Twitter stream
+    // can use them to generate OAuth credentials
+    System.setProperty("twitter4j.oauth.consumerKey", consumerKey)
+    System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)
+    System.setProperty("twitter4j.oauth.accessToken", accessToken)
+    System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret)
+
+    val sparkConf = new SparkConf().setAppName("TwitterHashTagJoinSentiments")
+    val ssc = new StreamingContext(sparkConf, Seconds(2))
+    val stream = TwitterUtils.createStream(ssc, None, filters)
+
+    val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))
+
+    // Read in the word-sentiment list and create a static RDD from it
+    val wordSentimentFilePath = "data/streaming/AFINN-111.txt"
+    val wordSentiments = ssc.sparkContext.textFile(wordSentimentFilePath).map { line =>
+      val Array(word, happinessValue) = line.split("\t")
+      (word, happinessValue)
+    } cache()
+
+    // Determine the hash tags with the highest sentiment values by joining the streaming RDD
+    // with the static RDD inside the transform() method and then multiplying
+    // the frequency of the hash tag by its sentiment value
+    val happiest60 = hashTags.map(hashTag => (hashTag.tail, 1))
+      .reduceByKeyAndWindow(_ + _, Seconds(60))
+      .transform{topicCount => wordSentiments.join(topicCount)}
+      .map{case (topic, tuple) => (topic, tuple._1 * tuple._2)}
+      .map{case (topic, happinessValue) => (happinessValue, topic)}
+      .transform(_.sortByKey(false))
+
+    val happiest10 = hashTags.map(hashTag => (hashTag.tail, 1))
+      .reduceByKeyAndWindow(_ + _, Seconds(10))
+      .transform{topicCount => wordSentiments.join(topicCount)}
+      .map{case (topic, tuple) => (topic, tuple._1 * tuple._2)}
+      .map{case (topic, happinessValue) => (happinessValue, topic)}
+      .transform(_.sortByKey(false))
+
+    // Print hash tags with the most positive sentiment values
+    happiest60.foreachRDD(rdd => {
+      val topList = rdd.take(10)
+      println("\nHappiest topics in last 60 seconds (%s total):".format(rdd.count()))
+      topList.foreach{case (happiness, tag) => println("%s (%s happiness)".format(tag, happiness))}
+    })
+
+    happiest10.foreachRDD(rdd => {
+      val topList = rdd.take(10)
+      println("\nHappiest topics in last 10 seconds (%s total):".format(rdd.count()))
+      topList.foreach{case (happiness, tag) => println("%s (%s happiness)".format(tag, happiness))}
+    })
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println


[26/50] [abbrv] bahir git commit: [SPARK-12692][BUILD][STREAMING] Scala style: Fix the style violation (Space before ", " or ":")

Posted by lr...@apache.org.
[SPARK-12692][BUILD][STREAMING] Scala style: Fix the style violation (Space before "," or ":")

Fix the style violation (space before , and :).
This PR is a followup for #10643.

Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>

Closes #10685 from sarutak/SPARK-12692-followup-streaming.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/618b8620
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/618b8620
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/618b8620

Branch: refs/heads/master
Commit: 618b8620c3bbbb7c9b47240ffb20729a5185afc8
Parents: d29a61b
Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Authored: Mon Jan 11 21:06:22 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Jan 11 21:06:22 2016 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala | 4 ++--
 .../org/apache/spark/streaming/twitter/TwitterInputDStream.scala | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/618b8620/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 116c170..079bd8a 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -38,11 +38,11 @@ import org.apache.spark.streaming.receiver.Receiver
 
 private[streaming]
 class MQTTInputDStream(
-    ssc_ : StreamingContext,
+    _ssc: StreamingContext,
     brokerUrl: String,
     topic: String,
     storageLevel: StorageLevel
-  ) extends ReceiverInputDStream[String](ssc_) {
+  ) extends ReceiverInputDStream[String](_ssc) {
 
   private[streaming] override def name: String = s"MQTT stream [$id]"
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/618b8620/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index a48eec7..bdd57fd 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -39,11 +39,11 @@ import org.apache.spark.streaming.receiver.Receiver
 */
 private[streaming]
 class TwitterInputDStream(
-    ssc_ : StreamingContext,
+    _ssc: StreamingContext,
     twitterAuth: Option[Authorization],
     filters: Seq[String],
     storageLevel: StorageLevel
-  ) extends ReceiverInputDStream[Status](ssc_)  {
+  ) extends ReceiverInputDStream[Status](_ssc)  {
 
   private def createOAuthAuthorization(): Authorization = {
     new OAuthAuthorization(new ConfigurationBuilder().build())


[35/50] [abbrv] bahir git commit: [SPARK-13423][WIP][CORE][SQL][STREAMING] Static analysis fixes for 2.x

Posted by lr...@apache.org.
[SPARK-13423][WIP][CORE][SQL][STREAMING] Static analysis fixes for 2.x

## What changes were proposed in this pull request?

Make some cross-cutting code improvements according to static analysis. These are individually up for discussion since they exist in separate commits that can be reverted. The changes are broadly:

- Inner class should be static
- Mismatched hashCode/equals
- Overflow in compareTo
- Unchecked warnings
- Misuse of assert, vs junit.assert
- get(a) + getOrElse(b) -> getOrElse(a,b)
- Array/String .size -> .length (occasionally, -> .isEmpty / .nonEmpty) to avoid implicit conversions
- Dead code
- tailrec
- exists(_ == ) -> contains find + nonEmpty -> exists filter + size -> count
- reduce(_+_) -> sum map + flatten -> map

The most controversial may be .size -> .length simply because of its size. It is intended to avoid implicits that might be expensive in some places.

## How was the this patch tested?

Existing Jenkins unit tests.

Author: Sean Owen <so...@cloudera.com>

Closes #11292 from srowen/SPARK-13423.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/a784af6c
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/a784af6c
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/a784af6c

Branch: refs/heads/master
Commit: a784af6c3e2de0e883cd04d445d7a14749dc8cfe
Parents: 8ad5ddd
Author: Sean Owen <so...@cloudera.com>
Authored: Thu Mar 3 09:54:09 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Thu Mar 3 09:54:09 2016 +0000

----------------------------------------------------------------------
 .../apache/spark/examples/streaming/akka/JavaActorWordCount.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/a784af6c/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
index cf77466..7bb70d0 100644
--- a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -60,7 +60,9 @@ class JavaSampleActorReceiver<T> extends JavaActorReceiver {
 
   @Override
   public void onReceive(Object msg) throws Exception {
-    store((T) msg);
+    @SuppressWarnings("unchecked")
+    T msgT = (T) msg;
+    store(msgT);
   }
 
   @Override


[07/50] [abbrv] bahir git commit: [SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py.

Posted by lr...@apache.org.
[SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #8775 from vanzin/SPARK-10300.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/56189117
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/56189117
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/56189117

Branch: refs/heads/master
Commit: 56189117ac5b438384b27b0f2617c29432ff66f0
Parents: 2c8702f
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Wed Oct 7 14:11:21 2015 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Wed Oct 7 14:11:21 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/pom.xml    | 14 ++++----------
 streaming-twitter/pom.xml | 10 ++--------
 streaming-zeromq/pom.xml  | 10 ++--------
 3 files changed, 8 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/56189117/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 05e6338..59fba8b 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -59,21 +59,15 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.activemq</groupId>
       <artifactId>activemq-core</artifactId>
       <version>5.7.0</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

http://git-wip-us.apache.org/repos/asf/bahir/blob/56189117/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 244ad58..4c22ec8 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -59,14 +59,8 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
     </dependency>
   </dependencies>
   <build>

http://git-wip-us.apache.org/repos/asf/bahir/blob/56189117/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 171df86..02d6b81 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -58,14 +58,8 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
     </dependency>
   </dependencies>
   <build>


[06/50] [abbrv] bahir git commit: [DOC] [PYSPARK] [MLLIB] Added newlines to docstrings to fix parameter formatting

Posted by lr...@apache.org.
[DOC] [PYSPARK] [MLLIB] Added newlines to docstrings to fix parameter formatting

Added newlines before `:param ...:` and `:return:` markup. Without these, parameter lists aren't formatted correctly in the API docs. I.e:

![screen shot 2015-09-21 at 21 49 26](https://cloud.githubusercontent.com/assets/11915197/10004686/de3c41d4-60aa-11e5-9c50-a46dcb51243f.png)

.. looks like this once newline is added:

![screen shot 2015-09-21 at 21 50 14](https://cloud.githubusercontent.com/assets/11915197/10004706/f86bfb08-60aa-11e5-8524-ae4436713502.png)

Author: noelsmith <ma...@noelsmith.com>

Closes #8851 from noel-smith/docstring-missing-newline-fix.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/2c8702fc
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/2c8702fc
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/2c8702fc

Branch: refs/heads/master
Commit: 2c8702fc1198bd0746a4f91a0e49a00fb2b3c9b0
Parents: e5d4bb7
Author: noelsmith <ma...@noelsmith.com>
Authored: Mon Sep 21 14:24:19 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Sep 21 14:24:19 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/python/mqtt.py | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/2c8702fc/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index f065989..fa83006 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -31,6 +31,7 @@ class MQTTUtils(object):
                      storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2):
         """
         Create an input stream that pulls messages from a Mqtt Broker.
+
         :param ssc:  StreamingContext object
         :param brokerUrl:  Url of remote mqtt publisher
         :param topic:  topic name to subscribe to


[29/50] [abbrv] bahir git commit: [SPARK-3369][CORE][STREAMING] Java mapPartitions Iterator->Iterable is inconsistent with Scala's Iterator->Iterator

Posted by lr...@apache.org.
[SPARK-3369][CORE][STREAMING] Java mapPartitions Iterator->Iterable is inconsistent with Scala's Iterator->Iterator

Fix Java function API methods for flatMap and mapPartitions to require producing only an Iterator, not Iterable. Also fix DStream.flatMap to require a function producing TraversableOnce only, not Traversable.

CC rxin pwendell for API change; tdas since it also touches streaming.

Author: Sean Owen <so...@cloudera.com>

Closes #10413 from srowen/SPARK-3369.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/881d1af6
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/881d1af6
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/881d1af6

Branch: refs/heads/master
Commit: 881d1af6c9b6af77780cf2880f20579073ba10a4
Parents: bbb1ddc
Author: Sean Owen <so...@cloudera.com>
Authored: Tue Jan 26 11:55:28 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Tue Jan 26 11:55:28 2016 +0000

----------------------------------------------------------------------
 .../spark/examples/streaming/akka/JavaActorWordCount.java       | 5 +++--
 .../streaming/twitter/JavaTwitterHashTagJoinSentiments.java     | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/881d1af6/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
index 62e5633..cf77466 100644
--- a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -18,6 +18,7 @@
 package org.apache.spark.examples.streaming;
 
 import java.util.Arrays;
+import java.util.Iterator;
 
 import scala.Tuple2;
 
@@ -120,8 +121,8 @@ public class JavaActorWordCount {
     // compute wordcount
     lines.flatMap(new FlatMapFunction<String, String>() {
       @Override
-      public Iterable<String> call(String s) {
-        return Arrays.asList(s.split("\\s+"));
+      public Iterator<String> call(String s) {
+        return Arrays.asList(s.split("\\s+")).iterator();
       }
     }).mapToPair(new PairFunction<String, String, Integer>() {
       @Override

http://git-wip-us.apache.org/repos/asf/bahir/blob/881d1af6/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
index d869768..f0ae9a9 100644
--- a/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
+++ b/streaming-twitter/examples/src/main/java/org/apache/spark/examples/streaming/twitter/JavaTwitterHashTagJoinSentiments.java
@@ -34,6 +34,7 @@ import scala.Tuple2;
 import twitter4j.Status;
 
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -70,8 +71,8 @@ public class JavaTwitterHashTagJoinSentiments {
 
     JavaDStream<String> words = stream.flatMap(new FlatMapFunction<Status, String>() {
       @Override
-      public Iterable<String> call(Status s) {
-        return Arrays.asList(s.getText().split(" "));
+      public Iterator<String> call(Status s) {
+        return Arrays.asList(s.getText().split(" ")).iterator();
       }
     });
 


[19/50] [abbrv] bahir git commit: [SPARK-3873][STREAMING] Import order fixes for streaming.

Posted by lr...@apache.org.
[SPARK-3873][STREAMING] Import order fixes for streaming.

Also included a few miscelaneous other modules that had very few violations.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #10532 from vanzin/SPARK-3873-streaming.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/d2d48331
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/d2d48331
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/d2d48331

Branch: refs/heads/master
Commit: d2d4833158c0959df6bbad7733b11553e6644c7f
Parents: 47e0e0e
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Thu Dec 31 01:34:13 2015 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Thu Dec 31 01:34:13 2015 -0800

----------------------------------------------------------------------
 .../apache/spark/streaming/twitter/TwitterInputDStream.scala   | 6 +++---
 .../org/apache/spark/streaming/twitter/TwitterUtils.scala      | 5 +++--
 .../scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala  | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/d2d48331/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index 9a85a65..a48eec7 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -19,13 +19,13 @@ package org.apache.spark.streaming.twitter
 
 import twitter4j._
 import twitter4j.auth.Authorization
-import twitter4j.conf.ConfigurationBuilder
 import twitter4j.auth.OAuthAuthorization
+import twitter4j.conf.ConfigurationBuilder
 
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.dstream._
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.Logging
 import org.apache.spark.streaming.receiver.Receiver
 
 /* A stream of Twitter statuses, potentially filtered by one or more keywords.

http://git-wip-us.apache.org/repos/asf/bahir/blob/d2d48331/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
index c6a9a2b..3e843e9 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
@@ -19,10 +19,11 @@ package org.apache.spark.streaming.twitter
 
 import twitter4j.Status
 import twitter4j.auth.Authorization
+
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaDStream, JavaStreamingContext}
-import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream}
+import org.apache.spark.streaming.api.java.{JavaDStream, JavaReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
 
 object TwitterUtils {
   /**

http://git-wip-us.apache.org/repos/asf/bahir/blob/d2d48331/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
index 4ea218e..63cd8a2 100644
--- a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
+++ b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.streaming.zeromq
 
-import scala.reflect.ClassTag
 import scala.collection.JavaConverters._
+import scala.reflect.ClassTag
 
 import akka.actor.{Props, SupervisorStrategy}
 import akka.util.ByteString


[48/50] [abbrv] bahir git commit: Add project LICENSE and NOTICE files

Posted by lr...@apache.org.
Add project LICENSE and NOTICE files


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/ab21f447
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/ab21f447
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/ab21f447

Branch: refs/heads/master
Commit: ab21f4473ce45b3bdadcdf5f878a9b987c56d776
Parents: e1871c0
Author: Luciano Resende <lr...@apache.org>
Authored: Thu May 19 18:22:18 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jun 7 22:54:36 2016 -0700

----------------------------------------------------------------------
 LICENSE | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 NOTICE  |   5 ++
 2 files changed, 207 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/ab21f447/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8f71f43
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+

http://git-wip-us.apache.org/repos/asf/bahir/blob/ab21f447/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..8bf7751
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,5 @@
+Apache Bahir
+Copyright (c) 2016 The Apache Software Foundation.
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
\ No newline at end of file


[17/50] [abbrv] bahir git commit: Bump master version to 2.0.0-SNAPSHOT.

Posted by lr...@apache.org.
Bump master version to 2.0.0-SNAPSHOT.

Author: Reynold Xin <rx...@databricks.com>

Closes #10387 from rxin/version-bump.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e2d30ef9
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e2d30ef9
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e2d30ef9

Branch: refs/heads/master
Commit: e2d30ef964c527b856638dba9ac510d891b32dde
Parents: 6b49590
Author: Reynold Xin <rx...@databricks.com>
Authored: Sat Dec 19 15:13:05 2015 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Sat Dec 19 15:13:05 2015 -0800

----------------------------------------------------------------------
 streaming-mqtt/pom.xml    | 2 +-
 streaming-twitter/pom.xml | 2 +-
 streaming-zeromq/pom.xml  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e2d30ef9/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 59fba8b..b3ba72a 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.6.0-SNAPSHOT</version>
+    <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/e2d30ef9/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 087270d..7b628b0 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.6.0-SNAPSHOT</version>
+    <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/bahir/blob/e2d30ef9/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 02d6b81..a725988 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.6.0-SNAPSHOT</version>
+    <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 


[02/50] [abbrv] bahir git commit: [SPARK-10547] [TEST] Streamline / improve style of Java API tests

Posted by lr...@apache.org.
[SPARK-10547] [TEST] Streamline / improve style of Java API tests

Fix a few Java API test style issues: unused generic types, exceptions, wrong assert argument order

Author: Sean Owen <so...@cloudera.com>

Closes #8706 from srowen/SPARK-10547.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/17a28c63
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/17a28c63
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/17a28c63

Branch: refs/heads/master
Commit: 17a28c639bface5562776c7bb8dfbb8ae8060a60
Parents: 0092cb9
Author: Sean Owen <so...@cloudera.com>
Authored: Sat Sep 12 10:40:10 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Sat Sep 12 10:40:10 2015 +0100

----------------------------------------------------------------------
 .../apache/spark/streaming/twitter/JavaTwitterStreamSuite.java   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/17a28c63/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java b/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
index e46b4e5..26ec8af 100644
--- a/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
+++ b/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
@@ -17,8 +17,6 @@
 
 package org.apache.spark.streaming.twitter;
 
-import java.util.Arrays;
-
 import org.junit.Test;
 import twitter4j.Status;
 import twitter4j.auth.Authorization;
@@ -30,7 +28,7 @@ import org.apache.spark.streaming.api.java.JavaDStream;
 public class JavaTwitterStreamSuite extends LocalJavaStreamingContext {
   @Test
   public void testTwitterStream() {
-    String[] filters = (String[])Arrays.<String>asList("filter1", "filter2").toArray();
+    String[] filters = { "filter1", "filter2" };
     Authorization auth = NullAuthorization.getInstance();
 
     // tests the API, does not actually test data receiving


[27/50] [abbrv] bahir git commit: [SPARK-7799][SPARK-12786][STREAMING] Add "streaming-akka" project

Posted by lr...@apache.org.
[SPARK-7799][SPARK-12786][STREAMING] Add "streaming-akka" project

Include the following changes:

1. Add "streaming-akka" project and org.apache.spark.streaming.akka.AkkaUtils for creating an actorStream
2. Remove "StreamingContext.actorStream" and "JavaStreamingContext.actorStream"
3. Update the ActorWordCount example and add the JavaActorWordCount example
4. Make "streaming-zeromq" depend on "streaming-akka" and update the codes accordingly

Author: Shixiong Zhu <sh...@databricks.com>

Closes #10744 from zsxwing/streaming-akka-2.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e3591971
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e3591971
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e3591971

Branch: refs/heads/master
Commit: e3591971054e18618fd6ce400efac15411619285
Parents: 618b862
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Wed Jan 20 13:55:41 2016 -0800
Committer: Tathagata Das <ta...@gmail.com>
Committed: Wed Jan 20 13:55:41 2016 -0800

----------------------------------------------------------------------
 .../streaming/akka/JavaActorWordCount.java      |  14 +-
 .../streaming/akka/ActorWordCount.scala         |  37 +++--
 streaming-akka/pom.xml                          |  73 +++++++++
 .../spark/streaming/akka/ActorReceiver.scala    |  64 +++++---
 .../apache/spark/streaming/akka/AkkaUtils.scala | 147 +++++++++++++++++++
 .../streaming/akka/JavaAkkaUtilsSuite.java      |  66 +++++++++
 .../spark/streaming/akka/AkkaUtilsSuite.scala   |  64 ++++++++
 .../streaming/zeromq/ZeroMQWordCount.scala      |  13 +-
 streaming-zeromq/pom.xml                        |   5 +
 .../spark/streaming/zeromq/ZeroMQReceiver.scala |   2 +-
 .../spark/streaming/zeromq/ZeroMQUtils.scala    |  76 +++++++---
 .../streaming/zeromq/JavaZeroMQStreamSuite.java |  31 ++--
 .../streaming/zeromq/ZeroMQStreamSuite.scala    |  16 +-
 13 files changed, 526 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
index 2377207..62e5633 100644
--- a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -31,7 +31,8 @@ import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.streaming.Duration;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
-import org.apache.spark.streaming.receiver.JavaActorReceiver;
+import org.apache.spark.streaming.akka.AkkaUtils;
+import org.apache.spark.streaming.akka.JavaActorReceiver;
 
 /**
  * A sample actor as receiver, is also simplest. This receiver actor
@@ -56,6 +57,7 @@ class JavaSampleActorReceiver<T> extends JavaActorReceiver {
     remotePublisher.tell(new SubscribeReceiver(getSelf()), getSelf());
   }
 
+  @Override
   public void onReceive(Object msg) throws Exception {
     store((T) msg);
   }
@@ -100,18 +102,20 @@ public class JavaActorWordCount {
     String feederActorURI = "akka.tcp://test@" + host + ":" + port + "/user/FeederActor";
 
     /*
-     * Following is the use of actorStream to plug in custom actor as receiver
+     * Following is the use of AkkaUtils.createStream to plug in custom actor as receiver
      *
      * An important point to note:
      * Since Actor may exist outside the spark framework, It is thus user's responsibility
      * to ensure the type safety, i.e type of data received and InputDstream
      * should be same.
      *
-     * For example: Both actorStream and JavaSampleActorReceiver are parameterized
+     * For example: Both AkkaUtils.createStream and JavaSampleActorReceiver are parameterized
      * to same type to ensure type safety.
      */
-    JavaDStream<String> lines = jssc.actorStream(
-        Props.create(JavaSampleActorReceiver.class, feederActorURI), "SampleReceiver");
+    JavaDStream<String> lines = AkkaUtils.createStream(
+        jssc,
+        Props.create(JavaSampleActorReceiver.class, feederActorURI),
+        "SampleReceiver");
 
     // compute wordcount
     lines.flatMap(new FlatMapFunction<String, String>() {

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 88cdc6b..8e88987 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -22,12 +22,12 @@ import scala.collection.mutable.LinkedList
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import akka.actor.{actorRef2Scala, Actor, ActorRef, Props}
+import akka.actor._
+import com.typesafe.config.ConfigFactory
 
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.SparkConf
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.receiver.ActorReceiver
-import org.apache.spark.util.AkkaUtils
+import org.apache.spark.streaming.akka.{ActorReceiver, AkkaUtils}
 
 case class SubscribeReceiver(receiverActor: ActorRef)
 case class UnsubscribeReceiver(receiverActor: ActorRef)
@@ -78,8 +78,7 @@ class FeederActor extends Actor {
  *
  * @see [[org.apache.spark.examples.streaming.FeederActor]]
  */
-class SampleActorReceiver[T: ClassTag](urlOfPublisher: String)
-extends ActorReceiver {
+class SampleActorReceiver[T](urlOfPublisher: String) extends ActorReceiver {
 
   lazy private val remotePublisher = context.actorSelection(urlOfPublisher)
 
@@ -108,9 +107,13 @@ object FeederActor {
     }
     val Seq(host, port) = args.toSeq
 
-    val conf = new SparkConf
-    val actorSystem = AkkaUtils.createActorSystem("test", host, port.toInt, conf = conf,
-      securityManager = new SecurityManager(conf))._1
+    val akkaConf = ConfigFactory.parseString(
+      s"""akka.actor.provider = "akka.remote.RemoteActorRefProvider"
+         |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
+         |akka.remote.netty.tcp.hostname = "$host"
+         |akka.remote.netty.tcp.port = $port
+         |""".stripMargin)
+       val actorSystem = ActorSystem("test", akkaConf)
     val feeder = actorSystem.actorOf(Props[FeederActor], "FeederActor")
 
     println("Feeder started as:" + feeder)
@@ -121,6 +124,7 @@ object FeederActor {
 
 /**
  * A sample word count program demonstrating the use of plugging in
+ *
  * Actor as Receiver
  * Usage: ActorWordCount <hostname> <port>
  *   <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
@@ -146,20 +150,21 @@ object ActorWordCount {
     val ssc = new StreamingContext(sparkConf, Seconds(2))
 
     /*
-     * Following is the use of actorStream to plug in custom actor as receiver
+     * Following is the use of AkkaUtils.createStream to plug in custom actor as receiver
      *
      * An important point to note:
      * Since Actor may exist outside the spark framework, It is thus user's responsibility
-     * to ensure the type safety, i.e type of data received and InputDstream
+     * to ensure the type safety, i.e type of data received and InputDStream
      * should be same.
      *
-     * For example: Both actorStream and SampleActorReceiver are parameterized
+     * For example: Both AkkaUtils.createStream and SampleActorReceiver are parameterized
      * to same type to ensure type safety.
      */
-
-    val lines = ssc.actorStream[String](
-      Props(new SampleActorReceiver[String]("akka.tcp://test@%s:%s/user/FeederActor".format(
-        host, port.toInt))), "SampleReceiver")
+    val lines = AkkaUtils.createStream[String](
+      ssc,
+      Props(classOf[SampleActorReceiver[String]],
+        "akka.tcp://test@%s:%s/user/FeederActor".format(host, port.toInt)),
+      "SampleReceiver")
 
     // compute wordcount
     lines.flatMap(_.split("\\s+")).map(x => (x, 1)).reduceByKey(_ + _).print()

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
new file mode 100644
index 0000000..34de9ba
--- /dev/null
+++ b/streaming-akka/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.10</artifactId>
+    <version>2.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-akka_2.10</artifactId>
+  <properties>
+    <sbt.project.name>streaming-akka</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Project External Akka</name>
+  <url>http://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${akka.group}</groupId>
+      <artifactId>akka-actor_${scala.binary.version}</artifactId>
+      <version>${akka.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${akka.group}</groupId>
+      <artifactId>akka-remote_${scala.binary.version}</artifactId>
+      <version>${akka.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
index 0eabf3d..c75dc92 100644
--- a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.receiver
+package org.apache.spark.streaming.akka
 
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicInteger
@@ -26,23 +26,44 @@ import scala.reflect.ClassTag
 
 import akka.actor._
 import akka.actor.SupervisorStrategy.{Escalate, Restart}
+import com.typesafe.config.ConfigFactory
 
-import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.{Logging, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receiver.Receiver
 
 /**
  * :: DeveloperApi ::
  * A helper with set of defaults for supervisor strategy
  */
 @DeveloperApi
-object ActorSupervisorStrategy {
+object ActorReceiver {
 
-  val defaultStrategy = OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange =
+  /**
+   * A OneForOneStrategy supervisor strategy with `maxNrOfRetries = 10` and
+   * `withinTimeRange = 15 millis`. For RuntimeException, it will restart the ActorReceiver; for
+   * others, it just escalates the failure to the supervisor of the supervisor.
+   */
+  val defaultSupervisorStrategy = OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange =
     15 millis) {
     case _: RuntimeException => Restart
     case _: Exception => Escalate
   }
+
+  /**
+   * A default ActorSystem creator. It will use a unique system name
+   * (streaming-actor-system-<spark-task-attempt-id>) to start an ActorSystem that supports remote
+   * communication.
+   */
+  val defaultActorSystemCreator: () => ActorSystem = () => {
+    val uniqueSystemName = s"streaming-actor-system-${TaskContext.get().taskAttemptId()}"
+    val akkaConf = ConfigFactory.parseString(
+      s"""akka.actor.provider = "akka.remote.RemoteActorRefProvider"
+         |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
+         |""".stripMargin)
+    ActorSystem(uniqueSystemName, akkaConf)
+  }
 }
 
 /**
@@ -58,13 +79,12 @@ object ActorSupervisorStrategy {
  *      }
  *  }
  *
- *  // Can be used with an actorStream as follows
- *  ssc.actorStream[String](Props(new MyActor),"MyActorReceiver")
+ *  AkkaUtils.createStream[String](ssc, Props[MyActor](),"MyActorReceiver")
  *
  * }}}
  *
  * @note Since Actor may exist outside the spark framework, It is thus user's responsibility
- *       to ensure the type safety, i.e parametrized type of push block and InputDStream
+ *       to ensure the type safety, i.e. parametrized type of push block and InputDStream
  *       should be same.
  */
 @DeveloperApi
@@ -103,18 +123,18 @@ abstract class ActorReceiver extends Actor {
  *
  * @example {{{
  *  class MyActor extends JavaActorReceiver {
- *      def receive {
- *          case anything: String => store(anything)
+ *      @Override
+ *      public void onReceive(Object msg) throws Exception {
+ *          store((String) msg);
  *      }
  *  }
  *
- *  // Can be used with an actorStream as follows
- *  ssc.actorStream[String](Props(new MyActor),"MyActorReceiver")
+ *  AkkaUtils.<String>createStream(jssc, Props.create(MyActor.class), "MyActorReceiver");
  *
  * }}}
  *
  * @note Since Actor may exist outside the spark framework, It is thus user's responsibility
- *       to ensure the type safety, i.e parametrized type of push block and InputDStream
+ *       to ensure the type safety, i.e. parametrized type of push block and InputDStream
  *       should be same.
  */
 @DeveloperApi
@@ -147,8 +167,8 @@ abstract class JavaActorReceiver extends UntypedActor {
 /**
  * :: DeveloperApi ::
  * Statistics for querying the supervisor about state of workers. Used in
- * conjunction with `StreamingContext.actorStream` and
- * [[org.apache.spark.streaming.receiver.ActorReceiver]].
+ * conjunction with `AkkaUtils.createStream` and
+ * [[org.apache.spark.streaming.akka.ActorReceiverSupervisor]].
  */
 @DeveloperApi
 case class Statistics(numberOfMsgs: Int,
@@ -157,10 +177,10 @@ case class Statistics(numberOfMsgs: Int,
   otherInfo: String)
 
 /** Case class to receive data sent by child actors */
-private[streaming] sealed trait ActorReceiverData
-private[streaming] case class SingleItemData[T](item: T) extends ActorReceiverData
-private[streaming] case class IteratorData[T](iterator: Iterator[T]) extends ActorReceiverData
-private[streaming] case class ByteBufferData(bytes: ByteBuffer) extends ActorReceiverData
+private[akka] sealed trait ActorReceiverData
+private[akka] case class SingleItemData[T](item: T) extends ActorReceiverData
+private[akka] case class IteratorData[T](iterator: Iterator[T]) extends ActorReceiverData
+private[akka] case class ByteBufferData(bytes: ByteBuffer) extends ActorReceiverData
 
 /**
  * Provides Actors as receivers for receiving stream.
@@ -181,14 +201,16 @@ private[streaming] case class ByteBufferData(bytes: ByteBuffer) extends ActorRec
  *  context.parent ! Props(new Worker, "Worker")
  * }}}
  */
-private[streaming] class ActorReceiverSupervisor[T: ClassTag](
+private[akka] class ActorReceiverSupervisor[T: ClassTag](
+    actorSystemCreator: () => ActorSystem,
     props: Props,
     name: String,
     storageLevel: StorageLevel,
     receiverSupervisorStrategy: SupervisorStrategy
   ) extends Receiver[T](storageLevel) with Logging {
 
-  protected lazy val actorSupervisor = SparkEnv.get.actorSystem.actorOf(Props(new Supervisor),
+  private lazy val actorSystem = actorSystemCreator()
+  protected lazy val actorSupervisor = actorSystem.actorOf(Props(new Supervisor),
     "Supervisor" + streamId)
 
   class Supervisor extends Actor {
@@ -241,5 +263,7 @@ private[streaming] class ActorReceiverSupervisor[T: ClassTag](
 
   def onStop(): Unit = {
     actorSupervisor ! PoisonPill
+    actorSystem.shutdown()
+    actorSystem.awaitTermination()
   }
 }

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/AkkaUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/AkkaUtils.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/AkkaUtils.scala
new file mode 100644
index 0000000..38c35c5
--- /dev/null
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/AkkaUtils.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.akka
+
+import scala.reflect.ClassTag
+
+import akka.actor.{ActorSystem, Props, SupervisorStrategy}
+
+import org.apache.spark.api.java.function.{Function0 => JFunction0}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+
+object AkkaUtils {
+
+  /**
+   * Create an input stream with a user-defined actor. See [[ActorReceiver]] for more details.
+   *
+   * @param ssc The StreamingContext instance
+   * @param propsForActor Props object defining creation of the actor
+   * @param actorName Name of the actor
+   * @param storageLevel RDD storage level (default: StorageLevel.MEMORY_AND_DISK_SER_2)
+   * @param actorSystemCreator A function to create ActorSystem in executors. `ActorSystem` will
+   *                           be shut down when the receiver is stopping (default:
+   *                           ActorReceiver.defaultActorSystemCreator)
+   * @param supervisorStrategy the supervisor strategy (default: ActorReceiver.defaultStrategy)
+   *
+   * @note An important point to note:
+   *       Since Actor may exist outside the spark framework, It is thus user's responsibility
+   *       to ensure the type safety, i.e. parametrized type of data received and createStream
+   *       should be same.
+   */
+  def createStream[T: ClassTag](
+      ssc: StreamingContext,
+      propsForActor: Props,
+      actorName: String,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2,
+      actorSystemCreator: () => ActorSystem = ActorReceiver.defaultActorSystemCreator,
+      supervisorStrategy: SupervisorStrategy = ActorReceiver.defaultSupervisorStrategy
+    ): ReceiverInputDStream[T] = ssc.withNamedScope("actor stream") {
+    val cleanF = ssc.sc.clean(actorSystemCreator)
+    ssc.receiverStream(new ActorReceiverSupervisor[T](
+      cleanF,
+      propsForActor,
+      actorName,
+      storageLevel,
+      supervisorStrategy))
+  }
+
+  /**
+   * Create an input stream with a user-defined actor. See [[JavaActorReceiver]] for more details.
+   *
+   * @param jssc The StreamingContext instance
+   * @param propsForActor Props object defining creation of the actor
+   * @param actorName Name of the actor
+   * @param storageLevel Storage level to use for storing the received objects
+   * @param actorSystemCreator A function to create ActorSystem in executors. `ActorSystem` will
+   *                           be shut down when the receiver is stopping.
+   * @param supervisorStrategy the supervisor strategy
+   *
+   * @note An important point to note:
+   *       Since Actor may exist outside the spark framework, It is thus user's responsibility
+   *       to ensure the type safety, i.e. parametrized type of data received and createStream
+   *       should be same.
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      propsForActor: Props,
+      actorName: String,
+      storageLevel: StorageLevel,
+      actorSystemCreator: JFunction0[ActorSystem],
+      supervisorStrategy: SupervisorStrategy
+    ): JavaReceiverInputDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    createStream[T](
+      jssc.ssc,
+      propsForActor,
+      actorName,
+      storageLevel,
+      () => actorSystemCreator.call(),
+      supervisorStrategy)
+  }
+
+  /**
+   * Create an input stream with a user-defined actor. See [[JavaActorReceiver]] for more details.
+   *
+   * @param jssc The StreamingContext instance
+   * @param propsForActor Props object defining creation of the actor
+   * @param actorName Name of the actor
+   * @param storageLevel Storage level to use for storing the received objects
+   *
+   * @note An important point to note:
+   *       Since Actor may exist outside the spark framework, It is thus user's responsibility
+   *       to ensure the type safety, i.e. parametrized type of data received and createStream
+   *       should be same.
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      propsForActor: Props,
+      actorName: String,
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    createStream[T](jssc.ssc, propsForActor, actorName, storageLevel)
+  }
+
+  /**
+   * Create an input stream with a user-defined actor. Storage level of the data will be the default
+   * StorageLevel.MEMORY_AND_DISK_SER_2. See [[JavaActorReceiver]] for more details.
+   *
+   * @param jssc The StreamingContext instance
+   * @param propsForActor Props object defining creation of the actor
+   * @param actorName Name of the actor
+   *
+   * @note An important point to note:
+   *       Since Actor may exist outside the spark framework, It is thus user's responsibility
+   *       to ensure the type safety, i.e. parametrized type of data received and createStream
+   *       should be same.
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      propsForActor: Props,
+      actorName: String
+    ): JavaReceiverInputDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    createStream[T](jssc.ssc, propsForActor, actorName)
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
----------------------------------------------------------------------
diff --git a/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java b/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
new file mode 100644
index 0000000..b732506
--- /dev/null
+++ b/streaming-akka/src/test/java/org/apache/spark/streaming/akka/JavaAkkaUtilsSuite.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.akka;
+
+import akka.actor.ActorSystem;
+import akka.actor.Props;
+import akka.actor.SupervisorStrategy;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.Test;
+
+import org.apache.spark.api.java.function.Function0;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+
+public class JavaAkkaUtilsSuite {
+
+  @Test // tests the API, does not actually test data receiving
+  public void testAkkaUtils() {
+    JavaStreamingContext jsc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+    try {
+      JavaReceiverInputDStream<String> test1 = AkkaUtils.<String>createStream(
+        jsc, Props.create(JavaTestActor.class), "test");
+      JavaReceiverInputDStream<String> test2 = AkkaUtils.<String>createStream(
+        jsc, Props.create(JavaTestActor.class), "test", StorageLevel.MEMORY_AND_DISK_SER_2());
+      JavaReceiverInputDStream<String> test3 = AkkaUtils.<String>createStream(
+        jsc,
+        Props.create(JavaTestActor.class),
+        "test", StorageLevel.MEMORY_AND_DISK_SER_2(),
+        new ActorSystemCreatorForTest(),
+        SupervisorStrategy.defaultStrategy());
+    } finally {
+      jsc.stop();
+    }
+  }
+}
+
+class ActorSystemCreatorForTest implements Function0<ActorSystem> {
+  @Override
+  public ActorSystem call() {
+    return null;
+  }
+}
+
+
+class JavaTestActor extends JavaActorReceiver {
+  @Override
+  public void onReceive(Object message) throws Exception {
+    store((String) message);
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
new file mode 100644
index 0000000..f437585
--- /dev/null
+++ b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaUtilsSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.akka
+
+import akka.actor.{Props, SupervisorStrategy}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+
+class AkkaUtilsSuite extends SparkFunSuite {
+
+  test("createStream") {
+    val ssc: StreamingContext = new StreamingContext("local[2]", "test", Seconds(1000))
+    try {
+      // tests the API, does not actually test data receiving
+      val test1: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc, Props[TestActor](), "test")
+      val test2: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc, Props[TestActor](), "test", StorageLevel.MEMORY_AND_DISK_SER_2)
+      val test3: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc,
+        Props[TestActor](),
+        "test",
+        StorageLevel.MEMORY_AND_DISK_SER_2,
+        supervisorStrategy = SupervisorStrategy.defaultStrategy)
+      val test4: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc, Props[TestActor](), "test", StorageLevel.MEMORY_AND_DISK_SER_2, () => null)
+      val test5: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc, Props[TestActor](), "test", StorageLevel.MEMORY_AND_DISK_SER_2, () => null)
+      val test6: ReceiverInputDStream[String] = AkkaUtils.createStream(
+        ssc,
+        Props[TestActor](),
+        "test",
+        StorageLevel.MEMORY_AND_DISK_SER_2,
+        () => null,
+        SupervisorStrategy.defaultStrategy)
+    } finally {
+      ssc.stop()
+    }
+  }
+}
+
+class TestActor extends ActorReceiver {
+  override def receive: Receive = {
+    case m: String => store(m)
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
index 9644890..f612e50 100644
--- a/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
+++ b/streaming-zeromq/examples/src/main/scala/org/apache/spark/examples/streaming/zeromq/ZeroMQWordCount.scala
@@ -25,8 +25,9 @@ import akka.actor.actorRef2Scala
 import akka.util.ByteString
 import akka.zeromq._
 import akka.zeromq.Subscribe
+import com.typesafe.config.ConfigFactory
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, TaskContext}
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.zeromq._
 
@@ -69,10 +70,10 @@ object SimpleZeroMQPublisher {
  *
  * To run this example locally, you may run publisher as
  *    `$ bin/run-example \
- *      org.apache.spark.examples.streaming.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
+ *      org.apache.spark.examples.streaming.SimpleZeroMQPublisher tcp://127.0.0.1:1234 foo`
  * and run the example as
  *    `$ bin/run-example \
- *      org.apache.spark.examples.streaming.ZeroMQWordCount tcp://127.0.1.1:1234 foo`
+ *      org.apache.spark.examples.streaming.ZeroMQWordCount tcp://127.0.0.1:1234 foo`
  */
 // scalastyle:on
 object ZeroMQWordCount {
@@ -90,7 +91,11 @@ object ZeroMQWordCount {
     def bytesToStringIterator(x: Seq[ByteString]): Iterator[String] = x.map(_.utf8String).iterator
 
     // For this stream, a zeroMQ publisher should be running.
-    val lines = ZeroMQUtils.createStream(ssc, url, Subscribe(topic), bytesToStringIterator _)
+    val lines = ZeroMQUtils.createStream(
+      ssc,
+      url,
+      Subscribe(topic),
+      bytesToStringIterator _)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
     wordCounts.print()

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index a725988..7781aae 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -43,6 +43,11 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-akka_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
index 506ba87..dd367cd 100644
--- a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
+++ b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
@@ -23,7 +23,7 @@ import akka.util.ByteString
 import akka.zeromq._
 
 import org.apache.spark.Logging
-import org.apache.spark.streaming.receiver.ActorReceiver
+import org.apache.spark.streaming.akka.ActorReceiver
 
 /**
  * A receiver to subscribe to ZeroMQ stream.

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
index 63cd8a2..1784d6e 100644
--- a/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
+++ b/streaming-zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
@@ -20,29 +20,33 @@ package org.apache.spark.streaming.zeromq
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
-import akka.actor.{Props, SupervisorStrategy}
+import akka.actor.{ActorSystem, Props, SupervisorStrategy}
 import akka.util.ByteString
 import akka.zeromq.Subscribe
 
-import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.api.java.function.{Function => JFunction, Function0 => JFunction0}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.akka.{ActorReceiver, AkkaUtils}
 import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
-import org.apache.spark.streaming.receiver.ActorSupervisorStrategy
 
 object ZeroMQUtils {
   /**
    * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param ssc            StreamingContext object
-   * @param publisherUrl   Url of remote zeromq publisher
-   * @param subscribe      Topic to subscribe to
+   * @param ssc StreamingContext object
+   * @param publisherUrl Url of remote zeromq publisher
+   * @param subscribe Topic to subscribe to
    * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic
    *                       and each frame has sequence of byte thus it needs the converter
    *                       (which might be deserializer of bytes) to translate from sequence
    *                       of sequence of bytes, where sequence refer to a frame
    *                       and sub sequence refer to its payload.
    * @param storageLevel   RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param actorSystemCreator A function to create ActorSystem in executors. `ActorSystem` will
+   *                           be shut down when the receiver is stopping (default:
+   *                           ActorReceiver.defaultActorSystemCreator)
+   * @param supervisorStrategy the supervisor strategy (default: ActorReceiver.defaultStrategy)
    */
   def createStream[T: ClassTag](
       ssc: StreamingContext,
@@ -50,22 +54,31 @@ object ZeroMQUtils {
       subscribe: Subscribe,
       bytesToObjects: Seq[ByteString] => Iterator[T],
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2,
-      supervisorStrategy: SupervisorStrategy = ActorSupervisorStrategy.defaultStrategy
+      actorSystemCreator: () => ActorSystem = ActorReceiver.defaultActorSystemCreator,
+      supervisorStrategy: SupervisorStrategy = ActorReceiver.defaultSupervisorStrategy
     ): ReceiverInputDStream[T] = {
-    ssc.actorStream(Props(new ZeroMQReceiver(publisherUrl, subscribe, bytesToObjects)),
-      "ZeroMQReceiver", storageLevel, supervisorStrategy)
+    AkkaUtils.createStream(
+      ssc,
+      Props(new ZeroMQReceiver(publisherUrl, subscribe, bytesToObjects)),
+      "ZeroMQReceiver",
+      storageLevel,
+      actorSystemCreator,
+      supervisorStrategy)
   }
 
   /**
    * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param jssc           JavaStreamingContext object
-   * @param publisherUrl   Url of remote ZeroMQ publisher
-   * @param subscribe      Topic to subscribe to
+   * @param jssc JavaStreamingContext object
+   * @param publisherUrl Url of remote ZeroMQ publisher
+   * @param subscribe Topic to subscribe to
    * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each
    *                       frame has sequence of byte thus it needs the converter(which might be
    *                       deserializer of bytes) to translate from sequence of sequence of bytes,
    *                       where sequence refer to a frame and sub sequence refer to its payload.
-   * @param storageLevel  Storage level to use for storing the received objects
+   * @param storageLevel Storage level to use for storing the received objects
+   * @param actorSystemCreator A function to create ActorSystem in executors. `ActorSystem` will
+   *                           be shut down when the receiver is stopping.
+   * @param supervisorStrategy the supervisor strategy (default: ActorReceiver.defaultStrategy)
    */
   def createStream[T](
       jssc: JavaStreamingContext,
@@ -73,25 +86,33 @@ object ZeroMQUtils {
       subscribe: Subscribe,
       bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]],
       storageLevel: StorageLevel,
+      actorSystemCreator: JFunction0[ActorSystem],
       supervisorStrategy: SupervisorStrategy
     ): JavaReceiverInputDStream[T] = {
     implicit val cm: ClassTag[T] =
       implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val fn =
       (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).iterator().asScala
-    createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel, supervisorStrategy)
+    createStream[T](
+      jssc.ssc,
+      publisherUrl,
+      subscribe,
+      fn,
+      storageLevel,
+      () => actorSystemCreator.call(),
+      supervisorStrategy)
   }
 
   /**
    * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param jssc           JavaStreamingContext object
-   * @param publisherUrl   Url of remote zeromq publisher
-   * @param subscribe      Topic to subscribe to
+   * @param jssc JavaStreamingContext object
+   * @param publisherUrl Url of remote zeromq publisher
+   * @param subscribe Topic to subscribe to
    * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each
    *                       frame has sequence of byte thus it needs the converter(which might be
    *                       deserializer of bytes) to translate from sequence of sequence of bytes,
    *                       where sequence refer to a frame and sub sequence refer to its payload.
-   * @param storageLevel   RDD storage level.
+   * @param storageLevel RDD storage level.
    */
   def createStream[T](
       jssc: JavaStreamingContext,
@@ -104,14 +125,19 @@ object ZeroMQUtils {
       implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val fn =
       (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).iterator().asScala
-    createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel)
+    createStream[T](
+      jssc.ssc,
+      publisherUrl,
+      subscribe,
+      fn,
+      storageLevel)
   }
 
   /**
    * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param jssc           JavaStreamingContext object
-   * @param publisherUrl   Url of remote zeromq publisher
-   * @param subscribe      Topic to subscribe to
+   * @param jssc JavaStreamingContext object
+   * @param publisherUrl Url of remote zeromq publisher
+   * @param subscribe Topic to subscribe to
    * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each
    *                       frame has sequence of byte thus it needs the converter(which might
    *                       be deserializer of bytes) to translate from sequence of sequence of
@@ -128,6 +154,10 @@ object ZeroMQUtils {
       implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val fn =
       (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).iterator().asScala
-    createStream[T](jssc.ssc, publisherUrl, subscribe, fn)
+    createStream[T](
+      jssc.ssc,
+      publisherUrl,
+      subscribe,
+      fn)
   }
 }

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java b/streaming-zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
index 417b91e..9ff4b41 100644
--- a/streaming-zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
+++ b/streaming-zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
@@ -17,14 +17,17 @@
 
 package org.apache.spark.streaming.zeromq;
 
-import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
-import org.junit.Test;
+import akka.actor.ActorSystem;
 import akka.actor.SupervisorStrategy;
 import akka.util.ByteString;
 import akka.zeromq.Subscribe;
+import org.junit.Test;
+
 import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function0;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
 
 public class JavaZeroMQStreamSuite extends LocalJavaStreamingContext {
 
@@ -32,19 +35,29 @@ public class JavaZeroMQStreamSuite extends LocalJavaStreamingContext {
   public void testZeroMQStream() {
     String publishUrl = "abc";
     Subscribe subscribe = new Subscribe((ByteString)null);
-    Function<byte[][], Iterable<String>> bytesToObjects = new Function<byte[][], Iterable<String>>() {
-      @Override
-      public Iterable<String> call(byte[][] bytes) throws Exception {
-        return null;
-      }
-    };
+    Function<byte[][], Iterable<String>> bytesToObjects = new BytesToObjects();
+    Function0<ActorSystem> actorSystemCreator = new ActorSystemCreatorForTest();
 
     JavaReceiverInputDStream<String> test1 = ZeroMQUtils.<String>createStream(
       ssc, publishUrl, subscribe, bytesToObjects);
     JavaReceiverInputDStream<String> test2 = ZeroMQUtils.<String>createStream(
       ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2());
     JavaReceiverInputDStream<String> test3 = ZeroMQUtils.<String>createStream(
-      ssc,publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2(),
+      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2(), actorSystemCreator,
       SupervisorStrategy.defaultStrategy());
   }
 }
+
+class BytesToObjects implements Function<byte[][], Iterable<String>> {
+  @Override
+  public Iterable<String> call(byte[][] bytes) throws Exception {
+    return null;
+  }
+}
+
+class ActorSystemCreatorForTest implements Function0<ActorSystem> {
+  @Override
+  public ActorSystem call() {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/bahir/blob/e3591971/streaming-zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala b/streaming-zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
index 35d2e62..bac2679 100644
--- a/streaming-zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
+++ b/streaming-zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
@@ -42,14 +42,22 @@ class ZeroMQStreamSuite extends SparkFunSuite {
 
     // tests the API, does not actually test data receiving
     val test1: ReceiverInputDStream[String] =
-      ZeroMQUtils.createStream(ssc, publishUrl, subscribe, bytesToObjects)
+      ZeroMQUtils.createStream(
+        ssc, publishUrl, subscribe, bytesToObjects, actorSystemCreator = () => null)
     val test2: ReceiverInputDStream[String] = ZeroMQUtils.createStream(
-      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2)
+      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2, () => null)
     val test3: ReceiverInputDStream[String] = ZeroMQUtils.createStream(
       ssc, publishUrl, subscribe, bytesToObjects,
-      StorageLevel.MEMORY_AND_DISK_SER_2, SupervisorStrategy.defaultStrategy)
+      StorageLevel.MEMORY_AND_DISK_SER_2, () => null, SupervisorStrategy.defaultStrategy)
+    val test4: ReceiverInputDStream[String] =
+      ZeroMQUtils.createStream(ssc, publishUrl, subscribe, bytesToObjects)
+    val test5: ReceiverInputDStream[String] = ZeroMQUtils.createStream(
+      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2)
+    val test6: ReceiverInputDStream[String] = ZeroMQUtils.createStream(
+      ssc, publishUrl, subscribe, bytesToObjects,
+      StorageLevel.MEMORY_AND_DISK_SER_2, supervisorStrategy = SupervisorStrategy.defaultStrategy)
 
-    // TODO: Actually test data receiving
+    // TODO: Actually test data receiving. A real test needs the native ZeroMQ library
     ssc.stop()
   }
 }


[28/50] [abbrv] bahir git commit: [HOTFIX][BUILD][TEST-MAVEN] Remove duplicate dependency

Posted by lr...@apache.org.
[HOTFIX][BUILD][TEST-MAVEN] Remove duplicate dependency

Author: Shixiong Zhu <sh...@databricks.com>

Closes #10868 from zsxwing/hotfix-akka-pom.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/bbb1ddcd
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/bbb1ddcd
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/bbb1ddcd

Branch: refs/heads/master
Commit: bbb1ddcd0072c0fe25a3385dd55e1915891b0f8b
Parents: e359197
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Fri Jan 22 12:33:18 2016 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Fri Jan 22 12:33:18 2016 -0800

----------------------------------------------------------------------
 streaming-akka/pom.xml | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/bbb1ddcd/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 34de9ba..06c8e8a 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -49,6 +49,10 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
       <groupId>${akka.group}</groupId>
       <artifactId>akka-actor_${scala.binary.version}</artifactId>
       <version>${akka.version}</version>
@@ -58,13 +62,6 @@
       <artifactId>akka-remote_${scala.binary.version}</artifactId>
       <version>${akka.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>


[09/50] [abbrv] bahir git commit: [SPARK-11245] update twitter4j to 4.0.4 version

Posted by lr...@apache.org.
[SPARK-11245] update twitter4j to 4.0.4 version

update twitter4j to 4.0.4 version
https://issues.apache.org/jira/browse/SPARK-11245

Author: dima <pr...@gmail.com>

Closes #9221 from pronix/twitter4j_update.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/3b1cc918
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/3b1cc918
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/3b1cc918

Branch: refs/heads/master
Commit: 3b1cc918d9e17ad45ed9b8610a0ab2007c3c0018
Parents: c03f462
Author: dima <pr...@gmail.com>
Authored: Sat Oct 24 18:16:45 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Sat Oct 24 18:16:45 2015 +0100

----------------------------------------------------------------------
 streaming-twitter/pom.xml                                          | 2 +-
 .../org/apache/spark/streaming/twitter/TwitterInputDStream.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/3b1cc918/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 4c22ec8..087270d 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -51,7 +51,7 @@
     <dependency>
       <groupId>org.twitter4j</groupId>
       <artifactId>twitter4j-stream</artifactId>
-      <version>3.0.3</version>
+      <version>4.0.4</version>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>

http://git-wip-us.apache.org/repos/asf/bahir/blob/3b1cc918/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index d7de74b..9a85a65 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -87,7 +87,7 @@ class TwitterReceiver(
 
       val query = new FilterQuery
       if (filters.size > 0) {
-        query.track(filters.toArray)
+        query.track(filters.mkString(","))
         newTwitterStream.filter(query)
       } else {
         newTwitterStream.sample()


[08/50] [abbrv] bahir git commit: [SPARK-10447][SPARK-3842][PYSPARK] upgrade pyspark to py4j0.9

Posted by lr...@apache.org.
[SPARK-10447][SPARK-3842][PYSPARK] upgrade pyspark to py4j0.9

Upgrade to Py4j0.9

Author: Holden Karau <ho...@pigscanfly.ca>
Author: Holden Karau <ho...@us.ibm.com>

Closes #8615 from holdenk/SPARK-10447-upgrade-pyspark-to-py4j0.9.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/c03f462b
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/c03f462b
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/c03f462b

Branch: refs/heads/master
Commit: c03f462ba694e34dd191d2edba8881484e5e76b3
Parents: 5618911
Author: Holden Karau <ho...@pigscanfly.ca>
Authored: Tue Oct 20 10:52:49 2015 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Tue Oct 20 10:52:49 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/python/mqtt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/c03f462b/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index fa83006..1ce4093 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-from py4j.java_gateway import Py4JJavaError
+from py4j.protocol import Py4JJavaError
 
 from pyspark.storagelevel import StorageLevel
 from pyspark.serializers import UTF8Deserializer


[44/50] [abbrv] bahir git commit: Create README.md

Posted by lr...@apache.org.
Create README.md

Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e1dfd0e8
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e1dfd0e8
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e1dfd0e8

Branch: refs/heads/master
Commit: e1dfd0e8731418d39eaf4cf66a5641849e34f051
Parents: 54040f3
Author: Christian Kadner <ck...@users.noreply.github.com>
Authored: Mon Jun 6 22:36:01 2016 -0700
Committer: Christian Kadner <ck...@users.noreply.github.com>
Committed: Mon Jun 6 22:36:01 2016 -0700

----------------------------------------------------------------------
 README.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e1dfd0e8/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a809a78
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# Bahir Repository with single "src" folder
+
+Initial Bahir repository containing the source for the Spark streaming connectors for akka, mqtt, twitter, zeromq 
+extracted from [Apache Spark revision 8301fad](https://github.com/apache/spark/tree/8301fadd8d269da11e72870b7a889596e3337839)
+(before the [deletion of the streaming connectors akka, mqtt, twitter, zeromq](https://issues.apache.org/jira/browse/SPARK-13843)). 
+
+This repo still needs license files, build scripts, READMEs, etc but it does have all of 
+the commit history as well as the respective examples (with rewritten revision trees to preserve pre-Bahir history).
+
+
+Folder structure:
+```
+- streaming-akka
+  - examples/src/main/...
+  - src/main/...
+- streaming-mqtt
+  - examples
+  - src
+  - python
+- ...
+```


[34/50] [abbrv] bahir git commit: [MINOR][STREAMING] Replace deprecated `apply` with `create` in example.

Posted by lr...@apache.org.
[MINOR][STREAMING] Replace deprecated `apply` with `create` in example.

## What changes were proposed in this pull request?

Twitter Algebird deprecated `apply` in HyperLogLog.scala.
```
deprecated("Use toHLL", since = "0.10.0 / 2015-05")
def apply[T <% Array[Byte]](t: T) = create(t)
```
This PR replace the deprecated usage `apply` with new `create`
according to the upstream change.

## How was this patch tested?
manual.
```
/bin/spark-submit --class org.apache.spark.examples.streaming.TwitterAlgebirdHLL examples/target/scala-2.11/spark-examples-2.0.0-SNAPSHOT-hadoop2.2.0.jar
```

Author: Dongjoon Hyun <do...@apache.org>

Closes #11451 from dongjoon-hyun/replace_deprecated_hll_apply.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/8ad5dddf
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/8ad5dddf
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/8ad5dddf

Branch: refs/heads/master
Commit: 8ad5dddfc9465d02d8cdb184e2c4fd10084f4e19
Parents: 6ac5a18
Author: Dongjoon Hyun <do...@apache.org>
Authored: Wed Mar 2 11:48:23 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Mar 2 11:48:23 2016 +0000

----------------------------------------------------------------------
 .../spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/8ad5dddf/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
index 0ec6214..6442b2a 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterAlgebirdHLL.scala
@@ -62,7 +62,7 @@ object TwitterAlgebirdHLL {
     var userSet: Set[Long] = Set()
 
     val approxUsers = users.mapPartitions(ids => {
-      ids.map(id => hll(id))
+      ids.map(id => hll.create(id))
     }).reduce(_ + _)
 
     val exactUsers = users.map(id => Set(id)).reduce(_ ++ _)


[21/50] [abbrv] bahir git commit: [SPARK-3873][TESTS] Import ordering fixes.

Posted by lr...@apache.org.
[SPARK-3873][TESTS] Import ordering fixes.

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #10582 from vanzin/SPARK-3873-tests.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/289c3408
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/289c3408
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/289c3408

Branch: refs/heads/master
Commit: 289c34081ed1418417e4772fb0872d5e682deff1
Parents: b509629
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue Jan 5 19:07:39 2016 -0800
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Tue Jan 5 19:07:39 2016 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala   | 2 +-
 .../org/apache/spark/streaming/twitter/TwitterStreamSuite.scala | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/289c3408/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
index 1618e2c..26c6dc4 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTTestUtils.scala
@@ -27,8 +27,8 @@ import org.apache.commons.lang3.RandomUtils
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 
-import org.apache.spark.util.Utils
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.util.Utils
 
 /**
  * Share codes for Scala and Python unit tests

http://git-wip-us.apache.org/repos/asf/bahir/blob/289c3408/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index d9acb56..7e5fc0c 100644
--- a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.streaming.twitter
 
-
 import org.scalatest.BeforeAndAfter
 import twitter4j.Status
-import twitter4j.auth.{NullAuthorization, Authorization}
+import twitter4j.auth.{Authorization, NullAuthorization}
 
 import org.apache.spark.{Logging, SparkFunSuite}
-import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
 class TwitterStreamSuite extends SparkFunSuite with BeforeAndAfter with Logging {


[05/50] [abbrv] bahir git commit: Revert "[SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py."

Posted by lr...@apache.org.
Revert "[SPARK-10300] [BUILD] [TESTS] Add support for test tags in run-tests.py."

This reverts commit 8abef21dac1a6538c4e4e0140323b83d804d602b.


Project: http://git-wip-us.apache.org/repos/asf/bahir/repo
Commit: http://git-wip-us.apache.org/repos/asf/bahir/commit/e5d4bb75
Tree: http://git-wip-us.apache.org/repos/asf/bahir/tree/e5d4bb75
Diff: http://git-wip-us.apache.org/repos/asf/bahir/diff/e5d4bb75

Branch: refs/heads/master
Commit: e5d4bb75495eec5ee9095e99101532b68b75fb29
Parents: 762497f
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Tue Sep 15 13:03:38 2015 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Tue Sep 15 13:03:38 2015 -0700

----------------------------------------------------------------------
 streaming-mqtt/pom.xml    | 10 ++++++++++
 streaming-twitter/pom.xml | 10 ++++++++++
 streaming-zeromq/pom.xml  | 10 ++++++++++
 3 files changed, 30 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bahir/blob/e5d4bb75/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 913c47d..05e6338 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -59,6 +59,16 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.apache.activemq</groupId>
       <artifactId>activemq-core</artifactId>
       <version>5.7.0</version>

http://git-wip-us.apache.org/repos/asf/bahir/blob/e5d4bb75/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 9137bf2..244ad58 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -58,6 +58,16 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

http://git-wip-us.apache.org/repos/asf/bahir/blob/e5d4bb75/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 6fec4f0..171df86 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -57,6 +57,16 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>