You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@livy.apache.org by lr...@apache.org on 2017/07/20 01:36:47 UTC

[01/50] [abbrv] incubator-livy-website git commit: [BAHIR-61] Enable publishing release artifacts from a tag

Repository: incubator-livy-website
Updated Branches:
  refs/heads/master [created] 0d9725d4c
  refs/remotes/origin/HEAD [created] 0d9725d4c
  refs/remotes/origin/master [created] 0d9725d4c
Updated Tags:  refs/tags/2.0.0-preview-rc1 [created] 1ffcb07fc
  refs/tags/v2.0.0-preview [created] 1ffcb07fc
  refs/tags/v2.0.0-rc1 [created] 03fb206e7
  refs/tags/v2.0.1-rc1 [created] aebdfb8e2
  refs/tags/v2.0.2-rc1 [created] 888f5db13
  refs/tags/v2.0.2-rc2 [created] bd6b7f3fb
  refs/tags/v2.0.2-rc3 [created] 6e1622166
  refs/tags/v2.1.0-rc1 [created] bc34dda31
  refs/tags/v2.1.1-rc1 [created] cfcb17e9a
  refs/tags/v2.1.1-rc2 [created] 9bb36b54c


[BAHIR-61] Enable publishing release artifacts from a tag

Enable a --gitTag parameter to identify an RC tag to be used
when publishing artifacts to maven.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/415576ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/415576ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/415576ba

Branch: refs/heads/master
Commit: 415576ba702206ba9cfc5c8bdbdee4869a1e52ac
Parents: 28f034f
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Sep 28 12:41:35 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Sep 28 12:41:35 2016 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/415576ba/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index 0c39031..2575538 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -64,6 +64,7 @@ release-build.sh --release-prepare --releaseVersion="2.0.0" --developmentVersion
 release-build.sh --release-prepare --releaseVersion="2.0.0" --developmentVersion="2.1.0-SNAPSHOT" --releaseRc="rc1" --tag="v2.0.0"  --gitCommitHash="a874b73" --dryRun
 
 release-build.sh --release-publish --gitCommitHash="a874b73"
+release-build.sh --release-publish --gitTag="v2.0.0rc1"
 
 release-build.sh --release-snapshot
 release-build.sh --release-snapshot --gitCommitHash="a874b73"
@@ -102,6 +103,10 @@ while [ "${1+defined}" ]; do
       GIT_REF="${PARTS[1]}"
       shift
       ;;
+    --gitTag)
+      GIT_TAG="${PARTS[1]}"
+      shift
+      ;;
     --releaseVersion)
       RELEASE_VERSION="${PARTS[1]}"
       shift
@@ -155,9 +160,15 @@ if [[ "$RELEASE_PREPARE" == "true" && -z "$DEVELOPMENT_VERSION" ]]; then
     exit_with_usage
 fi
 
-if [[ "$RELEASE_PUBLISH" == "true" && -z "$GIT_REF" ]]; then
-    echo "ERROR: --gitCommitHash must be passed as an argument to run this script"
-    exit_with_usage
+if [[ "$RELEASE_PUBLISH" == "true"  ]]; then
+    if [[ "$GIT_REF" && "$GIT_TAG" ]]; then
+        echo "ERROR: Only one argumented permitted when publishing : --gitCommitHash or --gitTag"
+        exit_with_usage
+    fi
+    if [[ -z "$GIT_REF" && -z "$GIT_TAG" ]]; then
+        echo "ERROR: --gitCommitHash OR --gitTag must be passed as an argument to run this script"
+        exit_with_usage
+    fi
 fi
 
 if [[ "$RELEASE_PUBLISH" == "true" && "$DRY_RUN" ]]; then
@@ -172,6 +183,9 @@ fi
 
 # Commit ref to checkout when building
 GIT_REF=${GIT_REF:-master}
+if [[ "$RELEASE_PUBLISH" == "true" && "$GIT_TAG" ]]; then
+    GIT_REF="tags/$GIT_TAG"
+fi
 
 BASE_DIR=$(pwd)

[35/50] [abbrv] incubator-livy-website git commit: [BAHIR-102] Initial support of Cloudant Query and examples

Posted by lr...@apache.org.

[BAHIR-102] Initial support of Cloudant Query and examples

Add optimization to use query in particular scenarios.

Closes #41.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/fd4c35fc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/fd4c35fc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/fd4c35fc

Branch: refs/heads/master
Commit: fd4c35fc9f7ebb57464d231cf5d66e7bc4096a1b
Parents: abfdc70
Author: Yang Lei <ge...@gmail.com>
Authored: Fri Apr 7 19:23:43 2017 -0400
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Apr 13 12:15:10 2017 -0700

----------------------------------------------------------------------
 sql-cloudant/README.md                          |   2 +
 sql-cloudant/examples/python/CloudantQuery.py   |  65 ++++++++++
 sql-cloudant/examples/python/CloudantQueryDF.py |  61 +++++++++
 .../src/main/resources/application.conf         |   2 +
 .../apache/bahir/cloudant/CloudantConfig.scala  |  94 ++++++--------
 .../apache/bahir/cloudant/DefaultSource.scala   |  32 +----
 .../common/JsonStoreConfigManager.scala         |  62 ++-------
 .../cloudant/common/JsonStoreDataAccess.scala   |  79 ++++++------
 .../bahir/cloudant/common/JsonStoreRDD.scala    | 129 ++++++++++++++++---
 9 files changed, 338 insertions(+), 188 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/README.md
----------------------------------------------------------------------
diff --git a/sql-cloudant/README.md b/sql-cloudant/README.md
index eaa8893..38d2bbb 100644
--- a/sql-cloudant/README.md
+++ b/sql-cloudant/README.md
@@ -62,6 +62,8 @@ cloudant.protocol|https|protocol to use to transfer data: http or https
 cloudant.host||cloudant host url
 cloudant.username||cloudant userid
 cloudant.password||cloudant password
+cloudant.useQuery|false|By default, _all_docs endpoint is used if configuration 'view' and 'index' (see below) are not set. When useQuery is enabled, _find endpoint will be used in place of _all_docs when query condition is not on primary key field (_id), so that query predicates may be driven into datastore. 
+cloudant.queryLimit|25|The maximum number of results returned when querying the _find endpoint.
 jsonstore.rdd.partitions|10|the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition
 jsonstore.rdd.maxInPartition|-1|the max rows in a partition. -1 means unlimited
 jsonstore.rdd.minInPartition|10|the min rows in a partition.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/examples/python/CloudantQuery.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantQuery.py b/sql-cloudant/examples/python/CloudantQuery.py
new file mode 100644
index 0000000..5ca5c44
--- /dev/null
+++ b/sql-cloudant/examples/python/CloudantQuery.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pprint
+from pyspark.sql import SparkSession
+
+# define cloudant related configuration
+# set protocol to http if needed, default value=https
+# config("cloudant.protocol","http")
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using query")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .config("jsonstore.rdd.partitions", 8)\
+    .config("cloudant.useQuery", "true")\
+    .config("schemaSampleSize",1)\
+    .getOrCreate()
+
+
+spark.sql(" CREATE TEMPORARY VIEW airportTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
+airportData = spark.sql("SELECT _id, airportName FROM airportTable1 WHERE airportName == 'Moscow' ")
+airportData.printSchema()
+print 'Total # of rows in airportData: ' + str(airportData.count())
+airportData.show()
+
+spark.sql(" CREATE TEMPORARY VIEW airportTable2 USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
+airportData = spark.sql("SELECT _id, airportName FROM airportTable2 WHERE airportName > 'Moscow' ORDER BY _id")
+airportData.printSchema()
+print 'Total # of rows in airportData: ' + str(airportData.count())
+airportData.show()
+
+spark.sql(" CREATE TEMPORARY VIEW airportTable3 USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
+airportData = spark.sql("SELECT _id, airportName FROM airportTable3 WHERE airportName > 'Moscow' AND  airportName < 'Sydney'  ORDER BY _id")
+airportData.printSchema()
+print 'Total # of rows in airportData: ' + str(airportData.count())
+airportData.show()
+
+spark.sql(" CREATE TEMPORARY VIEW flight1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight')")
+flightData = spark.sql("SELECT flightSegmentId, economyClassBaseCost, numFirstClassSeats FROM flight1 WHERE economyClassBaseCost >=200 AND numFirstClassSeats<=10")
+flightData.printSchema()
+print 'Total # of rows in airportData: ' + str(flightData.count())
+flightData.show()
+
+spark.sql(" CREATE TEMPORARY VIEW flight2 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight')")
+flightData = spark.sql("SELECT flightSegmentId, scheduledDepartureTime, scheduledArrivalTime FROM flight2 WHERE scheduledDepartureTime >='2014-12-15T05:00:00.000Z' AND scheduledArrivalTime <='2014-12-15T11:04:00.000Z'")
+flightData.printSchema()
+print 'Total # of rows in airportData: ' + str(flightData.count())
+flightData.show()
+
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/examples/python/CloudantQueryDF.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantQueryDF.py b/sql-cloudant/examples/python/CloudantQueryDF.py
new file mode 100644
index 0000000..c8fa296
--- /dev/null
+++ b/sql-cloudant/examples/python/CloudantQueryDF.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pprint
+from pyspark.sql import SparkSession
+
+# define cloudant related configuration
+# set protocol to http if needed, default value=https
+# config("cloudant.protocol","http")
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using query")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .config("jsonstore.rdd.partitions", 8)\
+    .config("cloudant.useQuery", "true")\
+    .config("schemaSampleSize",1)\
+    .getOrCreate()
+
+
+# ***0. Loading dataframe from Cloudant db with one String field condition
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+df.printSchema()
+df.filter(df.airportName == 'Moscow').select("_id",'airportName').show()
+
+
+# ***1. Loading dataframe from Cloudant db with one String field condition
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+df.printSchema()
+df.filter(df.airportName > 'Moscow').select("_id",'airportName').show()
+
+# ***2. Loading dataframe from Cloudant db with two String field condition
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+df.printSchema()
+df.filter(df.airportName > 'Moscow').filter(df.airportName < 'Sydney').select("_id",'airportName').show()
+
+# ***3. Loading dataframe from Cloudant db with two int field condition
+df = spark.read.load("n_flight", "org.apache.bahir.cloudant")
+df.printSchema()
+df.filter(df.economyClassBaseCost >= 200).filter(df.numFirstClassSeats <=10).select('flightSegmentId','scheduledDepartureTime', 'scheduledArrivalTime').show()
+
+# ***4. Loading dataframe from Cloudant db with two timestamp field condition
+df = spark.read.load("n_flight", "org.apache.bahir.cloudant")
+df.printSchema()
+df.filter(df.scheduledDepartureTime >= "2014-12-15T05:00:00.000Z").filter(df.scheduledArrivalTime <="2014-12-15T11:04:00.000Z").select('flightSegmentId','scheduledDepartureTime', 'scheduledArrivalTime').show()
+
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/resources/application.conf
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/resources/application.conf b/sql-cloudant/src/main/resources/application.conf
index 2d8b236..80dea91 100644
--- a/sql-cloudant/src/main/resources/application.conf
+++ b/sql-cloudant/src/main/resources/application.conf
@@ -10,5 +10,7 @@ spark-sql {
     }
     cloudant = {
         protocol = https
+        useQuery = false
+        queryLimit = 25
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
index ac14f4b..c4e27b9 100644
--- a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
@@ -34,20 +34,16 @@ class CloudantConfig(val protocol: String, val host: String,
     (implicit val username: String, val password: String,
     val partitions: Int, val maxInPartition: Int, val minInPartition: Int,
     val requestTimeout: Long, val bulkSize: Int, val schemaSampleSize: Int,
-    val createDBOnSave: Boolean, val selector: String)
+    val createDBOnSave: Boolean, val selector: String, val useQuery: Boolean = false,
+    val queryLimit: Int)
     extends Serializable{
 
-  private val SCHEMA_FOR_ALL_DOCS_NUM = -1
   private lazy val dbUrl = {protocol + "://" + host + "/" + dbName}
 
   val pkField = "_id"
   val defaultIndex = "_all_docs" // "_changes" does not work for partition
   val default_filter: String = "*:*"
 
-  def getChangesUrl(): String = {
-    dbUrl + "/_changes?include_docs=true&feed=normal"
-  }
-
   def getContinuousChangesUrl(): String = {
     var url = dbUrl + "/_changes?include_docs=true&feed=continuous&heartbeat=3000"
     if (selector != null) {
@@ -64,11 +60,6 @@ class CloudantConfig(val protocol: String, val host: String,
     dbUrl
   }
 
-  def getLastUrl(skip: Int): String = {
-    if (skip ==0 ) null
-    else s"$dbUrl/$defaultIndex?limit=$skip"
-  }
-
   def getSchemaSampleSize(): Int = {
     schemaSampleSize
   }
@@ -77,8 +68,6 @@ class CloudantConfig(val protocol: String, val host: String,
     createDBOnSave
   }
 
-  def getLastNum(result: JsValue): JsValue = (result \ "last_seq").get
-
   def getTotalUrl(url: String): String = {
     if (url.contains('?')) {
       url + "&limit=1"
@@ -91,37 +80,24 @@ class CloudantConfig(val protocol: String, val host: String,
     dbName
   }
 
-  def allowPartition(): Boolean = {indexName==null}
+  def queryEnabled(): Boolean = {useQuery && indexName==null && viewName==null}
 
-  def getOneUrl(): String = {
-    dbUrl + "/_all_docs?limit=1&include_docs=true"
-  }
+  def allowPartition(queryUsed: Boolean): Boolean = {indexName==null && !queryUsed}
 
-  def getOneUrlExcludeDDoc1(): String = {
-    dbUrl + "/_all_docs?endkey=%22_design/%22&limit=1&include_docs=true"
-  }
+  def getAllDocsUrl(limit: Int, excludeDDoc: Boolean = false): String = {
 
-  def getOneUrlExcludeDDoc2(): String = {
-    dbUrl + "/_all_docs?startkey=%22_design0/%22&limit=1&include_docs=true"
-  }
-
-  def getAllDocsUrlExcludeDDoc(limit: Int): String = {
     if (viewName == null) {
-      dbUrl + "/_all_docs?startkey=%22_design0/%22&limit=" + limit + "&include_docs=true"
-    } else {
-      dbUrl + "/" + viewName + "?limit=1"
-    }
-  }
-
-  def getAllDocsUrl(limit: Int): String = {
-    if (viewName == null) {
-      if (limit == SCHEMA_FOR_ALL_DOCS_NUM) {
-        dbUrl + "/_all_docs?include_docs=true"
+      val baseUrl = (
+          if ( excludeDDoc) dbUrl + "/_all_docs?startkey=%22_design0/%22&include_docs=true"
+          else dbUrl + "/_all_docs?include_docs=true"
+          )
+      if (limit == JsonStoreConfigManager.ALL_DOCS_LIMIT) {
+        baseUrl
       } else {
-        dbUrl + "/_all_docs?limit=" + limit + "&include_docs=true"
+        baseUrl + "&limit=" + limit
       }
     } else {
-      if (limit == JsonStoreConfigManager.SCHEMA_FOR_ALL_DOCS_NUM) {
+      if (limit == JsonStoreConfigManager.ALL_DOCS_LIMIT) {
         dbUrl + "/" + viewName
       } else {
         dbUrl + "/" + viewName + "?limit=" + limit
@@ -132,22 +108,23 @@ class CloudantConfig(val protocol: String, val host: String,
   def getRangeUrl(field: String = null, start: Any = null,
       startInclusive: Boolean = false, end: Any = null,
       endInclusive: Boolean = false,
-      includeDoc: Boolean = true): (String, Boolean) = {
-    val (url: String, pusheddown: Boolean) =
-      calculate(field, start, startInclusive, end, endInclusive)
-    if (includeDoc) {
+      includeDoc: Boolean = true,
+      allowQuery: Boolean = false): (String, Boolean, Boolean) = {
+    val (url: String, pusheddown: Boolean, queryUsed: Boolean) =
+      calculate(field, start, startInclusive, end, endInclusive, allowQuery)
+    if (includeDoc && !queryUsed ) {
       if (url.indexOf('?') > 0) {
-        (url + "&include_docs=true", pusheddown)
+        (url + "&include_docs=true", pusheddown, queryUsed)
       } else {
-        (url + "?include_docs=true", pusheddown)
+        (url + "?include_docs=true", pusheddown, queryUsed)
       }
     } else {
-      (url, pusheddown)
+      (url, pusheddown, queryUsed)
     }
   }
 
   private def calculate(field: String, start: Any, startInclusive: Boolean,
-      end: Any, endInclusive: Boolean): (String, Boolean) = {
+      end: Any, endInclusive: Boolean, allowQuery: Boolean): (String, Boolean, Boolean) = {
     if (field != null && field.equals(pkField)) {
       var condition = ""
       if (start != null && end != null && start.equals(end)) {
@@ -166,16 +143,18 @@ class CloudantConfig(val protocol: String, val host: String,
           condition += "endkey=%22" + URLEncoder.encode(end.toString(), "UTF-8") + "%22"
         }
       }
-      (dbUrl + "/_all_docs" + condition, true)
+      (dbUrl + "/_all_docs" + condition, true, false)
     } else if (indexName!=null) {
       //  push down to indexName
       val condition = calculateCondition(field, start, startInclusive,
         end, endInclusive)
-      (dbUrl + "/" + indexName + "?q=" + condition, true)
+      (dbUrl + "/" + indexName + "?q=" + condition, true, false)
     } else if (viewName != null) {
-      (dbUrl + "/" + viewName, true)
+      (dbUrl + "/" + viewName, false, false)
+    } else if (allowQuery && useQuery) {
+      (s"$dbUrl/_find", false, true)
     } else {
-      (s"$dbUrl/$defaultIndex", false)
+      (s"$dbUrl/$defaultIndex", false, false)
     }
 
   }
@@ -215,20 +194,21 @@ class CloudantConfig(val protocol: String, val host: String,
     }
   }
 
-  def getSubSetUrl (url: String, skip: Int, limit: Int)
-      (implicit convertSkip: (Int) => String): String = {
+  def getSubSetUrl (url: String, skip: Int, limit: Int, queryUsed: Boolean): String = {
     val suffix = {
       if (url.indexOf("_all_docs")>0) "include_docs=true&limit=" +
         limit + "&skip=" + skip
-      else if (url.indexOf("_changes")>0) "include_docs=true&limit=" +
-          limit + "&since=" + convertSkip(skip)
       else if (viewName != null) {
         "limit=" + limit + "&skip=" + skip
+      } else if (queryUsed) {
+        ""
       } else {
         "include_docs=true&limit=" + limit
       } // TODO Index query does not support subset query. Should disable Partitioned loading?
     }
-    if (url.indexOf('?') > 0) {
+    if (suffix.length==0) {
+      url
+    } else if (url.indexOf('?') > 0) {
       url + "&" + suffix
     }
     else {
@@ -246,8 +226,10 @@ class CloudantConfig(val protocol: String, val host: String,
     }
   }
 
-  def getRows(result: JsValue): Seq[JsValue] = {
-    if (viewName == null) {
+  def getRows(result: JsValue, queryUsed: Boolean): Seq[JsValue] = {
+    if ( queryUsed ) {
+      ((result \ "docs").as[JsArray]).value.map(row => row)
+    } else if ( viewName == null) {
       ((result \ "rows").as[JsArray]).value.map(row => (row \ "doc").get)
     } else {
       ((result \ "rows").as[JsArray]).value.map(row => row)

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
index 4c973f7..deab22a 100644
--- a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 
-import org.apache.bahir.cloudant.common.{FilterInterpreter, JsonStoreDataAccess, JsonStoreRDD, _}
+import org.apache.bahir.cloudant.common.{JsonStoreDataAccess, JsonStoreRDD, _}
 
 case class CloudantReadWriteRelation (config: CloudantConfig,
                                       schema: StructType,
@@ -49,23 +49,11 @@ case class CloudantReadWriteRelation (config: CloudantConfig,
           allDocsDF.select(requiredColumns(0), colsExceptCol0: _*).rdd
         }
       } else {
-        val filterInterpreter = new FilterInterpreter(filters)
-        var searchField: String = {
-          if (filterInterpreter.containsFiltersFor(config.pkField)) {
-            config.pkField
-          } else {
-            filterInterpreter.firstField
-          }
-        }
-
-        val (min, minInclusive, max, maxInclusive) = filterInterpreter.getInfo(searchField)
-        implicit val columns = requiredColumns
-        val (url: String, pusheddown: Boolean) = config.getRangeUrl(searchField,
-            min, minInclusive, max, maxInclusive, false)
-        if (!pusheddown) searchField = null
-        implicit val attrToFilters = filterInterpreter.getFiltersForPostProcess(searchField)
+        implicit val columns : Array[String] = requiredColumns
+        implicit val origFilters : Array[Filter] = filters
 
-        val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config, url)
+        logger.info("buildScan:" + columns + "," + origFilters)
+        val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config)
         val df = sqlContext.read.json(cloudantRDD)
         if (colsLength > 1) {
           val colsExceptCol0 = for (i <- 1 until colsLength) yield requiredColumns(i)
@@ -117,16 +105,10 @@ class DefaultSource extends RelationProvider
           inSchema
         } else {
           val df = if (config.getSchemaSampleSize() ==
-            JsonStoreConfigManager.SCHEMA_FOR_ALL_DOCS_NUM &&
+            JsonStoreConfigManager.ALL_DOCS_LIMIT &&
             config.viewName == null
             && config.indexName == null) {
-            val filterInterpreter = new FilterInterpreter(null)
-            var searchField = null
-            val (min, minInclusive, max, maxInclusive) =
-                filterInterpreter.getInfo(searchField)
-            val (url: String, pusheddown: Boolean) = config.getRangeUrl(searchField,
-                min, minInclusive, max, maxInclusive, false)
-            val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config, url)
+            val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config)
             allDocsDF = sqlContext.read.json(cloudantRDD)
             allDocsDF
           } else {

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
index 92192bb..38c5006 100644
--- a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
@@ -25,13 +25,14 @@ import org.apache.bahir.cloudant.CloudantConfig
 
  object JsonStoreConfigManager {
   val CLOUDANT_CONNECTOR_VERSION = "2.0.0"
-  val SCHEMA_FOR_ALL_DOCS_NUM = -1
+  val ALL_DOCS_LIMIT = -1
 
   private val CLOUDANT_HOST_CONFIG = "cloudant.host"
   private val CLOUDANT_USERNAME_CONFIG = "cloudant.username"
   private val CLOUDANT_PASSWORD_CONFIG = "cloudant.password"
   private val CLOUDANT_PROTOCOL_CONFIG = "cloudant.protocol"
-
+  private val USE_QUERY_CONFIG = "cloudant.useQuery"
+  private val QUERY_LIMIT_CONFIG = "cloudant.queryLimit"
 
   private val PARTITION_CONFIG = "jsonstore.rdd.partitions"
   private val MAX_IN_PARTITION_CONFIG = "jsonstore.rdd.maxInPartition"
@@ -39,7 +40,7 @@ import org.apache.bahir.cloudant.CloudantConfig
   private val REQUEST_TIMEOUT_CONFIG = "jsonstore.rdd.requestTimeout"
   private val BULK_SIZE_CONFIG = "bulkSize"
   private val SCHEMA_SAMPLE_SIZE_CONFIG = "schemaSampleSize"
-  private val CREATE_DB_ON_SAVE = "createDBOnSave"
+  private val CREATE_DB_ON_SAVE_CONFIG = "createDBOnSave"
 
 
   private val configFactory = ConfigFactory.load()
@@ -139,6 +140,10 @@ import org.apache.bahir.cloudant.CloudantConfig
   def getConfig(context: SQLContext, parameters: Map[String, String]): CloudantConfig = {
 
     val sparkConf = context.sparkContext.getConf
+    getConfig(sparkConf, parameters)
+  }
+
+  def getConfig (sparkConf: SparkConf, parameters: Map[String, String]): CloudantConfig = {
 
     implicit val total = getInt(sparkConf, parameters, PARTITION_CONFIG)
     implicit val max = getInt(sparkConf, parameters, MAX_IN_PARTITION_CONFIG)
@@ -146,67 +151,28 @@ import org.apache.bahir.cloudant.CloudantConfig
     implicit val requestTimeout = getLong(sparkConf, parameters, REQUEST_TIMEOUT_CONFIG)
     implicit val bulkSize = getInt(sparkConf, parameters, BULK_SIZE_CONFIG)
     implicit val schemaSampleSize = getInt(sparkConf, parameters, SCHEMA_SAMPLE_SIZE_CONFIG)
-    implicit val createDBOnSave = getBool(sparkConf, parameters, CREATE_DB_ON_SAVE)
+    implicit val createDBOnSave = getBool(sparkConf, parameters, CREATE_DB_ON_SAVE_CONFIG)
+
+    implicit val useQuery = getBool(sparkConf, parameters, USE_QUERY_CONFIG)
+    implicit val queryLimit = getInt(sparkConf, parameters, QUERY_LIMIT_CONFIG)
 
     val dbName = parameters.getOrElse("database", parameters.getOrElse("path", null))
     val indexName = parameters.getOrElse("index", null)
     val viewName = parameters.getOrElse("view", null)
-
-    // FIXME: Add logger
-    // scalastyle:off println
-    println(s"Use connectorVersion=$CLOUDANT_CONNECTOR_VERSION, dbName=$dbName, " +
-        s"indexName=$indexName, viewName=$viewName," +
-        s"$PARTITION_CONFIG=$total, $MAX_IN_PARTITION_CONFIG=$max," +
-        s"$MIN_IN_PARTITION_CONFIG=$min, $REQUEST_TIMEOUT_CONFIG=$requestTimeout," +
-        s"$BULK_SIZE_CONFIG=$bulkSize, $SCHEMA_SAMPLE_SIZE_CONFIG=$schemaSampleSize")
-    // scalastyle:on println
+    val selector = parameters.getOrElse("selector", null)
 
     val protocol = getString(sparkConf, parameters, CLOUDANT_PROTOCOL_CONFIG)
     val host = getString( sparkConf, parameters, CLOUDANT_HOST_CONFIG)
     val user = getString(sparkConf, parameters, CLOUDANT_USERNAME_CONFIG)
     val passwd = getString(sparkConf, parameters, CLOUDANT_PASSWORD_CONFIG)
-    val selector = getString(sparkConf, parameters, "selector")
 
     if (host != null) {
       new CloudantConfig(protocol, host, dbName, indexName,
         viewName) (user, passwd, total, max, min, requestTimeout, bulkSize,
-        schemaSampleSize, createDBOnSave, selector)
+        schemaSampleSize, createDBOnSave, selector, useQuery, queryLimit)
     } else {
       throw new RuntimeException("Spark configuration is invalid! " +
         "Please make sure to supply required values for cloudant.host.")
       }
   }
-
-  def getConfig(sparkConf: SparkConf, parameters: Map[String, String]): CloudantConfig = {
-
-    implicit val total = getInt(sparkConf, parameters, PARTITION_CONFIG)
-    implicit val max = getInt(sparkConf, parameters, MAX_IN_PARTITION_CONFIG)
-    implicit val min = getInt(sparkConf, parameters, MIN_IN_PARTITION_CONFIG)
-    implicit val requestTimeout = getLong(sparkConf, parameters, REQUEST_TIMEOUT_CONFIG)
-    implicit val bulkSize = getInt(sparkConf, parameters, BULK_SIZE_CONFIG)
-    implicit val schemaSampleSize = getInt(sparkConf, parameters, SCHEMA_SAMPLE_SIZE_CONFIG)
-    implicit val createDBOnSave = getBool(sparkConf, parameters, CREATE_DB_ON_SAVE)
-
-    val dbName = parameters.getOrElse("database", null)
-
-    // scalastyle:off println
-    println(s"Use connectorVersion=$CLOUDANT_CONNECTOR_VERSION, dbName=$dbName, " +
-      s"$REQUEST_TIMEOUT_CONFIG=$requestTimeout")
-    // scalastyle:on println
-
-    val protocol = getString(sparkConf, parameters, CLOUDANT_PROTOCOL_CONFIG)
-    val host = getString( sparkConf, parameters, CLOUDANT_HOST_CONFIG)
-    val user = getString(sparkConf, parameters, CLOUDANT_USERNAME_CONFIG)
-    val passwd = getString(sparkConf, parameters, CLOUDANT_PASSWORD_CONFIG)
-    val selector = getString(sparkConf, parameters, "selector")
-
-    if (host != null) {
-      new CloudantConfig(protocol, host, dbName)(user, passwd,
-        total, max, min, requestTimeout, bulkSize,
-        schemaSampleSize, createDBOnSave, selector)
-    } else {
-      throw new RuntimeException("Cloudant parameters are invalid!" +
-          "Please make sure to supply required values for cloudant.host.")
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
index e84a44c..ac79359 100644
--- a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
@@ -39,19 +39,6 @@ class JsonStoreDataAccess (config: CloudantConfig)  {
   lazy val logger = LoggerFactory.getLogger(getClass)
   implicit lazy val timeout = config.requestTimeout
 
-  def getOne()( implicit columns: Array[String] = null): Seq[String] = {
-    var r = this.getQueryResult[Seq[String]](config.getOneUrlExcludeDDoc1(), processAll)
-    if (r.size == 0 ) {
-      r = this.getQueryResult[Seq[String]](config.getOneUrlExcludeDDoc2(), processAll)
-    }
-    if (r.size == 0) {
-      throw new RuntimeException("Database " + config.getDbname() +
-        " doesn't have any non-design documents!")
-    } else {
-      r
-    }
-  }
-
   def getMany(limit: Int)(implicit columns: Array[String] = null): Seq[String] = {
     if (limit == 0) {
       throw new RuntimeException("Database " + config.getDbname() +
@@ -63,7 +50,7 @@ class JsonStoreDataAccess (config: CloudantConfig)  {
     }
     var r = this.getQueryResult[Seq[String]](config.getAllDocsUrl(limit), processAll)
     if (r.size == 0) {
-      r = this.getQueryResult[Seq[String]](config.getAllDocsUrlExcludeDDoc(limit), processAll)
+      r = this.getQueryResult[Seq[String]](config.getAllDocsUrl(limit, true), processAll)
     }
     if (r.size == 0) {
       throw new RuntimeException("Database " + config.getDbname() +
@@ -74,40 +61,34 @@ class JsonStoreDataAccess (config: CloudantConfig)  {
   }
 
   def getAll[T](url: String)
-      (implicit columns: Array[String] = null,
-      attrToFilters: Map[String, Array[Filter]] = null): Seq[String] = {
+      (implicit columns: Array[String] = null): Seq[String] = {
     this.getQueryResult[Seq[String]](url, processAll)
   }
 
   def getIterator(skip: Int, limit: Int, url: String)
       (implicit columns: Array[String] = null,
-      attrToFilters: Map[String, Array[Filter]] = null): Iterator[String] = {
-    implicit def convertSkip(skip: Int): String = {
-      val url = config.getLastUrl(skip)
-      if (url == null) {
-        skip.toString()
-      } else {
-        this.getQueryResult[String](url,
-          { result => config.getLastNum(Json.parse(result)).as[JsString].value})
-      }
-    }
-    val newUrl = config.getSubSetUrl(url, skip, limit)
+      postData: String = null): Iterator[String] = {
+    val newUrl = config.getSubSetUrl(url, skip, limit, postData!=null)
     this.getQueryResult[Iterator[String]](newUrl, processIterator)
   }
 
-  def getTotalRows(url: String): Int = {
-    val totalUrl = config.getTotalUrl(url)
-    this.getQueryResult[Int](totalUrl,
-        { result => config.getTotalRows(Json.parse(result))})
+  def getTotalRows(url: String, queryUsed: Boolean)
+      (implicit postData: String = null): Int = {
+      if (queryUsed) config.queryLimit // Query can not retrieve total row now.
+      else {
+        val totalUrl = config.getTotalUrl(url)
+        this.getQueryResult[Int](totalUrl,
+          { result => config.getTotalRows(Json.parse(result))})
+      }
   }
 
   private def processAll (result: String)
       (implicit columns: Array[String],
-      attrToFilters: Map[String, Array[Filter]] = null) = {
-    logger.debug(s"processAll columns:$columns, attrToFilters:$attrToFilters")
+      postData: String = null) = {
+    logger.debug(s"processAll:$result, columns:$columns")
     val jsonResult: JsValue = Json.parse(result)
-    var rows = config.getRows(jsonResult)
-    if (config.viewName == null) {
+    var rows = config.getRows(jsonResult, postData!=null )
+    if (config.viewName == null && postData==null) {
       // filter design docs
       rows = rows.filter(r => FilterDDocs.filter(r))
     }
@@ -116,7 +97,7 @@ class JsonStoreDataAccess (config: CloudantConfig)  {
 
   private def processIterator (result: String)
     (implicit columns: Array[String],
-    attrToFilters: Map[String, Array[Filter]] = null): Iterator[String] = {
+      postData: String = null): Iterator[String] = {
     processAll(result).iterator
   }
 
@@ -137,23 +118,39 @@ class JsonStoreDataAccess (config: CloudantConfig)  {
     getQueryResult(url, processResults)
   }
 
-
   private def getQueryResult[T]
       (url: String, postProcessor: (String) => T)
       (implicit columns: Array[String] = null,
-      attrToFilters: Map[String, Array[Filter]] = null) : T = {
-    logger.warn("Loading data from Cloudant using query: " + url)
+      postData: String = null) : T = {
+    logger.info(s"Loading data from Cloudant using: $url , postData: $postData")
     val requestTimeout = config.requestTimeout.toInt
     val clRequest: HttpRequest = config.username match {
       case null =>
-        Http(url)
+        if (postData!=null) {
+          Http(url)
+          .postData(postData)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+        } else {
+          Http(url)
             .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
             .header("User-Agent", "spark-cloudant")
+        }
       case _ =>
-        Http(url)
+        if (postData!=null) {
+          Http(url)
+          .postData(postData)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+          .auth(config.username, config.password)
+        } else {
+          Http(url)
             .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
             .header("User-Agent", "spark-cloudant")
             .auth(config.username, config.password)
+        }
     }
 
     val clResponse: HttpResponse[String] = clRequest.execute()

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/fd4c35fc/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
index 46774f5..46ba912 100644
--- a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
@@ -17,12 +17,13 @@
 package org.apache.bahir.cloudant.common
 
 import org.slf4j.LoggerFactory
+import play.api.libs.json.{JsNull, Json, JsString, JsValue}
 
 import org.apache.spark.Partition
 import org.apache.spark.SparkContext
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.sources._
 
 import org.apache.bahir.cloudant.CloudantConfig
 
@@ -31,9 +32,9 @@ import org.apache.bahir.cloudant.CloudantConfig
   * the limit rows returns and the skipped rows.
  */
 
-private[cloudant] class JsonStoreRDDPartition(val skip: Int, val limit: Int,
-    val idx: Int, val config: CloudantConfig,
-    val attrToFilters: Map[String, Array[Filter]])
+private[cloudant] class JsonStoreRDDPartition(val url: String, val skip: Int, val limit: Int,
+    val idx: Int, val config: CloudantConfig, val selector: JsValue, val fields: JsValue,
+    val queryUsed: Boolean)
     extends Partition with Serializable{
   val index = idx
 }
@@ -46,16 +47,15 @@ private[cloudant] class JsonStoreRDDPartition(val skip: Int, val limit: Int,
  *  and minInPartition / maxInPartition )
  *  maxRowsInPartition: -1 means unlimited
  */
-class JsonStoreRDD(sc: SparkContext, config: CloudantConfig,
-    url: String)(implicit requiredcolumns: Array[String] = null,
-    attrToFilters: Map[String, Array[Filter]] = null)
+class JsonStoreRDD(sc: SparkContext, config: CloudantConfig)
+    (implicit requiredcolumns: Array[String] = null,
+              filters: Array[Filter] = null)
   extends RDD[String](sc, Nil) {
 
-  lazy val totalRows = {
-      new JsonStoreDataAccess(config).getTotalRows(url)
-  }
-  lazy val totalPartition = {
-    if (totalRows == 0 || ! config.allowPartition() )  1
+  private val logger = LoggerFactory.getLogger(getClass)
+
+  private def getTotalPartition(totalRows: Int, queryUsed: Boolean): Int = {
+    if (totalRows == 0 || ! config.allowPartition(queryUsed) )  1
     else if (totalRows < config.partitions * config.minInPartition) {
       val total = totalRows / config.minInPartition
       if (total == 0 ) {
@@ -76,7 +76,7 @@ class JsonStoreRDD(sc: SparkContext, config: CloudantConfig,
     }
   }
 
-  lazy val limitPerPartition = {
+  private def getLimitPerPartition(totalRows: Int, totalPartition: Int): Int = {
     val limit = totalRows/totalPartition
     if (totalRows % totalPartition != 0) {
       limit + 1
@@ -85,22 +85,115 @@ class JsonStoreRDD(sc: SparkContext, config: CloudantConfig,
     }
   }
 
+  private def convertToMangoJson(f: Filter): (String, JsValue) = {
+    val (op, value): (String, Any) = f match {
+      case EqualTo(attr, v) => ("$eq", v)
+      case GreaterThan(attr, v) => ("$gt", v)
+      case LessThan(attr, v) => ("$lt", v)
+      case GreaterThanOrEqual(attr, v) => ("$gte", v)
+      case LessThanOrEqual(attr, v) => ("$lte", v)
+      case _ => (null, null)
+    }
+    val convertedV: JsValue = {
+      // TODO Better handing of other types
+      if (value != null) {
+        value match {
+          case s: String => Json.toJson(s)
+          case l: Long => Json.toJson(l)
+          case d: Double => Json.toJson(d)
+          case i: Int => Json.toJson(i)
+          case b: Boolean => Json.toJson(b)
+          case t: java.sql.Timestamp => Json.toJson(t)
+          case a: Any => logger.debug(s"Ignore field:$name, cannot handle its datatype: $a"); null
+        }
+      } else null
+    }
+    (op, convertedV)
+  }
+
+  private def convertAttrToMangoJson(filters: Array[Filter]): Map[String, JsValue] = {
+    filters.map(af => convertToMangoJson(af))
+            .filter(x => x._2 != null)
+            .toMap
+  }
+
   override def getPartitions: Array[Partition] = {
-    val logger = LoggerFactory.getLogger(getClass)
+
+    logger.info("getPartitions:" + requiredcolumns + "," + filters)
+
+    val filterInterpreter = new FilterInterpreter(filters)
+    val origAttrToFilters = ( if (filters==null || filters.length==0) null
+                              else filterInterpreter.getFiltersForPostProcess(null))
+
+    val (selector, fields) : (JsValue, JsValue) = {
+      if (!config.queryEnabled() || origAttrToFilters == null) (null, null)
+      else {
+        val selectors: Map[String, Map[String, JsValue]] =
+          origAttrToFilters.transform( (name, attrFilters) => convertAttrToMangoJson(attrFilters))
+        val filteredSelectors = selectors.filter((t) => ! t._2.isEmpty)
+
+        if (! filteredSelectors.isEmpty) {
+          val queryColumns = (
+              if (requiredcolumns == null || requiredcolumns.size == 0) null
+              else Json.toJson(requiredcolumns))
+          (Json.toJson(filteredSelectors), queryColumns)
+        } else (null, null)
+      }
+    }
+
+    logger.info("calculated selector and fields:" + selector + "," + fields)
+
+    var searchField: String = {
+          if (origAttrToFilters ==null ) null
+          else if (filterInterpreter.containsFiltersFor(config.pkField)) {
+            config.pkField
+          } else {
+            filterInterpreter.firstField
+          }
+        }
+
+    val (min, minInclusive, max, maxInclusive) = filterInterpreter.getInfo(searchField)
+    val (url: String, pusheddown: Boolean, queryUsed: Boolean) = config.getRangeUrl(searchField,
+            min, minInclusive, max, maxInclusive, false, selector!=null)
+
+    implicit val postData : String = {
+      if (queryUsed) {
+        Json.stringify(Json.obj("selector" -> selector, "limit" -> 1))
+      } else {
+        null
+      }
+    }
+    val totalRows = new JsonStoreDataAccess(config).getTotalRows(url, queryUsed)
+    val totalPartition = getTotalPartition(totalRows, queryUsed)
+    val limitPerPartition = getLimitPerPartition(totalRows, totalPartition)
+
     logger.info(s"Partition config - total=$totalPartition, " +
         s"limit=$limitPerPartition for totalRows of $totalRows")
 
-    (0 until totalPartition).map(i => {
+   logger.info(s"Partition query info - url=$url, queryUsed=$queryUsed")
+
+   (0 until totalPartition).map(i => {
       val skip = i * limitPerPartition
-      new JsonStoreRDDPartition(skip, limitPerPartition, i, config,
-          attrToFilters).asInstanceOf[Partition]
+      new JsonStoreRDDPartition(url, skip, limitPerPartition, i,
+          config, selector, fields, queryUsed).asInstanceOf[Partition]
     }).toArray
   }
 
   override def compute(splitIn: Partition, context: TaskContext):
       Iterator[String] = {
     val myPartition = splitIn.asInstanceOf[JsonStoreRDDPartition]
+    implicit val postData : String = {
+      if (myPartition.queryUsed && myPartition.fields !=null) {
+        Json.stringify(Json.obj("selector" -> myPartition.selector, "fields" -> myPartition.fields,
+            "limit" -> myPartition.limit, "skip" -> myPartition.skip))
+      } else if (myPartition.queryUsed) {
+        Json.stringify(Json.obj("selector" -> myPartition.selector, "limit" -> myPartition.limit,
+            "skip" -> myPartition.skip))
+      } else {
+        null
+      }
+    }
     new JsonStoreDataAccess(myPartition.config).getIterator(myPartition.skip,
-        myPartition.limit, url)
+        myPartition.limit, myPartition.url)
   }
 }

[16/50] [abbrv] incubator-livy-website git commit: [MINOR] update ImportOrderChecker

Posted by lr...@apache.org.

[MINOR] update ImportOrderChecker

Update Scalastyle configuration to group org.apache.bahir.* import statements below org.apache.spark.* imports

Closes #30


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/560a799c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/560a799c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/560a799c

Branch: refs/heads/master
Commit: 560a799cf7900d960dc7381962c23b78dc46a183
Parents: b7cb52b
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Mon Jan 9 14:46:17 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Wed Jan 18 14:46:16 2017 -0800

----------------------------------------------------------------------
 scalastyle-config.xml                                 | 14 +++++++-------
 .../bahir/sql/streaming/mqtt/MQTTStreamSource.scala   |  3 ++-
 .../bahir/sql/streaming/mqtt/MessageStore.scala       |  3 ++-
 .../sql/streaming/mqtt/LocalMessageStoreSuite.scala   |  4 +++-
 .../sql/streaming/mqtt/MQTTStreamSourceSuite.scala    |  4 +++-
 .../bahir/sql/streaming/mqtt/MQTTTestUtils.scala      |  4 +++-
 6 files changed, 20 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index c6aa3d9..0422503 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -212,13 +212,14 @@ This file is divided into 3 sections:
 
   <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
     <parameters>
-      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="groups">java,scala,3rdParty,spark,bahir</parameter>
       <parameter name="group.java">javax?\..*</parameter>
       <parameter name="group.scala">scala\..*</parameter>
-      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+      <parameter name="group.3rdParty">(?!org\.apache\.(spark|bahir)\.).*</parameter>
       <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+      <parameter name="group.bahir">org\.apache\.bahir\..*</parameter>
     </parameters>
-    <customMessage>Imports should be grouped and ordered: java,scala,3rdParty,spark</customMessage>
+    <customMessage>Imports should be grouped and ordered: java, scala, 3rd-party, spark, bahir</customMessage>
   </check>
 
   <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
@@ -245,6 +246,8 @@ This file is divided into 3 sections:
     <customMessage>Omit braces in case clauses.</customMessage>
   </check>
 
+  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
   <!-- ================================================================================ -->
   <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
   <!-- ================================================================================ -->
@@ -262,9 +265,6 @@ This file is divided into 3 sections:
     </parameters>
   </check>
 
-  <!-- Should turn this on, but we have a few places that need to be fixed first -->
-  <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
   <!-- ================================================================================ -->
   <!--                               rules we don't want                                -->
   <!-- ================================================================================ -->
@@ -274,7 +274,7 @@ This file is divided into 3 sections:
     <customMessage>Illegal import of sun._ or java.awt._</customMessage>
   </check>
 
-  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <!-- We want the opposite of this: NewLineAtEofChecker is enabled above -->
   <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
 
   <!-- This one complains about all kinds of random things. Disable. -->

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
index 8857edb..1739ff3 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSource.scala
@@ -27,7 +27,6 @@ import scala.collection.concurrent.TrieMap
 import scala.collection.mutable.ArrayBuffer
 import scala.util.{Failure, Success, Try}
 
-import org.apache.bahir.utils.Logging
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence}
 
@@ -36,6 +35,8 @@ import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Source}
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 
+import org.apache.bahir.utils.Logging
+
 
 object MQTTStreamConstants {
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
index e8e0f7d..84fd8c4 100644
--- a/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
+++ b/sql-streaming-mqtt/src/main/scala/org/apache/bahir/sql/streaming/mqtt/MessageStore.scala
@@ -23,12 +23,13 @@ import java.util
 
 import scala.reflect.ClassTag
 
-import org.apache.bahir.utils.Logging
 import org.eclipse.paho.client.mqttv3.{MqttClientPersistence, MqttPersistable, MqttPersistenceException}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.serializer.{JavaSerializer, Serializer, SerializerInstance}
 
+import org.apache.bahir.utils.Logging
+
 
 /** A message store for MQTT stream source for SQL Streaming. */
 trait MessageStore {

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
index 44da041..9c678cb 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/LocalMessageStoreSuite.scala
@@ -19,13 +19,15 @@ package org.apache.bahir.sql.streaming.mqtt
 
 import java.io.File
 
-import org.apache.bahir.utils.BahirUtils
 import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.JavaSerializer
 
+import org.apache.bahir.utils.BahirUtils
+
+
 class LocalMessageStoreSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val testData = Seq(1, 2, 3, 4, 5, 6)

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index f9a4bed..38971a0 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -24,7 +24,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.concurrent.Future
 
-import org.apache.bahir.utils.BahirUtils
 import org.eclipse.paho.client.mqttv3.MqttException
 import org.scalatest.BeforeAndAfter
 
@@ -32,6 +31,9 @@ import org.apache.spark.{SharedSparkContext, SparkFunSuite}
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.execution.streaming.LongOffset
 
+import org.apache.bahir.utils.BahirUtils
+
+
 class MQTTStreamSourceSuite extends SparkFunSuite with SharedSparkContext with BeforeAndAfter {
 
   protected var mqttTestUtils: MQTTTestUtils = _

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/560a799c/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
index bebeeef..9c7399f 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTTestUtils.scala
@@ -21,10 +21,12 @@ import java.io.File
 import java.net.{ServerSocket, URI}
 
 import org.apache.activemq.broker.{BrokerService, TransportConnector}
-import org.apache.bahir.utils.Logging
 import org.eclipse.paho.client.mqttv3._
 import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 
+import org.apache.bahir.utils.Logging
+
+
 class MQTTTestUtils(tempDir: File, port: Int = 0) extends Logging {
 
   private val persistenceDir = tempDir.getAbsolutePath

[41/50] [abbrv] incubator-livy-website git commit: [BAHIR-88] Produce distributions without release temp files

Posted by lr...@apache.org.

[BAHIR-88] Produce distributions without release temp files


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/ba68b358
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/ba68b358
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/ba68b358

Branch: refs/heads/master
Commit: ba68b3587ad4011a093bcaad921035f26907967c
Parents: 38c1578
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 20:13:45 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 20:13:45 2017 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/ba68b358/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index b207b93..fdb1f37 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -254,6 +254,13 @@ if [[ "$RELEASE_PREPARE" == "true" ]]; then
     if [ -z "$DRY_RUN" ]; then
         svn co $RELEASE_STAGING_LOCATION svn-bahir
         mkdir -p svn-bahir/$RELEASE_VERSION-$RELEASE_RC
+
+        cd "$BASE_DIR/target/bahir"
+        git checkout $RELEASE_TAG
+        git clean -d -f -x
+
+        $MVN $PUBLISH_PROFILES clean install -DskiptTests -Darguments="-DskipTests"
+
         cp bahir/distribution/target/*.tar.gz svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
         cp bahir/distribution/target/*.zip    svn-bahir/$RELEASE_VERSION-$RELEASE_RC/

[46/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.1.1-rc2

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.1.1-rc2


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/26bdee0b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/26bdee0b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/26bdee0b

Branch: refs/heads/master
Commit: 26bdee0bacb9c922abda09314036e9df0e97d88f
Parents: dcb4bbd
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 21:36:35 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 21:36:35 2017 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-cloudant/pom.xml       | 2 +-
 sql-streaming-akka/pom.xml | 2 +-
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-pubsub/pom.xml   | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 77104d1..f617db7 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.2.0-SNAPSHOT</version>
+        <version>2.1.1</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 81f2e28..7d4e89b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.1.1-rc2</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/sql-cloudant/pom.xml
----------------------------------------------------------------------
diff --git a/sql-cloudant/pom.xml b/sql-cloudant/pom.xml
index 5860033..bfd0571 100644
--- a/sql-cloudant/pom.xml
+++ b/sql-cloudant/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/sql-streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/pom.xml b/sql-streaming-akka/pom.xml
index 4d7040b..9e134d5 100644
--- a/sql-streaming-akka/pom.xml
+++ b/sql-streaming-akka/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.2.0-SNAPSHOT</version>
+        <version>2.1.1</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 4a01ef5..20518b0 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 36ce385..2e95ac0 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 2935f51..b5b9ac3 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/streaming-pubsub/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-pubsub/pom.xml b/streaming-pubsub/pom.xml
index c3da90f..1458a19 100644
--- a/streaming-pubsub/pom.xml
+++ b/streaming-pubsub/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>bahir-parent_2.11</artifactId>
     <groupId>org.apache.bahir</groupId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 949e4b3..8004aa2 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/26bdee0b/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index db6616d..eb93b7b 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[21/50] [abbrv] incubator-livy-website git commit: [MINOR] Fix Maven artifact IDs in README.md files

Posted by lr...@apache.org.

[MINOR] Fix Maven artifact IDs in README.md files

The spark-shell usage examples show an underscore instead of a dash in
the Maven artifact ID ("spark-streaming_xxx" vs "spark-streaming-xxx").
Consequently Spark can not resolve these broken Maven coordinates.

  $ bin/spark-shell \
      --packages org.apache.bahir:spark-streaming_akka_2.11:2.0.2

  $ bin/spark-shell \
      --packages org.apache.bahir:spark-streaming-akka_2.11:2.0.2


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/cf0d7408
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/cf0d7408
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/cf0d7408

Branch: refs/heads/master
Commit: cf0d7408157f458bb4abc8ef76759ad084ff6e1f
Parents: 748d056
Author: Christian Kadner <ck...@apache.org>
Authored: Sat Jan 28 05:37:36 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Sat Jan 28 05:37:36 2017 -0800

----------------------------------------------------------------------
 streaming-akka/README.md    | 2 +-
 streaming-mqtt/README.md    | 2 +-
 streaming-twitter/README.md | 2 +-
 streaming-zeromq/README.md  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/cf0d7408/streaming-akka/README.md
----------------------------------------------------------------------
diff --git a/streaming-akka/README.md b/streaming-akka/README.md
index bd5fb11..9c642aa 100644
--- a/streaming-akka/README.md
+++ b/streaming-akka/README.md
@@ -18,7 +18,7 @@ Using Maven:
 This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
 For example, to include it when starting the spark shell:
 
-    $ bin/spark-shell --packages org.apache.bahir:spark-streaming_akka_2.11:2.1.0-SNAPSHOT
+    $ bin/spark-shell --packages org.apache.bahir:spark-streaming-akka_2.11:2.1.0-SNAPSHOT
 
 Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
 The `--packages` argument can also be used with `bin/spark-submit`.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/cf0d7408/streaming-mqtt/README.md
----------------------------------------------------------------------
diff --git a/streaming-mqtt/README.md b/streaming-mqtt/README.md
index 2ec0128..872375d 100644
--- a/streaming-mqtt/README.md
+++ b/streaming-mqtt/README.md
@@ -18,7 +18,7 @@ Using Maven:
 This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
 For example, to include it when starting the spark shell:
 
-    $ bin/spark-shell --packages org.apache.bahir:spark-streaming_mqtt_2.11:2.1.0-SNAPSHOT
+    $ bin/spark-shell --packages org.apache.bahir:spark-streaming-mqtt_2.11:2.1.0-SNAPSHOT
 
 Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
 The `--packages` argument can also be used with `bin/spark-submit`.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/cf0d7408/streaming-twitter/README.md
----------------------------------------------------------------------
diff --git a/streaming-twitter/README.md b/streaming-twitter/README.md
index d1ea428..18f228f 100644
--- a/streaming-twitter/README.md
+++ b/streaming-twitter/README.md
@@ -18,7 +18,7 @@ Using Maven:
 This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
 For example, to include it when starting the spark shell:
 
-    $ bin/spark-shell --packages org.apache.bahir:spark-streaming_twitter_2.11:2.1.0-SNAPSHOT
+    $ bin/spark-shell --packages org.apache.bahir:spark-streaming-twitter_2.11:2.1.0-SNAPSHOT
 
 Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
 The `--packages` argument can also be used with `bin/spark-submit`.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/cf0d7408/streaming-zeromq/README.md
----------------------------------------------------------------------
diff --git a/streaming-zeromq/README.md b/streaming-zeromq/README.md
index 453d1a1..37835e3 100644
--- a/streaming-zeromq/README.md
+++ b/streaming-zeromq/README.md
@@ -18,7 +18,7 @@ Using Maven:
 This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
 For example, to include it when starting the spark shell:
 
-    $ bin/spark-shell --packages org.apache.bahir:spark-streaming_zeromq_2.11:2.1.0-SNAPSHOT
+    $ bin/spark-shell --packages org.apache.bahir:spark-streaming-zeromq_2.11:2.1.0-SNAPSHOT
 
 Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
 The `--packages` argument can also be used with `bin/spark-submit`.

[47/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/a7e9ecb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/a7e9ecb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/a7e9ecb6

Branch: refs/heads/master
Commit: a7e9ecb6474c0d150bee5461aa6e3368b99efb90
Parents: 26bdee0
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 21:36:50 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 21:36:50 2017 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-cloudant/pom.xml       | 2 +-
 sql-streaming-akka/pom.xml | 2 +-
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-pubsub/pom.xml   | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index f617db7..77104d1 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.1</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7d4e89b..81f2e28 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.1.1-rc2</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/sql-cloudant/pom.xml
----------------------------------------------------------------------
diff --git a/sql-cloudant/pom.xml b/sql-cloudant/pom.xml
index bfd0571..5860033 100644
--- a/sql-cloudant/pom.xml
+++ b/sql-cloudant/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/sql-streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/pom.xml b/sql-streaming-akka/pom.xml
index 9e134d5..4d7040b 100644
--- a/sql-streaming-akka/pom.xml
+++ b/sql-streaming-akka/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.1</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 20518b0..4a01ef5 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 2e95ac0..36ce385 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index b5b9ac3..2935f51 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/streaming-pubsub/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-pubsub/pom.xml b/streaming-pubsub/pom.xml
index 1458a19..c3da90f 100644
--- a/streaming-pubsub/pom.xml
+++ b/streaming-pubsub/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>bahir-parent_2.11</artifactId>
     <groupId>org.apache.bahir</groupId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 8004aa2..949e4b3 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/a7e9ecb6/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index eb93b7b..db6616d 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[48/50] [abbrv] incubator-livy-website git commit: [MINOR] Fix data file path in the streaming-twitter sample app

Posted by lr...@apache.org.

[MINOR] Fix data file path in the streaming-twitter sample app

Closes #46


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/dca8d4c2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/dca8d4c2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/dca8d4c2

Branch: refs/heads/master
Commit: dca8d4c2dca29947611afbb7f8a788481e53ea9b
Parents: a7e9ecb
Author: Dheeraj Dwivedi <dh...@gmail.com>
Authored: Sat Jun 24 13:19:40 2017 +0530
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Jun 24 05:29:04 2017 -0700

----------------------------------------------------------------------
 .../examples/streaming/twitter/TwitterHashTagJoinSentiments.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dca8d4c2/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
index 957e4c9..6243344 100644
--- a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterHashTagJoinSentiments.scala
@@ -64,7 +64,7 @@ object TwitterHashTagJoinSentiments {
     val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))
 
     // Read in the word-sentiment list and create a static RDD from it
-    val wordSentimentFilePath = "data/streaming/AFINN-111.txt"
+    val wordSentimentFilePath = "streaming-twitter/examples/data/AFINN-111.txt"
     val wordSentiments = ssc.sparkContext.textFile(wordSentimentFilePath).map { line =>
       val Array(word, happinessValue) = line.split("\t")
       (word, happinessValue.toInt)

[08/50] [abbrv] incubator-livy-website git commit: [BAHIR-82] Bump Apache Spark dependency to release 2.0.2

Posted by lr...@apache.org.

[BAHIR-82] Bump Apache Spark dependency to release 2.0.2


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/08aa06cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/08aa06cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/08aa06cf

Branch: refs/heads/master
Commit: 08aa06cfc38ee503164a58c414cfe654aaed6c61
Parents: d43dad2
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Dec 3 09:15:46 2016 -0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Dec 3 09:15:46 2016 -0800

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/08aa06cf/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 070c011..a7ae91e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
     <log4j.version>1.2.17</log4j.version>
 
     <!-- Spark version -->
-    <spark.version>2.0.1</spark.version>
+    <spark.version>2.0.2</spark.version>
 
     <!-- Streaming Akka connector -->
     <akka.group>com.typesafe.akka</akka.group>

[29/50] [abbrv] incubator-livy-website git commit: [BAHIR-101] Spark SQL datasource for CounchDB/Cloudant

Posted by lr...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
new file mode 100644
index 0000000..e84a44c
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreDataAccess.scala
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant.common
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.collection.mutable.HashMap
+import scala.concurrent._
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.util.{Failure, Success}
+
+import scalaj.http.{Http, HttpRequest, HttpResponse}
+import ExecutionContext.Implicits.global
+import org.slf4j.LoggerFactory
+import play.api.libs.json._
+
+import org.apache.spark.sql.sources._
+
+import org.apache.bahir.cloudant.CloudantConfig
+import org.apache.bahir.cloudant.common._
+
+
+class JsonStoreDataAccess (config: CloudantConfig)  {
+  lazy val logger = LoggerFactory.getLogger(getClass)
+  implicit lazy val timeout = config.requestTimeout
+
+  def getOne()( implicit columns: Array[String] = null): Seq[String] = {
+    var r = this.getQueryResult[Seq[String]](config.getOneUrlExcludeDDoc1(), processAll)
+    if (r.size == 0 ) {
+      r = this.getQueryResult[Seq[String]](config.getOneUrlExcludeDDoc2(), processAll)
+    }
+    if (r.size == 0) {
+      throw new RuntimeException("Database " + config.getDbname() +
+        " doesn't have any non-design documents!")
+    } else {
+      r
+    }
+  }
+
+  def getMany(limit: Int)(implicit columns: Array[String] = null): Seq[String] = {
+    if (limit == 0) {
+      throw new RuntimeException("Database " + config.getDbname() +
+        " schema sample size is 0!")
+    }
+    if (limit < -1) {
+      throw new RuntimeException("Database " + config.getDbname() +
+        " schema sample size is " + limit + "!")
+    }
+    var r = this.getQueryResult[Seq[String]](config.getAllDocsUrl(limit), processAll)
+    if (r.size == 0) {
+      r = this.getQueryResult[Seq[String]](config.getAllDocsUrlExcludeDDoc(limit), processAll)
+    }
+    if (r.size == 0) {
+      throw new RuntimeException("Database " + config.getDbname() +
+        " doesn't have any non-design documents!")
+    } else {
+      r
+    }
+  }
+
+  def getAll[T](url: String)
+      (implicit columns: Array[String] = null,
+      attrToFilters: Map[String, Array[Filter]] = null): Seq[String] = {
+    this.getQueryResult[Seq[String]](url, processAll)
+  }
+
+  def getIterator(skip: Int, limit: Int, url: String)
+      (implicit columns: Array[String] = null,
+      attrToFilters: Map[String, Array[Filter]] = null): Iterator[String] = {
+    implicit def convertSkip(skip: Int): String = {
+      val url = config.getLastUrl(skip)
+      if (url == null) {
+        skip.toString()
+      } else {
+        this.getQueryResult[String](url,
+          { result => config.getLastNum(Json.parse(result)).as[JsString].value})
+      }
+    }
+    val newUrl = config.getSubSetUrl(url, skip, limit)
+    this.getQueryResult[Iterator[String]](newUrl, processIterator)
+  }
+
+  def getTotalRows(url: String): Int = {
+    val totalUrl = config.getTotalUrl(url)
+    this.getQueryResult[Int](totalUrl,
+        { result => config.getTotalRows(Json.parse(result))})
+  }
+
+  private def processAll (result: String)
+      (implicit columns: Array[String],
+      attrToFilters: Map[String, Array[Filter]] = null) = {
+    logger.debug(s"processAll columns:$columns, attrToFilters:$attrToFilters")
+    val jsonResult: JsValue = Json.parse(result)
+    var rows = config.getRows(jsonResult)
+    if (config.viewName == null) {
+      // filter design docs
+      rows = rows.filter(r => FilterDDocs.filter(r))
+    }
+    rows.map(r => convert(r))
+  }
+
+  private def processIterator (result: String)
+    (implicit columns: Array[String],
+    attrToFilters: Map[String, Array[Filter]] = null): Iterator[String] = {
+    processAll(result).iterator
+  }
+
+  private def convert(rec: JsValue)(implicit columns: Array[String]): String = {
+    if (columns == null) return Json.stringify(Json.toJson(rec))
+    val m = new HashMap[String, JsValue]()
+    for ( x <- columns) {
+        val field = JsonUtil.getField(rec, x).getOrElse(JsNull)
+        m.put(x, field)
+    }
+    val result = Json.stringify(Json.toJson(m.toMap))
+    logger.debug(s"converted: $result")
+    result
+  }
+
+
+  def getChanges(url: String, processResults: (String) => String): String = {
+    getQueryResult(url, processResults)
+  }
+
+
+  private def getQueryResult[T]
+      (url: String, postProcessor: (String) => T)
+      (implicit columns: Array[String] = null,
+      attrToFilters: Map[String, Array[Filter]] = null) : T = {
+    logger.warn("Loading data from Cloudant using query: " + url)
+    val requestTimeout = config.requestTimeout.toInt
+    val clRequest: HttpRequest = config.username match {
+      case null =>
+        Http(url)
+            .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+            .header("User-Agent", "spark-cloudant")
+      case _ =>
+        Http(url)
+            .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+            .header("User-Agent", "spark-cloudant")
+            .auth(config.username, config.password)
+    }
+
+    val clResponse: HttpResponse[String] = clRequest.execute()
+    if (! clResponse.isSuccess) {
+      throw new RuntimeException("Database " + config.getDbname() +
+          " request error: " + clResponse.body)
+    }
+    val data = postProcessor(clResponse.body)
+    logger.debug(s"got result:$data")
+    data
+  }
+
+
+  def createDB(): Unit = {
+    val url = config.getDbUrl()
+    val requestTimeout = config.requestTimeout.toInt
+    val clRequest: HttpRequest = config.username match {
+      case null =>
+        Http(url)
+          .method("put")
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("User-Agent", "spark-cloudant")
+      case _ =>
+        Http(url)
+          .method("put")
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("User-Agent", "spark-cloudant")
+          .auth(config.username, config.password)
+    }
+
+    val clResponse: HttpResponse[String] = clRequest.execute()
+    if (! clResponse.isSuccess) {
+      throw new RuntimeException("Database " + config.getDbname() +
+        " create error: " + clResponse.body)
+    } else {
+      logger.warn(s"Database ${config.getDbname()} was created.")
+    }
+  }
+
+
+  def getClPostRequest(data: String): HttpRequest = {
+    val url = config.getBulkPostUrl()
+    val requestTimeout = config.requestTimeout.toInt
+    config.username match {
+      case null =>
+        Http(url)
+          .postData(data)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+      case _ =>
+        Http(url)
+          .postData(data)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = requestTimeout)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+          .auth(config.username, config.password)
+    }
+  }
+
+
+  def saveAll(rows: List[String]): Unit = {
+    if (rows.size == 0) return
+    val bulkSize = config.bulkSize
+    val bulks = rows.grouped(bulkSize).toList
+    val totalBulks = bulks.size
+    logger.debug(s"total records:${rows.size}=bulkSize:$bulkSize * totalBulks:$totalBulks")
+
+    val futures = bulks.map( bulk => {
+        val data = config.getBulkRows(bulk)
+        val clRequest: HttpRequest = getClPostRequest(data)
+        Future {
+          clRequest.execute()
+        }
+      }
+    )
+    // remaining - number of requests remained to succeed
+    val remaining = new AtomicInteger(futures.length)
+    val p = Promise[HttpResponse[String]]
+    futures foreach {
+      _ onComplete {
+        case Success(clResponse: HttpResponse[String]) =>
+          // find if there was error in saving at least one of docs
+          val resBody: String = clResponse.body
+          val isErr = (resBody contains config.getConflictErrStr()) ||
+            (resBody contains config.getForbiddenErrStr())
+          if (!(clResponse.isSuccess) || isErr) {
+            val e = new RuntimeException("Save to database:" + config.getDbname() +
+                " failed with reason: " + clResponse.body)
+            p.tryFailure(e)
+          } else if (remaining.decrementAndGet() == 0) {
+            // succeed the whole save operation if all requests success
+            p.trySuccess(clResponse)
+          }
+        // if a least one save request fails - fail the whole save operation
+        case Failure(e) =>
+          p.tryFailure(e)
+      }
+    }
+
+    val mainFtr = p.future
+    mainFtr onSuccess {
+      case clResponsesList =>
+        logger.warn(s"Saved total ${rows.length} " +
+          s"with bulkSize $bulkSize " +
+          s"for database: ${config.getDbname()}")
+    }
+    mainFtr onFailure  {
+      case e =>
+        throw new RuntimeException("Save to database:" + config.getDbname() +
+          " failed with reason: " + e.getMessage)
+    }
+    Await.result(mainFtr, (config.requestTimeout * totalBulks).millis) // scalastyle:ignore
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
new file mode 100644
index 0000000..46774f5
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreRDD.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant.common
+
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.Partition
+import org.apache.spark.SparkContext
+import org.apache.spark.TaskContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.sources.Filter
+
+import org.apache.bahir.cloudant.CloudantConfig
+
+/**
+ * JsonStoreRDDPartition defines each partition as a subset of a query result:
+  * the limit rows returns and the skipped rows.
+ */
+
+private[cloudant] class JsonStoreRDDPartition(val skip: Int, val limit: Int,
+    val idx: Int, val config: CloudantConfig,
+    val attrToFilters: Map[String, Array[Filter]])
+    extends Partition with Serializable{
+  val index = idx
+}
+
+/**
+ *  The main purpose of JsonStoreRDD is to be able to create parallel read
+ *  by partition for dataaccess getAll (by condition) scenarios
+ *  defaultPartitions : how many partition intent,
+ *  will be re-calculate based on the value based on total rows
+ *  and minInPartition / maxInPartition )
+ *  maxRowsInPartition: -1 means unlimited
+ */
+class JsonStoreRDD(sc: SparkContext, config: CloudantConfig,
+    url: String)(implicit requiredcolumns: Array[String] = null,
+    attrToFilters: Map[String, Array[Filter]] = null)
+  extends RDD[String](sc, Nil) {
+
+  lazy val totalRows = {
+      new JsonStoreDataAccess(config).getTotalRows(url)
+  }
+  lazy val totalPartition = {
+    if (totalRows == 0 || ! config.allowPartition() )  1
+    else if (totalRows < config.partitions * config.minInPartition) {
+      val total = totalRows / config.minInPartition
+      if (total == 0 ) {
+        total + 1
+      } else {
+        total
+      }
+    }
+    else if (config.maxInPartition <=0) config.partitions
+    else {
+      val total = totalRows / config.maxInPartition
+      if ( totalRows % config.maxInPartition != 0) {
+        total + 1
+      }
+      else {
+        total
+      }
+    }
+  }
+
+  lazy val limitPerPartition = {
+    val limit = totalRows/totalPartition
+    if (totalRows % totalPartition != 0) {
+      limit + 1
+    } else {
+      limit
+    }
+  }
+
+  override def getPartitions: Array[Partition] = {
+    val logger = LoggerFactory.getLogger(getClass)
+    logger.info(s"Partition config - total=$totalPartition, " +
+        s"limit=$limitPerPartition for totalRows of $totalRows")
+
+    (0 until totalPartition).map(i => {
+      val skip = i * limitPerPartition
+      new JsonStoreRDDPartition(skip, limitPerPartition, i, config,
+          attrToFilters).asInstanceOf[Partition]
+    }).toArray
+  }
+
+  override def compute(splitIn: Partition, context: TaskContext):
+      Iterator[String] = {
+    val myPartition = splitIn.asInstanceOf[JsonStoreRDDPartition]
+    new JsonStoreDataAccess(myPartition.config).getIterator(myPartition.skip,
+        myPartition.limit, url)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonUtil.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonUtil.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonUtil.scala
new file mode 100644
index 0000000..cd46b16
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonUtil.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant.common
+
+import play.api.libs.json.{JsUndefined, JsValue}
+import scala.util.control.Breaks._
+
+object JsonUtil {
+  def getField(row: JsValue, field: String) : Option[JsValue] = {
+    var path = field.split('.')
+    var currentValue = row
+    var finalValue: Option[JsValue] = None
+    breakable {
+      for (i <- path.indices) {
+        val f: Option[JsValue] = (currentValue \ path(i)).toOption
+        f match {
+          case Some(f2) => currentValue = f2
+          case None => break
+        }
+        if (i == path.length -1) {
+          // The leaf node
+          finalValue = Some(currentValue)
+        }
+      }
+    }
+    finalValue
+  }
+}

[23/50] [abbrv] incubator-livy-website git commit: [BAHIR-87] Prepare release based on Apache Spark 2.1.0

Posted by lr...@apache.org.

[BAHIR-87] Prepare release based on Apache Spark 2.1.0


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/f97f2dfa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/f97f2dfa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/f97f2dfa

Branch: refs/heads/master
Commit: f97f2dfa2fc51a8dcdec82d45aa68b2cddc22b3a
Parents: 695ca98
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Fri Feb 3 19:02:31 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Feb 3 19:02:31 2017 -0800

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f97f2dfa/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e7cb338..0903858 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
     <log4j.version>1.2.17</log4j.version>
 
     <!-- Spark version -->
-    <spark.version>2.0.2</spark.version>
+    <spark.version>2.1.0</spark.version>
 
     <!-- Streaming Akka connector -->
     <akka.group>com.typesafe.akka</akka.group>

[39/50] [abbrv] incubator-livy-website git commit: [BAHIR-116] Add spark streaming connector to Google Cloud Pub/Sub

Posted by lr...@apache.org.

[BAHIR-116] Add spark streaming connector to Google Cloud Pub/Sub

Cloases #42.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/56613263
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/56613263
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/56613263

Branch: refs/heads/master
Commit: 56613263ca405aa5b45f32565ad4641f0a7b9752
Parents: 2a43076
Author: Chen Bin <bc...@talend.com>
Authored: Thu Apr 27 17:18:32 2017 +0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jun 6 21:03:49 2017 -0700

----------------------------------------------------------------------
 pom.xml                                         |   1 +
 streaming-pubsub/README.md                      |  45 +++
 .../PubsubWordCount.scala                       | 159 +++++++++++
 streaming-pubsub/pom.xml                        |  86 ++++++
 .../streaming/pubsub/PubsubInputDStream.scala   | 286 +++++++++++++++++++
 .../spark/streaming/pubsub/PubsubUtils.scala    | 105 +++++++
 .../streaming/pubsub/SparkGCPCredentials.scala  | 166 +++++++++++
 .../streaming/LocalJavaStreamingContext.java    |  44 +++
 .../streaming/pubsub/JavaPubsubStreamSuite.java |  38 +++
 .../src/test/resources/log4j.properties         |  28 ++
 .../spark/streaming/pubsub/PubsubFunSuite.scala |  46 +++
 .../streaming/pubsub/PubsubStreamSuite.scala    | 138 +++++++++
 .../streaming/pubsub/PubsubTestUtils.scala      | 142 +++++++++
 .../SparkGCPCredentialsBuilderSuite.scala       |  95 ++++++
 14 files changed, 1379 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index f76aac5..81f2e28 100644
--- a/pom.xml
+++ b/pom.xml
@@ -82,6 +82,7 @@
     <module>sql-streaming-mqtt</module>
     <module>streaming-twitter</module>
     <module>streaming-zeromq</module>
+    <module>streaming-pubsub</module>
   </modules>
 
   <properties>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/README.md
----------------------------------------------------------------------
diff --git a/streaming-pubsub/README.md b/streaming-pubsub/README.md
new file mode 100644
index 0000000..faf9826
--- /dev/null
+++ b/streaming-pubsub/README.md
@@ -0,0 +1,45 @@
+A library for reading data from [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/) using Spark Streaming.
+
+## Linking
+
+Using SBT:
+    
+    libraryDependencies += "org.apache.bahir" %% "spark-streaming-pubsub" % "2.2.0-SNAPSHOT"
+    
+Using Maven:
+    
+    <dependency>
+        <groupId>org.apache.bahir</groupId>
+        <artifactId>spark-streaming-pubsub_2.11</artifactId>
+        <version>2.2.0-SNAPSHOT</version>
+    </dependency>
+
+This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
+For example, to include it when starting the spark shell:
+
+    $ bin/spark-shell --packages org.apache.bahir:spark-streaming-pubsub_2.11:2.2.0-SNAPSHOT
+
+Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
+The `--packages` argument can also be used with `bin/spark-submit`.
+
+## Examples
+
+First you need to create credential by SparkGCPCredentials, it support four type of credentials
+* application default
+    `SparkGCPCredentials.builder.build()`
+* json type service account
+    `SparkGCPCredentials.builder.jsonServiceAccount(PATH_TO_JSON_KEY).build()`
+* p12 type service account
+    `SparkGCPCredentials.builder.p12ServiceAccount(PATH_TO_P12_KEY, EMAIL_ACCOUNT).build()`
+* metadata service account(running on dataproc)
+    `SparkGCPCredentials.builder.metadataServiceAccount().build()`
+
+### Scala API
+    
+    val lines = PubsubUtils.createStream(ssc, projectId, subscriptionName, credential, ..)
+    
+### Java API
+    
+    JavaDStream<SparkPubsubMessage> lines = PubsubUtils.createStream(jssc, projectId, subscriptionName, credential...) 
+
+See end-to-end examples at [Google Cloud Pubsub Examples](streaming-pubsub/examples)

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/examples/src/main/scala/org.apache.spark.examples.streaming.pubsub/PubsubWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/examples/src/main/scala/org.apache.spark.examples.streaming.pubsub/PubsubWordCount.scala b/streaming-pubsub/examples/src/main/scala/org.apache.spark.examples.streaming.pubsub/PubsubWordCount.scala
new file mode 100644
index 0000000..00f1fa1
--- /dev/null
+++ b/streaming-pubsub/examples/src/main/scala/org.apache.spark.examples.streaming.pubsub/PubsubWordCount.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming.pubsub
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
+import com.google.api.client.json.jackson2.JacksonFactory
+import com.google.api.services.pubsub.Pubsub.Builder
+import com.google.api.services.pubsub.model.PublishRequest
+import com.google.api.services.pubsub.model.PubsubMessage
+import com.google.cloud.hadoop.util.RetryHttpInitializer
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.pubsub.ConnectionUtils
+import org.apache.spark.streaming.pubsub.PubsubTestUtils
+import org.apache.spark.streaming.pubsub.PubsubUtils
+import org.apache.spark.streaming.pubsub.SparkGCPCredentials
+import org.apache.spark.streaming.pubsub.SparkPubsubMessage
+import org.apache.spark.streaming.Milliseconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.SparkConf
+
+
+/**
+ * Consumes messages from a Google Cloud Pub/Sub subscription and does wordcount.
+ * In this example it use application default credentials, so need to use gcloud
+ * client to generate token file before running example
+ *
+ * Usage: PubsubWordCount <projectId> <subscription>
+ *   <projectId> is the name of Google cloud
+ *   <subscription> is the subscription to a topic
+ *
+ * Example:
+ *  # use gcloud client generate token file
+ *  $ gcloud init
+ *  $ gcloud auth application-default login
+ *
+ *  # run the example
+ *  $ bin/run-example \
+ *      org.apache.spark.examples.streaming.pubsub.PubsubWordCount project_1 subscription_1
+ *
+ */
+object PubsubWordCount {
+  def main(args: Array[String]): Unit = {
+    if (args.length != 2) {
+      System.err.println(
+        """
+          |Usage: PubsubWordCount <projectId> <subscription>
+          |
+          |     <projectId> is the name of Google cloud
+          |     <subscription> is the subscription to a topic
+          |
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    val Seq(projectId, subscription) = args.toSeq
+
+    val sparkConf = new SparkConf().setAppName("PubsubWordCount")
+    val ssc = new StreamingContext(sparkConf, Milliseconds(2000))
+
+    val pubsubStream: ReceiverInputDStream[SparkPubsubMessage] = PubsubUtils.createStream(
+      ssc, projectId, None, subscription,
+      SparkGCPCredentials.builder.build(), StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    val wordCounts =
+      pubsubStream.map(message => (new String(message.getData()), 1)).reduceByKey(_ + _)
+
+    wordCounts.print()
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+
+}
+
+/**
+ * A Pub/Sub publisher for demonstration purposes, publishes message in 10 batches(seconds),
+ * you can set the size of messages in each batch by <records-per-sec>,
+ * and each message will contains only one word in this list
+ * ("google", "cloud", "pubsub", "say", "hello")
+ *
+ * Usage: PubsubPublisher <projectId> <topic> <records-per-sec>
+ *
+ *   <stream-projectIdname> is the name of Google cloud
+ *   <topic> is the topic of Google cloud Pub/Sub
+ *   <records-per-sec> is the rate of records per second to put onto the stream
+ *
+ * Example:
+ *    `$ bin/run-example \
+ *      org.apache.spark.examples.streaming.pubsub.PubsubPublisher project_1 topic_1 10`
+ */
+object PubsubPublisher {
+  def main(args: Array[String]): Unit = {
+    if (args.length != 3) {
+      System.err.println(
+        """
+          |Usage: PubsubPublisher <projectId> <topic> <records-per-sec>
+          |
+          |     <projectId> is the name of Google cloud
+          |     <topic> is the topic of Google cloud Pub/Sub
+          |     <records-per-sec> is the rate of records per second to put onto the topic
+          |
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    val Seq(projectId, topic, recordsPerSecond) = args.toSeq
+
+    val APP_NAME = this.getClass.getSimpleName
+
+    val client = new Builder(
+      GoogleNetHttpTransport.newTrustedTransport(),
+      JacksonFactory.getDefaultInstance(),
+      new RetryHttpInitializer(
+        SparkGCPCredentials.builder.build().provider,
+        APP_NAME
+      ))
+        .setApplicationName(APP_NAME)
+        .build()
+
+    val randomWords = List("google", "cloud", "pubsub", "say", "hello")
+    val publishRequest = new PublishRequest()
+    for (i <- 1 to 10) {
+      val messages = (1 to recordsPerSecond.toInt).map { recordNum =>
+          val randomWordIndex = Random.nextInt(randomWords.size)
+          new PubsubMessage().encodeData(randomWords(randomWordIndex).getBytes())
+      }
+      publishRequest.setMessages(messages.asJava)
+      client.projects().topics()
+          .publish(s"projects/$projectId/topics/$topic", publishRequest)
+          .execute()
+      println(s"Published data. topic: $topic; Mesaage: $publishRequest")
+
+      Thread.sleep(1000)
+    }
+
+  }
+}
+// scalastyle:on

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-pubsub/pom.xml b/streaming-pubsub/pom.xml
new file mode 100644
index 0000000..c3da90f
--- /dev/null
+++ b/streaming-pubsub/pom.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>bahir-parent_2.11</artifactId>
+    <groupId>org.apache.bahir</groupId>
+    <version>2.2.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.bahir</groupId>
+  <artifactId>spark-streaming-pubsub_2.11</artifactId>
+  <properties>
+    <sbt.project.name>streaming-pubsub</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Apache Bahir - Spark Streaming Google PubSub</name>
+  <url>http://bahir.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>v1-rev355-1.22.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>util</artifactId>
+      <version>1.6.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>util-hadoop</artifactId>
+      <version>1.6.0-hadoop2</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubInputDStream.scala b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubInputDStream.scala
new file mode 100644
index 0000000..e769f2e
--- /dev/null
+++ b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubInputDStream.scala
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import com.google.api.client.auth.oauth2.Credential
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
+import com.google.api.client.googleapis.json.GoogleJsonResponseException
+import com.google.api.client.json.jackson2.JacksonFactory
+import com.google.api.services.pubsub.Pubsub.Builder
+import com.google.api.services.pubsub.model.{AcknowledgeRequest, PubsubMessage, PullRequest}
+import com.google.api.services.pubsub.model.Subscription
+import com.google.cloud.hadoop.util.RetryHttpInitializer
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.util.Utils
+
+/**
+ * Input stream that subscribe messages from Google cloud Pub/Sub subscription.
+ * @param project         Google cloud project id
+ * @param topic           Topic name for creating subscription if need
+ * @param subscription    Pub/Sub subscription name
+ * @param credential      Google cloud project credential to access Pub/Sub service
+ */
+private[streaming]
+class PubsubInputDStream(
+    _ssc: StreamingContext,
+    val project: String,
+    val topic: Option[String],
+    val subscription: String,
+    val credential: SparkGCPCredentials,
+    val _storageLevel: StorageLevel
+) extends ReceiverInputDStream[SparkPubsubMessage](_ssc) {
+
+  override def getReceiver(): Receiver[SparkPubsubMessage] = {
+    new PubsubReceiver(project, topic, subscription, credential, _storageLevel)
+  }
+}
+
+/**
+ * A wrapper class for PubsubMessage's with a custom serialization format.
+ *
+ * This is necessary because PubsubMessage uses inner data structures
+ * which are not serializable.
+ */
+class SparkPubsubMessage() extends Externalizable {
+
+  private[pubsub] var message = new PubsubMessage
+
+  def getData(): Array[Byte] = message.decodeData()
+
+  def getAttributes(): java.util.Map[String, String] = message.getAttributes
+
+  def getMessageId(): String = message.getMessageId
+
+  def getPublishTime(): String = message.getPublishTime
+
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
+    message.decodeData() match {
+      case null => out.writeInt(-1)
+      case data =>
+        out.writeInt(data.size)
+        out.write(data)
+    }
+
+    message.getMessageId match {
+      case null => out.writeInt(-1)
+      case id =>
+        val idBuff = Utils.serialize(id)
+        out.writeInt(idBuff.length)
+        out.write(idBuff)
+    }
+
+    message.getPublishTime match {
+      case null => out.writeInt(-1)
+      case time =>
+        val publishTimeBuff = Utils.serialize(time)
+        out.writeInt(publishTimeBuff.length)
+        out.write(publishTimeBuff)
+    }
+
+    message.getAttributes match {
+      case null => out.writeInt(-1)
+      case attrs =>
+        out.writeInt(attrs.size())
+        for ((k, v) <- message.getAttributes.asScala) {
+          val keyBuff = Utils.serialize(k)
+          out.writeInt(keyBuff.length)
+          out.write(keyBuff)
+          val valBuff = Utils.serialize(v)
+          out.writeInt(valBuff.length)
+          out.write(valBuff)
+        }
+    }
+  }
+
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
+    in.readInt() match {
+      case -1 => message.encodeData(null)
+      case bodyLength =>
+        val data = new Array[Byte](bodyLength)
+        in.readFully(data)
+        message.encodeData(data)
+    }
+
+    in.readInt() match {
+      case -1 => message.setMessageId(null)
+      case idLength =>
+        val idBuff = new Array[Byte](idLength)
+        in.readFully(idBuff)
+        val id: String = Utils.deserialize(idBuff)
+        message.setMessageId(id)
+    }
+
+    in.readInt() match {
+      case -1 => message.setPublishTime(null)
+      case publishTimeLength =>
+        val publishTimeBuff = new Array[Byte](publishTimeLength)
+        in.readFully(publishTimeBuff)
+        val publishTime: String = Utils.deserialize(publishTimeBuff)
+        message.setPublishTime(publishTime)
+    }
+
+    in.readInt() match {
+      case -1 => message.setAttributes(null)
+      case numAttributes =>
+        val attributes = new java.util.HashMap[String, String]
+        for (i <- 0 until numAttributes) {
+          val keyLength = in.readInt()
+          val keyBuff = new Array[Byte](keyLength)
+          in.readFully(keyBuff)
+          val key: String = Utils.deserialize(keyBuff)
+
+          val valLength = in.readInt()
+          val valBuff = new Array[Byte](valLength)
+          in.readFully(valBuff)
+          val value: String = Utils.deserialize(valBuff)
+
+          attributes.put(key, value)
+        }
+        message.setAttributes(attributes)
+    }
+  }
+}
+
+private [pubsub]
+object ConnectionUtils {
+  val transport = GoogleNetHttpTransport.newTrustedTransport();
+  val jacksonFactory = JacksonFactory.getDefaultInstance;
+
+  // The topic or subscription already exists.
+  // This is an error on creation operations.
+  val ALREADY_EXISTS = 409
+
+  /**
+   * Client can retry with these response status
+   */
+  val RESOURCE_EXHAUSTED = 429
+
+  val CANCELLED = 499
+
+  val INTERNAL = 500
+
+  val UNAVAILABLE = 503
+
+  val DEADLINE_EXCEEDED = 504
+
+  def retryable(status: Int): Boolean = {
+    status match {
+      case RESOURCE_EXHAUSTED | CANCELLED | INTERNAL | UNAVAILABLE | DEADLINE_EXCEEDED => true
+      case _ => false
+    }
+  }
+}
+
+
+private[pubsub]
+class PubsubReceiver(
+    project: String,
+    topic: Option[String],
+    subscription: String,
+    credential: SparkGCPCredentials,
+    storageLevel: StorageLevel)
+    extends Receiver[SparkPubsubMessage](storageLevel) {
+
+  val APP_NAME = "sparkstreaming-pubsub-receiver"
+
+  val INIT_BACKOFF = 100 // 100ms
+
+  val MAX_BACKOFF = 10 * 1000 // 10s
+
+  val MAX_MESSAGE = 1000
+
+  lazy val client = new Builder(
+    ConnectionUtils.transport,
+    ConnectionUtils.jacksonFactory,
+    new RetryHttpInitializer(credential.provider, APP_NAME))
+      .setApplicationName(APP_NAME)
+      .build()
+
+  val projectFullName: String = s"projects/$project"
+  val subscriptionFullName: String = s"$projectFullName/subscriptions/$subscription"
+
+  override def onStart(): Unit = {
+    topic match {
+      case Some(t) =>
+        val sub: Subscription = new Subscription
+        sub.setTopic(s"$projectFullName/topics/$t")
+        try {
+          client.projects().subscriptions().create(subscriptionFullName, sub).execute()
+        } catch {
+          case e: GoogleJsonResponseException =>
+            if (e.getDetails.getCode == ConnectionUtils.ALREADY_EXISTS) {
+              // Ignore subscription already exists exception.
+            } else {
+              reportError("Failed to create subscription", e)
+            }
+          case NonFatal(e) =>
+            reportError("Failed to create subscription", e)
+        }
+      case None => // do nothing
+    }
+    new Thread() {
+      override def run() {
+        receive()
+      }
+    }.start()
+  }
+
+  def receive(): Unit = {
+    val pullRequest = new PullRequest().setMaxMessages(MAX_MESSAGE).setReturnImmediately(false)
+    var backoff = INIT_BACKOFF
+    while (!isStopped()) {
+      try {
+        val pullResponse =
+          client.projects().subscriptions().pull(subscriptionFullName, pullRequest).execute()
+        val receivedMessages = pullResponse.getReceivedMessages.asScala.toList
+        store(receivedMessages
+            .map(x => {
+              val sm = new SparkPubsubMessage
+              sm.message = x.getMessage
+              sm
+            })
+            .iterator)
+
+        val ackRequest = new AcknowledgeRequest()
+        ackRequest.setAckIds(receivedMessages.map(x => x.getAckId).asJava)
+        client.projects().subscriptions().acknowledge(subscriptionFullName, ackRequest).execute()
+        backoff = INIT_BACKOFF
+      } catch {
+        case e: GoogleJsonResponseException =>
+          if (ConnectionUtils.retryable(e.getDetails.getCode)) {
+            Thread.sleep(backoff)
+            backoff = Math.min(backoff * 2, MAX_BACKOFF)
+          } else {
+            reportError("Failed to pull messages", e)
+          }
+        case NonFatal(e) => reportError("Failed to pull messages", e)
+      }
+    }
+  }
+
+  override def onStop(): Unit = {}
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubUtils.scala b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubUtils.scala
new file mode 100644
index 0000000..b4f02b9
--- /dev/null
+++ b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/PubsubUtils.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
+import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.apache.spark.streaming.dstream.ReceiverInputDStream
+
+object PubsubUtils {
+
+  /**
+   * Create an input stream that receives messages pushed by a Pub/Sub publisher
+   * using service account authentication
+   *
+   * If topic is given, and the subscription doesn't exist,
+   * create subscription by the given name.
+   * Note: This Receiver will only receive the message arrived after the subscription created.
+   * If topic is not given, throw not found exception when it doesn't exist
+   *
+   * @param ssc             StreamingContext object
+   * @param project         Google cloud project id
+   * @param topic           Topic name for creating subscription if need
+   * @param subscription    Subscription name to subscribe to
+   * @param credentials     SparkGCPCredentials to use for authenticating
+   * @param storageLevel    RDD storage level
+   * @return
+   */
+  def createStream(
+      ssc: StreamingContext,
+      project: String,
+      topic: Option[String],
+      subscription: String,
+      credentials: SparkGCPCredentials,
+      storageLevel: StorageLevel): ReceiverInputDStream[SparkPubsubMessage] = {
+    ssc.withNamedScope("pubsub stream") {
+
+      new PubsubInputDStream(
+        ssc,
+        project,
+        topic,
+        subscription,
+        credentials,
+        storageLevel)
+    }
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a Pub/Sub publisher
+   * using given credential
+   *
+   * Throw not found exception if the subscription doesn't exist
+   *
+   * @param jssc         JavaStreamingContext object
+   * @param project      Google cloud project id
+   * @param subscription Subscription name to subscribe to
+   * @param credentials  SparkGCPCredentials to use for authenticating
+   * @param storageLevel RDD storage level
+   * @return
+   */
+  def createStream(jssc: JavaStreamingContext, project: String, subscription: String,
+      credentials: SparkGCPCredentials, storageLevel: StorageLevel
+      ): JavaReceiverInputDStream[SparkPubsubMessage] = {
+    createStream(jssc.ssc, project, None, subscription, credentials, storageLevel)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a Pub/Sub publisher
+   * using given credential
+   *
+   * If the subscription doesn't exist, create subscription by the given name.
+   * Note: This Receiver will only receive the message arrived after the subscription created.
+   *
+   * @param jssc            JavaStreamingContext object
+   * @param project         Google cloud project id
+   * @param topic           Topic name for creating subscription if need
+   * @param subscription    Subscription name to subscribe to
+   * @param credentials     SparkGCPCredentials to use for authenticating
+   * @param storageLevel    RDD storage level
+   * @return
+   */
+  def createStream(jssc: JavaStreamingContext,
+      project: String, topic: String, subscription: String,
+      credentials: SparkGCPCredentials, storageLevel: StorageLevel
+  ): JavaReceiverInputDStream[SparkPubsubMessage] = {
+    createStream(jssc.ssc, project, Some(topic), subscription, credentials, storageLevel)
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentials.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentials.scala b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentials.scala
new file mode 100644
index 0000000..5cadde3
--- /dev/null
+++ b/streaming-pubsub/src/main/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentials.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import com.google.api.client.auth.oauth2.Credential
+import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
+import com.google.api.services.pubsub.PubsubScopes
+import com.google.cloud.hadoop.util.{EntriesCredentialConfiguration, HadoopCredentialConfiguration}
+import java.util
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * Serializable interface providing a method executors can call to obtain an
+ * GCPCredentialsProvider instance for authenticating to GCP services.
+ */
+private[pubsub] sealed trait SparkGCPCredentials extends Serializable {
+
+  def provider: Credential
+}
+
+/**
+ * Returns application default type credential
+ */
+private[pubsub] final case object ApplicationDefaultCredentials extends SparkGCPCredentials {
+
+  override def provider: Credential = {
+    GoogleCredential.getApplicationDefault.createScoped(PubsubScopes.all())
+  }
+}
+
+/**
+ * Returns a Service Account type Credential instance.
+ * If all parameters are None, then try metadata service type
+ * If jsonFilePath available, try json type
+ * If jsonFilePath is None and p12FilePath and emailAccount available, try p12 type
+ *
+ * @param jsonFilePath file path for json
+ * @param p12FilePath  file path for p12
+ * @param emailAccount email account for p12
+ */
+private[pubsub] final case class ServiceAccountCredentials(
+    jsonFilePath: Option[String] = None,
+    p12FilePath: Option[String] = None,
+    emailAccount: Option[String] = None)
+    extends SparkGCPCredentials {
+
+  override def provider: Credential = {
+    val conf = new Configuration(false)
+    conf.setBoolean(
+      EntriesCredentialConfiguration.BASE_KEY_PREFIX
+          + EntriesCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX,
+      true)
+    jsonFilePath match {
+      case Some(jsonFilePath) =>
+        conf.set(
+          EntriesCredentialConfiguration.BASE_KEY_PREFIX
+              + EntriesCredentialConfiguration.JSON_KEYFILE_SUFFIX,
+          jsonFilePath
+        )
+      case _ => // do nothing
+    }
+    p12FilePath match {
+      case Some(p12FilePath) =>
+        conf.set(
+          EntriesCredentialConfiguration.BASE_KEY_PREFIX
+              + EntriesCredentialConfiguration.SERVICE_ACCOUNT_KEYFILE_SUFFIX,
+          p12FilePath
+        )
+      case _ => // do nothing
+    }
+    emailAccount match {
+      case Some(emailAccount) =>
+        conf.set(
+          EntriesCredentialConfiguration.BASE_KEY_PREFIX
+              + EntriesCredentialConfiguration.SERVICE_ACCOUNT_EMAIL_SUFFIX,
+          emailAccount
+        )
+      case _ => // do nothing
+    }
+
+    HadoopCredentialConfiguration
+        .newBuilder()
+        .withConfiguration(conf)
+        .build()
+        .getCredential(new util.ArrayList(PubsubScopes.all()))
+  }
+
+}
+
+object SparkGCPCredentials {
+
+  /**
+   * Builder for SparkGCPCredentials instance.
+   */
+  class Builder {
+    private var creds: Option[SparkGCPCredentials] = None
+
+    /**
+     * Use a json type key file for service account credential
+     *
+     * @param jsonFilePath json type key file
+     * @return Reference to this SparkGCPCredentials.Builder
+     */
+    def jsonServiceAccount(jsonFilePath: String): Builder = {
+      creds = Option(ServiceAccountCredentials(Option(jsonFilePath)))
+      this
+    }
+
+    /**
+     * Use a p12 type key file service account credential
+     *
+     * @param p12FilePath p12 type key file
+     * @param emailAccount email of service account
+     * @return Reference to this SparkGCPCredentials.Builder
+     */
+    def p12ServiceAccount(p12FilePath: String, emailAccount: String): Builder = {
+      creds = Option(ServiceAccountCredentials(
+        p12FilePath = Option(p12FilePath), emailAccount = Option(emailAccount)))
+      this
+    }
+
+    /**
+     * Use a meta data service to return service account
+     * @return Reference to this SparkGCPCredentials.Builder
+     */
+    def metadataServiceAccount(): Builder = {
+      creds = Option(ServiceAccountCredentials())
+      this
+    }
+
+    /**
+     * Returns the appropriate instance of SparkGCPCredentials given the configured
+     * parameters.
+     *
+     * - The service account credentials will be returned if they were provided.
+     *
+     * - The application default credentials will be returned otherwise.
+     * @return
+     */
+    def build(): SparkGCPCredentials = creds.getOrElse(ApplicationDefaultCredentials)
+
+  }
+
+  /**
+   * Creates a SparkGCPCredentials.Builder for constructing
+   * SparkGCPCredentials instance.
+   *
+   * @return SparkGCPCredentials.Builder instance
+   */
+  def builder: Builder = new Builder
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/streaming-pubsub/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000..cfedb5a
--- /dev/null
+++ b/streaming-pubsub/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/java/org/apache/spark/streaming/pubsub/JavaPubsubStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/java/org/apache/spark/streaming/pubsub/JavaPubsubStreamSuite.java b/streaming-pubsub/src/test/java/org/apache/spark/streaming/pubsub/JavaPubsubStreamSuite.java
new file mode 100644
index 0000000..360b9a9
--- /dev/null
+++ b/streaming-pubsub/src/test/java/org/apache/spark/streaming/pubsub/JavaPubsubStreamSuite.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.junit.Test;
+
+public class JavaPubsubStreamSuite extends LocalJavaStreamingContext {
+    @Test
+    public void testPubsubStream() {
+        // tests the API, does not actually test data receiving
+        JavaReceiverInputDStream<SparkPubsubMessage> stream1 = PubsubUtils.createStream(
+                ssc, "project", "subscription",
+                new SparkGCPCredentials.Builder().build(), StorageLevel.MEMORY_AND_DISK_SER_2());
+
+        JavaReceiverInputDStream<SparkPubsubMessage> stream2 = PubsubUtils.createStream(
+                ssc, "project", "topic", "subscription",
+                new SparkGCPCredentials.Builder().build(), StorageLevel.MEMORY_AND_DISK_SER_2());
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/resources/log4j.properties b/streaming-pubsub/src/test/resources/log4j.properties
new file mode 100644
index 0000000..75e3b53
--- /dev/null
+++ b/streaming-pubsub/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.spark-project.jetty=WARN
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubFunSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubFunSuite.scala b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubFunSuite.scala
new file mode 100644
index 0000000..acdceb7
--- /dev/null
+++ b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubFunSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Helper class that runs Google Cloud Pub/Sub real data transfer tests of
+ * ignores them based on env variable is set or not.
+ */
+trait PubsubFunSuite extends SparkFunSuite {
+  import PubsubTestUtils._
+
+  /** Run the test if environment variable is set or ignore the test */
+  def testIfEnabled(testName: String)(testBody: => Unit) {
+    if (shouldRunTests) {
+      test(testName)(testBody)
+    } else {
+      ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody)
+    }
+  }
+
+  /** Run the give body of code only if Kinesis tests are enabled */
+  def runIfTestsEnabled(message: String)(body: => Unit): Unit = {
+    if (shouldRunTests) {
+      body
+    } else {
+      ignore(s"$message [enable by setting env var $envVarNameForEnablingTests=1]")(())
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubStreamSuite.scala b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubStreamSuite.scala
new file mode 100644
index 0000000..284950c
--- /dev/null
+++ b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubStreamSuite.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import java.util.UUID
+
+import scala.concurrent.duration._
+
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Seconds
+
+class PubsubStreamSuite extends PubsubFunSuite with Eventually with BeforeAndAfter {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val appName: String = this.getClass.getSimpleName
+
+  private val topicName: String = s"bahirStreamTestTopic_${UUID.randomUUID()}"
+
+  private val subscriptionName: String = s"${topicName}_sub"
+
+  private val subForCreateName: String = s"${topicName}_create_me"
+
+  private var ssc: StreamingContext = null
+  private var pubsubTestUtils: PubsubTestUtils = null
+  private var topicFullName: String = null
+  private var subscriptionFullName: String = null
+  private var subForCreateFullName: String = null
+
+  override def beforeAll(): Unit = {
+    runIfTestsEnabled("Prepare PubsubTestUtils") {
+      pubsubTestUtils = new PubsubTestUtils
+      topicFullName = pubsubTestUtils.getFullTopicPath(topicName)
+      subscriptionFullName = pubsubTestUtils.getFullSubscriptionPath(subscriptionName)
+      subForCreateFullName = pubsubTestUtils.getFullSubscriptionPath(subForCreateName)
+      pubsubTestUtils.createTopic(topicFullName)
+      pubsubTestUtils.createSubscription(topicFullName, subscriptionFullName)
+    }
+  }
+
+  override def afterAll(): Unit = {
+    if (pubsubTestUtils != null) {
+      pubsubTestUtils.removeSubscription(subForCreateFullName)
+      pubsubTestUtils.removeSubscription(subscriptionFullName)
+      pubsubTestUtils.removeTopic(topicFullName)
+    }
+  }
+
+  before {
+    ssc = new StreamingContext(master, appName, batchDuration)
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+    }
+  }
+
+  test("PubsubUtils API") {
+    val pubsubStream1 = PubsubUtils.createStream(
+      ssc, "project", None, "subscription",
+      PubsubTestUtils.credential, StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    val pubsubStream2 = PubsubUtils.createStream(
+      ssc, "project", Some("topic"), "subscription",
+      PubsubTestUtils.credential, StorageLevel.MEMORY_AND_DISK_SER_2)
+  }
+
+  testIfEnabled("pubsub input stream") {
+    val receiveStream = PubsubUtils.createStream(
+      ssc, PubsubTestUtils.projectId, Some(topicName), subscriptionName,
+      PubsubTestUtils.credential, StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    @volatile var receiveMessages: List[SparkPubsubMessage] = List()
+    receiveStream.foreachRDD { rdd =>
+      if (rdd.collect().length > 0) {
+        receiveMessages = receiveMessages ::: List(rdd.first)
+        receiveMessages
+      }
+    }
+
+    ssc.start()
+
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      val sendMessages = pubsubTestUtils.generatorMessages(10)
+      pubsubTestUtils.publishData(topicFullName, sendMessages)
+      assert(sendMessages.map(m => new String(m.getData))
+          .contains(new String(receiveMessages(0).getData)))
+      assert(sendMessages.map(_.getAttributes).contains(receiveMessages(0).getAttributes))
+    }
+  }
+
+  testIfEnabled("pubsub input stream, create pubsub") {
+    val receiveStream = PubsubUtils.createStream(
+      ssc, PubsubTestUtils.projectId, Some(topicName), subForCreateName,
+      PubsubTestUtils.credential, StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    @volatile var receiveMessages: List[SparkPubsubMessage] = List()
+    receiveStream.foreachRDD { rdd =>
+      if (rdd.collect().length > 0) {
+        receiveMessages = receiveMessages ::: List(rdd.first)
+        receiveMessages
+      }
+    }
+
+    ssc.start()
+
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      val sendMessages = pubsubTestUtils.generatorMessages(10)
+      pubsubTestUtils.publishData(topicFullName, sendMessages)
+      assert(sendMessages.map(m => new String(m.getData))
+          .contains(new String(receiveMessages(0).getData)))
+      assert(sendMessages.map(_.getAttributes).contains(receiveMessages(0).getAttributes))
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubTestUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubTestUtils.scala b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubTestUtils.scala
new file mode 100644
index 0000000..9dd719a
--- /dev/null
+++ b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/PubsubTestUtils.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import scala.collection.JavaConverters._
+
+import com.google.api.services.pubsub.Pubsub
+import com.google.api.services.pubsub.Pubsub.Builder
+import com.google.api.services.pubsub.model.PublishRequest
+import com.google.api.services.pubsub.model.PubsubMessage
+import com.google.api.services.pubsub.model.Subscription
+import com.google.api.services.pubsub.model.Topic
+import com.google.cloud.hadoop.util.RetryHttpInitializer
+
+import org.apache.spark.internal.Logging
+
+private[pubsub] class PubsubTestUtils extends Logging {
+
+  val APP_NAME = this.getClass.getSimpleName
+
+  val client: Pubsub = {
+    new Builder(
+      ConnectionUtils.transport,
+      ConnectionUtils.jacksonFactory,
+      new RetryHttpInitializer(
+        PubsubTestUtils.credential.provider,
+        APP_NAME
+      ))
+        .setApplicationName(APP_NAME)
+        .build()
+  }
+
+  def createTopic(topic: String): Unit = {
+    val topicRequest = new Topic()
+    client.projects().topics().create(topic, topicRequest.setName(topic)).execute()
+  }
+
+  def createSubscription(topic: String, subscription: String): Unit = {
+    val subscriptionRequest = new Subscription()
+    client.projects().subscriptions().create(subscription,
+      subscriptionRequest.setTopic(topic).setName(subscription)).execute()
+  }
+
+  def publishData(topic: String, messages: List[SparkPubsubMessage]): Unit = {
+    val publishRequest = new PublishRequest()
+    publishRequest.setMessages(messages.map(m => m.message).asJava)
+    client.projects().topics().publish(topic, publishRequest).execute()
+  }
+
+  def removeSubscription(subscription: String): Unit = {
+    client.projects().subscriptions().delete(subscription).execute()
+  }
+
+  def removeTopic(topic: String): Unit = {
+    client.projects().topics().delete(topic).execute()
+  }
+
+  def generatorMessages(num: Int): List[SparkPubsubMessage] = {
+    (1 to num)
+        .map(n => {
+          val m = new PubsubMessage()
+          m.encodeData(s"data$n".getBytes)
+          m.setAttributes(Map("a1" -> s"v1$n", "a2" -> s"v2$n").asJava)
+        })
+        .map(m => {
+          val sm = new SparkPubsubMessage()
+          sm.message = m
+          sm
+        })
+        .toList
+  }
+
+  def getFullTopicPath(topic: String): String =
+    s"projects/${PubsubTestUtils.projectId}/topics/$topic"
+
+  def getFullSubscriptionPath(subscription: String): String =
+    s"projects/${PubsubTestUtils.projectId}/subscriptions/$subscription"
+
+}
+
+private[pubsub] object PubsubTestUtils {
+
+  val envVarNameForEnablingTests = "ENABLE_PUBSUB_TESTS"
+  val envVarNameForGoogleCloudProjectId = "GCP_TEST_PROJECT_ID"
+  val envVarNameForJsonKeyPath = "GCP_TEST_JSON_KEY_PATH"
+  val envVarNameForP12KeyPath = "GCP_TEST_P12_KEY_PATH"
+  val envVarNameForAccount = "GCP_TEST_ACCOUNT"
+
+  lazy val shouldRunTests = {
+    val isEnvSet = sys.env.get(envVarNameForEnablingTests) == Some("1")
+    if (isEnvSet) {
+      // scalastyle:off println
+      // Print this so that they are easily visible on the console and not hidden in the log4j logs.
+      println(
+        s"""
+           |Google Pub/Sub tests that actually send data has been enabled by setting the environment
+           |variable $envVarNameForEnablingTests to 1.
+           |This will create Pub/Sub Topics and Subscriptions in Google cloud platform.
+           |Please be aware that this may incur some Google cloud costs.
+           |Set the environment variable $envVarNameForGoogleCloudProjectId to the desired project.
+        """.stripMargin)
+      // scalastyle:on println
+    }
+    isEnvSet
+  }
+
+  lazy val projectId = {
+    val id = sys.env.getOrElse(envVarNameForGoogleCloudProjectId,
+      throw new IllegalArgumentException(
+        s"Need to set environment varibable $envVarNameForGoogleCloudProjectId if enable test."))
+    // scalastyle:off println
+    // Print this so that they are easily visible on the console and not hidden in the log4j logs.
+    println(s"Using project $id for creating Pub/Sub topic and subscription for tests.")
+    // scalastyle:on println
+    id
+  }
+
+  lazy val credential =
+    sys.env.get(envVarNameForJsonKeyPath)
+        .map(path => SparkGCPCredentials.builder.jsonServiceAccount(path).build())
+        .getOrElse(
+          sys.env.get(envVarNameForP12KeyPath)
+            .map(path => SparkGCPCredentials.builder.p12ServiceAccount(
+              path, sys.env.get(envVarNameForAccount).get
+            ).build())
+            .getOrElse(SparkGCPCredentials.builder.build()))
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/56613263/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentialsBuilderSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentialsBuilderSuite.scala b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentialsBuilderSuite.scala
new file mode 100644
index 0000000..e47b0b2
--- /dev/null
+++ b/streaming-pubsub/src/test/scala/org/apache/spark/streaming/pubsub/SparkGCPCredentialsBuilderSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.pubsub
+
+import java.io.FileNotFoundException
+
+import org.scalatest.concurrent.Timeouts
+
+import org.apache.spark.util.Utils
+import org.apache.spark.SparkFunSuite
+
+class SparkGCPCredentialsBuilderSuite extends SparkFunSuite with Timeouts {
+  private def builder = SparkGCPCredentials.builder
+
+  private val jsonCreds = ServiceAccountCredentials(
+    jsonFilePath = Option("json-key-path")
+  )
+
+  private val p12Creds = ServiceAccountCredentials(
+    p12FilePath = Option("p12-key-path"),
+    emailAccount = Option("email")
+  )
+
+  private val metadataCreds = ServiceAccountCredentials()
+
+  test("should build application default") {
+    assert(builder.build() === ApplicationDefaultCredentials)
+  }
+
+  test("should build json service account") {
+    assertResult(jsonCreds) {
+      builder.jsonServiceAccount(jsonCreds.jsonFilePath.get).build()
+    }
+  }
+
+  test("should provide json creds") {
+    val thrown = intercept[FileNotFoundException] {
+      jsonCreds.provider
+    }
+    assert(thrown.getMessage === "json-key-path (No such file or directory)")
+  }
+
+  test("should build p12 service account") {
+    assertResult(p12Creds) {
+      builder.p12ServiceAccount(p12Creds.p12FilePath.get, p12Creds.emailAccount.get).build()
+    }
+  }
+
+  test("should provide p12 creds") {
+    val thrown = intercept[FileNotFoundException] {
+      p12Creds.provider
+    }
+    assert(thrown.getMessage === "p12-key-path (No such file or directory)")
+  }
+
+  test("should build metadata service account") {
+    assertResult(metadataCreds) {
+      builder.metadataServiceAccount().build()
+    }
+  }
+
+  test("SparkGCPCredentials classes should be serializable") {
+    assertResult(jsonCreds) {
+      Utils.deserialize[ServiceAccountCredentials](Utils.serialize(jsonCreds))
+    }
+
+    assertResult(p12Creds) {
+      Utils.deserialize[ServiceAccountCredentials](Utils.serialize(p12Creds))
+    }
+
+    assertResult(metadataCreds) {
+      Utils.deserialize[ServiceAccountCredentials](Utils.serialize(metadataCreds))
+    }
+
+    assertResult(ApplicationDefaultCredentials) {
+      Utils.deserialize[ServiceAccountCredentials](Utils.serialize(ApplicationDefaultCredentials))
+    }
+  }
+
+}

[14/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/2aad4e08
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/2aad4e08
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/2aad4e08

Branch: refs/heads/master
Commit: 2aad4e087ac7f5999b364a29d66f181c111319ed
Parents: dd82363
Author: Luciano Resende <lr...@apache.org>
Authored: Tue Jan 10 18:22:07 2017 -0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jan 10 18:22:07 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 713a95c..186dafd 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.0.2</version>
+        <version>2.1.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 8ba1cc1..a7ae91e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.0.2</version>
+  <version>2.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.0.2-rc2</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 118a98f..678aeee 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index a7f2670..b108502 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 5c0f1a6..8cdba98 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 608efa0..7647ba8 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2aad4e08/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 07dce62..f9ae3bc 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[06/50] [abbrv] incubator-livy-website git commit: [MINOR] Add build temp files to rat exclude list

Posted by lr...@apache.org.

[MINOR] Add build temp files to rat exclude list


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/50ecf205
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/50ecf205
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/50ecf205

Branch: refs/heads/master
Commit: 50ecf2058863e8f5d33bb288d3e59c40c1410558
Parents: 942b43d
Author: Luciano Resende <lr...@apache.org>
Authored: Thu Nov 17 11:24:29 2016 +0100
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Nov 17 11:24:29 2016 +0100

----------------------------------------------------------------------
 pom.xml | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/50ecf205/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 65e6462..070c011 100644
--- a/pom.xml
+++ b/pom.xml
@@ -449,6 +449,7 @@
               <exclude>**/target/**</exclude>
               <exclude>**/README.md</exclude>
               <exclude>**/examples/data/*.txt</exclude>
+              <exclude>**/*.iml</exclude>
             </excludes>
           </configuration>
         </plugin>

[05/50] [abbrv] incubator-livy-website git commit: [BAHIR-69] Clean build between different scala version

Posted by lr...@apache.org.

[BAHIR-69] Clean build between different scala version

During release-publish, execute a mvn clean between different scala
version builds


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/942b43dc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/942b43dc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/942b43dc

Branch: refs/heads/master
Commit: 942b43dc428c7ade2789fb09df0fda360cf94024
Parents: d8601f3
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Oct 15 11:19:19 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Oct 15 11:20:51 2016 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/942b43dc/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index 2575538..06762cf 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -281,11 +281,13 @@ if [[ "$RELEASE_PUBLISH" == "true" ]]; then
     cd target/bahir
 
     #Deploy default scala 2.11
-    mvn -DaltDeploymentRepository=apache.releases.https::default::https://repository.apache.org/service/local/staging/deploy/maven2 clean verify gpg:sign install:install deploy:deploy -DskiptTests -Dgpg.passphrase=$GPG_PASSPHRASE $PUBLISH_PROFILES
+    mvn $PUBLISH_PROFILES -DaltDeploymentRepository=apache.releases.https::default::https://repository.apache.org/service/local/staging/deploy/maven2 clean package gpg:sign install:install deploy:deploy -DskiptTests -Darguments="-DskipTests" -Dgpg.passphrase=$GPG_PASSPHRASE
+
+    mvn clean
 
     #Deploy scala 2.10
     ./dev/change-scala-version.sh 2.10
-    mvn -DaltDeploymentRepository=apache.releases.https::default::https://repository.apache.org/service/local/staging/deploy/maven2 clean verify gpg:sign install:install deploy:deploy -DskiptTests -Dscala-2.10 -Dgpg.passphrase=$GPG_PASSPHRASE $PUBLISH_PROFILES
+    mvn $PUBLISH_PROFILES -DaltDeploymentRepository=apache.releases.https::default::https://repository.apache.org/service/local/staging/deploy/maven2 clean package gpg:sign install:install deploy:deploy -DskiptTests -Darguments="-DskipTests" -Dscala-2.10 -Dgpg.passphrase=$GPG_PASSPHRASE
 
     cd "$BASE_DIR" #exit target
 
@@ -311,11 +313,11 @@ if [[ "$RELEASE_SNAPSHOT" == "true" ]]; then
     fi
 
     #Deploy default scala 2.11
-    $MVN -DaltDeploymentRepository=apache.snapshots.https::default::https://repository.apache.org/content/repositories/snapshots clean verify gpg:sign install:install deploy:deploy -DskiptTests -Dgpg.passphrase=$GPG_PASSPHRASE $PUBLISH_PROFILES
+    $MVN $PUBLISH_PROFILES -DaltDeploymentRepository=apache.snapshots.https::default::https://repository.apache.org/content/repositories/snapshots clean package gpg:sign install:install deploy:deploy -DskiptTests -Darguments="-DskipTests" -Dgpg.passphrase=$GPG_PASSPHRASE
 
     #Deploy scala 2.10
     ./dev/change-scala-version.sh 2.10
-    $MVN -DaltDeploymentRepository=apache.snapshots.https::default::https://repository.apache.org/content/repositories/snapshots clean verify gpg:sign install:install deploy:deploy -DskiptTests -Dscala-2.10 -Dgpg.passphrase=$GPG_PASSPHRASE $PUBLISH_PROFILES
+    $MVN $PUBLISH_PROFILES -DaltDeploymentRepository=apache.snapshots.https::default::https://repository.apache.org/content/repositories/snapshots clean package gpg:sign install:install deploy:deploy -DskiptTests -Darguments="-DskipTests" -Dscala-2.10 -Dgpg.passphrase=$GPG_PASSPHRASE
 
     cd "$BASE_DIR" #exit target
     exit 0

[25/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/daf001fd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/daf001fd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/daf001fd

Branch: refs/heads/master
Commit: daf001fd6c8db14362da8a6a51961859768ca081
Parents: e61cc3d
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Fri Feb 3 20:21:59 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Feb 3 20:21:59 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index c7fbff0..4ff93fc 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 210c608..f9ee4a0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.1.0-rc1</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 28840e6..4a01ef5 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 4510801..36ce385 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index e5fef2a..2935f51 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index a33842d..949e4b3 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/daf001fd/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index bba10f0..db6616d 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[04/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/d8601f32
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/d8601f32
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/d8601f32

Branch: refs/heads/master
Commit: d8601f32a1be8c204e3a143a74f41d56687c25b6
Parents: bd00d2e
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Oct 15 09:56:42 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Oct 15 09:56:42 2016 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 1689a26..186dafd 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.0.1</version>
+        <version>2.1.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 891dd71..65e6462 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.0.1</version>
+  <version>2.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.0.1-rc1</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index a42406c..678aeee 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.1</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index bbf638b..b108502 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.1</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 281a125..8cdba98 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.1</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 2429ee4..7647ba8 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.1</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d8601f32/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index b2592d7..f9ae3bc 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.1</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[26/50] [abbrv] incubator-livy-website git commit: [MINOR] Code Style Fixes for Akka Example

Posted by lr...@apache.org.

[MINOR] Code Style Fixes for Akka Example

* mutable LinkedHashSet must be prefixed
* use interpolated string formatting

Closes #36


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/8d46b396
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/8d46b396
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/8d46b396

Branch: refs/heads/master
Commit: 8d46b396142624f40ab0359c7fb2407cb7f06951
Parents: daf001f
Author: Scott Walker <sc...@digital.hmrc.gov.uk>
Authored: Thu Feb 23 14:13:12 2017 +0000
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Mon Feb 27 03:39:37 2017 -0800

----------------------------------------------------------------------
 .../spark/examples/streaming/akka/ActorWordCount.scala    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/8d46b396/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
index 4af5d94..3a06da8 100644
--- a/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
+++ b/streaming-akka/examples/src/main/scala/org/apache/spark/examples/streaming/akka/ActorWordCount.scala
@@ -18,7 +18,7 @@
 // scalastyle:off println
 package org.apache.spark.examples.streaming.akka
 
-import scala.collection.mutable.LinkedHashSet
+import scala.collection.mutable
 import scala.util.Random
 
 import akka.actor.{Props, _}
@@ -39,7 +39,7 @@ case class UnsubscribeReceiver(receiverActor: ActorRef)
 class FeederActor extends Actor {
 
   val rand = new Random()
-  val receivers = new LinkedHashSet[ActorRef]()
+  val receivers = new mutable.LinkedHashSet[ActorRef]()
 
   val strings: Array[String] = Array("words ", "may ", "count ")
 
@@ -62,11 +62,11 @@ class FeederActor extends Actor {
 
   def receive: Receive = {
     case SubscribeReceiver(receiverActor: ActorRef) =>
-      println("received subscribe from %s".format(receiverActor.toString))
+      println(s"received subscribe from ${receiverActor.toString}")
       receivers += receiverActor
 
     case UnsubscribeReceiver(receiverActor: ActorRef) =>
-      println("received unsubscribe from %s".format(receiverActor.toString))
+      println(s"received unsubscribe from ${receiverActor.toString}")
       receivers -= receiverActor
   }
 }
@@ -172,7 +172,7 @@ object ActorWordCount {
     val lines = AkkaUtils.createStream[String](
       ssc,
       Props(classOf[SampleActorReceiver[String]],
-        "akka.tcp://test@%s:%s/user/FeederActor".format(host, port.toInt)),
+        s"akka.tcp://test@$host:${port.toInt}/user/FeederActor"),
       "SampleReceiver")
 
     // compute wordcount

[50/50] [abbrv] incubator-livy-website git commit: [MINOR] Fix copy+paste typo

Posted by lr...@apache.org.

[MINOR] Fix copy+paste typo


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/0d9725d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/0d9725d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/0d9725d4

Branch: refs/heads/master
Commit: 0d9725d4ccc315760ecc35eeeb25c58a15258b3a
Parents: e3d9e69
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jul 19 18:24:04 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jul 19 18:24:04 2017 -0700

----------------------------------------------------------------------
 streaming-mqtt/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/0d9725d4/streaming-mqtt/README.md
----------------------------------------------------------------------
diff --git a/streaming-mqtt/README.md b/streaming-mqtt/README.md
index eb08b51..5117594 100644
--- a/streaming-mqtt/README.md
+++ b/streaming-mqtt/README.md
@@ -52,7 +52,7 @@ this actor can be configured to handle failures, etc.
 
     val lines = MQTTUtils.createStream(ssc, brokerUrl, topic)
     val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topic)
-    val lines = MQTTUtils.createPairedByteArrayStreamStream(ssc, brokerUrl, topic)
+    val lines = MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topic)
 
 Additional mqtt connection options can be provided:

[22/50] [abbrv] incubator-livy-website git commit: [MINOR] Update Scaladoc in MQTTWordCount example

Posted by lr...@apache.org.

[MINOR] Update Scaladoc in MQTTWordCount example

to match the more descriptive text from the Python example

Closes #35


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/695ca982
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/695ca982
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/695ca982

Branch: refs/heads/master
Commit: 695ca982d2bb8606d7b40bbba11f3fe94ab5899e
Parents: cf0d740
Author: Prabeesh <pr...@gmail.com>
Authored: Fri Feb 3 02:21:32 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Feb 3 02:21:32 2017 -0800

----------------------------------------------------------------------
 .../examples/streaming/mqtt/MQTTWordCount.scala | 31 +++++++++++++-------
 1 file changed, 21 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/695ca982/streaming-mqtt/examples/src/main/scala/org/apache/spark/examples/streaming/mqtt/MQTTWordCount.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/examples/src/main/scala/org/apache/spark/examples/streaming/mqtt/MQTTWordCount.scala b/streaming-mqtt/examples/src/main/scala/org/apache/spark/examples/streaming/mqtt/MQTTWordCount.scala
index e09e75e..bc41ffa 100644
--- a/streaming-mqtt/examples/src/main/scala/org/apache/spark/examples/streaming/mqtt/MQTTWordCount.scala
+++ b/streaming-mqtt/examples/src/main/scala/org/apache/spark/examples/streaming/mqtt/MQTTWordCount.scala
@@ -79,24 +79,35 @@ object MQTTPublisher {
 }
 
 /**
- * A sample wordcount with MqttStream stream
+ * A sample wordcount with MQTTInputDStream
  *
- * To work with Mqtt, Mqtt Message broker/server required.
- * Mosquitto (http://mosquitto.org/) is an open source Mqtt Broker
- * In ubuntu mosquitto can be installed using the command  `$ sudo apt-get install mosquitto`
- * Eclipse paho project provides Java library for Mqtt Client http://www.eclipse.org/paho/
- * Example Java code for Mqtt Publisher and Subscriber can be found here
- * https://bitbucket.org/mkjinesh/mqttclient
  * Usage: MQTTWordCount <MqttbrokerUrl> <topic>
- *   <MqttbrokerUrl> and <topic> describe where Mqtt publisher is running.
  *
- * To run this example locally, you may run publisher as
+ * To run this example on your local machine, you first need to setup a MQTT broker and publisher,
+ * like Mosquitto (http://mosquitto.org/) an easy to use and install open source MQTT Broker.
+ * On Mac OS, Mosquitto can be installed with Homebrew `$ brew install mosquitto`.
+ * On Ubuntu, Mosquitto can be installed with the command `$ sudo apt-get install mosquitto`.
+ *
+ * Alternatively, checkout the Eclipse paho project which provides a number of clients and utilities
+ * for working with MQTT (http://www.eclipse.org/paho/#getting-started).
+ *
+ * How to run this example locally:
+ *
+ * (1) Start a MQTT message broker/server, i.e. Mosquitto:
+ *
+ *    `$ mosquitto -p 1883`
+ *
+ * (2) Run the publisher:
+ *
  *    `$ bin/run-example \
  *      org.apache.spark.examples.streaming.mqtt.MQTTPublisher tcp://localhost:1883 foo`
- * and run the example as
+ *
+ * (3) Run the example:
+ *
  *    `$ bin/run-example \
  *      org.apache.spark.examples.streaming.mqtt.MQTTWordCount tcp://localhost:1883 foo`
  */
+
 object MQTTWordCount {
 
   def main(args: Array[String]) {

[49/50] [abbrv] incubator-livy-website git commit: [BAHIR-100] Enhance MQTT connector to support byte arrays

Posted by lr...@apache.org.

[BAHIR-100] Enhance MQTT connector to support byte arrays

Closes #47


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/e3d9e696
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/e3d9e696
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/e3d9e696

Branch: refs/heads/master
Commit: e3d9e6960941696ba073735e9d039c85146c217a
Parents: dca8d4c
Author: drosenst <da...@intel.com>
Authored: Wed Jul 5 23:41:02 2017 +0300
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jul 19 16:47:26 2017 -0700

----------------------------------------------------------------------
 streaming-mqtt/README.md                        |   3 +
 .../spark/streaming/mqtt/MQTTInputDStream.scala |   1 +
 .../mqtt/MQTTPairedByteArrayInputDStream.scala  | 144 ++++++++
 .../streaming/mqtt/MQTTPairedInputDStream.scala |   1 +
 .../apache/spark/streaming/mqtt/MQTTUtils.scala | 366 ++++++++++++++-----
 .../streaming/mqtt/JavaMQTTStreamSuite.java     |  20 +-
 .../spark/streaming/mqtt/MQTTStreamSuite.scala  |  26 ++
 7 files changed, 477 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/README.md
----------------------------------------------------------------------
diff --git a/streaming-mqtt/README.md b/streaming-mqtt/README.md
index 6b89136..eb08b51 100644
--- a/streaming-mqtt/README.md
+++ b/streaming-mqtt/README.md
@@ -52,12 +52,14 @@ this actor can be configured to handle failures, etc.
 
     val lines = MQTTUtils.createStream(ssc, brokerUrl, topic)
     val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topic)
+    val lines = MQTTUtils.createPairedByteArrayStreamStream(ssc, brokerUrl, topic)
 
 Additional mqtt connection options can be provided:
 
 ```Scala
 val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
 val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+val lines = MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
 ```
 
 ### Java API
@@ -67,5 +69,6 @@ this actor can be configured to handle failures, etc.
 
     JavaDStream<String> lines = MQTTUtils.createStream(jssc, brokerUrl, topic);
     JavaReceiverInputDStream<Tuple2<String, String>> lines = MQTTUtils.createPairedStream(jssc, brokerUrl, topics);
+    JavaReceiverInputDStream<Tuple2<String, String>> lines = MQTTUtils.createPairedByteArrayStream(jssc, brokerUrl, topics);
 
 See end-to-end examples at [MQTT Examples](https://github.com/apache/bahir/tree/master/streaming-mqtt/examples)

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 328656b..cf27440 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -30,6 +30,7 @@ import org.apache.spark.streaming.receiver.Receiver
 /**
  * Input stream that subscribe messages from a Mqtt Broker.
  * Uses eclipse paho as MqttClient http://www.eclipse.org/paho/
+ * @param _ssc               Spark Streaming StreamingContext
  * @param brokerUrl          Url of remote mqtt publisher
  * @param topic              topic name to subscribe to
  * @param storageLevel       RDD storage level.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedByteArrayInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedByteArrayInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedByteArrayInputDStream.scala
new file mode 100644
index 0000000..07c0b18
--- /dev/null
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedByteArrayInputDStream.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt
+
+import org.eclipse.paho.client.mqttv3._
+import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.receiver.Receiver
+
+/**
+ * Input stream that subscribe messages from a Mqtt Broker.
+ * Uses eclipse paho as MqttClient http://www.eclipse.org/paho/
+ * @param _ssc:              Spark Streaming StreamingContext,
+ * @param brokerUrl          Url of remote mqtt publisher
+ * @param topics             topic name Array to subscribe to
+ * @param storageLevel       RDD storage level.
+ * @param clientId           ClientId to use for the mqtt connection
+ * @param username           Username for authentication to the mqtt publisher
+ * @param password           Password for authentication to the mqtt publisher
+ * @param cleanSession       Sets the mqtt cleanSession parameter
+ * @param qos                Quality of service to use for the topic subscription
+ * @param connectionTimeout  Connection timeout for the mqtt connection
+ * @param keepAliveInterval  Keepalive interal for the mqtt connection
+ * @param mqttVersion        Version to use for the mqtt connection
+ */
+private[streaming] class MQTTPairedByteArrayInputDStream(
+    _ssc: StreamingContext,
+    brokerUrl: String,
+    topics: Array[String],
+    storageLevel: StorageLevel,
+    clientId: Option[String] = None,
+    username: Option[String] = None,
+    password: Option[String] = None,
+    cleanSession: Option[Boolean] = None,
+    qos: Option[Int] = None,
+    connectionTimeout: Option[Int] = None,
+    keepAliveInterval: Option[Int] = None,
+    mqttVersion: Option[Int] = None) extends ReceiverInputDStream[(String, Array[Byte])](_ssc) {
+
+  private[streaming] override def name: String = s"MQTT stream [$id]"
+
+  def getReceiver(): Receiver[(String, Array[Byte])] = {
+    new MQTTByteArrayPairReceiver(brokerUrl, topics, storageLevel, clientId, username,
+        password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+  }
+}
+
+private[streaming] class MQTTByteArrayPairReceiver(
+  brokerUrl: String,
+  topics: Array[String],
+  storageLevel: StorageLevel,
+  clientId: Option[String],
+  username: Option[String],
+  password: Option[String],
+  cleanSession: Option[Boolean],
+  qos: Option[Int],
+  connectionTimeout: Option[Int],
+  keepAliveInterval: Option[Int],
+  mqttVersion: Option[Int]) extends Receiver[(String, Array[Byte])](storageLevel) {
+
+  def onStop() {
+
+  }
+
+  def onStart() {
+
+    // Set up persistence for messages
+    val persistence = new MemoryPersistence()
+
+    // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance
+    val client = new MqttClient(brokerUrl, clientId.getOrElse(MqttClient.generateClientId()),
+      persistence)
+
+    // Initialize mqtt parameters
+    val mqttConnectionOptions = new MqttConnectOptions()
+    if (username.isDefined && password.isDefined) {
+      mqttConnectionOptions.setUserName(username.get)
+      mqttConnectionOptions.setPassword(password.get.toCharArray)
+    }
+    mqttConnectionOptions.setCleanSession(cleanSession.getOrElse(true))
+    if (connectionTimeout.isDefined) {
+      mqttConnectionOptions.setConnectionTimeout(connectionTimeout.get)
+    }
+    if (keepAliveInterval.isDefined) {
+      mqttConnectionOptions.setKeepAliveInterval(keepAliveInterval.get)
+    }
+    if (mqttVersion.isDefined) {
+      mqttConnectionOptions.setMqttVersion(mqttVersion.get)
+    }
+
+    // Callback automatically triggers as and when new message arrives on specified topic
+    val callback = new MqttCallback() {
+
+      // Handles Mqtt message
+      override def messageArrived(topic: String, message: MqttMessage) {
+        store((topic, message.getPayload()))
+      }
+
+      override def deliveryComplete(token: IMqttDeliveryToken) {
+      }
+
+      override def connectionLost(cause: Throwable) {
+        restart("Connection lost ", cause)
+      }
+    }
+
+    // Set up callback for MqttClient. This needs to happen before
+    // connecting or subscribing, otherwise messages may be lost
+    client.setCallback(callback)
+
+    // Connect to MqttBroker
+    client.connect(mqttConnectionOptions)
+
+    // Subscribe to Mqtt topic
+    var i = 0
+    val qosArray = Array.ofDim[Int](topics.length)
+    for (i <- qosArray.indices) {
+      qosArray(i) = qos.getOrElse(1)
+    }
+    client.subscribe(topics, qosArray)
+
+  }
+}
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
index 050777b..ec89ed7 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
@@ -30,6 +30,7 @@ import org.apache.spark.streaming.receiver.Receiver
 /**
  * Input stream that subscribe messages from a Mqtt Broker.
  * Uses eclipse paho as MqttClient http://www.eclipse.org/paho/
+ * @param _ssc               Spark Streaming StreamingContext
  * @param brokerUrl          Url of remote mqtt publisher
  * @param topics             topic name Array to subscribe to
  * @param storageLevel       RDD storage level.

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
index 0accb80..f42275f 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
@@ -215,22 +215,39 @@ object MQTTUtils {
     new MQTTPairedInputDStream(ssc, brokerUrl, topics, storageLevel)
   }
 
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  * @param ssc           StreamingContext object
+  * @param brokerUrl     Url of remote MQTT publisher
+  * @param topics        Array of topic names to subscribe to
+  * @param storageLevel  RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+  */
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * @param ssc                StreamingContext object
-   * @param brokerUrl          Url of remote MQTT publisher
-   * @param topics             Array of topic names to subscribe to
-   * @param storageLevel       RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
-   * @param clientId           ClientId to use for the mqtt connection
-   * @param username           Username for authentication to the mqtt publisher
-   * @param password           Password for authentication to the mqtt publisher
-   * @param cleanSession       Sets the mqtt cleanSession parameter
-   * @param qos                Quality of service to use for the topic subscription
-   * @param connectionTimeout  Connection timeout for the mqtt connection
-   * @param keepAliveInterval  Keepalive interal for the mqtt connection
-   * @param mqttVersion        Version to use for the mqtt connection
-   */
+   def createPairedByteArrayStream(
+        ssc: StreamingContext,
+        brokerUrl: String,
+        topics: Array[String],
+        storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+        ): ReceiverInputDStream[(String, Array[Byte])] = {
+    new MQTTPairedByteArrayInputDStream(ssc, brokerUrl, topics, storageLevel)
+   }
+
+/**
+ * Create an input stream that receives messages pushed by a MQTT publisher.
+ *
+ * @param ssc               StreamingContext object
+ * @param brokerUrl         Url of remote MQTT publisher
+ * @param topics            Array of topic names to subscribe to
+ * @param storageLevel      RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+ * @param clientId          ClientId to use for the mqtt connection
+ * @param username          Username for authentication to the mqtt publisher
+ * @param password          Password for authentication to the mqtt publisher
+ * @param cleanSession      Sets the mqtt cleanSession parameter
+ * @param qos               Quality of service to use for the topic subscription
+ * @param connectionTimeout Connection timeout for the mqtt connection
+ * @param keepAliveInterval Keepalive interal for the mqtt connection
+ * @param mqttVersion       Version to use for the mqtt connection
+ */
   def createPairedStream(
       ssc: StreamingContext,
       brokerUrl: String,
@@ -246,57 +263,130 @@ object MQTTUtils {
       mqttVersion: Option[Int]
     ): ReceiverInputDStream[(String, String)] = {
     new MQTTPairedInputDStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password,
-          cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+      cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
-   * @param jssc      JavaStreamingContext object
-   * @param brokerUrl Url of remote MQTT publisher
-   * @param topics    Array of topic names to subscribe to
-   */
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  *
+  * @param ssc               StreamingContext object
+  * @param brokerUrl         Url of remote MQTT publisher
+  * @param topics            Array of topic names to subscribe to
+  * @param storageLevel      RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+  * @param clientId          ClientId to use for the mqtt connection
+  * @param username          Username for authentication to the mqtt publisher
+  * @param password          Password for authentication to the mqtt publisher
+  * @param cleanSession      Sets the mqtt cleanSession parameter
+  * @param qos               Quality of service to use for the topic subscription
+  * @param connectionTimeout Connection timeout for the mqtt connection
+  * @param keepAliveInterval Keepalive interal for the mqtt connection
+  * @param mqttVersion       Version to use for the mqtt connection
+  */
+  def createPairedByteArrayStream(
+                                   ssc: StreamingContext,
+                                   brokerUrl: String,
+                                   topics: Array[String],
+                                   storageLevel: StorageLevel,
+                                   clientId: Option[String],
+                                   username: Option[String],
+                                   password: Option[String],
+                                   cleanSession: Option[Boolean],
+                                   qos: Option[Int],
+                                   connectionTimeout: Option[Int],
+                                   keepAliveInterval: Option[Int],
+                                   mqttVersion: Option[Int]
+                                 ): ReceiverInputDStream[(String, Array[Byte])] = {
+    new MQTTPairedByteArrayInputDStream(ssc, brokerUrl, topics, storageLevel,
+      clientId, username, password, cleanSession, qos, connectionTimeout,
+      keepAliveInterval, mqttVersion)
+  }
+
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+  *
+  * @param jssc      JavaStreamingContext object
+  * @param brokerUrl Url of remote MQTT publisher
+  * @param topics    Array of topic names to subscribe to
+  */
   def createPairedStream(
-      jssc: JavaStreamingContext,
-      brokerUrl: String,
-      topics: Array[String]
-    ): JavaReceiverInputDStream[(String, String)] = {
+                          jssc: JavaStreamingContext,
+                          brokerUrl: String,
+                          topics: Array[String]
+                        ): JavaReceiverInputDStream[(String, String)] = {
     implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createPairedStream(jssc.ssc, brokerUrl, topics)
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * @param jssc          JavaStreamingContext object
-   * @param brokerUrl     Url of remote MQTT publisher
-   * @param topics        Array of topic names to subscribe to
-   * @param storageLevel  RDD storage level.
-   */
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+  *
+  * @param jssc      JavaStreamingContext object
+  * @param brokerUrl Url of remote MQTT publisher
+  * @param topics    Array of topic names to subscribe to
+  */
+  def createPairedByteArrayStream(
+                                   jssc: JavaStreamingContext,
+                                   brokerUrl: String,
+                                   topics: Array[String]
+                                 ): JavaReceiverInputDStream[(String, Array[Byte])] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedByteArrayStream(jssc.ssc, brokerUrl, topics)
+  }
+
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  *
+  * @param jssc         JavaStreamingContext object
+  * @param brokerUrl    Url of remote MQTT publisher
+  * @param topics       Array of topic names to subscribe to
+  * @param storageLevel RDD storage level.
+  */
   def createPairedStream(
-      jssc: JavaStreamingContext,
-      brokerUrl: String,
-      topics: Array[String],
-      storageLevel: StorageLevel
-    ): JavaReceiverInputDStream[(String, String)] = {
+                          jssc: JavaStreamingContext,
+                          brokerUrl: String,
+                          topics: Array[String],
+                          storageLevel: StorageLevel
+                        ): JavaReceiverInputDStream[(String, String)] = {
     implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createPairedStream(jssc.ssc, brokerUrl, topics, storageLevel)
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * @param jssc               JavaStreamingContext object
-   * @param brokerUrl          Url of remote MQTT publisher
-   * @param topics             Array of topic names to subscribe to
-   * @param storageLevel       RDD storage level.
-   * @param clientId           ClientId to use for the mqtt connection
-   * @param username           Username for authentication to the mqtt publisher
-   * @param password           Password for authentication to the mqtt publisher
-   * @param cleanSession       Sets the mqtt cleanSession parameter
-   * @param qos                Quality of service to use for the topic subscription
-   * @param connectionTimeout  Connection timeout for the mqtt connection
-   * @param keepAliveInterval  Keepalive interal for the mqtt connection
-   * @param mqttVersion        Version to use for the mqtt connection
-   */
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+ *
+ * @param jssc         JavaStreamingContext object
+ * @param brokerUrl    Url of remote MQTT publisher
+ * @param topics       Array of topic names to subscribe to
+ * @param storageLevel RDD storage level.
+ */
+  def createPairedByteArrayStream(
+                                   jssc: JavaStreamingContext,
+                                   brokerUrl: String,
+                                   topics: Array[String],
+                                   storageLevel: StorageLevel
+                                 ): JavaReceiverInputDStream[(String, Array[Byte])] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedByteArrayStream(jssc.ssc, brokerUrl, topics, storageLevel)
+  }
+
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  *
+  * @param jssc              JavaStreamingContext object
+  * @param brokerUrl         Url of remote MQTT publisher
+  * @param topics            Array of topic names to subscribe to
+  * @param storageLevel      RDD storage level.
+  * @param clientId          ClientId to use for the mqtt connection
+  * @param username          Username for authentication to the mqtt publisher
+  * @param password          Password for authentication to the mqtt publisher
+  * @param cleanSession      Sets the mqtt cleanSession parameter
+  * @param qos               Quality of service to use for the topic subscription
+  * @param connectionTimeout Connection timeout for the mqtt connection
+  * @param keepAliveInterval Keepalive interal for the mqtt connection
+  * @param mqttVersion       Version to use for the mqtt connection
+  */
   def createPairedStream(
       jssc: JavaStreamingContext,
       brokerUrl: String,
@@ -317,20 +407,57 @@ object MQTTUtils {
         Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * @param jssc               JavaStreamingContext object
-   * @param brokerUrl          Url of remote MQTT publisher
-   * @param topics             Array of topic names to subscribe to
-   * @param clientId           ClientId to use for the mqtt connection
-   * @param username           Username for authentication to the mqtt publisher
-   * @param password           Password for authentication to the mqtt publisher
-   * @param cleanSession       Sets the mqtt cleanSession parameter
-   * @param qos                Quality of service to use for the topic subscription
-   * @param connectionTimeout  Connection timeout for the mqtt connection
-   * @param keepAliveInterval  Keepalive interal for the mqtt connection
-   * @param mqttVersion        Version to use for the mqtt connection
-   */
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  *
+  * @param jssc              JavaStreamingContext object
+  * @param brokerUrl         Url of remote MQTT publisher
+  * @param topics            Array of topic names to subscribe to
+  * @param storageLevel      RDD storage level.
+  * @param clientId          ClientId to use for the mqtt connection
+  * @param username          Username for authentication to the mqtt publisher
+  * @param password          Password for authentication to the mqtt publisher
+  * @param cleanSession      Sets the mqtt cleanSession parameter
+  * @param qos               Quality of service to use for the topic subscription
+  * @param connectionTimeout Connection timeout for the mqtt connection
+  * @param keepAliveInterval Keepalive interal for the mqtt connection
+  * @param mqttVersion       Version to use for the mqtt connection
+  */
+  def createPairedByteArrayStream(
+                                   jssc: JavaStreamingContext,
+                                   brokerUrl: String,
+                                   topics: Array[String],
+                                   storageLevel: StorageLevel,
+                                   clientId: String,
+                                   username: String,
+                                   password: String,
+                                   cleanSession: Boolean,
+                                   qos: Int,
+                                   connectionTimeout: Int,
+                                   keepAliveInterval: Int,
+                                   mqttVersion: Int
+                                 ): JavaReceiverInputDStream[(String, Array[Byte])] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedByteArrayStream(jssc.ssc, brokerUrl, topics, storageLevel, Option(clientId),
+      Option(username), Option(password), Option(cleanSession), Option(qos),
+      Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
+  }
+
+/**
+* Create an input stream that receives messages pushed by a MQTT publisher.
+  *
+  * @param jssc              JavaStreamingContext object
+  * @param brokerUrl         Url of remote MQTT publisher
+  * @param topics            Array of topic names to subscribe to
+  * @param clientId          ClientId to use for the mqtt connection
+  * @param username          Username for authentication to the mqtt publisher
+  * @param password          Password for authentication to the mqtt publisher
+  * @param cleanSession      Sets the mqtt cleanSession parameter
+  * @param qos               Quality of service to use for the topic subscription
+  * @param connectionTimeout Connection timeout for the mqtt connection
+  * @param keepAliveInterval Keepalive interal for the mqtt connection
+  * @param mqttVersion       Version to use for the mqtt connection
+  */
   def createPairedStream(
       jssc: JavaStreamingContext,
       brokerUrl: String,
@@ -346,20 +473,56 @@ object MQTTUtils {
     ): JavaReceiverInputDStream[(String, String)] = {
     implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createPairedStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
-        Option(clientId), Option(username), Option(password), Option(cleanSession), Option(qos),
-        Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
+      Option(clientId), Option(username), Option(password), Option(cleanSession), Option(qos),
+      Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a MQTT publisher.
-   * @param jssc               JavaStreamingContext object
-   * @param brokerUrl          Url of remote MQTT publisher
-   * @param topics             Array of topic names to subscribe to
-   * @param clientId           ClientId to use for the mqtt connection
-   * @param username           Username for authentication to the mqtt publisher
-   * @param password           Password for authentication to the mqtt publisher
-   * @param cleanSession       Sets the mqtt cleanSession parameter
-   */
+/**
+ * Create an input stream that receives messages pushed by a MQTT publisher.
+ *
+ * @param jssc              JavaStreamingContext object
+ * @param brokerUrl         Url of remote MQTT publisher
+ * @param topics            Array of topic names to subscribe to
+ * @param clientId          ClientId to use for the mqtt connection
+ * @param username          Username for authentication to the mqtt publisher
+ * @param password          Password for authentication to the mqtt publisher
+ * @param cleanSession      Sets the mqtt cleanSession parameter
+ * @param qos               Quality of service to use for the topic subscription
+ * @param connectionTimeout Connection timeout for the mqtt connection
+ * @param keepAliveInterval Keepalive interal for the mqtt connection
+ * @param mqttVersion       Version to use for the mqtt connection
+ */
+  def createPairedByteArrayStream(
+                                   jssc: JavaStreamingContext,
+                                   brokerUrl: String,
+                                   topics: Array[String],
+                                   clientId: String,
+                                   username: String,
+                                   password: String,
+                                   cleanSession: Boolean,
+                                   qos: Int,
+                                   connectionTimeout: Int,
+                                   keepAliveInterval: Int,
+                                   mqttVersion: Int
+                                 ): JavaReceiverInputDStream[(String, Array[Byte])] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedByteArrayStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
+      Option(clientId), Option(username), Option(password), Option(cleanSession), Option(qos),
+      Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
+  }
+
+
+/**
+ * Create an input stream that receives messages pushed by a MQTT publisher.
+ *
+ * @param jssc         JavaStreamingContext object
+ * @param brokerUrl    Url of remote MQTT publisher
+ * @param topics       Array of topic names to subscribe to
+ * @param clientId     ClientId to use for the mqtt connection
+ * @param username     Username for authentication to the mqtt publisher
+ * @param password     Password for authentication to the mqtt publisher
+ * @param cleanSession Sets the mqtt cleanSession parameter
+ */
   def createPairedStream(
       jssc: JavaStreamingContext,
       brokerUrl: String,
@@ -368,12 +531,40 @@ object MQTTUtils {
       username: String,
       password: String,
       cleanSession: Boolean
-    ): JavaReceiverInputDStream[(String, String)] = {
+      ): JavaReceiverInputDStream[(String, String)] = {
     implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
     createPairedStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
-        Option(clientId), Option(username), Option(password), Option(cleanSession), None,
-        None, None, None)
+      Option(clientId), Option(username), Option(password), Option(cleanSession), None,
+      None, None, None)
   }
+
+
+/**
+ * Create an input stream that receives messages pushed by a MQTT publisher.
+ *
+ * @param jssc         JavaStreamingContext object
+ * @param brokerUrl    Url of remote MQTT publisher
+ * @param topics       Array of topic names to subscribe to
+ * @param clientId     ClientId to use for the mqtt connection
+ * @param username     Username for authentication to the mqtt publisher
+ * @param password     Password for authentication to the mqtt publisher
+ * @param cleanSession Sets the mqtt cleanSession parameter
+ */
+  def createPairedByteArrayStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      clientId: String,
+      username: String,
+      password: String,
+      cleanSession: Boolean
+      ): JavaReceiverInputDStream[(String, Array[Byte])] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedByteArrayStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
+      Option(clientId), Option(username), Option(password), Option(cleanSession), None,
+      None, None, None)
+  }
+
 }
 
 /**
@@ -398,4 +589,13 @@ private[mqtt] class MQTTUtilsPythonHelper {
     ): JavaDStream[(String, String)] = {
     MQTTUtils.createPairedStream(jssc, brokerUrl, topics, storageLevel)
   }
+
+  def createPairedByteArrayStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel
+      ): JavaDStream[(String, Array[Byte])] = {
+    MQTTUtils.createPairedByteArrayStream(jssc, brokerUrl, topics, storageLevel)
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java b/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
index d320595..e30d187 100644
--- a/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
+++ b/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
@@ -49,8 +49,26 @@ public class JavaMQTTStreamSuite extends LocalJavaStreamingContext {
       brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2(), "testid", "user",
       "password", true, 1, 10, 30, 3);
     JavaReceiverInputDStream<Tuple2<String, String>> test9 = MQTTUtils.createPairedStream(ssc,
-      brokerUrl, topics, "testid", "user", "password", true, 1, 10, 30, 3);
+      brokerUrl, topics, "testid", "user", "password", true, 1,
+      10, 30, 3);
     JavaReceiverInputDStream<Tuple2<String, String>> test10 = MQTTUtils.createPairedStream(ssc,
       brokerUrl, topics, "testid", "user", "password", true);
+    JavaReceiverInputDStream<Tuple2<String, byte[]>> test11 =
+            MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics);
+    JavaReceiverInputDStream<Tuple2<String, byte[]>> test12 =
+            MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics,
+            StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaReceiverInputDStream<Tuple2<String, byte[]>> test13 =
+            MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics,
+            StorageLevel.MEMORY_AND_DISK_SER_2(), "testid", "user",
+            "password", true, 1, 10, 30, 3);
+    JavaReceiverInputDStream<Tuple2<String, byte[]>> test14 =
+            MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics,
+            "testid", "user", "password", true,
+             1, 10, 30, 3);
+    JavaReceiverInputDStream<Tuple2<String, byte[]>> test15 =
+            MQTTUtils.createPairedByteArrayStream(ssc, brokerUrl, topics, "testid",
+            "user", "password", true);
+
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e3d9e696/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index f1d9a20..6ef551b 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -101,4 +101,30 @@ class MQTTStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfter
     }
     ssc.stop()
   }
+
+  test("mqtt input stream3") {
+    val sendMessage1 = "MQTT demo for spark streaming1"
+    val sendMessage2 = "MQTT demo for spark streaming2"
+    val receiveStream2 = MQTTUtils.createPairedByteArrayStream(ssc,
+      "tcp://" + mqttTestUtils.brokerUri, topics, StorageLevel.MEMORY_ONLY)
+
+    @volatile var receiveMessage: List[String] = List()
+    receiveStream2.foreachRDD { rdd =>
+      if (rdd.collect.length > 0) {
+        receiveMessage = receiveMessage ::: List(new String(rdd.first()._2))
+        receiveMessage
+      }
+    }
+
+    ssc.start()
+
+    // Retry it because we don't know when the receiver will start.
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      mqttTestUtils.publishData(topics(0), sendMessage1)
+      mqttTestUtils.publishData(topics(1), sendMessage2)
+      assert(receiveMessage.contains(sendMessage1)||receiveMessage.contains(sendMessage2))
+    }
+    ssc.stop()
+  }
+
 }

[40/50] [abbrv] incubator-livy-website git commit: [MINOR] Add checkpoint directory to git ignore configuration

Posted by lr...@apache.org.

[MINOR] Add checkpoint directory to git ignore configuration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/38c15782
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/38c15782
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/38c15782

Branch: refs/heads/master
Commit: 38c15782d12e7a4bc20d170624f4d36b4f0ed6e4
Parents: 5661326
Author: Luciano Resende <lr...@apache.org>
Authored: Tue Jun 6 21:10:27 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jun 6 21:10:27 2017 -0700

----------------------------------------------------------------------
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/38c15782/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index fb6d3b7..2aaac58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,4 @@ target/
 .checkstyle
 .fbExcludeFilterFile
 dependency-reduced-pom.xml
+checkpoint

[07/50] [abbrv] incubator-livy-website git commit: [BAHIR-64] add Akka streaming test (send/receive)

Posted by lr...@apache.org.

[BAHIR-64] add Akka streaming test (send/receive)

This PR adds the test suite AkkaStreamSuite.scala to
the streaming connector streaming-akka to test data
being sent and received.

Closes #24


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/d43dad21
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/d43dad21
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/d43dad21

Branch: refs/heads/master
Commit: d43dad21963d2ba338acc44d6233ff020cef7d38
Parents: 50ecf20
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Wed Sep 28 12:41:35 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Nov 17 13:05:16 2016 +0100

----------------------------------------------------------------------
 NOTICE                                          |   2 +-
 .../spark/streaming/akka/ActorReceiver.scala    |   1 +
 .../spark/streaming/akka/AkkaStreamSuite.scala  | 106 +++++++++++++++++++
 3 files changed, 108 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d43dad21/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 8bf7751..7067f1e 100644
--- a/NOTICE
+++ b/NOTICE
@@ -2,4 +2,4 @@ Apache Bahir
 Copyright (c) 2016 The Apache Software Foundation.
 
 This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
\ No newline at end of file
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d43dad21/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
index e3be880..d30e380 100644
--- a/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
+++ b/streaming-akka/src/main/scala/org/apache/spark/streaming/akka/ActorReceiver.scala
@@ -65,6 +65,7 @@ object ActorReceiver {
     val akkaConf = ConfigFactory.parseString(
       s"""akka.actor.provider = "akka.remote.RemoteActorRefProvider"
          |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
+         |akka.remote.netty.tcp.port = "0"
          |""".stripMargin)
     ActorSystem(uniqueSystemName, akkaConf)
   }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/d43dad21/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaStreamSuite.scala b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaStreamSuite.scala
new file mode 100644
index 0000000..e52bf0e
--- /dev/null
+++ b/streaming-akka/src/test/scala/org/apache/spark/streaming/akka/AkkaStreamSuite.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.akka
+
+import java.util.concurrent.ConcurrentLinkedQueue
+
+import scala.collection.JavaConverters._
+import scala.concurrent.duration._
+
+import akka.actor._
+import com.typesafe.config.ConfigFactory
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+
+class AkkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfter {
+
+  private var ssc: StreamingContext = _
+
+  private var actorSystem: ActorSystem = _
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+      ssc = null
+    }
+    if (actorSystem != null) {
+      actorSystem.shutdown()
+      actorSystem.awaitTermination(30.seconds)
+      actorSystem = null
+    }
+  }
+
+  test("actor input stream") {
+    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+
+    // we set the TCP port to "0" to have the port chosen automatically for the Feeder actor and
+    // the Receiver actor will "pick it up" from the Feeder URI when it subscribes to the Feeder
+    // actor (http://doc.akka.io/docs/akka/2.3.11/scala/remoting.html)
+    val akkaConf = ConfigFactory.parseMap(
+      Map(
+        "akka.actor.provider" -> "akka.remote.RemoteActorRefProvider",
+        "akka.remote.netty.tcp.transport-class" -> "akka.remote.transport.netty.NettyTransport",
+        "akka.remote.netty.tcp.port" -> "0").
+        asJava)
+    actorSystem = ActorSystem("test", akkaConf)
+    actorSystem.actorOf(Props(classOf[FeederActor]), "FeederActor")
+    val feederUri =
+      actorSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress + "/user/FeederActor"
+
+    val actorStream =
+      AkkaUtils.createStream[String](ssc, Props(classOf[TestActorReceiver], feederUri),
+        "TestActorReceiver")
+    val result = new ConcurrentLinkedQueue[String]
+    actorStream.foreachRDD { rdd =>
+      rdd.collect().foreach(result.add)
+    }
+    ssc.start()
+
+    eventually(timeout(10.seconds), interval(10.milliseconds)) {
+      assert((1 to 10).map(_.toString) === result.asScala.toList)
+    }
+  }
+}
+
+case class SubscribeReceiver(receiverActor: ActorRef)
+
+class FeederActor extends Actor {
+
+  def receive: Receive = {
+    case SubscribeReceiver(receiverActor: ActorRef) =>
+      (1 to 10).foreach(i => receiverActor ! i.toString())
+  }
+}
+
+class TestActorReceiver(uriOfPublisher: String) extends ActorReceiver {
+
+  lazy private val remotePublisher = context.actorSelection(uriOfPublisher)
+
+  override def preStart(): Unit = {
+    remotePublisher ! SubscribeReceiver(self)
+  }
+
+  def receive: PartialFunction[Any, Unit] = {
+    case msg: String => store(msg)
+  }
+
+}

[10/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/25c1ca22
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/25c1ca22
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/25c1ca22

Branch: refs/heads/master
Commit: 25c1ca22bc64064c219cc5c56fc64a9d699e296b
Parents: 1c411ea
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Dec 3 09:35:49 2016 -0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Dec 3 09:35:49 2016 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 713a95c..186dafd 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.0.2</version>
+        <version>2.1.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index cc531d1..a7ae91e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.0.2</version>
+  <version>2.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.0.2-rc1</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 118a98f..678aeee 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index a7f2670..b108502 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 5c0f1a6..8cdba98 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 608efa0..7647ba8 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/25c1ca22/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 07dce62..f9ae3bc 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[13/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.0.2-rc2

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.0.2-rc2


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/dd823638
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/dd823638
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/dd823638

Branch: refs/heads/master
Commit: dd82363878a6bb4a19aade8010c01ddfbf254aff
Parents: 461a85c
Author: Luciano Resende <lr...@apache.org>
Authored: Tue Jan 10 18:21:53 2017 -0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Tue Jan 10 18:21:53 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 186dafd..713a95c 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.0.2</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a7ae91e..8ba1cc1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.0.2</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.0.2-rc2</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 678aeee..118a98f 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index b108502..a7f2670 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 8cdba98..5c0f1a6 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7647ba8..608efa0 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dd823638/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f9ae3bc..07dce62 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[02/50] [abbrv] incubator-livy-website git commit: [BAHIR-62] Prepare release based on Apache Spark 2.0.1

Posted by lr...@apache.org.

[BAHIR-62] Prepare release based on Apache Spark 2.0.1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/cc9cf1ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/cc9cf1ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/cc9cf1ba

Branch: refs/heads/master
Commit: cc9cf1ba2097fad00f8ff173434bfdabf4e3f10c
Parents: 415576b
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Oct 15 09:49:53 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Oct 15 09:49:53 2016 -0700

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/cc9cf1ba/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index cf58de1..65e6462 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
     <log4j.version>1.2.17</log4j.version>
 
     <!-- Spark version -->
-    <spark.version>2.1.0-SNAPSHOT</spark.version>
+    <spark.version>2.0.1</spark.version>
 
     <!-- Streaming Akka connector -->
     <akka.group>com.typesafe.akka</akka.group>

[34/50] [abbrv] incubator-livy-website git commit: [MINOR] Tweaking assembly naming

Posted by lr...@apache.org.

[MINOR] Tweaking assembly naming


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/abfdc706
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/abfdc706
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/abfdc706

Branch: refs/heads/master
Commit: abfdc706f674f08b64b980ef98f0c467ac719561
Parents: b328233
Author: Luciano Resende <lr...@apache.org>
Authored: Thu Apr 13 12:05:23 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Apr 13 12:05:23 2017 -0700

----------------------------------------------------------------------
 distribution/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/abfdc706/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 4ff93fc..77104d1 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -26,9 +26,9 @@
     </parent>
 
     <groupId>org.apache.bahir</groupId>
-    <artifactId>assembly_2.11</artifactId>
+    <artifactId>bahir-spark-distribution_2.11</artifactId>
     <packaging>pom</packaging>
-    <name>Apache Bahir - Release Distributions</name>
+    <name>Apache Bahir - Spark Extensions Distribution</name>
     <url>http://bahir.apache.org/</url>
 
     <dependencies>

[28/50] [abbrv] incubator-livy-website git commit: [MINOR] Re-add .gitignore and .gitattributes

Posted by lr...@apache.org.

[MINOR] Re-add .gitignore and .gitattributes

The two files accidentally got removed in commit 826545c


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/2ebfd0b6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/2ebfd0b6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/2ebfd0b6

Branch: refs/heads/master
Commit: 2ebfd0b6af165d8d68158bb5d552692823872f25
Parents: 826545c
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Mon Mar 27 23:23:35 2017 -0700
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Mon Mar 27 23:23:35 2017 -0700

----------------------------------------------------------------------
 .gitattributes | 13 +++++++++++++
 .gitignore     | 24 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2ebfd0b6/.gitattributes
----------------------------------------------------------------------
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a8edefd
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,13 @@
+# Set the default behavior to have all files normalized to Unix-style
+# line endings upon check-in.
+* text=auto
+
+# Declare files that will always have CRLF line endings on checkout.
+*.bat text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.dll binary
+*.exp binary
+*.lib binary
+*.pdb binary
+*.exe binary

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2ebfd0b6/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fb6d3b7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# Mac
+.DS_Store
+
+# Eclipse
+.classpath
+.project
+.settings/
+target/
+
+# Intellij
+.idea/
+.idea_modules/
+*.iml
+*.iws
+*.class
+*.log
+
+# Python
+*.pyc
+
+# Others
+.checkstyle
+.fbExcludeFilterFile
+dependency-reduced-pom.xml

[12/50] [abbrv] incubator-livy-website git commit: [BAHIR-83] temporarily ignore flaky test case "Recovering offset from the last processed offset." in BasicMQTTSourceSuite

Posted by lr...@apache.org.

[BAHIR-83] temporarily ignore flaky test case "Recovering offset from the last processed offset." in BasicMQTTSourceSuite


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/461a85ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/461a85ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/461a85ce

Branch: refs/heads/master
Commit: 461a85ce7b0885b37a1bad2d42294007e4a91152
Parents: eb9136f
Author: Christian Kadner <ck...@apache.org>
Authored: Tue Jan 10 11:06:50 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Tue Jan 10 11:06:50 2017 -0800

----------------------------------------------------------------------
 .../apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/461a85ce/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 4c1c75f..111a44b 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -143,7 +143,8 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
     }
   }
 
-  test("Recovering offset from the last processed offset.") {
+  // TODO: reinstate this test after fixing BAHIR-83
+  ignore("Recovering offset from the last processed offset.") {
     val sendMessage = "MQTT is a message queue."
 
     import scala.concurrent.ExecutionContext.Implicits.global

[09/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.0.2-rc1

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.0.2-rc1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/1c411ea2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/1c411ea2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/1c411ea2

Branch: refs/heads/master
Commit: 1c411ea22e60e3f7b3e93fdbc6e2351cda816c90
Parents: 08aa06c
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Dec 3 09:35:35 2016 -0800
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Dec 3 09:35:35 2016 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 186dafd..713a95c 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.0.2</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a7ae91e..cc531d1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.0.2</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.0.2-rc1</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 678aeee..118a98f 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index b108502..a7f2670 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 8cdba98..5c0f1a6 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7647ba8..608efa0 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/1c411ea2/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f9ae3bc..07dce62 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[19/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.0.2-rc3

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.0.2-rc3


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/477ca1b5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/477ca1b5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/477ca1b5

Branch: refs/heads/master
Commit: 477ca1b556d2486b8c9ddbfb7a327ed8245066c4
Parents: 8ad556e
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Fri Jan 20 16:43:42 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Jan 20 16:43:42 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 186dafd..713a95c 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.0.2</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e7cb338..fe2fd70 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.0.2</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.0.2-rc3</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 678aeee..118a98f 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index b108502..a7f2670 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 8cdba98..5c0f1a6 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7647ba8..608efa0 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/477ca1b5/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f9ae3bc..07dce62 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[36/50] [abbrv] incubator-livy-website git commit: [MINOR] Update Spark dependency to release 2.1.1

Posted by lr...@apache.org.

[MINOR] Update Spark dependency to release 2.1.1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/68ed2d44
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/68ed2d44
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/68ed2d44

Branch: refs/heads/master
Commit: 68ed2d448c91ca2a79697649d8bae7b94c3a05bc
Parents: fd4c35f
Author: Luciano Resende <lr...@apache.org>
Authored: Thu May 18 00:42:17 2017 -0400
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu May 18 00:42:17 2017 -0400

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/68ed2d44/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 65129cd..f76aac5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -98,7 +98,7 @@
     <log4j.version>1.2.17</log4j.version>
 
     <!-- Spark version -->
-    <spark.version>2.1.0</spark.version>
+    <spark.version>2.1.1</spark.version>
 
     <!-- Streaming Akka connector -->
     <akka.group>com.typesafe.akka</akka.group>

[38/50] [abbrv] incubator-livy-website git commit: [BAHIR-120] Akka SQL Streaming build fails with Apache Spark 2.1.1

Posted by lr...@apache.org.

[BAHIR-120] Akka SQL Streaming build fails with Apache Spark 2.1.1

Closes #44.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/2a430765
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/2a430765
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/2a430765

Branch: refs/heads/master
Commit: 2a43076579dd247db4afa01e2dc0d1176c3eb4a1
Parents: 86ded93
Author: Subhobrata Dey <sb...@gmail.com>
Authored: Tue May 30 23:43:39 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed May 31 09:57:15 2017 -0700

----------------------------------------------------------------------
 .../apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/2a430765/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
index a04dc66..5e9b86e 100644
--- a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
+++ b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
@@ -40,11 +40,10 @@ class AkkaStreamSourceSuite extends SparkFunSuite with BeforeAndAfter {
   private val conf = new SparkConf().setMaster("local[4]").setAppName("AkkaStreamSourceSuite")
   protected val spark = SparkSession.builder().config(conf).getOrCreate()
 
-  akkaTestUtils = new AkkaTestUtils
-  akkaTestUtils.setup()
-
   before {
     tempDir.mkdirs()
+    akkaTestUtils = new AkkaTestUtils
+    akkaTestUtils.setup()
   }
 
   after {

[17/50] [abbrv] incubator-livy-website git commit: [BAHIR-84] Suppress Parquet-MR build log messages

Posted by lr...@apache.org.

[BAHIR-84] Suppress Parquet-MR build log messages

Since Parquet-MR (1.7.0) uses Java Simple Logging (not Log4j) we
need to add a logging.properties file and add it to the configuration
of the maven-surefire-plugin and scalatest-maven-plugin.
Since Parquet-MR is logging everything to System.out despite log file
handler settings we raise the threshold to ERROR.

https://github.com/Parquet/parquet-mr/issues/390
https://github.com/Parquet/parquet-mr/issues/425

Closes #33


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/bce9cd15
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/bce9cd15
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/bce9cd15

Branch: refs/heads/master
Commit: bce9cd15989e648be03468c6a5b848ee8193df4d
Parents: 560a799
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Thu Jan 12 00:07:02 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Thu Jan 19 19:14:30 2017 -0800

----------------------------------------------------------------------
 pom.xml                                         |  2 +
 .../src/test/resources/logging.properties       | 70 ++++++++++++++++++++
 2 files changed, 72 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bce9cd15/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a7ae91e..e7cb338 100644
--- a/pom.xml
+++ b/pom.xml
@@ -565,6 +565,7 @@
             </environmentVariables>
             <systemProperties>
               <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+              <java.util.logging.config.file>${basedir}/src/test/resources/logging.properties</java.util.logging.config.file>
               <derby.system.durability>test</derby.system.durability>
               <java.awt.headless>true</java.awt.headless>
               <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
@@ -614,6 +615,7 @@
             </environmentVariables>
             <systemProperties>
               <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+              <java.util.logging.config.file>${basedir}/src/test/resources/logging.properties</java.util.logging.config.file>
               <derby.system.durability>test</derby.system.durability>
               <java.awt.headless>true</java.awt.headless>
               <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bce9cd15/sql-streaming-mqtt/src/test/resources/logging.properties
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/resources/logging.properties b/sql-streaming-mqtt/src/test/resources/logging.properties
new file mode 100644
index 0000000..dd2cfc5
--- /dev/null
+++ b/sql-streaming-mqtt/src/test/resources/logging.properties
@@ -0,0 +1,70 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+############################################################
+# Global properties
+############################################################
+
+# "handlers" specifies a comma separated list of log Handler
+# classes.  These handlers will be installed during VM startup.
+# Note that these classes must be on the system classpath.
+# By default we only configure a ConsoleHandler, which will only
+# show messages at the INFO and above levels.
+handlers = java.util.logging.ConsoleHandler
+
+# To also add the FileHandler, use the following line instead.
+#handlers = java.util.logging.FileHandler, java.util.logging.ConsoleHandler
+
+# Default global logging level.
+# This specifies which kinds of events are logged across
+# all loggers.  For any given facility this global level
+# can be overriden by a facility specific level
+# Note that the ConsoleHandler also has a separate level
+# setting to limit messages printed to the console.
+.level = INFO
+
+############################################################
+# Handler specific properties.
+# Describes specific configuration info for Handlers.
+############################################################
+
+# Log file output is in target directory.
+java.util.logging.FileHandler.pattern = target/unit-tests-java-%u.log
+java.util.logging.FileHandler.limit = 50000
+java.util.logging.FileHandler.count = 1
+java.util.logging.FileHandler.formatter = java.util.logging.XMLFormatter
+
+# Limit the message that are printed on the console to WARNING and above.
+java.util.logging.ConsoleHandler.level = WARNING
+java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
+
+# Example to customize the SimpleFormatter output format
+# to print one-line log message like this:
+#     <level>: <log message> [<date/time>]
+#
+# java.util.logging.SimpleFormatter.format=%4$s: %5$s [%1$tc]%n
+
+############################################################
+# Facility specific properties.
+# Provides extra control for each logger.
+############################################################
+
+# [BAHIR-] don't flood build logs with superfluous Parquet INFO messages
+# they should be written to a file via FileHandler but they end up in the
+# build log anyhow irrespective of the ConsoleHandler log level
+# also see https://github.com/Parquet/parquet-mr/issues/425
+org.apache.parquet.hadoop.level=SEVERE

[42/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.1.1-rc1

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.1.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/c5180d80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/c5180d80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/c5180d80

Branch: refs/heads/master
Commit: c5180d80aaab15ef07e39d6397ee4ac8dccd937a
Parents: ba68b35
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 20:18:43 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 20:18:43 2017 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-cloudant/pom.xml       | 2 +-
 sql-streaming-akka/pom.xml | 2 +-
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-pubsub/pom.xml   | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 77104d1..f617db7 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.2.0-SNAPSHOT</version>
+        <version>2.1.1</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 81f2e28..32d0b8a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.1.1-rc1</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/sql-cloudant/pom.xml
----------------------------------------------------------------------
diff --git a/sql-cloudant/pom.xml b/sql-cloudant/pom.xml
index 5860033..bfd0571 100644
--- a/sql-cloudant/pom.xml
+++ b/sql-cloudant/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/sql-streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/pom.xml b/sql-streaming-akka/pom.xml
index 4d7040b..9e134d5 100644
--- a/sql-streaming-akka/pom.xml
+++ b/sql-streaming-akka/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.2.0-SNAPSHOT</version>
+        <version>2.1.1</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 4a01ef5..20518b0 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 36ce385..2e95ac0 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 2935f51..b5b9ac3 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/streaming-pubsub/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-pubsub/pom.xml b/streaming-pubsub/pom.xml
index c3da90f..1458a19 100644
--- a/streaming-pubsub/pom.xml
+++ b/streaming-pubsub/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>bahir-parent_2.11</artifactId>
     <groupId>org.apache.bahir</groupId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 949e4b3..8004aa2 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/c5180d80/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index db6616d..eb93b7b 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[15/50] [abbrv] incubator-livy-website git commit: [BAHIR-83] ignore test case "Send and receive 100 messages."

Posted by lr...@apache.org.

[BAHIR-83] ignore test case "Send and receive 100 messages."


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/b7cb52ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/b7cb52ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/b7cb52ba

Branch: refs/heads/master
Commit: b7cb52bab5117910d7c69a41d738dd670447fd13
Parents: 2aad4e0
Author: Christian Kadner <ck...@apache.org>
Authored: Wed Jan 18 00:00:28 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Wed Jan 18 00:00:28 2017 -0800

----------------------------------------------------------------------
 .../apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/b7cb52ba/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
index 111a44b..f9a4bed 100644
--- a/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
+++ b/sql-streaming-mqtt/src/test/scala/org/apache/bahir/sql/streaming/mqtt/MQTTStreamSourceSuite.scala
@@ -99,7 +99,8 @@ class BasicMQTTSourceSuite extends MQTTStreamSourceSuite {
     assert(resultBuffer.head == sendMessage)
   }
 
-  test("Send and receive 100 messages.") {
+  // TODO: reinstate this test after fixing BAHIR-83
+  ignore("Send and receive 100 messages.") {
 
     val sendMessage = "MQTT is a message queue."

[37/50] [abbrv] incubator-livy-website git commit: [BAHIR-117] Expand filtering options for TwitterInputDStream

Posted by lr...@apache.org.

[BAHIR-117] Expand filtering options for TwitterInputDStream

Adds a new method to TwitterUtils that enables users to pass
an arbitrary FilterQuery down to the TwitterReceiver.

This enables use-cases like receiving Tweets based on location,
based on handle, etc. Previously users were only able to receive
Tweets based on disjunctive keyword queries.

Closes #43.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/86ded930
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/86ded930
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/86ded930

Branch: refs/heads/master
Commit: 86ded930e4af769e8191c8f415fe48193dd4914b
Parents: 68ed2d4
Author: Clemens Wolff <cl...@microsoft.com>
Authored: Thu May 4 12:52:54 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed May 24 09:42:53 2017 -0700

----------------------------------------------------------------------
 .../streaming/akka/JavaActorWordCount.java      |  2 +-
 .../streaming/twitter/TwitterLocations.scala    | 92 ++++++++++++++++++++
 .../streaming/twitter/TwitterInputDStream.scala | 12 ++-
 .../spark/streaming/twitter/TwitterUtils.scala  | 46 +++++++++-
 .../twitter/JavaTwitterStreamSuite.java         |  4 +
 .../streaming/twitter/TwitterStreamSuite.scala  |  5 +-
 6 files changed, 150 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
----------------------------------------------------------------------
diff --git a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
index 740f9f8..abc1f70 100644
--- a/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
+++ b/streaming-akka/examples/src/main/java/org/apache/spark/examples/streaming/akka/JavaActorWordCount.java
@@ -49,7 +49,7 @@ class JavaSampleActorReceiver<T> extends JavaActorReceiver {
 
   private final String urlOfPublisher;
 
-  public JavaSampleActorReceiver(String urlOfPublisher) {
+  JavaSampleActorReceiver(String urlOfPublisher) {
     this.urlOfPublisher = urlOfPublisher;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterLocations.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterLocations.scala b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterLocations.scala
new file mode 100644
index 0000000..00859fe
--- /dev/null
+++ b/streaming-twitter/examples/src/main/scala/org/apache/spark/examples/streaming/twitter/TwitterLocations.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming.twitter
+
+import org.apache.log4j.{Level, Logger}
+import twitter4j.FilterQuery
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.twitter._
+
+/**
+ * Illustrates the use of custom filter queries to get Tweets from one or more locations.
+ */
+object TwitterLocations {
+  def main(args: Array[String]) {
+    if (args.length < 4 || args.length % 4 != 0) {
+      System.err.println("Usage: TwitterLocations <consumer key> <consumer secret> " +
+        "<access token> <access token secret> " +
+        "[<latitude-south-west> <longitude-south-west>" +
+        " <latitude-north-east> <longitude-north-east> ...]")
+      System.exit(1)
+    }
+
+    // Set logging level if log4j not configured (override by adding log4j.properties to classpath)
+    if (!Logger.getRootLogger.getAllAppenders.hasMoreElements) {
+      Logger.getRootLogger.setLevel(Level.WARN)
+    }
+
+    // Set the system properties so that Twitter4j library used by twitter stream
+    // can use them to generate OAuth credentials
+    val Array(consumerKey, consumerSecret, accessToken, accessTokenSecret) = args.take(4)
+    System.setProperty("twitter4j.oauth.consumerKey", consumerKey)
+    System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)
+    System.setProperty("twitter4j.oauth.accessToken", accessToken)
+    System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret)
+
+    // Get bounding boxes of locations for which to retrieve Tweets from command line
+    val locationArgs = args.takeRight(args.length - 4)
+    val boundingBoxes = if (locationArgs.length == 0) {
+      System.out.println("No location bounding boxes specified, using defaults for New York City")
+      val nycSouthWest = Array(-74.0, 40.0)
+      val nycNorthEast = Array(-73.0, 41.0)
+      Array(nycSouthWest, nycNorthEast)
+    } else {
+      locationArgs.map(_.toDouble).sliding(2, 2).toArray
+    }
+
+    val sparkConf = new SparkConf().setAppName("TwitterLocations")
+
+    // check Spark configuration for master URL, set it to local if not configured
+    if (!sparkConf.contains("spark.master")) {
+      sparkConf.setMaster("local[2]")
+    }
+
+    val ssc = new StreamingContext(sparkConf, Seconds(2))
+    val locationsQuery = new FilterQuery().locations(boundingBoxes : _*)
+
+    // Print Tweets from the specified coordinates
+    // This includes Tweets geo-tagged in the bounding box defined by the coordinates
+    // As well as Tweets tagged in places inside of the bounding box
+    TwitterUtils.createFilteredStream(ssc, None, Some(locationsQuery))
+      .map(tweet => {
+        val latitude = Option(tweet.getGeoLocation).map(l => s"${l.getLatitude},${l.getLongitude}")
+        val place = Option(tweet.getPlace).map(_.getName)
+        val location = latitude.getOrElse(place.getOrElse("(no location)"))
+        val text = tweet.getText.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
+        s"$location\t$text"
+      })
+      .print()
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index bd23a12..81ce60d 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -41,7 +41,7 @@ private[streaming]
 class TwitterInputDStream(
     _ssc: StreamingContext,
     twitterAuth: Option[Authorization],
-    filters: Seq[String],
+    query: Option[FilterQuery],
     storageLevel: StorageLevel
   ) extends ReceiverInputDStream[Status](_ssc)  {
 
@@ -52,14 +52,14 @@ class TwitterInputDStream(
   private val authorization = twitterAuth.getOrElse(createOAuthAuthorization())
 
   override def getReceiver(): Receiver[Status] = {
-    new TwitterReceiver(authorization, filters, storageLevel)
+    new TwitterReceiver(authorization, query, storageLevel)
   }
 }
 
 private[streaming]
 class TwitterReceiver(
     twitterAuth: Authorization,
-    filters: Seq[String],
+    query: Option[FilterQuery],
     storageLevel: StorageLevel
   ) extends Receiver[Status](storageLevel) with Logging {
 
@@ -85,10 +85,8 @@ class TwitterReceiver(
         }
       })
 
-      val query = new FilterQuery
-      if (filters.size > 0) {
-        query.track(filters.mkString(","))
-        newTwitterStream.filter(query)
+      if (query.isDefined) {
+        newTwitterStream.filter(query.get)
       } else {
         newTwitterStream.sample()
       }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
index 9cb0106..b0e9b78 100644
--- a/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
+++ b/streaming-twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.twitter
 
-import twitter4j.Status
+import twitter4j.{FilterQuery, Status}
 import twitter4j.auth.Authorization
 
 import org.apache.spark.storage.StorageLevel
@@ -33,6 +33,25 @@ object TwitterUtils {
    *        authorization; this uses the system properties twitter4j.oauth.consumerKey,
    *        twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
    *        twitter4j.oauth.accessTokenSecret
+   * @param query A query to get only those tweets that match it
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createFilteredStream(
+      ssc: StreamingContext,
+      twitterAuth: Option[Authorization],
+      query: Option[FilterQuery] = None,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): ReceiverInputDStream[Status] = {
+    new TwitterInputDStream(ssc, twitterAuth, query, storageLevel)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * @param ssc         StreamingContext object
+   * @param twitterAuth Twitter4J authentication, or None to use Twitter4J's default OAuth
+   *        authorization; this uses the system properties twitter4j.oauth.consumerKey,
+   *        twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   *        twitter4j.oauth.accessTokenSecret
    * @param filters Set of filter strings to get only those tweets that match them
    * @param storageLevel Storage level to use for storing the received objects
    */
@@ -42,7 +61,11 @@ object TwitterUtils {
       filters: Seq[String] = Nil,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
     ): ReceiverInputDStream[Status] = {
-    new TwitterInputDStream(ssc, twitterAuth, filters, storageLevel)
+    val query = if (filters.nonEmpty) {
+      Some(new FilterQuery().track(filters.mkString(",")))
+    } else None
+
+    createFilteredStream(ssc, twitterAuth, query, storageLevel)
   }
 
   /**
@@ -129,4 +152,23 @@ object TwitterUtils {
     ): JavaReceiverInputDStream[Status] = {
     createStream(jssc.ssc, Some(twitterAuth), filters, storageLevel)
   }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * @param jssc         JavaStreamingContext object
+   * @param twitterAuth Twitter4J authentication, or None to use Twitter4J's default OAuth
+   *        authorization; this uses the system properties twitter4j.oauth.consumerKey,
+   *        twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   *        twitter4j.oauth.accessTokenSecret
+   * @param query A query to get only those tweets that match it
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createFilteredStream(
+      jssc: JavaStreamingContext,
+      twitterAuth: Authorization,
+      query: FilterQuery,
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[Status] = {
+    createFilteredStream(jssc.ssc, Some(twitterAuth), Some(query), storageLevel)
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java b/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
index 26ec8af..e22e24e 100644
--- a/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
+++ b/streaming-twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
@@ -18,6 +18,7 @@
 package org.apache.spark.streaming.twitter;
 
 import org.junit.Test;
+import twitter4j.FilterQuery;
 import twitter4j.Status;
 import twitter4j.auth.Authorization;
 import twitter4j.auth.NullAuthorization;
@@ -30,6 +31,7 @@ public class JavaTwitterStreamSuite extends LocalJavaStreamingContext {
   public void testTwitterStream() {
     String[] filters = { "filter1", "filter2" };
     Authorization auth = NullAuthorization.getInstance();
+    FilterQuery query = new FilterQuery().language("en,es");
 
     // tests the API, does not actually test data receiving
     JavaDStream<Status> test1 = TwitterUtils.createStream(ssc);
@@ -40,5 +42,7 @@ public class JavaTwitterStreamSuite extends LocalJavaStreamingContext {
     JavaDStream<Status> test5 = TwitterUtils.createStream(ssc, auth, filters);
     JavaDStream<Status> test6 = TwitterUtils.createStream(ssc,
       auth, filters, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaDStream<Status> test7 = TwitterUtils.createFilteredStream(ssc,
+      auth, query, StorageLevel.MEMORY_AND_DISK_SER_2());
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/86ded930/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index bd23831..3f1babd 100644
--- a/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/streaming-twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming.twitter
 
 import org.scalatest.BeforeAndAfter
-import twitter4j.Status
+import twitter4j.{FilterQuery, Status}
 import twitter4j.auth.{Authorization, NullAuthorization}
 
 import org.apache.spark.SparkFunSuite
@@ -38,6 +38,7 @@ class TwitterStreamSuite extends SparkFunSuite with BeforeAndAfter with Logging
   test("twitter input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)
     val filters = Seq("filter1", "filter2")
+    val query = new FilterQuery().language("fr,es")
     val authorization: Authorization = NullAuthorization.getInstance()
 
     // tests the API, does not actually test data receiving
@@ -52,6 +53,8 @@ class TwitterStreamSuite extends SparkFunSuite with BeforeAndAfter with Logging
       TwitterUtils.createStream(ssc, Some(authorization), filters)
     val test6: ReceiverInputDStream[Status] = TwitterUtils.createStream(
       ssc, Some(authorization), filters, StorageLevel.MEMORY_AND_DISK_SER_2)
+    val test7: ReceiverInputDStream[Status] = TwitterUtils.createFilteredStream(
+      ssc, Some(authorization), Some(query), StorageLevel.MEMORY_AND_DISK_SER_2)
 
     // Note that actually testing the data receiving is hard as authentication keys are
     // necessary for accessing Twitter live stream

[11/50] [abbrv] incubator-livy-website git commit: [MINOR] update NOTICE file for 2017

Posted by lr...@apache.org.

[MINOR] update NOTICE file for 2017


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/eb9136f8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/eb9136f8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/eb9136f8

Branch: refs/heads/master
Commit: eb9136f8bd7d923e4d75f1572d91763830ed6ab8
Parents: 25c1ca2
Author: Christian Kadner <ck...@apache.org>
Authored: Fri Jan 6 14:10:10 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Jan 6 14:10:10 2017 -0800

----------------------------------------------------------------------
 NOTICE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/eb9136f8/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 7067f1e..9465797 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
 Apache Bahir
-Copyright (c) 2016 The Apache Software Foundation.
+Copyright (c) 2016-2017 The Apache Software Foundation.
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).

[45/50] [abbrv] incubator-livy-website git commit: [BAHIR-88] Add release:prepare statement back to script

Posted by lr...@apache.org.

[BAHIR-88] Add release:prepare statement back to script


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/dcb4bbd2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/dcb4bbd2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/dcb4bbd2

Branch: refs/heads/master
Commit: dcb4bbd2e4d75bc0872ce32c159b03a1d0f90047
Parents: 6d9a4d7
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 21:32:39 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 21:32:39 2017 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/dcb4bbd2/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index 2cfcd27..0b8905d 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -243,7 +243,7 @@ if [[ "$RELEASE_PREPARE" == "true" ]]; then
     cd target/bahir
 
     # Build and prepare the release
-#$MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Darguments="-Dgpg.passphrase=\"$GPG_PASSPHRASE\" -DskipTests" -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
+    $MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Darguments="-Dgpg.passphrase=\"$GPG_PASSPHRASE\" -DskipTests" -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
 
     cd .. #exit bahir

[44/50] [abbrv] incubator-livy-website git commit: [BAHIR-88] Additional fixes to produce proper rc distribution

Posted by lr...@apache.org.

[BAHIR-88] Additional fixes to produce proper rc distribution


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/6d9a4d7a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/6d9a4d7a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/6d9a4d7a

Branch: refs/heads/master
Commit: 6d9a4d7ab0c1eff0bf63e91cec32b601c263f790
Parents: 5d8fc42
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 21:28:58 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 21:29:39 2017 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/6d9a4d7a/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index fdb1f37..2cfcd27 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -200,7 +200,7 @@ if [ -z "$RELEASE_TAG" ]; then
   RELEASE_TAG="v$RELEASE_VERSION-$RELEASE_RC"
 fi
 
-RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/bahir/"
+RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/bahir/bahir-spark"
 
 
 echo "  "
@@ -233,10 +233,6 @@ function checkout_code {
     git_hash=`git rev-parse --short HEAD`
     echo "Checked out Bahir git hash $git_hash"
 
-    git clean -d -f -x
-    #rm .gitignore
-    #rm -rf .git
-
     cd "$BASE_DIR" #return to base dir
 }
 
@@ -247,30 +243,31 @@ if [[ "$RELEASE_PREPARE" == "true" ]]; then
     cd target/bahir
 
     # Build and prepare the release
-    $MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Darguments="-Dgpg.passphrase=\"$GPG_PASSPHRASE\" -DskipTests" -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
+#$MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Darguments="-Dgpg.passphrase=\"$GPG_PASSPHRASE\" -DskipTests" -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
 
     cd .. #exit bahir
 
     if [ -z "$DRY_RUN" ]; then
-        svn co $RELEASE_STAGING_LOCATION svn-bahir
-        mkdir -p svn-bahir/$RELEASE_VERSION-$RELEASE_RC
-
         cd "$BASE_DIR/target/bahir"
         git checkout $RELEASE_TAG
         git clean -d -f -x
 
         $MVN $PUBLISH_PROFILES clean install -DskiptTests -Darguments="-DskipTests"
 
+        cd "$BASE_DIR/target"
+        svn co $RELEASE_STAGING_LOCATION svn-bahir
+        mkdir -p svn-bahir/$RELEASE_VERSION-$RELEASE_RC
+
         cp bahir/distribution/target/*.tar.gz svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
         cp bahir/distribution/target/*.zip    svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
 
         cd svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
         rm -f *.asc
-        for i in *.zip *.tgz; do gpg --output $i.asc --detach-sig --armor $i; done
+        for i in *.zip *.tar.gz; do gpg --output $i.asc --detach-sig --armor $i; done
         rm -f *.md5
-        for i in *.zip *.tgz; do openssl md5 -hex $i | sed 's/MD5(\([^)]*\))= \([0-9a-f]*\)/\2 *\1/' > $i.md5; done
+        for i in *.zip *.tar.gz; do openssl md5 -hex $i | sed 's/MD5(\([^)]*\))= \([0-9a-f]*\)/\2 *\1/' > $i.md5; done
         rm -f *.sha
-        for i in *.zip *.tgz; do shasum $i > $i.sha; done
+        for i in *.zip *.tar.gz; do shasum $i > $i.sha; done
 
         cd .. #exit $RELEASE_VERSION-$RELEASE_RC/

[03/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.0.1-rc1

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.0.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/bd00d2e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/bd00d2e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/bd00d2e4

Branch: refs/heads/master
Commit: bd00d2e4aa8c4cc8837bcaa1d0d95d3e8997ac8c
Parents: cc9cf1b
Author: Luciano Resende <lr...@apache.org>
Authored: Sat Oct 15 09:56:27 2016 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Sat Oct 15 09:56:27 2016 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 186dafd..1689a26 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.0.1</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 65e6462..891dd71 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.0.1</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.0.1-rc1</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 678aeee..a42406c 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index b108502..bbf638b 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 8cdba98..281a125 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7647ba8..2429ee4 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/bd00d2e4/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f9ae3bc..b2592d7 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.0.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[20/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/748d0569
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/748d0569
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/748d0569

Branch: refs/heads/master
Commit: 748d0569097ef70a44547a584a3e48ea0892d907
Parents: 477ca1b
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Fri Jan 20 16:43:56 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Jan 20 16:43:56 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 713a95c..186dafd 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.0.2</version>
+        <version>2.1.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index fe2fd70..e7cb338 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.0.2</version>
+  <version>2.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.0.2-rc3</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 118a98f..678aeee 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index a7f2670..b108502 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 5c0f1a6..8cdba98 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 608efa0..7647ba8 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/748d0569/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index 07dce62..f9ae3bc 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.0.2</version>
+    <version>2.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[24/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare release v2.1.0-rc1

Posted by lr...@apache.org.

[maven-release-plugin] prepare release v2.1.0-rc1


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/e61cc3dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/e61cc3dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/e61cc3dd

Branch: refs/heads/master
Commit: e61cc3ddc7994361d85fe22aeb1fd973e106959e
Parents: f97f2df
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Fri Feb 3 20:21:39 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Fri Feb 3 20:21:39 2017 -0800

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 186dafd..c7fbff0 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.1.0</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0903858..210c608 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>HEAD</tag>
+    <tag>v2.1.0-rc1</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 678aeee..28840e6 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index b108502..4510801 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index 8cdba98..e5fef2a 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 7647ba8..a33842d 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/e61cc3dd/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index f9ae3bc..bba10f0 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[31/50] [abbrv] incubator-livy-website git commit: [BAHIR-97] Akka as SQL Streaming datasource.

Posted by lr...@apache.org.

[BAHIR-97] Akka as SQL Streaming datasource.

Closes #38.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/889de659
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/889de659
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/889de659

Branch: refs/heads/master
Commit: 889de659c33dd56bad7193a4b69e6d05d061a2fd
Parents: f0d9a84
Author: Subhobrata Dey <sb...@gmail.com>
Authored: Sun Mar 26 21:30:30 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Apr 6 08:05:09 2017 -0700

----------------------------------------------------------------------
 pom.xml                                         |   1 +
 sql-streaming-akka/README.md                    | 111 +++++++
 .../streaming/akka/JavaAkkaStreamWordCount.java |  95 ++++++
 .../streaming/akka/AkkaStreamWordCount.scala    |  72 +++++
 sql-streaming-akka/pom.xml                      | 120 ++++++++
 .../src/main/assembly/assembly.xml              |  44 +++
 .../sql/streaming/akka/AkkaStreamSource.scala   | 294 +++++++++++++++++++
 .../bahir/sql/streaming/akka/MessageStore.scala |  83 ++++++
 .../org/apache/bahir/utils/BahirUtils.scala     |  47 +++
 .../scala/org/apache/bahir/utils/Logging.scala  |  24 ++
 .../src/test/resources/feeder_actor.conf        |  34 +++
 .../src/test/resources/log4j.properties         |  27 ++
 .../streaming/akka/AkkaStreamSourceSuite.scala  | 191 ++++++++++++
 .../sql/streaming/akka/AkkaTestUtils.scala      |  93 ++++++
 14 files changed, 1236 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 73cac1f..65129cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -77,6 +77,7 @@
   <modules>
     <module>sql-cloudant</module>
     <module>streaming-akka</module>
+    <module>sql-streaming-akka</module>
     <module>streaming-mqtt</module>
     <module>sql-streaming-mqtt</module>
     <module>streaming-twitter</module>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/README.md
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/README.md b/sql-streaming-akka/README.md
new file mode 100644
index 0000000..b64a8e2
--- /dev/null
+++ b/sql-streaming-akka/README.md
@@ -0,0 +1,111 @@
+A library for reading data from Akka Actors using Spark SQL Streaming ( or Structured streaming.). 
+
+## Linking
+
+Using SBT:
+
+    libraryDependencies += "org.apache.bahir" %% "spark-sql-streaming-akka" % "2.2.0-SNAPSHOT"
+
+Using Maven:
+
+    <dependency>
+        <groupId>org.apache.bahir</groupId>
+        <artifactId>spark-sql-streaming-akka_2.11</artifactId>
+        <version>2.2.0-SNAPSHOT</version>
+    </dependency>
+
+This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
+For example, to include it when starting the spark shell:
+
+    $ bin/spark-shell --packages org.apache.bahir:spark-sql-streaming-akka_2.11:2.2.0-SNAPSHOT
+
+Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
+The `--packages` argument can also be used with `bin/spark-submit`.
+
+This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.
+
+## Examples
+
+A SQL Stream can be created with data streams received from Akka Feeder actor using,
+
+        sqlContext.readStream
+                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                .option("urlOfPublisher", "feederActorUri")
+                .load()
+                
+## Enable recovering from failures.
+                
+Setting values for option `persistenceDirPath` helps in recovering in case of a restart, by restoring the state where it left off before the shutdown.
+                
+        sqlContext.readStream
+                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                .option("urlOfPublisher", "feederActorUri")
+                .option("persistenceDirPath", "/path/to/localdir")
+                .load() 
+                       
+## Configuration options.
+                       
+This source uses [Akka Actor api](http://doc.akka.io/api/akka/2.4/akka/actor/Actor.html).
+                       
+* `urlOfPublisher` The url of Publisher or Feeder actor that the Receiver actor connects to. Set this as the tcp url of the Publisher or Feeder actor.
+* `persistenceDirPath` By default it is used for storing incoming messages on disk.
+
+### Scala API
+
+An example, for scala API to count words from incoming message stream. 
+
+        // Create DataFrame representing the stream of input lines from connection
+        // to publisher or feeder actor
+        val lines = spark.readStream
+                    .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                    .option("urlOfPublisher", urlOfPublisher)
+                    .load().as[(String, Timestamp)]
+    
+        // Split the lines into words
+        val words = lines.map(_._1).flatMap(_.split(" "))
+    
+        // Generate running word count
+        val wordCounts = words.groupBy("value").count()
+    
+        // Start running the query that prints the running counts to the console
+        val query = wordCounts.writeStream
+                    .outputMode("complete")
+                    .format("console")
+                    .start()
+    
+        query.awaitTermination()
+        
+Please see `AkkaStreamWordCount.scala` for full example.     
+   
+### Java API
+   
+An example, for Java API to count words from incoming message stream.
+   
+        // Create DataFrame representing the stream of input lines from connection
+        // to publisher or feeder actor
+        Dataset<String> lines = spark
+                                .readStream()
+                                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                                .option("urlOfPublisher", urlOfPublisher)
+                                .load().select("value").as(Encoders.STRING());
+    
+        // Split the lines into words
+        Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+          @Override
+          public Iterator<String> call(String s) throws Exception {
+            return Arrays.asList(s.split(" ")).iterator();
+          }
+        }, Encoders.STRING());
+    
+        // Generate running word count
+        Dataset<Row> wordCounts = words.groupBy("value").count();
+    
+        // Start running the query that prints the running counts to the console
+        StreamingQuery query = wordCounts.writeStream()
+                                .outputMode("complete")
+                                .format("console")
+                                .start();
+    
+        query.awaitTermination();   
+         
+Please see `JavaAkkaStreamWordCount.java` for full example.      

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/examples/src/main/java/org/apache/bahir/examples/sql/streaming/akka/JavaAkkaStreamWordCount.java
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/examples/src/main/java/org/apache/bahir/examples/sql/streaming/akka/JavaAkkaStreamWordCount.java b/sql-streaming-akka/examples/src/main/java/org/apache/bahir/examples/sql/streaming/akka/JavaAkkaStreamWordCount.java
new file mode 100644
index 0000000..59146ae
--- /dev/null
+++ b/sql-streaming-akka/examples/src/main/java/org/apache/bahir/examples/sql/streaming/akka/JavaAkkaStreamWordCount.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.examples.sql.streaming.akka;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from Akka Feeder Actor system.
+ *
+ * Usage: AkkaStreamWordCount <urlOfPublisher>
+ * <urlOfPublisher> provides the uri of the publisher or feeder actor that Structured Streaming
+ * would connect to receive data.
+ *
+ * To run this on your local machine, a Feeder Actor System should be up and running.
+ *
+ */
+public final class JavaAkkaStreamWordCount {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 1) {
+      System.err.println("Usage: JavaAkkaStreamWordCount <urlOfPublisher>");
+      System.exit(1);
+    }
+
+    if (!Logger.getRootLogger().getAllAppenders().hasMoreElements()) {
+      Logger.getRootLogger().setLevel(Level.WARN);
+    }
+
+    String urlOfPublisher = args[0];
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaAkkaStreamWordCount");
+
+    // check Spark configuration for master URL, set it to local if not configured
+    if (!sparkConf.contains("spark.master")) {
+      sparkConf.setMaster("local[4]");
+    }
+
+    SparkSession spark = SparkSession.builder()
+                          .config(sparkConf)
+                          .getOrCreate();
+
+    // Create DataFrame representing the stream of input lines from connection
+    // to publisher or feeder actor
+    Dataset<String> lines = spark
+                            .readStream()
+                            .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                            .option("urlOfPublisher", urlOfPublisher)
+                            .load().select("value").as(Encoders.STRING());
+
+    // Split the lines into words
+    Dataset<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterator<String> call(String s) throws Exception {
+        return Arrays.asList(s.split(" ")).iterator();
+      }
+    }, Encoders.STRING());
+
+    // Generate running word count
+    Dataset<Row> wordCounts = words.groupBy("value").count();
+
+    // Start running the query that prints the running counts to the console
+    StreamingQuery query = wordCounts.writeStream()
+                            .outputMode("complete")
+                            .format("console")
+                            .start();
+
+    query.awaitTermination();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/akka/AkkaStreamWordCount.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/akka/AkkaStreamWordCount.scala b/sql-streaming-akka/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/akka/AkkaStreamWordCount.scala
new file mode 100644
index 0000000..8c4185a
--- /dev/null
+++ b/sql-streaming-akka/examples/src/main/scala/org/apache/bahir/examples/sql/streaming/akka/AkkaStreamWordCount.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.examples.sql.streaming.akka
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from Akka Feeder Actor system.
+ *
+ * Usage: AkkaStreamWordCount <urlOfPublisher>
+ * <urlOfPublisher> provides the uri of the publisher or feeder actor that Structured Streaming
+ * would connect to receive data.
+ *
+ * To run this on your local machine, a Feeder Actor System should be up and running.
+ *
+ */
+object AkkaStreamWordCount {
+  def main(args: Array[String]): Unit = {
+    if (args.length < 1) {
+      System.err.println("Usage: AkkaStreamWordCount <urlOfPublisher>") // scalastyle:off println
+      System.exit(1)
+    }
+
+    val urlOfPublisher = args(0)
+
+    val spark = SparkSession
+                .builder()
+                .appName("AkkaStreamWordCount")
+                .master("local[4]")
+                .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataFrame representing the stream of input lines from connection
+    // to publisher or feeder actor
+    val lines = spark.readStream
+                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+                .option("urlOfPublisher", urlOfPublisher)
+                .load().as[(String, Timestamp)]
+
+    // Split the lines into words
+    val words = lines.map(_._1).flatMap(_.split(" "))
+
+    // Generate running word count
+    val wordCounts = words.groupBy("value").count()
+
+    // Start running the query that prints the running counts to the console
+    val query = wordCounts.writeStream
+                .outputMode("complete")
+                .format("console")
+                .start()
+
+    query.awaitTermination()
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/pom.xml b/sql-streaming-akka/pom.xml
new file mode 100644
index 0000000..4d7040b
--- /dev/null
+++ b/sql-streaming-akka/pom.xml
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.bahir</groupId>
+        <artifactId>bahir-parent_2.11</artifactId>
+        <version>2.2.0-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>spark-sql-streaming-akka_2.11</artifactId>
+    <properties>
+        <sbt.project.name>sql-streaming-akka</sbt.project.name>
+    </properties>
+    <packaging>jar</packaging>
+    <name>Apache Bahir - Spark SQL Streaming Akka</name>
+    <url>http://bahir.apache.org</url>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-tags_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-actor_${scala.binary.version}</artifactId>
+            <version>${akka.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-remote_${scala.binary.version}</artifactId>
+            <version>${akka.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-slf4j_${scala.binary.version}</artifactId>
+            <version>${akka.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.rocksdb</groupId>
+            <artifactId>rocksdbjni</artifactId>
+            <version>5.1.2</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+            </plugin>
+
+            <!-- Assemble a jar with test dependencies for Python tests -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>test-jar-with-dependencies</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                        <configuration>
+                            <!-- Make sure the file path is same as the sbt build -->
+                            <finalName>spark-streaming-akka-test-${project.version}</finalName>
+                            <outputDirectory>${project.build.directory}/scala-${scala.binary.version}</outputDirectory>
+                            <appendAssemblyId>false</appendAssemblyId>
+                            <!-- Don't publish it since it's only for Python tests -->
+                            <attach>false</attach>
+                            <descriptors>
+                                <descriptor>src/main/assembly/assembly.xml</descriptor>
+                            </descriptors>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/main/assembly/assembly.xml b/sql-streaming-akka/src/main/assembly/assembly.xml
new file mode 100644
index 0000000..58a95a0
--- /dev/null
+++ b/sql-streaming-akka/src/main/assembly/assembly.xml
@@ -0,0 +1,44 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<assembly>
+    <id>test-jar-with-dependencies</id>
+    <formats>
+        <format>jar</format>
+    </formats>
+    <includeBaseDirectory>false</includeBaseDirectory>
+
+    <fileSets>
+        <fileSet>
+            <directory>${project.build.directory}/scala-${scala.binary.version}/test-classes</directory>
+            <outputDirectory></outputDirectory>
+        </fileSet>
+    </fileSets>
+
+    <dependencySets>
+        <dependencySet>
+            <useTransitiveDependencies>true</useTransitiveDependencies>
+            <scope>test</scope>
+            <unpack>true</unpack>
+            <excludes>
+                <exclude>org.apache.hadoop:*:jar</exclude>
+                <exclude>org.apache.zookeeper:*:jar</exclude>
+                <exclude>org.apache.avro:*:jar</exclude>
+            </excludes>
+        </dependencySet>
+    </dependencySets>
+
+</assembly>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSource.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSource.scala b/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSource.scala
new file mode 100644
index 0000000..96d892f
--- /dev/null
+++ b/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSource.scala
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.sql.streaming.akka
+
+import java.nio.ByteBuffer
+import java.sql.Timestamp
+import java.text.SimpleDateFormat
+import java.util.{Calendar, Objects}
+import java.util.concurrent.CountDownLatch
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.collection.concurrent.TrieMap
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.concurrent.Future
+import scala.language.postfixOps
+import scala.util.{Failure, Success, Try}
+
+import akka.actor._
+import akka.actor.SupervisorStrategy.{Escalate, Restart}
+import akka.pattern.ask
+import akka.util.Timeout
+import com.typesafe.config.ConfigFactory
+import org.rocksdb.{Options, RocksDB}
+
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Source}
+import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
+import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
+
+import org.apache.bahir.utils.Logging
+
+object AkkaStreamConstants {
+
+  val SCHEMA_DEFAULT = StructType(StructField("value", StringType)
+    :: StructField("timestamp", TimestampType) :: Nil)
+
+  val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+
+  val defaultSupervisorStrategy = OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange =
+    15 millis) {
+    case _: RuntimeException => Restart
+    case _: Exception => Escalate
+  }
+
+  val defaultActorSystemCreator: () => ActorSystem = () => {
+//    val uniqueSystemName = s"streaming-actor-system-${TaskContext.get().taskAttemptId()}"
+    val uniqueSystemName = s"streaming-actor-system"
+    val akkaConf = ConfigFactory.parseString(
+      s"""akka.actor.provider = "akka.remote.RemoteActorRefProvider"
+         |akka.remote.enabled-transports = ["akka.remote.netty.tcp"]
+         |akka.remote.netty.tcp.port = "0"
+         |akka.loggers.0 = "akka.event.slf4j.Slf4jLogger"
+         |akka.log-dead-letters-during-shutdown = "off"
+       """.stripMargin)
+    ActorSystem(uniqueSystemName, akkaConf)
+  }
+}
+
+case class SubscribeReceiver(receiverActor: ActorRef)
+case class UnsubscribeReceiver(receiverActor: ActorRef)
+
+case class Statistics(numberOfMsgs: Int,
+                      numberOfWorkers: Int,
+                      numberOfHiccups: Int,
+                      otherInfo: String)
+
+private[akka] sealed trait ActorReceiverData
+private[akka] case class SingleItemData(item: String) extends ActorReceiverData
+private[akka] case class AskStoreSingleItemData(item: String) extends ActorReceiverData
+private[akka] case class IteratorData(iterator: Iterator[String]) extends ActorReceiverData
+private[akka] case class ByteBufferData(bytes: ByteBuffer) extends ActorReceiverData
+private[akka] object Ack extends ActorReceiverData
+
+class AkkaStreamSource(urlOfPublisher: String,
+                       persistence: RocksDB, sqlContext: SQLContext,
+                       messageParser: String => (String, Timestamp))
+  extends Source with Logging {
+
+  override def schema: StructType = AkkaStreamConstants.SCHEMA_DEFAULT
+
+  private val store = new LocalMessageStore(persistence, sqlContext.sparkContext.getConf)
+
+  private val messages = new TrieMap[Int, (String, Timestamp)]()
+
+  private val initLock = new CountDownLatch(1)
+
+  private var offset = 0
+
+  private var actorSystem: ActorSystem = _
+  private var actorSupervisor: ActorRef = _
+
+  private def fetchLastProcessedOffset(): Int = {
+    Try(store.maxProcessedOffset) match {
+      case Success(x) =>
+        log.info(s"Recovering from last stored offset $x")
+        x
+      case Failure(e) => 0
+    }
+  }
+
+  initialize()
+  private def initialize(): Unit = {
+
+    class ActorReceiver(urlOfPublisher: String) extends Actor {
+
+      lazy private val remotePublisher = context.actorSelection(urlOfPublisher)
+
+      override def preStart(): Unit = remotePublisher ! SubscribeReceiver(context.self)
+
+      override def receive: PartialFunction[Any, Unit] = {
+        case msg: String => store(msg)
+      }
+
+      override def postStop(): Unit = remotePublisher ! UnsubscribeReceiver(context.self)
+
+      def store(iter: Iterator[String]) = {
+        context.parent ! IteratorData(iter)
+      }
+
+      def store(item: String) = {
+        context.parent ! SingleItemData(item)
+      }
+
+      def store(item: String, timeout: Timeout): Future[Unit] = {
+        context.parent.ask(AskStoreSingleItemData(item))(timeout).map(_ => ())(context.dispatcher)
+      }
+    }
+
+    class Supervisor extends Actor {
+      override val supervisorStrategy = AkkaStreamConstants.defaultSupervisorStrategy
+
+      private val props = Props(new ActorReceiver(urlOfPublisher))
+      private val name = "ActorReceiver"
+      private val worker = context.actorOf(props, name)
+      log.info("Started receiver actor at:" + worker.path)
+
+      private val n: AtomicInteger = new AtomicInteger(0)
+      private val hiccups: AtomicInteger = new AtomicInteger(0)
+
+      override def receive: PartialFunction[Any, Unit] = {
+
+        case data =>
+          initLock.await()
+          var temp = offset + 1
+
+          data match {
+            case IteratorData(iterator) =>
+              log.debug("received iterator")
+              iterator.asInstanceOf[Iterator[String]].foreach(record => {
+                messages.put(temp, messageParser(record.toString))
+                temp += 1
+              })
+
+            case SingleItemData(msg) =>
+              log.debug("received single")
+              messages.put(temp, messageParser(msg))
+              n.incrementAndGet()
+
+            case AskStoreSingleItemData(msg) =>
+              log.debug("received single sync")
+              messages.put(temp, messageParser(msg))
+              n.incrementAndGet()
+              sender() ! Ack
+
+            case ByteBufferData(bytes) =>
+              log.debug("received bytes")
+              messages.put(temp, messageParser(new String(bytes.array())))
+
+            case props: Props =>
+              val worker = context.actorOf(props)
+              log.info("Started receiver worker at:" + worker.path)
+              sender() ! worker
+
+            case (props: Props, name: String) =>
+              val worker = context.actorOf(props, name)
+              log.info("Started receiver worker at:" + worker.path)
+              sender() ! worker
+
+            case _: PossiblyHarmful => hiccups.incrementAndGet()
+
+            case _: Statistics =>
+              val workers = context.children
+              sender() ! Statistics(n.get(), workers.size, hiccups.get(), workers.mkString("\n"))
+          }
+          offset = temp
+      }
+    }
+
+    actorSystem = AkkaStreamConstants.defaultActorSystemCreator()
+    actorSupervisor = actorSystem.actorOf(Props(new Supervisor), "Supervisor")
+    offset = fetchLastProcessedOffset()
+    initLock.countDown()
+  }
+
+  override def stop(): Unit = {
+    actorSupervisor ! PoisonPill
+    Persistence.close()
+    actorSystem.shutdown()
+    actorSystem.awaitTermination()
+  }
+
+  override def getOffset: Option[Offset] = {
+    if (offset == 0) {
+      None
+    } else {
+      Some(LongOffset(offset))
+    }
+  }
+
+  override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+    val startIndex = start.getOrElse(LongOffset(0L)).asInstanceOf[LongOffset].offset.toInt
+    val endIndex = end.asInstanceOf[LongOffset].offset.toInt
+    val data: ArrayBuffer[(String, Timestamp)] = ArrayBuffer.empty
+
+    ((startIndex + 1) to endIndex).foreach { id =>
+      val element: (String, Timestamp) = messages.getOrElse(id,
+        store.retrieve[(String, Timestamp)](id).orNull)
+
+      if (!Objects.isNull(element)) {
+        data += element
+        store.store(id, element)
+      }
+      messages.remove(id, element)
+    }
+    log.trace(s"Get Batch invoked, ${data.mkString}")
+    import sqlContext.implicits._
+    data.toDF("value", "timestamp")
+  }
+}
+
+class AkkaStreamSourceProvider extends StreamSourceProvider with DataSourceRegister with Logging {
+
+  override def sourceSchema(sqlContext: SQLContext, schema: Option[StructType],
+                            providerName: String, parameters: Map[String, String])
+  : (String, StructType) = ("akka", AkkaStreamConstants.SCHEMA_DEFAULT)
+
+  override def createSource(sqlContext: SQLContext, metadataPath: String,
+                            schema: Option[StructType], providerName: String,
+                            parameters: Map[String, String]): Source = {
+
+    def e(s: String) = new IllegalArgumentException(s)
+
+    val urlOfPublisher: String = parameters.getOrElse("urlOfPublisher", parameters.getOrElse("path",
+      throw e(
+        s"""Please provide url of Publisher actor by specifying path
+           | or .options("urlOfPublisher",...)""".stripMargin)))
+
+    val persistenceDirPath: String = parameters.getOrElse("persistenceDirPath",
+      System.getProperty("java.io.tmpdir"))
+
+    val messageParserWithTimestamp = (x: String) =>
+      (x, Timestamp.valueOf(AkkaStreamConstants.DATE_FORMAT.format(Calendar.getInstance().getTime)))
+
+    val persistence = Persistence.getOrCreatePersistenceInstance(persistenceDirPath)
+    new AkkaStreamSource(urlOfPublisher, persistence, sqlContext, messageParserWithTimestamp)
+  }
+
+  override def shortName(): String = "akka"
+}
+
+object Persistence {
+  var persistence: RocksDB = _
+
+  def getOrCreatePersistenceInstance(persistenceDirPath: String): RocksDB = {
+    if (Objects.isNull(persistence)) {
+      RocksDB.loadLibrary()
+      persistence = RocksDB.open(new Options().setCreateIfMissing(true), persistenceDirPath)
+    }
+    persistence
+  }
+
+  def close(): Unit = {
+    if (!Objects.isNull(persistence)) {
+      persistence.close()
+      persistence = null
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/MessageStore.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/MessageStore.scala b/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/MessageStore.scala
new file mode 100644
index 0000000..9babd82
--- /dev/null
+++ b/sql-streaming-akka/src/main/scala/org/apache/bahir/sql/streaming/akka/MessageStore.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.sql.streaming.akka
+
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
+
+import org.rocksdb.RocksDB
+
+import org.apache.spark.serializer.{JavaSerializer, Serializer, SerializerInstance}
+import org.apache.spark.SparkConf
+
+import org.apache.bahir.utils.Logging
+
+
+trait MessageStore {
+
+  def store[T: ClassTag](id: Int, message: T): Boolean
+
+  def retrieve[T: ClassTag](start: Int, end: Int): Seq[Option[T]]
+
+  def retrieve[T: ClassTag](id: Int): Option[T]
+
+  def maxProcessedOffset: Int
+}
+
+private[akka] class LocalMessageStore(val persistentStore: RocksDB,
+                                      val serializer: Serializer)
+  extends MessageStore with Logging {
+
+  val classLoader = Thread.currentThread().getContextClassLoader
+
+  def this(persistentStore: RocksDB, conf: SparkConf) =
+    this(persistentStore, new JavaSerializer(conf))
+
+  val serializerInstance: SerializerInstance = serializer.newInstance()
+
+  private def get(id: Int) = persistentStore.get(id.toString.getBytes)
+
+  override def maxProcessedOffset: Int = persistentStore.getLatestSequenceNumber.toInt
+
+  override def store[T: ClassTag](id: Int, message: T): Boolean = {
+    val bytes: Array[Byte] = serializerInstance.serialize(message).array()
+    try {
+      persistentStore.put(id.toString.getBytes(), bytes)
+      true
+    } catch {
+      case e: Exception => log.warn(s"Failed to store message Id: $id", e)
+        false
+    }
+  }
+
+  override def retrieve[T: ClassTag](start: Int, end: Int): Seq[Option[T]] = {
+    (start until end).map(x => retrieve(x))
+  }
+
+  override def retrieve[T: ClassTag](id: Int): Option[T] = {
+    val bytes = persistentStore.get(id.toString.getBytes)
+
+    if (bytes != null) {
+      Some(serializerInstance.deserialize(
+        ByteBuffer.wrap(bytes), classLoader))
+    } else {
+      None
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/BahirUtils.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/BahirUtils.scala b/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/BahirUtils.scala
new file mode 100644
index 0000000..996a0a1
--- /dev/null
+++ b/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/BahirUtils.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.utils
+
+import java.io.{File, IOException}
+import java.nio.file.{Files, FileVisitResult, Path, SimpleFileVisitor}
+import java.nio.file.attribute.BasicFileAttributes
+
+object BahirUtils extends Logging {
+
+  def recursiveDeleteDir(dir: File): Path = {
+    Files.walkFileTree(dir.toPath, new SimpleFileVisitor[Path]() {
+      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
+        try {
+          Files.delete(file)
+        } catch {
+          case t: Throwable => log.warn("Failed to delete", t)
+        }
+        FileVisitResult.CONTINUE
+      }
+
+      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
+        try {
+          Files.delete(dir)
+        } catch {
+          case t: Throwable => log.warn("Failed to delete", t)
+        }
+        FileVisitResult.CONTINUE
+      }
+    })
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/Logging.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/Logging.scala b/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/Logging.scala
new file mode 100644
index 0000000..776ed5a
--- /dev/null
+++ b/sql-streaming-akka/src/main/scala/org/apache/bahir/utils/Logging.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.utils
+
+import org.slf4j.LoggerFactory
+
+trait Logging {
+  final val log = LoggerFactory.getLogger(this.getClass.getName.stripSuffix("$"))
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/test/resources/feeder_actor.conf
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/test/resources/feeder_actor.conf b/sql-streaming-akka/src/test/resources/feeder_actor.conf
new file mode 100644
index 0000000..9ec210e
--- /dev/null
+++ b/sql-streaming-akka/src/test/resources/feeder_actor.conf
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+akka {
+  loglevel = "INFO"
+  actor {
+    provider = "akka.remote.RemoteActorRefProvider"
+  }
+  remote {
+    enabled-transports = ["akka.remote.netty.tcp"]
+    netty.tcp {
+      hostname = "127.0.0.1"
+      port = 0
+    }
+    log-sent-messages = on
+    log-received-messages = on
+  }
+  loggers.0 = "akka.event.slf4j.Slf4jLogger"
+  log-dead-letters-during-shutdown = "off"
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/test/resources/log4j.properties b/sql-streaming-akka/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3706a6e
--- /dev/null
+++ b/sql-streaming-akka/src/test/resources/log4j.properties
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.spark_project.jetty=WARN

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
new file mode 100644
index 0000000..a04dc66
--- /dev/null
+++ b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaStreamSourceSuite.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.bahir.sql.streaming.akka
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.sql.{DataFrame, SparkSession, SQLContext}
+import org.apache.spark.sql.execution.streaming.FileStreamSource.Timestamp
+import org.apache.spark.sql.execution.streaming.LongOffset
+
+import org.apache.bahir.utils.BahirUtils
+
+class AkkaStreamSourceSuite extends SparkFunSuite with BeforeAndAfter {
+
+  protected var akkaTestUtils: AkkaTestUtils = _
+  protected val tempDir: File =
+    new File(System.getProperty("java.io.tmpdir") + "/spark-akka-persistence")
+
+  private val conf = new SparkConf().setMaster("local[4]").setAppName("AkkaStreamSourceSuite")
+  protected val spark = SparkSession.builder().config(conf).getOrCreate()
+
+  akkaTestUtils = new AkkaTestUtils
+  akkaTestUtils.setup()
+
+  before {
+    tempDir.mkdirs()
+  }
+
+  after {
+    Persistence.close()
+    BahirUtils.recursiveDeleteDir(tempDir)
+  }
+
+  protected val tmpDir: String = tempDir.getAbsolutePath
+
+  protected def createStreamingDataframe(dir: String = tmpDir): (SQLContext, DataFrame) = {
+
+    val sqlContext: SQLContext = spark.sqlContext
+
+    sqlContext.setConf("spark.sql.streaming.checkpointLocation", dir + "/checkpoint")
+
+    val dataFrame: DataFrame =
+      sqlContext.readStream.format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
+        .option("urlOfPublisher", akkaTestUtils.getFeederActorUri())
+        .option("persistenceDirPath", dir + "/persistence").load()
+    (sqlContext, dataFrame)
+  }
+}
+
+class BasicAkkaSourceSuite extends AkkaStreamSourceSuite {
+
+  private def writeStreamResults(sqlContext: SQLContext, dataFrame: DataFrame,
+                                 waitDuration: Long): Boolean = {
+    import sqlContext.implicits._
+    dataFrame.as[(String, Timestamp)].writeStream.format("parquet")
+      .start(s"$tmpDir/parquet/t.parquet").awaitTermination(waitDuration)
+  }
+
+  private def readBackSreamingResults(sqlContext: SQLContext): mutable.Buffer[String] = {
+    import sqlContext.implicits._
+    val asList =
+      sqlContext.read.schema(AkkaStreamConstants.SCHEMA_DEFAULT)
+      .parquet(s"$tmpDir/parquet/t.parquet").as[(String, Timestamp)].map(_._1)
+      .collectAsList().asScala
+    asList
+  }
+
+  test("basic usage") {
+    val message = "Akka is a reactive framework"
+
+    akkaTestUtils.setMessage(message)
+    akkaTestUtils.setCountOfMessages(1)
+
+    val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
+
+    writeStreamResults(sqlContext, dataFrame, 10000)
+
+    val resultBuffer: mutable.Buffer[String] = readBackSreamingResults(sqlContext)
+
+    assert(resultBuffer.size === 1)
+    assert(resultBuffer.head === message)
+  }
+
+  test("Send and receive 100 messages.") {
+    val message = "Akka is a reactive framework"
+
+    akkaTestUtils.setMessage(message)
+    akkaTestUtils.setCountOfMessages(100)
+
+    val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
+
+    writeStreamResults(sqlContext, dataFrame, 10000)
+
+    val resultBuffer: mutable.Buffer[String] = readBackSreamingResults(sqlContext)
+
+    assert(resultBuffer.size === 100)
+    assert(resultBuffer.head === message)
+  }
+
+  test("params not provided") {
+    val persistenceDirPath = tempDir.getAbsolutePath + "/persistence"
+
+    val provider = new AkkaStreamSourceProvider
+    val sqlContext: SQLContext = spark.sqlContext
+
+    val parameters = Map("persistenceDirPath" -> persistenceDirPath)
+
+    intercept[IllegalArgumentException] {
+      provider.createSource(sqlContext, "", None, "", parameters)
+    }
+  }
+
+  test("Recovering offset from the last processed offset") {
+    val persistenceDirPath = tempDir.getAbsolutePath + "/persistence"
+    val message = "Akka is a reactive framework"
+
+    akkaTestUtils.setMessage(message)
+    akkaTestUtils.setCountOfMessages(100)
+
+    val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
+
+    writeStreamResults(sqlContext, dataFrame, 10000)
+
+    val provider = new AkkaStreamSourceProvider
+    val parameters = Map("urlOfPublisher" -> akkaTestUtils.getFeederActorUri(),
+      "persistenceDirPath" -> persistenceDirPath)
+
+    val offset: Long = provider.createSource(sqlContext, "", None, "", parameters)
+      .getOffset.get.asInstanceOf[LongOffset].offset
+    assert(offset === 100L)
+  }
+}
+
+class StressTestAkkaSource extends AkkaStreamSourceSuite {
+
+  // Run with -Xmx1024m
+  // Default allowed payload size sent to an akka actor is 128000 bytes.
+  test("Send & Receive messages of size 128000 bytes.") {
+
+    val freeMemory: Long = Runtime.getRuntime.freeMemory()
+
+    log.info(s"Available memory before test run is ${freeMemory / (1024 * 1024)}MB.")
+
+    val noOfMsgs = 124 * 1024
+
+    val messageBuilder = new mutable.StringBuilder()
+    for (i <- 0 until noOfMsgs) yield messageBuilder.append(((i % 26) + 65).toChar)
+
+    val message = messageBuilder.toString()
+
+    akkaTestUtils.setMessage(message)
+    akkaTestUtils.setCountOfMessages(1)
+
+    val (sqlContext: SQLContext, dataFrame: DataFrame) = createStreamingDataframe()
+
+    import sqlContext.implicits._
+
+    dataFrame.as[(String, Timestamp)].writeStream
+      .format("parquet")
+      .start(s"$tmpDir/parquet/t.parquet")
+      .awaitTermination(25000)
+
+    val outputMessage =
+      sqlContext.read.schema(AkkaStreamConstants.SCHEMA_DEFAULT)
+        .parquet(s"$tmpDir/parquet/t.parquet").as[(String, Timestamp)]
+        .map(_._1).head()
+
+    assert(outputMessage === message)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/889de659/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaTestUtils.scala b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaTestUtils.scala
new file mode 100644
index 0000000..9cbfc32
--- /dev/null
+++ b/sql-streaming-akka/src/test/scala/org/apache/bahir/sql/streaming/akka/AkkaTestUtils.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.bahir.sql.streaming.akka
+
+import java.io.File
+
+import scala.collection.mutable
+import scala.util.Random
+
+import akka.actor.{Actor, ActorRef, ActorSystem, ExtendedActorSystem, Props}
+import com.typesafe.config.{Config, ConfigFactory}
+
+import org.apache.bahir.utils.Logging
+
+class AkkaTestUtils extends Logging {
+  private val actorSystemName = "feeder-actor-system"
+  private var actorSystem: ActorSystem = _
+
+  private val feederActorName = "feederActor"
+
+  private var message: String = _
+  private var count = 1
+
+  def getFeederActorConfig(): Config = {
+    val configFile = getClass.getClassLoader
+                      .getResource("feeder_actor.conf").getFile
+    ConfigFactory.parseFile(new File(configFile))
+  }
+
+  def getFeederActorUri(): String =
+    s"${actorSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress}" +
+      s"/user/$feederActorName"
+
+  class FeederActor extends Actor {
+
+    val rand = new Random()
+    val receivers = new mutable.LinkedHashSet[ActorRef]()
+
+    val sendMessageThread =
+      new Thread() {
+        override def run(): Unit = {
+          var counter = 0
+          while (counter < count) {
+//            Thread.sleep(500)
+            receivers.foreach(_ ! message)
+            counter += 1
+          }
+        }
+      }
+
+    override def receive: Receive = {
+      case SubscribeReceiver(receiverActor: ActorRef) =>
+        log.debug(s"received subscribe from ${receiverActor.toString}")
+        receivers += receiverActor
+        sendMessageThread.run()
+
+      case UnsubscribeReceiver(receiverActor: ActorRef) =>
+        log.debug(s"received unsubscribe from ${receiverActor.toString}")
+        receivers -= receiverActor
+    }
+  }
+
+  def setup(): Unit = {
+    val feederConf = getFeederActorConfig()
+
+    actorSystem = ActorSystem(actorSystemName, feederConf)
+    actorSystem.actorOf(Props(new FeederActor), feederActorName)
+  }
+
+  def shutdown(): Unit = {
+//    actorSystem.awaitTermination()
+    actorSystem.shutdown()
+  }
+
+  def setMessage(message: String): Unit = this.message = message
+  def setCountOfMessages(messageCount: Int): Unit = count = messageCount
+}

[27/50] [abbrv] incubator-livy-website git commit: [BAHIR-89] Multi topic API support for streaming MQTT

Posted by lr...@apache.org.

[BAHIR-89] Multi topic API support for streaming MQTT

New API which accept array of MQTT topics as input
and return Tuple2<TopicName, Message> as output.

It helps consume from multiple MQTT topics with
efficient user of resources.

Closes #37.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/826545cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/826545cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/826545cb

Branch: refs/heads/master
Commit: 826545cb8db4b89bbdb3927e53f555c0fa15771e
Parents: 8d46b39
Author: Anntinu <an...@gmail.com>
Authored: Mon Feb 27 07:37:07 2017 +0530
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Mar 23 14:32:04 2017 -0700

----------------------------------------------------------------------
 .gitattributes                                  |  13 --
 .gitignore                                      |  24 ---
 streaming-mqtt/README.md                        |   6 +-
 streaming-mqtt/python/mqtt.py                   |  17 ++
 .../streaming/mqtt/MQTTPairedInputDStream.scala | 142 +++++++++++++++
 .../apache/spark/streaming/mqtt/MQTTUtils.scala | 182 +++++++++++++++++++
 .../streaming/mqtt/JavaMQTTStreamSuite.java     |  15 +-
 .../spark/streaming/mqtt/MQTTStreamSuite.scala  |  25 +++
 8 files changed, 385 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/.gitattributes
----------------------------------------------------------------------
diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index a8edefd..0000000
--- a/.gitattributes
+++ /dev/null
@@ -1,13 +0,0 @@
-# Set the default behavior to have all files normalized to Unix-style
-# line endings upon check-in.
-* text=auto
-
-# Declare files that will always have CRLF line endings on checkout.
-*.bat text eol=crlf
-
-# Denote all files that are truly binary and should not be modified.
-*.dll binary
-*.exp binary
-*.lib binary
-*.pdb binary
-*.exe binary

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index fb6d3b7..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,24 +0,0 @@
-# Mac
-.DS_Store
-
-# Eclipse
-.classpath
-.project
-.settings/
-target/
-
-# Intellij
-.idea/
-.idea_modules/
-*.iml
-*.iws
-*.class
-*.log
-
-# Python
-*.pyc
-
-# Others
-.checkstyle
-.fbExcludeFilterFile
-dependency-reduced-pom.xml

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/README.md
----------------------------------------------------------------------
diff --git a/streaming-mqtt/README.md b/streaming-mqtt/README.md
index 872375d..6b89136 100644
--- a/streaming-mqtt/README.md
+++ b/streaming-mqtt/README.md
@@ -32,6 +32,7 @@ This source uses the [Eclipse Paho Java Client](https://eclipse.org/paho/clients
  * `brokerUrl` A url MqttClient connects to. Set this as the url of the Mqtt Server. e.g. tcp://localhost:1883.
  * `storageLevel` By default it is used for storing incoming messages on disk.
  * `topic` Topic MqttClient subscribes to.
+ * `topics` List of topics MqttClient subscribes to.
  * `clientId` clientId, this client is assoicated with. Provide the same value to recover a stopped client.
  * `QoS` The maximum quality of service to subscribe each topic at. Messages published at a lower quality of service will be received at the published QoS. Messages published at a higher quality of service will be received using the QoS specified on the subscribe.
  * `username` Sets the user name to use for the connection to Mqtt Server. Do not set it, if server does not need this. Setting it empty will lead to errors.
@@ -50,11 +51,13 @@ You need to extend `ActorReceiver` so as to store received data into Spark using
 this actor can be configured to handle failures, etc.
 
     val lines = MQTTUtils.createStream(ssc, brokerUrl, topic)
+    val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topic)
 
 Additional mqtt connection options can be provided:
 
 ```Scala
 val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+val lines = MQTTUtils.createPairedStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
 ```
 
 ### Java API
@@ -63,5 +66,6 @@ You need to extend `JavaActorReceiver` so as to store received data into Spark u
 this actor can be configured to handle failures, etc.
 
     JavaDStream<String> lines = MQTTUtils.createStream(jssc, brokerUrl, topic);
+    JavaReceiverInputDStream<Tuple2<String, String>> lines = MQTTUtils.createPairedStream(jssc, brokerUrl, topics);
 
-See end-to-end examples at [MQTT Examples](https://github.com/apache/bahir/tree/master/streaming-mqtt/examples)
\ No newline at end of file
+See end-to-end examples at [MQTT Examples](https://github.com/apache/bahir/tree/master/streaming-mqtt/examples)

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/python/mqtt.py
----------------------------------------------------------------------
diff --git a/streaming-mqtt/python/mqtt.py b/streaming-mqtt/python/mqtt.py
index c55b704..da00394 100644
--- a/streaming-mqtt/python/mqtt.py
+++ b/streaming-mqtt/python/mqtt.py
@@ -44,6 +44,23 @@ class MQTTUtils(object):
         return DStream(jstream, ssc, UTF8Deserializer())
 
     @staticmethod
+    def createPairedStream(ssc, brokerUrl, topics,
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_2):
+        """
+        Create an input stream that pulls messages from a Mqtt Broker.
+
+        :param ssc:  StreamingContext object
+        :param brokerUrl:  Url of remote mqtt publisher
+        :param topics:  topic names to subscribe to
+        :param storageLevel:  RDD storage level.
+        :return: A DStream object
+        """
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        helper = MQTTUtils._get_helper(ssc._sc)
+        jstream = helper.createStream(ssc._jssc, brokerUrl, topics, jlevel)
+        return DStream(jstream, ssc, UTF8Deserializer())
+
+    @staticmethod
     def _get_helper(sc):
         try:
             return sc._jvm.org.apache.spark.streaming.mqtt.MQTTUtilsPythonHelper()

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
new file mode 100644
index 0000000..050777b
--- /dev/null
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTPairedInputDStream.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt
+
+import java.nio.charset.StandardCharsets
+
+import org.eclipse.paho.client.mqttv3._
+import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.receiver.Receiver
+
+/**
+ * Input stream that subscribe messages from a Mqtt Broker.
+ * Uses eclipse paho as MqttClient http://www.eclipse.org/paho/
+ * @param brokerUrl          Url of remote mqtt publisher
+ * @param topics             topic name Array to subscribe to
+ * @param storageLevel       RDD storage level.
+ * @param clientId           ClientId to use for the mqtt connection
+ * @param username           Username for authentication to the mqtt publisher
+ * @param password           Password for authentication to the mqtt publisher
+ * @param cleanSession       Sets the mqtt cleanSession parameter
+ * @param qos                Quality of service to use for the topic subscription
+ * @param connectionTimeout  Connection timeout for the mqtt connection
+ * @param keepAliveInterval  Keepalive interal for the mqtt connection
+ * @param mqttVersion        Version to use for the mqtt connection
+ */
+private[streaming] class MQTTPairedInputDStream(
+    _ssc: StreamingContext,
+    brokerUrl: String,
+    topics: Array[String],
+    storageLevel: StorageLevel,
+    clientId: Option[String] = None,
+    username: Option[String] = None,
+    password: Option[String] = None,
+    cleanSession: Option[Boolean] = None,
+    qos: Option[Int] = None,
+    connectionTimeout: Option[Int] = None,
+    keepAliveInterval: Option[Int] = None,
+    mqttVersion: Option[Int] = None) extends ReceiverInputDStream[(String, String)](_ssc) {
+
+  private[streaming] override def name: String = s"MQTT stream [$id]"
+
+  def getReceiver(): Receiver[(String, String)] = {
+    new MQTTPairReceiver(brokerUrl, topics, storageLevel, clientId, username,
+        password, cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+  }
+}
+
+private[streaming] class MQTTPairReceiver(
+    brokerUrl: String,
+    topics: Array[String],
+    storageLevel: StorageLevel,
+    clientId: Option[String],
+    username: Option[String],
+    password: Option[String],
+    cleanSession: Option[Boolean],
+    qos: Option[Int],
+    connectionTimeout: Option[Int],
+    keepAliveInterval: Option[Int],
+    mqttVersion: Option[Int]) extends Receiver[(String, String)](storageLevel) {
+
+  def onStop() {
+
+  }
+
+  def onStart() {
+
+    // Set up persistence for messages
+    val persistence = new MemoryPersistence()
+
+    // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance
+    val client = new MqttClient(brokerUrl, clientId.getOrElse(MqttClient.generateClientId()),
+      persistence)
+
+    // Initialize mqtt parameters
+    val mqttConnectionOptions = new MqttConnectOptions()
+    if (username.isDefined && password.isDefined) {
+      mqttConnectionOptions.setUserName(username.get)
+      mqttConnectionOptions.setPassword(password.get.toCharArray)
+    }
+    mqttConnectionOptions.setCleanSession(cleanSession.getOrElse(true))
+    if (connectionTimeout.isDefined) {
+      mqttConnectionOptions.setConnectionTimeout(connectionTimeout.get)
+    }
+    if (keepAliveInterval.isDefined) {
+      mqttConnectionOptions.setKeepAliveInterval(keepAliveInterval.get)
+    }
+    if (mqttVersion.isDefined) {
+      mqttConnectionOptions.setMqttVersion(mqttVersion.get)
+    }
+
+    // Callback automatically triggers as and when new message arrives on specified topic
+    val callback = new MqttCallback() {
+
+      // Handles Mqtt message
+      override def messageArrived(topic: String, message: MqttMessage) {
+        store((topic, new String(message.getPayload(), StandardCharsets.UTF_8)))
+      }
+
+      override def deliveryComplete(token: IMqttDeliveryToken) {
+      }
+
+      override def connectionLost(cause: Throwable) {
+        restart("Connection lost ", cause)
+      }
+    }
+
+    // Set up callback for MqttClient. This needs to happen before
+    // connecting or subscribing, otherwise messages may be lost
+    client.setCallback(callback)
+
+    // Connect to MqttBroker
+    client.connect(mqttConnectionOptions)
+
+    // Subscribe to Mqtt topic
+    var i = 0;
+    val qosArray = Array.ofDim[Int](topics.length);
+    for (i <- 0 to qosArray.length -1) {
+      qosArray(i) = qos.getOrElse(1);
+    }
+    client.subscribe(topics, qosArray)
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
index 7e2f5c7..0accb80 100644
--- a/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
+++ b/streaming-mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
@@ -199,7 +199,181 @@ object MQTTUtils {
     createStream(jssc.ssc, brokerUrl, topic, StorageLevel.MEMORY_AND_DISK_SER_2, Option(clientId),
       Option(username), Option(password), Option(cleanSession), None, None, None, None)
   }
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param ssc           StreamingContext object
+   * @param brokerUrl     Url of remote MQTT publisher
+   * @param topics        Array of topic names to subscribe to
+   * @param storageLevel  RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+   */
+  def createPairedStream(
+      ssc: StreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): ReceiverInputDStream[(String, String)] = {
+    new MQTTPairedInputDStream(ssc, brokerUrl, topics, storageLevel)
+  }
+
 
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param ssc                StreamingContext object
+   * @param brokerUrl          Url of remote MQTT publisher
+   * @param topics             Array of topic names to subscribe to
+   * @param storageLevel       RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param clientId           ClientId to use for the mqtt connection
+   * @param username           Username for authentication to the mqtt publisher
+   * @param password           Password for authentication to the mqtt publisher
+   * @param cleanSession       Sets the mqtt cleanSession parameter
+   * @param qos                Quality of service to use for the topic subscription
+   * @param connectionTimeout  Connection timeout for the mqtt connection
+   * @param keepAliveInterval  Keepalive interal for the mqtt connection
+   * @param mqttVersion        Version to use for the mqtt connection
+   */
+  def createPairedStream(
+      ssc: StreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel,
+      clientId: Option[String],
+      username: Option[String],
+      password: Option[String],
+      cleanSession: Option[Boolean],
+      qos: Option[Int],
+      connectionTimeout: Option[Int],
+      keepAliveInterval: Option[Int],
+      mqttVersion: Option[Int]
+    ): ReceiverInputDStream[(String, String)] = {
+    new MQTTPairedInputDStream(ssc, brokerUrl, topics, storageLevel, clientId, username, password,
+          cleanSession, qos, connectionTimeout, keepAliveInterval, mqttVersion)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc      JavaStreamingContext object
+   * @param brokerUrl Url of remote MQTT publisher
+   * @param topics    Array of topic names to subscribe to
+   */
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String]
+    ): JavaReceiverInputDStream[(String, String)] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedStream(jssc.ssc, brokerUrl, topics)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param jssc          JavaStreamingContext object
+   * @param brokerUrl     Url of remote MQTT publisher
+   * @param topics        Array of topic names to subscribe to
+   * @param storageLevel  RDD storage level.
+   */
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[(String, String)] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedStream(jssc.ssc, brokerUrl, topics, storageLevel)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param jssc               JavaStreamingContext object
+   * @param brokerUrl          Url of remote MQTT publisher
+   * @param topics             Array of topic names to subscribe to
+   * @param storageLevel       RDD storage level.
+   * @param clientId           ClientId to use for the mqtt connection
+   * @param username           Username for authentication to the mqtt publisher
+   * @param password           Password for authentication to the mqtt publisher
+   * @param cleanSession       Sets the mqtt cleanSession parameter
+   * @param qos                Quality of service to use for the topic subscription
+   * @param connectionTimeout  Connection timeout for the mqtt connection
+   * @param keepAliveInterval  Keepalive interal for the mqtt connection
+   * @param mqttVersion        Version to use for the mqtt connection
+   */
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel,
+      clientId: String,
+      username: String,
+      password: String,
+      cleanSession: Boolean,
+      qos: Int,
+      connectionTimeout: Int,
+      keepAliveInterval: Int,
+      mqttVersion: Int
+    ): JavaReceiverInputDStream[(String, String)] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedStream(jssc.ssc, brokerUrl, topics, storageLevel, Option(clientId),
+        Option(username), Option(password), Option(cleanSession), Option(qos),
+        Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param jssc               JavaStreamingContext object
+   * @param brokerUrl          Url of remote MQTT publisher
+   * @param topics             Array of topic names to subscribe to
+   * @param clientId           ClientId to use for the mqtt connection
+   * @param username           Username for authentication to the mqtt publisher
+   * @param password           Password for authentication to the mqtt publisher
+   * @param cleanSession       Sets the mqtt cleanSession parameter
+   * @param qos                Quality of service to use for the topic subscription
+   * @param connectionTimeout  Connection timeout for the mqtt connection
+   * @param keepAliveInterval  Keepalive interal for the mqtt connection
+   * @param mqttVersion        Version to use for the mqtt connection
+   */
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      clientId: String,
+      username: String,
+      password: String,
+      cleanSession: Boolean,
+      qos: Int,
+      connectionTimeout: Int,
+      keepAliveInterval: Int,
+      mqttVersion: Int
+    ): JavaReceiverInputDStream[(String, String)] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
+        Option(clientId), Option(username), Option(password), Option(cleanSession), Option(qos),
+        Option(connectionTimeout), Option(keepAliveInterval), Option(mqttVersion))
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param jssc               JavaStreamingContext object
+   * @param brokerUrl          Url of remote MQTT publisher
+   * @param topics             Array of topic names to subscribe to
+   * @param clientId           ClientId to use for the mqtt connection
+   * @param username           Username for authentication to the mqtt publisher
+   * @param password           Password for authentication to the mqtt publisher
+   * @param cleanSession       Sets the mqtt cleanSession parameter
+   */
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      clientId: String,
+      username: String,
+      password: String,
+      cleanSession: Boolean
+    ): JavaReceiverInputDStream[(String, String)] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createPairedStream(jssc.ssc, brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2,
+        Option(clientId), Option(username), Option(password), Option(cleanSession), None,
+        None, None, None)
+  }
 }
 
 /**
@@ -216,4 +390,12 @@ private[mqtt] class MQTTUtilsPythonHelper {
     ): JavaDStream[String] = {
     MQTTUtils.createStream(jssc, brokerUrl, topic, storageLevel)
   }
+  def createPairedStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topics: Array[String],
+      storageLevel: StorageLevel
+    ): JavaDStream[(String, String)] = {
+    MQTTUtils.createPairedStream(jssc, brokerUrl, topics, storageLevel)
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java b/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
index 45332d9..d320595 100644
--- a/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
+++ b/streaming-mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
@@ -18,16 +18,18 @@
 package org.apache.spark.streaming.mqtt;
 
 import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
 import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
 import org.junit.Test;
 
-import org.apache.spark.streaming.LocalJavaStreamingContext;
+import scala.Tuple2;
 
 public class JavaMQTTStreamSuite extends LocalJavaStreamingContext {
   @Test
   public void testMQTTStream() {
     String brokerUrl = "abc";
     String topic = "def";
+    String[] topics = {"def1","def2"};
 
     // tests the API, does not actually test data receiving
     JavaReceiverInputDStream<String> test1 = MQTTUtils.createStream(ssc, brokerUrl, topic);
@@ -39,5 +41,16 @@ public class JavaMQTTStreamSuite extends LocalJavaStreamingContext {
       "testid", "user", "password", true, 1, 10, 30, 3);
     JavaReceiverInputDStream<String> test5 = MQTTUtils.createStream(ssc, brokerUrl, topic,
       "testid", "user", "password", true);
+    JavaReceiverInputDStream<Tuple2<String, String>> test6 = MQTTUtils.createPairedStream(ssc,
+      brokerUrl, topics);
+    JavaReceiverInputDStream<Tuple2<String, String>> test7 = MQTTUtils.createPairedStream(ssc,
+      brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaReceiverInputDStream<Tuple2<String, String>> test8 = MQTTUtils.createPairedStream(ssc,
+      brokerUrl, topics, StorageLevel.MEMORY_AND_DISK_SER_2(), "testid", "user",
+      "password", true, 1, 10, 30, 3);
+    JavaReceiverInputDStream<Tuple2<String, String>> test9 = MQTTUtils.createPairedStream(ssc,
+      brokerUrl, topics, "testid", "user", "password", true, 1, 10, 30, 3);
+    JavaReceiverInputDStream<Tuple2<String, String>> test10 = MQTTUtils.createPairedStream(ssc,
+      brokerUrl, topics, "testid", "user", "password", true);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/826545cb/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
----------------------------------------------------------------------
diff --git a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index fdcd18c..f1d9a20 100644
--- a/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/streaming-mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -33,6 +33,7 @@ class MQTTStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfter
   private val master = "local[2]"
   private val framework = this.getClass.getSimpleName
   private val topic = "def"
+  private val topics = Array("def1", "def2")
 
   private var ssc: StreamingContext = _
   private var mqttTestUtils: MQTTTestUtils = _
@@ -76,4 +77,28 @@ class MQTTStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfter
     }
     ssc.stop()
   }
+  test("mqtt input stream2") {
+    val sendMessage1 = "MQTT demo for spark streaming1"
+    val sendMessage2 = "MQTT demo for spark streaming2"
+    val receiveStream2 = MQTTUtils.createPairedStream(ssc, "tcp://" + mqttTestUtils.brokerUri,
+        topics, StorageLevel.MEMORY_ONLY)
+
+    @volatile var receiveMessage: List[String] = List()
+    receiveStream2.foreachRDD { rdd =>
+      if (rdd.collect.length > 0) {
+        receiveMessage = receiveMessage ::: List(rdd.first()._2)
+        receiveMessage
+      }
+    }
+
+    ssc.start()
+
+    // Retry it because we don't know when the receiver will start.
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      mqttTestUtils.publishData(topics(0), sendMessage1)
+      mqttTestUtils.publishData(topics(1), sendMessage2)
+      assert(receiveMessage.contains(sendMessage1)||receiveMessage.contains(sendMessage2))
+    }
+    ssc.stop()
+  }
 }

[32/50] [abbrv] incubator-livy-website git commit: [BAHIR-101] Update sql-cloudant readme and python examples

Posted by lr...@apache.org.

[BAHIR-101] Update sql-cloudant readme and python examples

Closes #40.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/561291bf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/561291bf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/561291bf

Branch: refs/heads/master
Commit: 561291bfc17f8eae97318b39ea9cc2d80680d5ce
Parents: 889de65
Author: Esteban Laver <em...@us.ibm.com>
Authored: Mon Apr 3 18:05:44 2017 -0400
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Apr 6 08:28:10 2017 -0700

----------------------------------------------------------------------
 sql-cloudant/README.md                          | 306 ++++++++-----------
 sql-cloudant/examples/python/CloudantApp.py     |   9 +-
 sql-cloudant/examples/python/CloudantDF.py      |   5 +-
 .../examples/python/CloudantDFOption.py         |   5 +-
 4 files changed, 143 insertions(+), 182 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/561291bf/sql-cloudant/README.md
----------------------------------------------------------------------
diff --git a/sql-cloudant/README.md b/sql-cloudant/README.md
index 98a1c85..eaa8893 100644
--- a/sql-cloudant/README.md
+++ b/sql-cloudant/README.md
@@ -1,24 +1,12 @@
-Spark Cloudant Connector
-================
+A library for reading data from Cloudant or CouchDB databases using Spark SQL and Spark Streaming. 
 
-Cloudant integration with Spark as Spark SQL external datasource, and Spark Streaming as a custom receiver. 
+[IBM® Cloudant®](https://cloudant.com) is a document-oriented DataBase as a Service (DBaaS). It stores data as documents 
+in JSON format. It's built with scalability, high availability, and durability in mind. It comes with a 
+wide variety of indexing options including map-reduce, Cloudant Query, full-text indexing, and 
+geospatial indexing. The replication capabilities make it easy to keep data in sync between database 
+clusters, desktop PCs, and mobile devices.
 
-
-##  Contents:
-0. [Linking](#Linking)
-1. [Implementation of RelationProvider](#implementation-of-relationProvider)
-2. [Implementation of Receiver](#implementation-of-Receiver)
-3. [Sample applications](#Sample-application)
-    1. [Using SQL In Python](#Using-SQL-In-Python)
-    2. [Using SQL In Scala](#Using-SQL-In-Scala)
-    3. [Using DataFrame In Python](#Using-DataFrame-In-Python)
-    4. [Using DataFrame In Scala](#Using-DataFrame-In-Scala)
-    5. [Using Streams In Scala](#Using-Streams-In-Scala)
-4. [Configuration Overview](#Configuration-Overview)
-5. [Known limitations and areas for improvement](#Known-limitations)
-
-
-<div id='Linking'/>
+[Apache CouchDB™](http://couchdb.apache.org) is open source database software that focuses on ease of use and having an architecture that "completely embraces the Web". It has a document-oriented NoSQL database architecture and is implemented in the concurrency-oriented language Erlang; it uses JSON to store data, JavaScript as its query language using MapReduce, and HTTP for an API.
 
 ## Linking
 
@@ -35,51 +23,96 @@ Using Maven:
     </dependency>
 
 This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
-For example, to include it when starting the spark shell:
 
     $ bin/spark-shell --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT
 
 Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
 The `--packages` argument can also be used with `bin/spark-submit`.
 
+Submit a job in Python:
+    
+    spark-submit  --master local[4] --jars <path to cloudant-spark.jar>  <path to python script> 
+    
+Submit a job in Scala:
+
+	spark-submit --class "<your class>" --master local[4] --jars <path to cloudant-spark.jar> <path to your app jar>
+
 This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.
 
 
-<div id='implementation-of-relationProvider'/>
+## Configuration options	
+The configuration is obtained in the following sequence:
 
-### Implementation of RelationProvider
+1. default in the Config, which is set in the application.conf
+2. key in the SparkConf, which is set in SparkConf
+3. key in the parameters, which is set in a dataframe or temporaty table options
+4. "spark."+key in the SparkConf (as they are treated as the one passed in through spark-submit using --conf option)
 
-[DefaultSource.scala](src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala) is a RelationProvider for loading data from Cloudant to Spark, and saving it back from Cloudant to Spark.  It has the following functionalities:
+Here each subsequent configuration overrides the previous one. Thus, configuration set using DataFrame option overrides what has beens set in SparkConf. And configuration passed in spark-submit using --conf takes precedence over any setting in the code.
 
-Functionality | Enablement 
---- | ---
-Table Option | database or path, search index, view 
-Scan Type | PrunedFilteredScan 
-Column Pruning | yes
-Predicates Push Down | _id or first predicate 
-Parallel Loading | yes, except with search index
-Insert-able | yes
- 
 
-<div id='implementation-of-Receiver'/>
+### Configuration in application.conf
+Default values are defined in [here](cloudant-spark-sql/src/main/resources/application.conf).
+
+### Configuration on SparkConf
 
-### Implementation of Receiver
+Name | Default | Meaning
+--- |:---:| ---
+cloudant.protocol|https|protocol to use to transfer data: http or https
+cloudant.host||cloudant host url
+cloudant.username||cloudant userid
+cloudant.password||cloudant password
+jsonstore.rdd.partitions|10|the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition
+jsonstore.rdd.maxInPartition|-1|the max rows in a partition. -1 means unlimited
+jsonstore.rdd.minInPartition|10|the min rows in a partition.
+jsonstore.rdd.requestTimeout|900000| the request timeout in milliseconds
+bulkSize|200| the bulk save size
+schemaSampleSize| "-1" | the sample size for RDD schema discovery. 1 means we are using only first document for schema discovery; -1 means all documents; 0 will be treated as 1; any number N means min(N, total) docs 
+createDBOnSave|"false"| whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised. 
 
-Spark Cloudant connector creates a discretized stream in Spark (Spark input DStream) out of Cloudant data sources. [CloudantReceiver.scala](src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala) is a custom Receiver that converts `_changes` feed from a Cloudant database to DStream in Spark. This allows all sorts of processing on this streamed data including [using DataFrames and SQL operations on it](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala).
+### Configuration on Spark SQL Temporary Table or DataFrame
 
-**NOTE:** Since CloudantReceiver for Spark Streaming is based on `_changes` API, there are some limitations that application developers should be aware of. Firstly, results returned from `_changes` are partially ordered, and may not be presented in order in which documents were updated. Secondly, in case of shards' unavailability, you may see duplicates, changes that have been seen already. Thus, it is up to applications using Spark Streaming with CloudantReceiver to keep track of _changes they have processed and detect duplicates. 
+Besides all the configurations passed to a temporary table or dataframe through SparkConf, it is also possible to set the following configurations in temporary table or dataframe using OPTIONS: 
 
+Name | Default | Meaning
+--- |:---:| ---
+database||cloudant database name
+view||cloudant view w/o the database name. only used for load.
+index||cloudant search index w/o the database name. only used for load data with less than or equal to 200 results.
+path||cloudant: as database name if database is not present
+schemaSampleSize|"-1"| the sample size used to discover the schema for this temp table. -1 scans all documents
+bulkSize|200| the bulk save size
+createDBOnSave|"false"| whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised. 
 
-<div id='Sample-application'/>
+For fast loading, views are loaded without include_docs. Thus, a derived schema will always be: `{id, key, value}`, where `value `can be a compount field. An example of loading data from a view: 
 
-## Sample applications
+```python
+spark.sql(" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')")
 
-<div id='Using-SQL-In-Python'/>
+```
 
-### Using SQL In Python 
-	
-[CloudantApp.py](examples/python/CloudantApp.py)
+### Configuration on Cloudant Receiver for Spark Streaming
 
+Name | Default | Meaning
+--- |:---:| ---
+cloudant.host||cloudant host url
+cloudant.username||cloudant userid
+cloudant.password||cloudant password
+database||cloudant database name
+selector| all documents| a selector written in Cloudant Query syntax, specifying conditions for selecting documents. Only documents satisfying the selector's conditions will be retrieved from Cloudant and loaded into Spark.
+
+
+### Configuration in spark-submit using --conf option
+
+The above stated configuration keys can also be set using `spark-submit --conf` option. When passing configuration in spark-submit, make sure adding "spark." as prefix to the keys.
+
+
+## Examples
+
+### Python API
+
+#### Using SQL In Python 
+	
 ```python
 spark = SparkSession\
     .builder\
@@ -90,21 +123,58 @@ spark = SparkSession\
     .getOrCreate()
 
 
-#### Loading temp table from Cloudant db
+# Loading temp table from Cloudant db
 spark.sql(" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
 airportData = spark.sql("SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id")
 airportData.printSchema()
 print 'Total # of rows in airportData: ' + str(airportData.count())
 for code in airportData.collect():
     print code._id
-```	
+```
 
-<div id='Using-SQL-In-Scala'/>
+See [CloudantApp.py](examples/python/CloudantApp.py) for examples.
 
-### Using SQL In Scala 
+Submit job example:
+```
+spark-submit  --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD sql-cloudant/examples/python/CloudantApp.py
+```
 
+#### Using DataFrame In Python 
 
-[CloudantApp.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala)
+```python
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using dataframes")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .config("jsonstore.rdd.partitions", 8)\
+    .getOrCreate()
+
+# ***1. Loading dataframe from Cloudant db
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+df.cache() 
+df.printSchema()
+df.filter(df.airportName >= 'Moscow').select("_id",'airportName').show()
+df.filter(df._id >= 'CAA').select("_id",'airportName').show()	    
+```
+
+See [CloudantDF.py](examples/python/CloudantDF.py) for examples.
+	
+In case of doing multiple operations on a dataframe (select, filter etc.),
+you should persist a dataframe. Otherwise, every operation on a dataframe will load the same data from Cloudant again.
+Persisting will also speed up computation. This statement will persist an RDD in memory: `df.cache()`.  Alternatively for large dbs to persist in memory & disk, use: 
+
+```python
+from pyspark import StorageLevel
+df.persist(storageLevel = StorageLevel(True, True, False, True, 1))
+```
+
+[Sample code](examples/python/CloudantDFOption.py) on using DataFrame option to define cloudant configuration
+
+### Scala API
+
+#### Using SQL In Scala 
 
 ```scala
 val spark = SparkSession
@@ -122,7 +192,7 @@ import spark.implicits._
 spark.sql(
     s"""
     |CREATE TEMPORARY TABLE airportTable
-    |USING org.apache.bahir.cloudant.spark
+    |USING org.apache.bahir.cloudant
     |OPTIONS ( database 'n_airportcodemapping')
     """.stripMargin)
 // create a dataframe
@@ -131,52 +201,17 @@ airportData.printSchema()
 println(s"Total # of rows in airportData: " + airportData.count())
 // convert dataframe to array of Rows, and process each row
 airportData.map(t => "code: " + t(0) + ",name:" + t(1)).collect().foreach(println)
-	
-```	
-
-
-<div id='Using-DataFrame-In-Python'/>	
-
-### Using DataFrame In Python 
-
-[CloudantDF.py](examples/python/CloudantDF.py). 
-
-```python	    
-spark = SparkSession\
-    .builder\
-    .appName("Cloudant Spark SQL Example in Python using dataframes")\
-    .config("cloudant.host","ACCOUNT.cloudant.com")\
-    .config("cloudant.username", "USERNAME")\
-    .config("cloudant.password","PASSWORD")\
-    .config("jsonstore.rdd.partitions", 8)\
-    .getOrCreate()
-
-#### Loading dataframe from Cloudant db
-df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
-df.cache() 
-df.printSchema()
-df.filter(df.airportName >= 'Moscow').select("_id",'airportName').show()
-df.filter(df._id >= 'CAA').select("_id",'airportName').show()	    
 ```
-	
-In case of doing multiple operations on a dataframe (select, filter etc.),
-you should persist a dataframe. Otherwise, every operation on a dataframe will load the same data from Cloudant again.
-Persisting will also speed up computation. This statement will persist an RDD in memory: `df.cache()`.  Alternatively for large dbs to persist in memory & disk, use: 
-
-```python
-from pyspark import StorageLevel
-df.persist(storageLevel = StorageLevel(True, True, False, True, 1))
-```	
+See [CloudantApp.scala](examples/scala/src/main/scala/mytest/spark/CloudantApp.scala) for examples.
 
-[Sample code on using DataFrame option to define cloudant configuration](examples/python/CloudantDFOption.py)
-
-<div id='Using-DataFrame-In-Scala'/>	
+Submit job example:
+```
+spark-submit --class org.apache.spark.examples.sql.cloudant.CloudantApp --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT --conf spark.cloudant.host=ACCOUNT.cloudant.com --conf spark.cloudant.username=USERNAME --conf spark.cloudant.password=PASSWORD  /path/to/spark-sql-cloudant_2.11-2.2.0-SNAPSHOT-tests.jar
+```
 
 ### Using DataFrame In Scala 
 
-[CloudantDF.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala)
-
-```	scala
+```scala
 val spark = SparkSession
       .builder()
       .appName("Cloudant Spark SQL Example with Dataframe")
@@ -199,15 +234,14 @@ val df2 = df.filter(df("flightSegmentId") === "AA106")
     .select("flightSegmentId","economyClassBaseCost")
 df2.show()
 df2.write.format("org.apache.bahir.cloudant").save("n_flight2")
-```	
+```
+
+See [CloudantDF.scala](examples/scala/src/main/scala/mytest/spark/CloudantDF.scala) for examples.
     
- [Sample code on using DataFrame option to define cloudant configuration](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala)
- 
+[Sample code](examples/scala/src/main/scala/mytest/spark/CloudantDFOption.scala) on using DataFrame option to define Cloudant configuration.
  
-<div id='Using-Streams-In-Scala'/>
  
 ### Using Streams In Scala 
-[CloudantStreaming.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala)
 
 ```scala
 val ssc = new StreamingContext(sparkConf, Seconds(10))
@@ -235,9 +269,14 @@ ssc.start()
 Thread.sleep(120000L)
 ssc.stop(true)
 	
-```	
+```
 
-By default, Spark Streaming will load all documents from a database. If you want to limit the loading to specific documents, use `selector` option of `CloudantReceiver` and specify your conditions ([CloudantStreamingSelector.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala)):
+See [CloudantStreaming.scala](examples/scala/src/main/scala/mytest/spark/CloudantStreaming.scala) for examples.
+
+By default, Spark Streaming will load all documents from a database. If you want to limit the loading to 
+specific documents, use `selector` option of `CloudantReceiver` and specify your conditions 
+(See [CloudantStreamingSelector.scala](examples/scala/src/main/scala/mytest/spark/CloudantStreamingSelector.scala)
+example for more details):
 
 ```scala
 val changes = ssc.receiverStream(new CloudantReceiver(Map(
@@ -247,78 +286,3 @@ val changes = ssc.receiverStream(new CloudantReceiver(Map(
   "database" -> "sales",
   "selector" -> "{\"month\":\"May\", \"rep\":\"John\"}")))
 ```
-
-
-<div id='Configuration-Overview'/>
-
-## Configuration Overview	
-
-The configuration is obtained in the following sequence:
-
-1. default in the Config, which is set in the application.conf
-2. key in the SparkConf, which is set in SparkConf
-3. key in the parameters, which is set in a dataframe or temporaty table options, or StreamReceiver
-4. "spark."+key in the SparkConf (as they are treated as the one passed in through spark-submit using --conf option)
-
-Here each subsequent configuration overrides the previous one. Thus, configuration set using DataFrame option overrides what has beens set in SparkConf. And configuration passed in spark-submit using --conf takes precedence over any setting in the code. When passing configuration in spark-submit, make sure adding "spark." as prefix to the keys.
-
-
-### Configuration in application.conf
-
-Default values are defined in [here](src/main/resources/application.conf)
-
-### Configuration on SparkConf
-
-Name | Default | Meaning
---- |:---:| ---
-cloudant.protocol|https|protocol to use to transfer data: http or https
-cloudant.host||cloudant host url
-cloudant.username||cloudant userid
-cloudant.password||cloudant password
-jsonstore.rdd.partitions|10|the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition
-jsonstore.rdd.maxInPartition|-1|the max rows in a partition. -1 means unlimited
-jsonstore.rdd.minInPartition|10|the min rows in a partition.
-jsonstore.rdd.requestTimeout|900000| the request timeout in milliseconds
-bulkSize|200| the bulk save size
-schemaSampleSize| "-1" | the sample size for RDD schema discovery. 1 means we are using only first document for schema discovery; -1 means all documents; 0 will be treated as 1; any number N means min(N, total) docs 
-createDBOnSave|"false"| whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised. 
-
-
-###  Configuration on Spark SQL Temporary Table or DataFrame
-
-Besides overriding any SparkConf configuration, you can also set the following configurations at temporary table or dataframe level.
-
-Name | Default | Meaning
---- |:---:| ---
-database||cloudant database name
-view||cloudant view w/o the database name. only used for load.  
-index||cloudant search index w/o the database name. only used for load data with less than or equal to 200 results.
-path||cloudant: as database name if database is not present
-
-
-#### View Specific
-
-For fast loading, views are loaded without include_docs. Thus, a derived schema will always be: `{id, key, value}`, where `value `can be a compound field. An example of loading data from a view:
-
-```python
-spark.sql(" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')")
-
-```
-
-###  Configuration on Cloudant Receiver for Spark Streaming
-
-Besides overriding any SparkConf configuration, you can also set the following configurations at stream Receiver level
-
-Name | Default | Meaning
---- |:---:| ---
-database||cloudant database name
-selector| all documents| a selector written in Cloudant Query syntax, specifying conditions for selecting documents. Only documents satisfying the selector's conditions will be retrieved from Cloudant and loaded into Spark.
-
-
-<div id='Known-limitations'/>
-
-## Known limitations and areas for improvement
-
-* Loading data from Cloudant search index will work only for up to 200 results.
-		
-* Need to improve how number of partitions is determined for parallel loading

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/561291bf/sql-cloudant/examples/python/CloudantApp.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantApp.py b/sql-cloudant/examples/python/CloudantApp.py
index 029f39b..c403aeb 100644
--- a/sql-cloudant/examples/python/CloudantApp.py
+++ b/sql-cloudant/examples/python/CloudantApp.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pprint
 from pyspark.sql import SparkSession
 
 spark = SparkSession\
@@ -30,16 +29,16 @@ spark = SparkSession\
 spark.sql(" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
 airportData = spark.sql("SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id")
 airportData.printSchema()
-print 'Total # of rows in airportData: ' + str(airportData.count())
+print ('Total # of rows in airportData: ' + str(airportData.count()))
 for code in airportData.collect():
-    print code._id
+    print (code._id)
 
 
 # ***2. Loading temp table from Cloudant search index
-print 'About to test org.apache.bahir.cloudant for flight with index'
+print ('About to test org.apache.bahir.cloudant for flight with index')
 spark.sql(" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', index '_design/view/_search/n_flights')")
 flightData = spark.sql("SELECT flightSegmentId, scheduledDepartureTime FROM flightTable1 WHERE flightSegmentId >'AA9' AND flightSegmentId<'AA95'")
 flightData.printSchema()
 for code in flightData.collect():
-    print 'Flight {0} on {1}'.format(code.flightSegmentId, code.scheduledDepartureTime)
+    print ('Flight {0} on {1}'.format(code.flightSegmentId, code.scheduledDepartureTime))
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/561291bf/sql-cloudant/examples/python/CloudantDF.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantDF.py b/sql-cloudant/examples/python/CloudantDF.py
index c009e98..a8af0fa 100644
--- a/sql-cloudant/examples/python/CloudantDF.py
+++ b/sql-cloudant/examples/python/CloudantDF.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pprint
 from pyspark.sql import SparkSession
 
 # define cloudant related configuration
@@ -54,7 +53,7 @@ df2.write.save("n_flight2",  "org.apache.bahir.cloudant",
         bulkSize = "100", createDBOnSave="true") 
 total = df.filter(df.flightSegmentId >'AA9').select("flightSegmentId", 
         "scheduledDepartureTime").orderBy(df.flightSegmentId).count()
-print "Total", total, "flights from table"
+print ("Total", total, "flights from table")
 
 
 # ***3. Loading dataframe from a Cloudant search index
@@ -63,7 +62,7 @@ df = spark.read.load(format="org.apache.bahir.cloudant", database="n_flight",
 df.printSchema()
 total = df.filter(df.flightSegmentId >'AA9').select("flightSegmentId", 
         "scheduledDepartureTime").orderBy(df.flightSegmentId).count()
-print "Total", total, "flights from index"
+print ("Total", total, "flights from index")
 
 
 # ***4. Loading dataframe from a Cloudant view

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/561291bf/sql-cloudant/examples/python/CloudantDFOption.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantDFOption.py b/sql-cloudant/examples/python/CloudantDFOption.py
index c045532..a7f5e38 100644
--- a/sql-cloudant/examples/python/CloudantDFOption.py
+++ b/sql-cloudant/examples/python/CloudantDFOption.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pprint
 from pyspark.sql import SparkSession
 
 spark = SparkSession\
@@ -55,7 +54,7 @@ df.printSchema()
 total = df.filter(df.flightSegmentId >'AA9') \
     .select("flightSegmentId", "scheduledDepartureTime") \
     .orderBy(df.flightSegmentId).count()
-print "Total", total, "flights from table"
+print ("Total", total, "flights from table")
 
 
 # ***3. Loading dataframe from Cloudant search index
@@ -69,4 +68,4 @@ df.printSchema()
 total = df.filter(df.flightSegmentId >'AA9') \
     .select("flightSegmentId", "scheduledDepartureTime") \
     .orderBy(df.flightSegmentId).count()
-print "Total", total, "flights from index"
+print ("Total", total, "flights from index")

[30/50] [abbrv] incubator-livy-website git commit: [BAHIR-101] Spark SQL datasource for CounchDB/Cloudant

Posted by lr...@apache.org.

[BAHIR-101] Spark SQL datasource for CounchDB/Cloudant

Initial code supporting CounchDB/Cloudant as an Spark SQL
data source. The initial source contains the core connector,
examples, and basic documentation on the README.

Closes #39.


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/f0d9a84f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/f0d9a84f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/f0d9a84f

Branch: refs/heads/master
Commit: f0d9a84f76cb34a432e1d2db053d2471a8ab2ba4
Parents: 2ebfd0b
Author: Yang Lei <ge...@gmail.com>
Authored: Wed Mar 29 17:32:51 2017 -0400
Committer: Luciano Resende <lr...@apache.org>
Committed: Fri Mar 31 15:42:54 2017 -0700

----------------------------------------------------------------------
 README.md                                       |   1 +
 pom.xml                                         |  17 +-
 sql-cloudant/README.md                          | 324 +++++++++++++++++++
 sql-cloudant/examples/python/CloudantApp.py     |  45 +++
 sql-cloudant/examples/python/CloudantDF.py      |  75 +++++
 .../examples/python/CloudantDFOption.py         |  72 +++++
 .../examples/sql/cloudant/CloudantApp.scala     |  73 +++++
 .../examples/sql/cloudant/CloudantDF.scala      |  64 ++++
 .../sql/cloudant/CloudantDFOption.scala         |  71 ++++
 .../sql/cloudant/CloudantStreaming.scala        |  99 ++++++
 .../cloudant/CloudantStreamingSelector.scala    |  64 ++++
 sql-cloudant/pom.xml                            | 115 +++++++
 .../src/main/resources/application.conf         |  14 +
 sql-cloudant/src/main/resources/reference.conf  |   0
 .../apache/bahir/cloudant/CloudantConfig.scala  | 273 ++++++++++++++++
 .../bahir/cloudant/CloudantReceiver.scala       |  90 ++++++
 .../apache/bahir/cloudant/DefaultSource.scala   | 159 +++++++++
 .../bahir/cloudant/common/FilterUtil.scala      | 149 +++++++++
 .../common/JsonStoreConfigManager.scala         | 212 ++++++++++++
 .../cloudant/common/JsonStoreDataAccess.scala   | 272 ++++++++++++++++
 .../bahir/cloudant/common/JsonStoreRDD.scala    | 106 ++++++
 .../apache/bahir/cloudant/common/JsonUtil.scala |  42 +++
 22 files changed, 2336 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index ff8599b..ebbaea7 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ Currently, each submodule has its own README.md, with information on example usa
 * [Streaming Mqtt](https://github.com/apache/bahir/blob/master/streaming-mqtt/README.md)
 * [Streaming Zeromq](https://github.com/apache/bahir/blob/master/streaming-zeromq/README.md)
 * [Streaming Twitter](https://github.com/apache/bahir/blob/master/streaming-twitter/README.md)
+* [SQL Cloudant](sql-cloudant/README.md)
 
 Furthermore, to generate scaladocs for each module:
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index f9ee4a0..73cac1f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,6 +75,7 @@
   </mailingLists>
 
   <modules>
+    <module>sql-cloudant</module>
     <module>streaming-akka</module>
     <module>streaming-mqtt</module>
     <module>sql-streaming-mqtt</module>
@@ -162,7 +163,7 @@
       </snapshots>
     </pluginRepository>
   </pluginRepositories>
-  
+
   <dependencies>
     <!--
       This is a dummy dependency that is used to trigger the maven-shade plugin so that Spark's
@@ -290,6 +291,12 @@
       </dependency>
 
       <dependency>
+        <groupId>com.typesafe.play</groupId>
+        <artifactId>play-json_${scala.binary.version}</artifactId>
+        <version>2.5.9</version>
+      </dependency>
+
+      <dependency>
         <groupId>org.json4s</groupId>
         <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
         <version>3.2.11</version>
@@ -301,6 +308,12 @@
         <version>${jsr305.version}</version>
       </dependency>
 
+      <dependency>
+        <groupId>org.scalaj</groupId>
+        <artifactId>scalaj-http_${scala.binary.version}</artifactId>
+        <version>2.3.0</version>
+      </dependency>
+
       <!-- Scala Related Dependencies -->
       <dependency>
         <groupId>org.scala-lang</groupId>
@@ -450,6 +463,8 @@
               <exclude>**/README.md</exclude>
               <exclude>**/examples/data/*.txt</exclude>
               <exclude>**/*.iml</exclude>
+              <exclude>**/src/main/resources/application.conf</exclude>
+              <exclude>**/src/main/resources/reference.conf</exclude>
             </excludes>
           </configuration>
         </plugin>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/README.md
----------------------------------------------------------------------
diff --git a/sql-cloudant/README.md b/sql-cloudant/README.md
new file mode 100644
index 0000000..98a1c85
--- /dev/null
+++ b/sql-cloudant/README.md
@@ -0,0 +1,324 @@
+Spark Cloudant Connector
+================
+
+Cloudant integration with Spark as Spark SQL external datasource, and Spark Streaming as a custom receiver. 
+
+
+##  Contents:
+0. [Linking](#Linking)
+1. [Implementation of RelationProvider](#implementation-of-relationProvider)
+2. [Implementation of Receiver](#implementation-of-Receiver)
+3. [Sample applications](#Sample-application)
+    1. [Using SQL In Python](#Using-SQL-In-Python)
+    2. [Using SQL In Scala](#Using-SQL-In-Scala)
+    3. [Using DataFrame In Python](#Using-DataFrame-In-Python)
+    4. [Using DataFrame In Scala](#Using-DataFrame-In-Scala)
+    5. [Using Streams In Scala](#Using-Streams-In-Scala)
+4. [Configuration Overview](#Configuration-Overview)
+5. [Known limitations and areas for improvement](#Known-limitations)
+
+
+<div id='Linking'/>
+
+## Linking
+
+Using SBT:
+
+    libraryDependencies += "org.apache.bahir" %% "spark-sql-cloudant" % "2.2.0-SNAPSHOT"
+
+Using Maven:
+
+    <dependency>
+        <groupId>org.apache.bahir</groupId>
+        <artifactId>spark-sql-cloudant_2.11</artifactId>
+        <version>2.2.0-SNAPSHOT</version>
+    </dependency>
+
+This library can also be added to Spark jobs launched through `spark-shell` or `spark-submit` by using the `--packages` command line option.
+For example, to include it when starting the spark shell:
+
+    $ bin/spark-shell --packages org.apache.bahir:spark-sql-cloudant_2.11:2.2.0-SNAPSHOT
+
+Unlike using `--jars`, using `--packages` ensures that this library and its dependencies will be added to the classpath.
+The `--packages` argument can also be used with `bin/spark-submit`.
+
+This library is compiled for Scala 2.11 only, and intends to support Spark 2.0 onwards.
+
+
+<div id='implementation-of-relationProvider'/>
+
+### Implementation of RelationProvider
+
+[DefaultSource.scala](src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala) is a RelationProvider for loading data from Cloudant to Spark, and saving it back from Cloudant to Spark.  It has the following functionalities:
+
+Functionality | Enablement 
+--- | ---
+Table Option | database or path, search index, view 
+Scan Type | PrunedFilteredScan 
+Column Pruning | yes
+Predicates Push Down | _id or first predicate 
+Parallel Loading | yes, except with search index
+Insert-able | yes
+ 
+
+<div id='implementation-of-Receiver'/>
+
+### Implementation of Receiver
+
+Spark Cloudant connector creates a discretized stream in Spark (Spark input DStream) out of Cloudant data sources. [CloudantReceiver.scala](src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala) is a custom Receiver that converts `_changes` feed from a Cloudant database to DStream in Spark. This allows all sorts of processing on this streamed data including [using DataFrames and SQL operations on it](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala).
+
+**NOTE:** Since CloudantReceiver for Spark Streaming is based on `_changes` API, there are some limitations that application developers should be aware of. Firstly, results returned from `_changes` are partially ordered, and may not be presented in order in which documents were updated. Secondly, in case of shards' unavailability, you may see duplicates, changes that have been seen already. Thus, it is up to applications using Spark Streaming with CloudantReceiver to keep track of _changes they have processed and detect duplicates. 
+
+
+<div id='Sample-application'/>
+
+## Sample applications
+
+<div id='Using-SQL-In-Python'/>
+
+### Using SQL In Python 
+	
+[CloudantApp.py](examples/python/CloudantApp.py)
+
+```python
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using temp tables")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .getOrCreate()
+
+
+#### Loading temp table from Cloudant db
+spark.sql(" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
+airportData = spark.sql("SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id")
+airportData.printSchema()
+print 'Total # of rows in airportData: ' + str(airportData.count())
+for code in airportData.collect():
+    print code._id
+```	
+
+<div id='Using-SQL-In-Scala'/>
+
+### Using SQL In Scala 
+
+
+[CloudantApp.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala)
+
+```scala
+val spark = SparkSession
+      .builder()
+      .appName("Cloudant Spark SQL Example")
+      .config("cloudant.host","ACCOUNT.cloudant.com")
+      .config("cloudant.username", "USERNAME")
+      .config("cloudant.password","PASSWORD")
+      .getOrCreate()
+
+// For implicit conversions of Dataframe to RDDs
+import spark.implicits._
+    
+// create a temp table from Cloudant db and query it using sql syntax
+spark.sql(
+    s"""
+    |CREATE TEMPORARY TABLE airportTable
+    |USING org.apache.bahir.cloudant.spark
+    |OPTIONS ( database 'n_airportcodemapping')
+    """.stripMargin)
+// create a dataframe
+val airportData = spark.sql("SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id")
+airportData.printSchema()
+println(s"Total # of rows in airportData: " + airportData.count())
+// convert dataframe to array of Rows, and process each row
+airportData.map(t => "code: " + t(0) + ",name:" + t(1)).collect().foreach(println)
+	
+```	
+
+
+<div id='Using-DataFrame-In-Python'/>	
+
+### Using DataFrame In Python 
+
+[CloudantDF.py](examples/python/CloudantDF.py). 
+
+```python	    
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using dataframes")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .config("jsonstore.rdd.partitions", 8)\
+    .getOrCreate()
+
+#### Loading dataframe from Cloudant db
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+df.cache() 
+df.printSchema()
+df.filter(df.airportName >= 'Moscow').select("_id",'airportName').show()
+df.filter(df._id >= 'CAA').select("_id",'airportName').show()	    
+```
+	
+In case of doing multiple operations on a dataframe (select, filter etc.),
+you should persist a dataframe. Otherwise, every operation on a dataframe will load the same data from Cloudant again.
+Persisting will also speed up computation. This statement will persist an RDD in memory: `df.cache()`.  Alternatively for large dbs to persist in memory & disk, use: 
+
+```python
+from pyspark import StorageLevel
+df.persist(storageLevel = StorageLevel(True, True, False, True, 1))
+```	
+
+[Sample code on using DataFrame option to define cloudant configuration](examples/python/CloudantDFOption.py)
+
+<div id='Using-DataFrame-In-Scala'/>	
+
+### Using DataFrame In Scala 
+
+[CloudantDF.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala)
+
+```	scala
+val spark = SparkSession
+      .builder()
+      .appName("Cloudant Spark SQL Example with Dataframe")
+      .config("cloudant.host","ACCOUNT.cloudant.com")
+      .config("cloudant.username", "USERNAME")
+      .config("cloudant.password","PASSWORD")
+      .config("createDBOnSave","true") // to create a db on save
+      .config("jsonstore.rdd.partitions", "20") // using 20 partitions
+      .getOrCreate()
+          
+// 1. Loading data from Cloudant db
+val df = spark.read.format("org.apache.bahir.cloudant").load("n_flight")
+// Caching df in memory to speed computations
+// and not to retrieve data from cloudant again
+df.cache() 
+df.printSchema()
+
+// 2. Saving dataframe to Cloudant db
+val df2 = df.filter(df("flightSegmentId") === "AA106")
+    .select("flightSegmentId","economyClassBaseCost")
+df2.show()
+df2.write.format("org.apache.bahir.cloudant").save("n_flight2")
+```	
+    
+ [Sample code on using DataFrame option to define cloudant configuration](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala)
+ 
+ 
+<div id='Using-Streams-In-Scala'/>
+ 
+### Using Streams In Scala 
+[CloudantStreaming.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala)
+
+```scala
+val ssc = new StreamingContext(sparkConf, Seconds(10))
+val changes = ssc.receiverStream(new CloudantReceiver(Map(
+  "cloudant.host" -> "ACCOUNT.cloudant.com",
+  "cloudant.username" -> "USERNAME",
+  "cloudant.password" -> "PASSWORD",
+  "database" -> "n_airportcodemapping")))
+
+changes.foreachRDD((rdd: RDD[String], time: Time) => {
+  // Get the singleton instance of SparkSession
+  val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
+
+  println(s"========= $time =========")
+  // Convert RDD[String] to DataFrame
+  val changesDataFrame = spark.read.json(rdd)
+  if (!changesDataFrame.schema.isEmpty) {
+    changesDataFrame.printSchema()
+    changesDataFrame.select("*").show()
+    ....
+  }
+})
+ssc.start()
+// run streaming for 120 secs
+Thread.sleep(120000L)
+ssc.stop(true)
+	
+```	
+
+By default, Spark Streaming will load all documents from a database. If you want to limit the loading to specific documents, use `selector` option of `CloudantReceiver` and specify your conditions ([CloudantStreamingSelector.scala](examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala)):
+
+```scala
+val changes = ssc.receiverStream(new CloudantReceiver(Map(
+  "cloudant.host" -> "ACCOUNT.cloudant.com",
+  "cloudant.username" -> "USERNAME",
+  "cloudant.password" -> "PASSWORD",
+  "database" -> "sales",
+  "selector" -> "{\"month\":\"May\", \"rep\":\"John\"}")))
+```
+
+
+<div id='Configuration-Overview'/>
+
+## Configuration Overview	
+
+The configuration is obtained in the following sequence:
+
+1. default in the Config, which is set in the application.conf
+2. key in the SparkConf, which is set in SparkConf
+3. key in the parameters, which is set in a dataframe or temporaty table options, or StreamReceiver
+4. "spark."+key in the SparkConf (as they are treated as the one passed in through spark-submit using --conf option)
+
+Here each subsequent configuration overrides the previous one. Thus, configuration set using DataFrame option overrides what has beens set in SparkConf. And configuration passed in spark-submit using --conf takes precedence over any setting in the code. When passing configuration in spark-submit, make sure adding "spark." as prefix to the keys.
+
+
+### Configuration in application.conf
+
+Default values are defined in [here](src/main/resources/application.conf)
+
+### Configuration on SparkConf
+
+Name | Default | Meaning
+--- |:---:| ---
+cloudant.protocol|https|protocol to use to transfer data: http or https
+cloudant.host||cloudant host url
+cloudant.username||cloudant userid
+cloudant.password||cloudant password
+jsonstore.rdd.partitions|10|the number of partitions intent used to drive JsonStoreRDD loading query result in parallel. The actual number is calculated based on total rows returned and satisfying maxInPartition and minInPartition
+jsonstore.rdd.maxInPartition|-1|the max rows in a partition. -1 means unlimited
+jsonstore.rdd.minInPartition|10|the min rows in a partition.
+jsonstore.rdd.requestTimeout|900000| the request timeout in milliseconds
+bulkSize|200| the bulk save size
+schemaSampleSize| "-1" | the sample size for RDD schema discovery. 1 means we are using only first document for schema discovery; -1 means all documents; 0 will be treated as 1; any number N means min(N, total) docs 
+createDBOnSave|"false"| whether to create a new database during save operation. If false, a database should already exist. If true, a new database will be created. If true, and a database with a provided name already exists, an error will be raised. 
+
+
+###  Configuration on Spark SQL Temporary Table or DataFrame
+
+Besides overriding any SparkConf configuration, you can also set the following configurations at temporary table or dataframe level.
+
+Name | Default | Meaning
+--- |:---:| ---
+database||cloudant database name
+view||cloudant view w/o the database name. only used for load.  
+index||cloudant search index w/o the database name. only used for load data with less than or equal to 200 results.
+path||cloudant: as database name if database is not present
+
+
+#### View Specific
+
+For fast loading, views are loaded without include_docs. Thus, a derived schema will always be: `{id, key, value}`, where `value `can be a compound field. An example of loading data from a view:
+
+```python
+spark.sql(" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', view '_design/view/_view/AA0')")
+
+```
+
+###  Configuration on Cloudant Receiver for Spark Streaming
+
+Besides overriding any SparkConf configuration, you can also set the following configurations at stream Receiver level
+
+Name | Default | Meaning
+--- |:---:| ---
+database||cloudant database name
+selector| all documents| a selector written in Cloudant Query syntax, specifying conditions for selecting documents. Only documents satisfying the selector's conditions will be retrieved from Cloudant and loaded into Spark.
+
+
+<div id='Known-limitations'/>
+
+## Known limitations and areas for improvement
+
+* Loading data from Cloudant search index will work only for up to 200 results.
+		
+* Need to improve how number of partitions is determined for parallel loading

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/python/CloudantApp.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantApp.py b/sql-cloudant/examples/python/CloudantApp.py
new file mode 100644
index 0000000..029f39b
--- /dev/null
+++ b/sql-cloudant/examples/python/CloudantApp.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pprint
+from pyspark.sql import SparkSession
+
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using temp tables")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .getOrCreate()
+
+
+# ***1. Loading temp table from Cloudant db
+spark.sql(" CREATE TEMPORARY TABLE airportTable USING org.apache.bahir.cloudant OPTIONS ( database 'n_airportcodemapping')")
+airportData = spark.sql("SELECT _id, airportName FROM airportTable WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id")
+airportData.printSchema()
+print 'Total # of rows in airportData: ' + str(airportData.count())
+for code in airportData.collect():
+    print code._id
+
+
+# ***2. Loading temp table from Cloudant search index
+print 'About to test org.apache.bahir.cloudant for flight with index'
+spark.sql(" CREATE TEMPORARY TABLE flightTable1 USING org.apache.bahir.cloudant OPTIONS ( database 'n_flight', index '_design/view/_search/n_flights')")
+flightData = spark.sql("SELECT flightSegmentId, scheduledDepartureTime FROM flightTable1 WHERE flightSegmentId >'AA9' AND flightSegmentId<'AA95'")
+flightData.printSchema()
+for code in flightData.collect():
+    print 'Flight {0} on {1}'.format(code.flightSegmentId, code.scheduledDepartureTime)
+

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/python/CloudantDF.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantDF.py b/sql-cloudant/examples/python/CloudantDF.py
new file mode 100644
index 0000000..c009e98
--- /dev/null
+++ b/sql-cloudant/examples/python/CloudantDF.py
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pprint
+from pyspark.sql import SparkSession
+
+# define cloudant related configuration
+# set protocol to http if needed, default value=https
+# config("cloudant.protocol","http")
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using dataframes")\
+    .config("cloudant.host","ACCOUNT.cloudant.com")\
+    .config("cloudant.username", "USERNAME")\
+    .config("cloudant.password","PASSWORD")\
+    .config("jsonstore.rdd.partitions", 8)\
+    .getOrCreate()
+
+
+# ***1. Loading dataframe from Cloudant db
+df = spark.read.load("n_airportcodemapping", "org.apache.bahir.cloudant")
+# In case of doing multiple operations on a dataframe (select, filter etc.)
+# you should persist the dataframe.
+# Othewise, every operation on the dataframe will load the same data from Cloudant again.
+# Persisting will also speed up computation.
+df.cache() # persisting in memory
+# alternatively for large dbs to persist in memory & disk:
+# from pyspark import StorageLevel
+# df.persist(storageLevel = StorageLevel(True, True, False, True, 1)) 
+df.printSchema()
+df.filter(df.airportName >= 'Moscow').select("_id",'airportName').show()
+df.filter(df._id >= 'CAA').select("_id",'airportName').show()
+
+
+# ***2. Saving a datafram to Cloudant db
+df = spark.read.load(format="org.apache.bahir.cloudant", database="n_flight")
+df.printSchema()
+df2 = df.filter(df.flightSegmentId=='AA106')\
+    .select("flightSegmentId", "economyClassBaseCost")
+df2.write.save("n_flight2",  "org.apache.bahir.cloudant",
+        bulkSize = "100", createDBOnSave="true") 
+total = df.filter(df.flightSegmentId >'AA9').select("flightSegmentId", 
+        "scheduledDepartureTime").orderBy(df.flightSegmentId).count()
+print "Total", total, "flights from table"
+
+
+# ***3. Loading dataframe from a Cloudant search index
+df = spark.read.load(format="org.apache.bahir.cloudant", database="n_flight", 
+        index="_design/view/_search/n_flights")
+df.printSchema()
+total = df.filter(df.flightSegmentId >'AA9').select("flightSegmentId", 
+        "scheduledDepartureTime").orderBy(df.flightSegmentId).count()
+print "Total", total, "flights from index"
+
+
+# ***4. Loading dataframe from a Cloudant view
+df = spark.read.load(format="org.apache.bahir.cloudant", path="n_flight", 
+        view="_design/view/_view/AA0", schemaSampleSize="20")
+# schema for view will always be: _id, key, value
+# where value can be a complex field
+df.printSchema()
+df.show()

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/python/CloudantDFOption.py
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/python/CloudantDFOption.py b/sql-cloudant/examples/python/CloudantDFOption.py
new file mode 100644
index 0000000..c045532
--- /dev/null
+++ b/sql-cloudant/examples/python/CloudantDFOption.py
@@ -0,0 +1,72 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pprint
+from pyspark.sql import SparkSession
+
+spark = SparkSession\
+    .builder\
+    .appName("Cloudant Spark SQL Example in Python using dataframes with options")\
+    .getOrCreate()
+
+cloudant_host = "ACCOUNT.cloudant.com"
+cloudant_username = "USERNAME"
+cloudant_password = "PASSWORD"
+
+# ***1. Loading dataframe from Cloudant db
+df = spark.read.format("org.apache.bahir.cloudant") \
+    .option("cloudant.host", cloudant_host) \
+    .option("cloudant.username", cloudant_username) \
+    .option("cloudant.password", cloudant_password) \
+    .load("n_airportcodemapping")
+df.cache() # persisting in memory
+df.printSchema()
+df.filter(df._id >= 'CAA').select("_id",'airportName').show()
+
+
+# ***2.Saving dataframe to Cloudant db
+df.filter(df._id >= 'CAA').select("_id",'airportName') \
+    .write.format("org.apache.bahir.cloudant") \
+    .option("cloudant.host", cloudant_host) \
+    .option("cloudant.username", cloudant_username) \
+    .option("cloudant.password",cloudant_password) \
+    .option("bulkSize","100") \
+    .option("createDBOnSave", "true") \
+    .save("airportcodemapping_df")
+df = spark.read.format("org.apache.bahir.cloudant") \
+    .option("cloudant.host", cloudant_host) \
+    .option("cloudant.username", cloudant_username) \
+    .option("cloudant.password", cloudant_password) \
+    .load("n_flight")
+df.printSchema()
+total = df.filter(df.flightSegmentId >'AA9') \
+    .select("flightSegmentId", "scheduledDepartureTime") \
+    .orderBy(df.flightSegmentId).count()
+print "Total", total, "flights from table"
+
+
+# ***3. Loading dataframe from Cloudant search index
+df = spark.read.format("org.apache.bahir.cloudant") \
+    .option("cloudant.host",cloudant_host) \
+    .option("cloudant.username",cloudant_username) \
+    .option("cloudant.password",cloudant_password) \
+    .option("index","_design/view/_search/n_flights").load("n_flight")
+df.printSchema()
+
+total = df.filter(df.flightSegmentId >'AA9') \
+    .select("flightSegmentId", "scheduledDepartureTime") \
+    .orderBy(df.flightSegmentId).count()
+print "Total", total, "flights from index"

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala
new file mode 100644
index 0000000..d3e5ecc
--- /dev/null
+++ b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantApp.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.cloudant
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.SQLContext
+
+object CloudantApp {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName("Cloudant Spark SQL Example")
+      .config("cloudant.host", "ACCOUNT.cloudant.com")
+      .config("cloudant.username", "USERNAME")
+      .config("cloudant.password", "PASSWORD")
+      .getOrCreate()
+
+    // For implicit conversions of Dataframe to RDDs
+    import spark.implicits._
+
+    // create a temp table from Cloudant db and query it using sql syntax
+    spark.sql(
+        s"""
+        |CREATE TEMPORARY TABLE airportTable
+        |USING org.apache.bahir.cloudant
+        |OPTIONS ( database 'n_airportcodemapping')
+        """.stripMargin)
+    // create a dataframe
+    val airportData = spark.sql(
+        s"""
+        |SELECT _id, airportName
+        |FROM airportTable
+        |WHERE _id >= 'CAA' AND _id <= 'GAA' ORDER BY _id
+        """.stripMargin)
+    airportData.printSchema()
+    println(s"Total # of rows in airportData: " + airportData.count()) // scalastyle:ignore
+    // convert dataframe to array of Rows, and process each row
+    airportData.map(t => "code: " + t(0) + ",name:" + t(1)).collect().foreach(println) // scalastyle:ignore
+
+    // create a temp table from Cloudant index  and query it using sql syntax
+    spark.sql(
+        s"""
+        |CREATE TEMPORARY TABLE flightTable
+        |USING org.apache.bahir.cloudant
+        |OPTIONS (database 'n_flight', index '_design/view/_search/n_flights')
+        """.stripMargin)
+    val flightData = spark.sql(
+        s"""
+        |SELECT flightSegmentId, scheduledDepartureTime
+        |FROM flightTable
+        |WHERE flightSegmentId >'AA9' AND flightSegmentId<'AA95'
+        """.stripMargin)
+    flightData.printSchema()
+    flightData.map(t => "flightSegmentId: " + t(0) + ", scheduledDepartureTime: " + t(1))
+                   .collect().foreach(println) // scalastyle:ignore
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala
new file mode 100644
index 0000000..d97b688
--- /dev/null
+++ b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDF.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.cloudant
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.storage.StorageLevel
+
+object CloudantDF{
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName("Cloudant Spark SQL Example with Dataframe")
+      .config("cloudant.host", "ACCOUNT.cloudant.com")
+      .config("cloudant.username", "USERNAME")
+      .config("cloudant.password", "PASSWORD")
+      .config("createDBOnSave", "true") // to create a db on save
+      .config("jsonstore.rdd.partitions", "20") // using 20 partitions
+      .getOrCreate()
+
+    // 1. Loading data from Cloudant db
+    val df = spark.read.format("org.apache.bahir.cloudant").load("n_flight")
+    // Caching df in memory to speed computations
+    // and not to retrieve data from cloudant again
+    df.cache()
+    df.printSchema()
+
+    // 2. Saving dataframe to Cloudant db
+    val df2 = df.filter(df("flightSegmentId") === "AA106")
+        .select("flightSegmentId", "economyClassBaseCost")
+    df2.show()
+    df2.write.format("org.apache.bahir.cloudant").save("n_flight2")
+
+    // 3. Loading data from Cloudant search index
+    val df3 = spark.read.format("org.apache.bahir.cloudant")
+      .option("index", "_design/view/_search/n_flights").load("n_flight")
+    val total = df3.filter(df3("flightSegmentId") >"AA9")
+      .select("flightSegmentId", "scheduledDepartureTime")
+      .orderBy(df3("flightSegmentId")).count()
+    println(s"Total $total flights from index") // scalastyle:ignore
+
+    // 4. Loading data from view
+    val df4 = spark.read.format("org.apache.bahir.cloudant")
+      .option("view", "_design/view/_view/AA0").load("n_flight")
+    df4.printSchema()
+    df4.show()
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala
new file mode 100644
index 0000000..164ca21
--- /dev/null
+++ b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantDFOption.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.cloudant
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.storage.StorageLevel
+
+object CloudantDFOption{
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName("Cloudant Spark SQL Example with Dataframe using Option")
+      .getOrCreate()
+
+    val cloudantHost = "ACCOUNT.cloudant.com"
+    val cloudantUser = "USERNAME"
+    val cloudantPassword = "PASSWORD"
+
+    // 1. Loading data from Cloudant db
+    val df = spark.read.format("org.apache.bahir.cloudant")
+      .option("cloudant.host", cloudantHost)
+      .option("cloudant.username", cloudantUser)
+      .option("cloudant.password", cloudantPassword)
+      .load("n_airportcodemapping")
+
+    df.cache()
+    df.printSchema()
+    df.filter(df("_id") >= "CAA").select("_id", "airportName").show()
+
+    // 2. Saving dataframe to Cloudant db
+    // To create a Cloudant db during save set the option createDBOnSave=true
+    df.filter(df("_id") >= "CAA")
+      .select("_id", "airportName")
+      .write.format("org.apache.bahir.cloudant")
+      .option("cloudant.host", cloudantHost)
+      .option("cloudant.username", cloudantUser)
+      .option("cloudant.password", cloudantPassword)
+      .option("createDBOnSave", "true")
+      .save("airportcodemapping_df")
+
+    // 3. Loading data from Cloudant search index
+    val df2 = spark.read.format("org.apache.bahir.cloudant")
+      .option("index", "_design/view/_search/n_flights")
+      .option("cloudant.host", cloudantHost)
+      .option("cloudant.username", cloudantUser)
+      .option("cloudant.password", cloudantPassword)
+      .load("n_flight")
+    val total2 = df2.filter(df2("flightSegmentId") >"AA9")
+      .select("flightSegmentId", "scheduledDepartureTime")
+      .orderBy(df2("flightSegmentId"))
+      .count()
+    println(s"Total $total2 flights from index")// scalastyle:ignore
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala
new file mode 100644
index 0000000..a1de696
--- /dev/null
+++ b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreaming.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.cloudant
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.streaming.{ Seconds, StreamingContext, Time }
+import org.apache.spark.streaming.scheduler.{ StreamingListener, StreamingListenerReceiverError}
+
+import org.apache.bahir.cloudant.CloudantReceiver
+
+object CloudantStreaming {
+  def main(args: Array[String]) {
+    val sparkConf = new SparkConf().setAppName("Cloudant Spark SQL External Datasource in Scala")
+    // Create the context with a 10 seconds batch size
+    val ssc = new StreamingContext(sparkConf, Seconds(10))
+
+    val changes = ssc.receiverStream(new CloudantReceiver(sparkConf, Map(
+      "cloudant.host" -> "ACCOUNT.cloudant.com",
+      "cloudant.username" -> "USERNAME",
+      "cloudant.password" -> "PASSWORD",
+      "database" -> "n_airportcodemapping")))
+
+    changes.foreachRDD((rdd: RDD[String], time: Time) => {
+      // Get the singleton instance of SparkSession
+      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
+
+      println(s"========= $time =========")// scalastyle:ignore
+      // Convert RDD[String] to DataFrame
+      val changesDataFrame = spark.read.json(rdd)
+      if (!changesDataFrame.schema.isEmpty) {
+        changesDataFrame.printSchema()
+        changesDataFrame.select("*").show()
+
+        var hasDelRecord = false
+        var hasAirportNameField = false
+        for (field <- changesDataFrame.schema.fieldNames) {
+          if ("_deleted".equals(field)) {
+            hasDelRecord = true
+          }
+          if ("airportName".equals(field)) {
+            hasAirportNameField = true
+          }
+        }
+        if (hasDelRecord) {
+          changesDataFrame.filter(changesDataFrame("_deleted")).select("*").show()
+        }
+
+        if (hasAirportNameField) {
+          changesDataFrame.filter(changesDataFrame("airportName") >= "Paris").select("*").show()
+          changesDataFrame.registerTempTable("airportcodemapping")
+          val airportCountsDataFrame =
+            spark.sql(
+                s"""
+                |select airportName, count(*) as total
+                |from airportcodemapping
+                |group by airportName
+                """.stripMargin)
+          airportCountsDataFrame.show()
+        }
+      }
+
+    })
+    ssc.start()
+    // run streaming for 120 secs
+    Thread.sleep(120000L)
+    ssc.stop(true)
+  }
+}
+
+/** Lazily instantiated singleton instance of SparkSession */
+object SparkSessionSingleton {
+  @transient  private var instance: SparkSession = _
+  def getInstance(sparkConf: SparkConf): SparkSession = {
+    if (instance == null) {
+      instance = SparkSession
+        .builder
+        .config(sparkConf)
+        .getOrCreate()
+    }
+    instance
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala
new file mode 100644
index 0000000..51d939a
--- /dev/null
+++ b/sql-cloudant/examples/src/main/scala/org/apache/spark/examples/sql/cloudant/CloudantStreamingSelector.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.cloudant
+
+import java.util.concurrent.atomic.AtomicLong
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.streaming.{ Seconds, StreamingContext, Time }
+
+import org.apache.bahir.cloudant.CloudantReceiver
+
+object CloudantStreamingSelector {
+  def main(args: Array[String]) {
+    val sparkConf = new SparkConf().setAppName("Cloudant Spark SQL External Datasource in Scala")
+
+    // Create the context with a 10 seconds batch size
+    val ssc = new StreamingContext(sparkConf, Seconds(10))
+    val curTotalAmount = new AtomicLong(0)
+    val curSalesCount = new AtomicLong(0)
+    var batchAmount = 0L
+
+    val changes = ssc.receiverStream(new CloudantReceiver(sparkConf, Map(
+      "cloudant.host" -> "ACCOUNT.cloudant.com",
+      "cloudant.username" -> "USERNAME",
+      "cloudant.password" -> "PASSWORD",
+      "database" -> "sales",
+      "selector" -> "{\"month\":\"May\", \"rep\":\"John\"}")))
+
+    changes.foreachRDD((rdd: RDD[String], time: Time) => {
+      // Get the singleton instance of SQLContext
+      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
+      println(s"========= $time =========") // scalastyle:ignore
+      val changesDataFrame = spark.read.json(rdd)
+      if (!changesDataFrame.schema.isEmpty) {
+        changesDataFrame.select("*").show()
+        batchAmount = changesDataFrame.groupBy().sum("amount").collect()(0).getLong(0)
+        curSalesCount.getAndAdd(changesDataFrame.count())
+        curTotalAmount.getAndAdd(batchAmount)
+        println("Current sales count:" + curSalesCount)// scalastyle:ignore
+        println("Current total amount:" + curTotalAmount)// scalastyle:ignore
+        }
+    })
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/pom.xml
----------------------------------------------------------------------
diff --git a/sql-cloudant/pom.xml b/sql-cloudant/pom.xml
new file mode 100644
index 0000000..5860033
--- /dev/null
+++ b/sql-cloudant/pom.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.bahir</groupId>
+    <artifactId>bahir-parent_2.11</artifactId>
+    <version>2.2.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.bahir</groupId>
+  <artifactId>spark-sql-cloudant_2.11</artifactId>
+  <properties>
+    <sbt.project.name>spark-sql-cloudant</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Apache Bahir - Spark SQL Cloudant DataSource</name>
+  <url>http://bahir.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.typesafe.play</groupId>
+      <artifactId>play-json_${scala.binary.version}</artifactId>
+      <!--version>2.4.8</version-->
+    </dependency>
+    <!--dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.datatype</groupId>
+      <artifactId>jackson-datatype-jdk8</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.datatype</groupId>
+      <artifactId>jackson-datatype-jsr310</artifactId>
+      <version>2.5.4</version>
+    </dependency-->
+    <dependency>
+      <groupId>org.scalaj</groupId>
+      <artifactId>scalaj-http_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+	 <resources>
+	     <resource>
+    	   <directory>src/main/resources</directory>
+     	</resource>
+    </resources>    
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/resources/application.conf
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/resources/application.conf b/sql-cloudant/src/main/resources/application.conf
new file mode 100644
index 0000000..2d8b236
--- /dev/null
+++ b/sql-cloudant/src/main/resources/application.conf
@@ -0,0 +1,14 @@
+spark-sql {
+    bulkSize = 200
+    schemaSampleSize = -1
+    createDBOnSave = false
+    jsonstore.rdd = {
+        partitions = 10
+        maxInPartition = -1
+        minInPartition = 10
+        requestTimeout = 900000
+    }
+    cloudant = {
+        protocol = https
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/resources/reference.conf
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/resources/reference.conf b/sql-cloudant/src/main/resources/reference.conf
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
new file mode 100644
index 0000000..ac14f4b
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant
+
+import java.net.URLEncoder
+
+import play.api.libs.json.JsArray
+import play.api.libs.json.Json
+import play.api.libs.json.JsValue
+
+import org.apache.bahir.cloudant.common._
+
+/*
+* Only allow one field pushdown now
+* as the filter today does not tell how to link the filters out And v.s. Or
+*/
+
+class CloudantConfig(val protocol: String, val host: String,
+    val dbName: String, val indexName: String = null, val viewName: String = null)
+    (implicit val username: String, val password: String,
+    val partitions: Int, val maxInPartition: Int, val minInPartition: Int,
+    val requestTimeout: Long, val bulkSize: Int, val schemaSampleSize: Int,
+    val createDBOnSave: Boolean, val selector: String)
+    extends Serializable{
+
+  private val SCHEMA_FOR_ALL_DOCS_NUM = -1
+  private lazy val dbUrl = {protocol + "://" + host + "/" + dbName}
+
+  val pkField = "_id"
+  val defaultIndex = "_all_docs" // "_changes" does not work for partition
+  val default_filter: String = "*:*"
+
+  def getChangesUrl(): String = {
+    dbUrl + "/_changes?include_docs=true&feed=normal"
+  }
+
+  def getContinuousChangesUrl(): String = {
+    var url = dbUrl + "/_changes?include_docs=true&feed=continuous&heartbeat=3000"
+    if (selector != null) {
+      url = url + "&filter=_selector"
+    }
+    url
+  }
+
+  def getSelector() : String = {
+    selector
+  }
+
+  def getDbUrl(): String = {
+    dbUrl
+  }
+
+  def getLastUrl(skip: Int): String = {
+    if (skip ==0 ) null
+    else s"$dbUrl/$defaultIndex?limit=$skip"
+  }
+
+  def getSchemaSampleSize(): Int = {
+    schemaSampleSize
+  }
+
+  def getCreateDBonSave(): Boolean = {
+    createDBOnSave
+  }
+
+  def getLastNum(result: JsValue): JsValue = (result \ "last_seq").get
+
+  def getTotalUrl(url: String): String = {
+    if (url.contains('?')) {
+      url + "&limit=1"
+    } else {
+      url + "?limit=1"
+    }
+  }
+
+  def getDbname(): String = {
+    dbName
+  }
+
+  def allowPartition(): Boolean = {indexName==null}
+
+  def getOneUrl(): String = {
+    dbUrl + "/_all_docs?limit=1&include_docs=true"
+  }
+
+  def getOneUrlExcludeDDoc1(): String = {
+    dbUrl + "/_all_docs?endkey=%22_design/%22&limit=1&include_docs=true"
+  }
+
+  def getOneUrlExcludeDDoc2(): String = {
+    dbUrl + "/_all_docs?startkey=%22_design0/%22&limit=1&include_docs=true"
+  }
+
+  def getAllDocsUrlExcludeDDoc(limit: Int): String = {
+    if (viewName == null) {
+      dbUrl + "/_all_docs?startkey=%22_design0/%22&limit=" + limit + "&include_docs=true"
+    } else {
+      dbUrl + "/" + viewName + "?limit=1"
+    }
+  }
+
+  def getAllDocsUrl(limit: Int): String = {
+    if (viewName == null) {
+      if (limit == SCHEMA_FOR_ALL_DOCS_NUM) {
+        dbUrl + "/_all_docs?include_docs=true"
+      } else {
+        dbUrl + "/_all_docs?limit=" + limit + "&include_docs=true"
+      }
+    } else {
+      if (limit == JsonStoreConfigManager.SCHEMA_FOR_ALL_DOCS_NUM) {
+        dbUrl + "/" + viewName
+      } else {
+        dbUrl + "/" + viewName + "?limit=" + limit
+      }
+    }
+  }
+
+  def getRangeUrl(field: String = null, start: Any = null,
+      startInclusive: Boolean = false, end: Any = null,
+      endInclusive: Boolean = false,
+      includeDoc: Boolean = true): (String, Boolean) = {
+    val (url: String, pusheddown: Boolean) =
+      calculate(field, start, startInclusive, end, endInclusive)
+    if (includeDoc) {
+      if (url.indexOf('?') > 0) {
+        (url + "&include_docs=true", pusheddown)
+      } else {
+        (url + "?include_docs=true", pusheddown)
+      }
+    } else {
+      (url, pusheddown)
+    }
+  }
+
+  private def calculate(field: String, start: Any, startInclusive: Boolean,
+      end: Any, endInclusive: Boolean): (String, Boolean) = {
+    if (field != null && field.equals(pkField)) {
+      var condition = ""
+      if (start != null && end != null && start.equals(end)) {
+        condition += "?key=%22" + URLEncoder.encode(start.toString(), "UTF-8") + "%22"
+      } else {
+        if (start != null) {
+          condition += "?startkey=%22" + URLEncoder.encode(
+              start.toString(), "UTF-8") + "%22"
+        }
+        if (end != null) {
+          if (start != null) {
+            condition += "&"
+          } else {
+            condition += "?"
+          }
+          condition += "endkey=%22" + URLEncoder.encode(end.toString(), "UTF-8") + "%22"
+        }
+      }
+      (dbUrl + "/_all_docs" + condition, true)
+    } else if (indexName!=null) {
+      //  push down to indexName
+      val condition = calculateCondition(field, start, startInclusive,
+        end, endInclusive)
+      (dbUrl + "/" + indexName + "?q=" + condition, true)
+    } else if (viewName != null) {
+      (dbUrl + "/" + viewName, true)
+    } else {
+      (s"$dbUrl/$defaultIndex", false)
+    }
+
+  }
+
+  def calculateCondition(field: String, min: Any, minInclusive: Boolean = false,
+        max: Any, maxInclusive: Boolean = false) : String = {
+    if (field != null && (min != null || max!= null)) {
+      var condition = field + ":"
+      if (min!=null && max!=null && min.equals(max)) {
+        condition += min
+      } else {
+        if (minInclusive) {
+          condition+="["
+        } else {
+          condition +="{"
+        }
+        if (min!=null) {
+          condition += min
+        } else {
+          condition+="*"
+        }
+        condition+=" TO "
+        if (max !=null) {
+          condition += max
+        } else {
+          condition += "*"
+        }
+        if (maxInclusive) {
+          condition+="]"
+        } else {
+          condition +="}"
+        }
+      }
+      URLEncoder.encode(condition, "UTF-8")
+    } else {
+      default_filter
+    }
+  }
+
+  def getSubSetUrl (url: String, skip: Int, limit: Int)
+      (implicit convertSkip: (Int) => String): String = {
+    val suffix = {
+      if (url.indexOf("_all_docs")>0) "include_docs=true&limit=" +
+        limit + "&skip=" + skip
+      else if (url.indexOf("_changes")>0) "include_docs=true&limit=" +
+          limit + "&since=" + convertSkip(skip)
+      else if (viewName != null) {
+        "limit=" + limit + "&skip=" + skip
+      } else {
+        "include_docs=true&limit=" + limit
+      } // TODO Index query does not support subset query. Should disable Partitioned loading?
+    }
+    if (url.indexOf('?') > 0) {
+      url + "&" + suffix
+    }
+    else {
+      url + "?" + suffix
+    }
+  }
+
+  def getTotalRows(result: JsValue): Int = {
+    val tr = (result \ "total_rows").asOpt[Int]
+    tr match {
+      case None =>
+        (result \ "pending").as[Int] + 1
+      case Some(tr2) =>
+        tr2
+    }
+  }
+
+  def getRows(result: JsValue): Seq[JsValue] = {
+    if (viewName == null) {
+      ((result \ "rows").as[JsArray]).value.map(row => (row \ "doc").get)
+    } else {
+      ((result \ "rows").as[JsArray]).value.map(row => row)
+    }
+  }
+
+  def getBulkPostUrl(): String = {
+    dbUrl + "/_bulk_docs"
+  }
+
+  def getBulkRows(rows: List[String]): String = {
+    val docs = rows.map { x => Json.parse(x) }
+    Json.stringify(Json.obj("docs" -> Json.toJson(docs.toSeq)))
+  }
+
+  def getConflictErrStr(): String = {
+    """"error":"conflict""""
+  }
+
+  def getForbiddenErrStr(): String = {
+    """"error":"forbidden""""
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala
new file mode 100644
index 0000000..0446660
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantReceiver.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant
+
+// scalastyle:off
+import scalaj.http._
+
+import play.api.libs.json.Json
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.SparkConf
+
+import org.apache.bahir.cloudant.common._
+// scalastyle:on
+
+class CloudantReceiver(sparkConf: SparkConf, cloudantParams: Map[String, String])
+    extends Receiver[String](StorageLevel.MEMORY_AND_DISK) {
+  lazy val config: CloudantConfig = {
+    JsonStoreConfigManager.getConfig(sparkConf, cloudantParams)
+      .asInstanceOf[CloudantConfig]
+  }
+
+  def onStart() {
+    // Start the thread that receives data over a connection
+    new Thread("Cloudant Receiver") {
+      override def run() { receive() }
+    }.start()
+  }
+
+  private def receive(): Unit = {
+    val url = config.getContinuousChangesUrl()
+    val selector: String = if (config.getSelector() != null) {
+      "{\"selector\":" + config.getSelector() + "}"
+    } else {
+      "{}"
+    }
+
+    val clRequest: HttpRequest = config.username match {
+      case null =>
+        Http(url)
+          .postData(selector)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = 0)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+      case _ =>
+        Http(url)
+          .postData(selector)
+          .timeout(connTimeoutMs = 1000, readTimeoutMs = 0)
+          .header("Content-Type", "application/json")
+          .header("User-Agent", "spark-cloudant")
+          .auth(config.username, config.password)
+    }
+
+    clRequest.exec((code, headers, is) => {
+      if (code == 200) {
+        scala.io.Source.fromInputStream(is, "utf-8").getLines().foreach(line => {
+          if (line.length() > 0) {
+            val json = Json.parse(line)
+            val jsonDoc = (json \ "doc").get
+            val doc = Json.stringify(jsonDoc)
+            store(doc)
+          }
+        })
+      } else {
+        val status = headers.getOrElse("Status", IndexedSeq.empty)
+        val errorMsg = "Error retrieving _changes feed " + config.getDbname() + ": " + status(0)
+        reportError(errorMsg, new RuntimeException(errorMsg))
+      }
+    })
+  }
+
+  def onStop(): Unit = {
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
new file mode 100644
index 0000000..4c973f7
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/DefaultSource.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant
+
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+
+import org.apache.bahir.cloudant.common.{FilterInterpreter, JsonStoreDataAccess, JsonStoreRDD, _}
+
+case class CloudantReadWriteRelation (config: CloudantConfig,
+                                      schema: StructType,
+                                      allDocsDF: DataFrame = null)
+                      (@transient val sqlContext: SQLContext)
+  extends BaseRelation with PrunedFilteredScan  with InsertableRelation {
+
+   @transient lazy val dataAccess = {new JsonStoreDataAccess(config)}
+
+    implicit lazy val logger = LoggerFactory.getLogger(getClass)
+
+    def buildScan(requiredColumns: Array[String],
+                filters: Array[Filter]): RDD[Row] = {
+      val colsLength = requiredColumns.length
+
+      if (allDocsDF != null) {
+        if (colsLength == 0) {
+          allDocsDF.select().rdd
+        } else if (colsLength == 1) {
+          allDocsDF.select(requiredColumns(0)).rdd
+        } else {
+          val colsExceptCol0 = for (i <- 1 until colsLength) yield requiredColumns(i)
+          allDocsDF.select(requiredColumns(0), colsExceptCol0: _*).rdd
+        }
+      } else {
+        val filterInterpreter = new FilterInterpreter(filters)
+        var searchField: String = {
+          if (filterInterpreter.containsFiltersFor(config.pkField)) {
+            config.pkField
+          } else {
+            filterInterpreter.firstField
+          }
+        }
+
+        val (min, minInclusive, max, maxInclusive) = filterInterpreter.getInfo(searchField)
+        implicit val columns = requiredColumns
+        val (url: String, pusheddown: Boolean) = config.getRangeUrl(searchField,
+            min, minInclusive, max, maxInclusive, false)
+        if (!pusheddown) searchField = null
+        implicit val attrToFilters = filterInterpreter.getFiltersForPostProcess(searchField)
+
+        val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config, url)
+        val df = sqlContext.read.json(cloudantRDD)
+        if (colsLength > 1) {
+          val colsExceptCol0 = for (i <- 1 until colsLength) yield requiredColumns(i)
+          df.select(requiredColumns(0), colsExceptCol0: _*).rdd
+        } else {
+          df.rdd
+        }
+      }
+    }
+
+
+  def insert(data: DataFrame, overwrite: Boolean): Unit = {
+      if (config.getCreateDBonSave()) {
+        dataAccess.createDB()
+      }
+      if (data.count() == 0) {
+        logger.warn(("Database " + config.getDbname() +
+          ": nothing was saved because the number of records was 0!"))
+      } else {
+        val result = data.toJSON.foreachPartition { x =>
+          val list = x.toList // Has to pass as List, Iterator results in 0 data
+          dataAccess.saveAll(list)
+        }
+      }
+    }
+}
+
+class DefaultSource extends RelationProvider
+  with CreatableRelationProvider
+  with SchemaRelationProvider {
+
+  val logger = LoggerFactory.getLogger(getClass)
+
+  def createRelation(sqlContext: SQLContext,
+                     parameters: Map[String, String]): CloudantReadWriteRelation = {
+      create(sqlContext, parameters, null)
+    }
+
+    private def create(sqlContext: SQLContext,
+                       parameters: Map[String, String],
+                       inSchema: StructType) = {
+
+      val config: CloudantConfig = JsonStoreConfigManager.getConfig(sqlContext, parameters)
+
+      var allDocsDF: DataFrame = null
+
+      val schema: StructType = {
+        if (inSchema != null) {
+          inSchema
+        } else {
+          val df = if (config.getSchemaSampleSize() ==
+            JsonStoreConfigManager.SCHEMA_FOR_ALL_DOCS_NUM &&
+            config.viewName == null
+            && config.indexName == null) {
+            val filterInterpreter = new FilterInterpreter(null)
+            var searchField = null
+            val (min, minInclusive, max, maxInclusive) =
+                filterInterpreter.getInfo(searchField)
+            val (url: String, pusheddown: Boolean) = config.getRangeUrl(searchField,
+                min, minInclusive, max, maxInclusive, false)
+            val cloudantRDD = new JsonStoreRDD(sqlContext.sparkContext, config, url)
+            allDocsDF = sqlContext.read.json(cloudantRDD)
+            allDocsDF
+          } else {
+            val dataAccess = new JsonStoreDataAccess(config)
+            val aRDD = sqlContext.sparkContext.parallelize(
+                dataAccess.getMany(config.getSchemaSampleSize()))
+            sqlContext.read.json(aRDD)
+          }
+          df.schema
+        }
+      }
+      CloudantReadWriteRelation(config, schema, allDocsDF)(sqlContext)
+    }
+
+    def createRelation(sqlContext: SQLContext,
+                       mode: SaveMode,
+                       parameters: Map[String, String],
+                       data: DataFrame): CloudantReadWriteRelation = {
+      val relation = create(sqlContext, parameters, data.schema)
+      relation.insert(data, mode==SaveMode.Overwrite)
+      relation
+    }
+
+    def createRelation(sqlContext: SQLContext,
+                       parameters: Map[String, String],
+                       schema: StructType): CloudantReadWriteRelation = {
+      create(sqlContext, parameters, schema)
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/FilterUtil.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/FilterUtil.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/FilterUtil.scala
new file mode 100644
index 0000000..12cd81c
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/FilterUtil.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant.common
+
+import org.slf4j.LoggerFactory
+import play.api.libs.json.{JsString, JsValue}
+
+import org.apache.spark.sql.sources._
+
+
+/**
+ * Only handles the following filter condition
+ * 1. EqualTo,GreaterThan,LessThan,GreaterThanOrEqual,LessThanOrEqual,In
+ * 2. recursive AND of (filters in 1 and AND). Issue: Spark 1.3.0 does not return
+ *    AND filter instead returned 2 filters
+ */
+class FilterInterpreter(origFilters: Array[Filter]) {
+
+  private val logger = LoggerFactory.getLogger(getClass)
+
+  lazy val firstField = {
+    if (origFilters.length > 0) getFilterAttribute(origFilters(0))
+    else null
+  }
+
+  private lazy val filtersByAttr = {
+    origFilters
+      .filter(f => getFilterAttribute(f) != null)
+      .map(f => (getFilterAttribute(f), f))
+      .groupBy(attrFilter => attrFilter._1)
+      .mapValues(a => a.map(p => p._2))
+  }
+
+  private def getFilterAttribute(f: Filter): String = {
+    val result = f match {
+      case EqualTo(attr, v) => attr
+      case GreaterThan(attr, v) => attr
+      case LessThan(attr, v) => attr
+      case GreaterThanOrEqual(attr, v) => attr
+      case LessThanOrEqual(attr, v) => attr
+      case In(attr, v) => attr
+      case IsNotNull(attr) => attr
+      case IsNull(attr) => attr
+      case _ => null
+    }
+    result
+  }
+
+  def containsFiltersFor(key: String): Boolean = {
+    filtersByAttr.contains(key)
+  }
+
+  private lazy val analyzedFilters = {
+    filtersByAttr.map(m => m._1 -> analyze(m._2))
+  }
+
+  private def analyze(filters: Array[Filter]): (Any, Boolean, Any, Boolean, Array[Filter]) = {
+
+    var min: Any = null
+    var minInclusive: Boolean = false
+    var max: Any = null
+    var maxInclusive: Boolean = false
+    var others: Array[Filter] = Array[Filter]()
+
+    def evaluate(filter: Filter) {
+      filter match {
+        case GreaterThanOrEqual(attr, v) => min = v; minInclusive = true
+        case LessThanOrEqual(attr, v) => max = v; maxInclusive = true
+        case EqualTo(attr, v) => min = v; max = v
+        case GreaterThan(attr, v) => min = v
+        case LessThan(attr, v) => max = v
+        case _ => others = others :+ filter
+      }
+    }
+
+    filters.map(f => evaluate(f))
+
+    logger.info(s"Calculated range info: min=$min," +
+      s" minInclusive=$minInclusive," +
+      s"max=$max," +
+      s"maxInclusive=$maxInclusive," +
+      s"others=$others")
+    (min, minInclusive, max, maxInclusive, others)
+  }
+
+  def getInfo(field: String): (Any, Boolean, Any, Boolean) = {
+    if (field == null) (null, false, null, false)
+    else {
+      val data = analyzedFilters.getOrElse(field, (null, false, null, false, null))
+      (data._1, data._2, data._3, data._4)
+    }
+  }
+
+  def getFiltersForPostProcess(pushdownField: String): Map[String, Array[Filter]] = {
+    filtersByAttr.map(f => {
+      if (f._1.equals(pushdownField)) f._1 -> analyzedFilters.get(pushdownField).get._5
+      else f._1 -> f._2
+    })
+  }
+}
+
+/**
+ *
+ */
+class FilterUtil(filters: Map[String, Array[Filter]]) {
+  private val logger = LoggerFactory.getLogger(getClass)
+  def apply(implicit r: JsValue = null): Boolean = {
+    if (r == null) return true
+    val satisfied = filters.forall({
+      case (attr, filters) =>
+        val field = JsonUtil.getField(r, attr).getOrElse(null)
+        if (field == null) {
+          logger.debug(s"field $attr not exisit:$r")
+          false
+        } else {
+          true
+        }
+    })
+    satisfied
+  }
+}
+
+
+object FilterDDocs {
+  def filter(row: JsValue): Boolean = {
+    if (row == null) return true
+    val id : String = JsonUtil.getField(row, "_id").
+        getOrElse(null).as[JsString].value
+    if (id.startsWith("_design")) {
+      false
+    } else {
+      true
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/f0d9a84f/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
----------------------------------------------------------------------
diff --git a/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
new file mode 100644
index 0000000..92192bb
--- /dev/null
+++ b/sql-cloudant/src/main/scala/org/apache/bahir/cloudant/common/JsonStoreConfigManager.scala
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bahir.cloudant.common
+
+import com.typesafe.config.ConfigFactory
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkConf
+
+import org.apache.bahir.cloudant.CloudantConfig
+
+ object JsonStoreConfigManager {
+  val CLOUDANT_CONNECTOR_VERSION = "2.0.0"
+  val SCHEMA_FOR_ALL_DOCS_NUM = -1
+
+  private val CLOUDANT_HOST_CONFIG = "cloudant.host"
+  private val CLOUDANT_USERNAME_CONFIG = "cloudant.username"
+  private val CLOUDANT_PASSWORD_CONFIG = "cloudant.password"
+  private val CLOUDANT_PROTOCOL_CONFIG = "cloudant.protocol"
+
+
+  private val PARTITION_CONFIG = "jsonstore.rdd.partitions"
+  private val MAX_IN_PARTITION_CONFIG = "jsonstore.rdd.maxInPartition"
+  private val MIN_IN_PARTITION_CONFIG = "jsonstore.rdd.minInPartition"
+  private val REQUEST_TIMEOUT_CONFIG = "jsonstore.rdd.requestTimeout"
+  private val BULK_SIZE_CONFIG = "bulkSize"
+  private val SCHEMA_SAMPLE_SIZE_CONFIG = "schemaSampleSize"
+  private val CREATE_DB_ON_SAVE = "createDBOnSave"
+
+
+  private val configFactory = ConfigFactory.load()
+
+  private val ROOT_CONFIG_NAME = "spark-sql"
+  private val rootConfig = configFactory.getConfig(ROOT_CONFIG_NAME)
+
+
+  /**
+   * The sequence of getting configuration
+   * 1. "spark."+key in the SparkConf
+   *  (as they are treated as the one passed in through spark-submit)
+   * 2. key in the parameters, which is set in DF option
+   * 3. key in the SparkConf, which is set in SparkConf
+   * 4. default in the Config, which is set in the application.conf
+   */
+
+
+  private def getInt(sparkConf: SparkConf, parameters: Map[String, String],
+      key: String) : Int = {
+    val valueS = parameters.getOrElse(key, null)
+    if (sparkConf != null) {
+      val default = {
+        if (valueS == null) {
+          sparkConf.getInt(key, rootConfig.getInt(key))
+        } else {
+          valueS.toInt
+        }
+      }
+      sparkConf.getInt(s"spark.$key", default)
+    } else {
+      if (valueS == null) {
+        rootConfig.getInt(key)
+      } else {
+        valueS.toInt
+      }
+    }
+  }
+
+  private def getLong(sparkConf: SparkConf, parameters: Map[String, String],
+      key: String) : Long = {
+    val valueS = parameters.getOrElse(key, null)
+    if (sparkConf != null) {
+      val default = {
+        if (valueS == null) {
+          sparkConf.getLong(key, rootConfig.getLong(key))
+        } else {
+          valueS.toLong
+        }
+      }
+      sparkConf.getLong(s"spark.$key", default)
+    } else {
+      if (valueS == null) rootConfig.getLong(key) else valueS.toLong
+    }
+  }
+
+  private def getString(sparkConf: SparkConf, parameters: Map[String, String],
+      key: String) : String = {
+    val defaultInConfig = if (rootConfig.hasPath(key)) rootConfig.getString(key) else null
+    val valueS = parameters.getOrElse(key, null)
+    if (sparkConf != null) {
+      val default = {
+        if (valueS == null) {
+          sparkConf.get(key, defaultInConfig)
+        } else {
+          valueS
+        }
+      }
+      sparkConf.get(s"spark.$key", default)
+    } else {
+      if (valueS == null) defaultInConfig else valueS
+    }
+  }
+
+  private def getBool(sparkConf: SparkConf, parameters: Map[String, String],
+      key: String) : Boolean = {
+    val valueS = parameters.getOrElse(key, null)
+    if (sparkConf != null) {
+      val default = {
+        if (valueS == null) {
+          sparkConf.getBoolean(key, rootConfig.getBoolean(key))
+        } else {
+          valueS.toBoolean
+        }
+      }
+      sparkConf.getBoolean(s"spark.$key", default)
+    } else
+    if (valueS == null) {
+      rootConfig.getBoolean(key)
+    } else {
+      valueS.toBoolean
+    }
+  }
+
+
+
+  def getConfig(context: SQLContext, parameters: Map[String, String]): CloudantConfig = {
+
+    val sparkConf = context.sparkContext.getConf
+
+    implicit val total = getInt(sparkConf, parameters, PARTITION_CONFIG)
+    implicit val max = getInt(sparkConf, parameters, MAX_IN_PARTITION_CONFIG)
+    implicit val min = getInt(sparkConf, parameters, MIN_IN_PARTITION_CONFIG)
+    implicit val requestTimeout = getLong(sparkConf, parameters, REQUEST_TIMEOUT_CONFIG)
+    implicit val bulkSize = getInt(sparkConf, parameters, BULK_SIZE_CONFIG)
+    implicit val schemaSampleSize = getInt(sparkConf, parameters, SCHEMA_SAMPLE_SIZE_CONFIG)
+    implicit val createDBOnSave = getBool(sparkConf, parameters, CREATE_DB_ON_SAVE)
+
+    val dbName = parameters.getOrElse("database", parameters.getOrElse("path", null))
+    val indexName = parameters.getOrElse("index", null)
+    val viewName = parameters.getOrElse("view", null)
+
+    // FIXME: Add logger
+    // scalastyle:off println
+    println(s"Use connectorVersion=$CLOUDANT_CONNECTOR_VERSION, dbName=$dbName, " +
+        s"indexName=$indexName, viewName=$viewName," +
+        s"$PARTITION_CONFIG=$total, $MAX_IN_PARTITION_CONFIG=$max," +
+        s"$MIN_IN_PARTITION_CONFIG=$min, $REQUEST_TIMEOUT_CONFIG=$requestTimeout," +
+        s"$BULK_SIZE_CONFIG=$bulkSize, $SCHEMA_SAMPLE_SIZE_CONFIG=$schemaSampleSize")
+    // scalastyle:on println
+
+    val protocol = getString(sparkConf, parameters, CLOUDANT_PROTOCOL_CONFIG)
+    val host = getString( sparkConf, parameters, CLOUDANT_HOST_CONFIG)
+    val user = getString(sparkConf, parameters, CLOUDANT_USERNAME_CONFIG)
+    val passwd = getString(sparkConf, parameters, CLOUDANT_PASSWORD_CONFIG)
+    val selector = getString(sparkConf, parameters, "selector")
+
+    if (host != null) {
+      new CloudantConfig(protocol, host, dbName, indexName,
+        viewName) (user, passwd, total, max, min, requestTimeout, bulkSize,
+        schemaSampleSize, createDBOnSave, selector)
+    } else {
+      throw new RuntimeException("Spark configuration is invalid! " +
+        "Please make sure to supply required values for cloudant.host.")
+      }
+  }
+
+  def getConfig(sparkConf: SparkConf, parameters: Map[String, String]): CloudantConfig = {
+
+    implicit val total = getInt(sparkConf, parameters, PARTITION_CONFIG)
+    implicit val max = getInt(sparkConf, parameters, MAX_IN_PARTITION_CONFIG)
+    implicit val min = getInt(sparkConf, parameters, MIN_IN_PARTITION_CONFIG)
+    implicit val requestTimeout = getLong(sparkConf, parameters, REQUEST_TIMEOUT_CONFIG)
+    implicit val bulkSize = getInt(sparkConf, parameters, BULK_SIZE_CONFIG)
+    implicit val schemaSampleSize = getInt(sparkConf, parameters, SCHEMA_SAMPLE_SIZE_CONFIG)
+    implicit val createDBOnSave = getBool(sparkConf, parameters, CREATE_DB_ON_SAVE)
+
+    val dbName = parameters.getOrElse("database", null)
+
+    // scalastyle:off println
+    println(s"Use connectorVersion=$CLOUDANT_CONNECTOR_VERSION, dbName=$dbName, " +
+      s"$REQUEST_TIMEOUT_CONFIG=$requestTimeout")
+    // scalastyle:on println
+
+    val protocol = getString(sparkConf, parameters, CLOUDANT_PROTOCOL_CONFIG)
+    val host = getString( sparkConf, parameters, CLOUDANT_HOST_CONFIG)
+    val user = getString(sparkConf, parameters, CLOUDANT_USERNAME_CONFIG)
+    val passwd = getString(sparkConf, parameters, CLOUDANT_PASSWORD_CONFIG)
+    val selector = getString(sparkConf, parameters, "selector")
+
+    if (host != null) {
+      new CloudantConfig(protocol, host, dbName)(user, passwd,
+        total, max, min, requestTimeout, bulkSize,
+        schemaSampleSize, createDBOnSave, selector)
+    } else {
+      throw new RuntimeException("Cloudant parameters are invalid!" +
+          "Please make sure to supply required values for cloudant.host.")
+    }
+  }
+}

[43/50] [abbrv] incubator-livy-website git commit: [maven-release-plugin] prepare for next development iteration

Posted by lr...@apache.org.

[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/5d8fc42b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/5d8fc42b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/5d8fc42b

Branch: refs/heads/master
Commit: 5d8fc42b556051721c9ebab110d7f13e9c46c0ac
Parents: c5180d8
Author: Luciano Resende <lr...@apache.org>
Authored: Wed Jun 7 20:18:58 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Wed Jun 7 20:18:58 2017 -0700

----------------------------------------------------------------------
 distribution/pom.xml       | 2 +-
 pom.xml                    | 4 ++--
 sql-cloudant/pom.xml       | 2 +-
 sql-streaming-akka/pom.xml | 2 +-
 sql-streaming-mqtt/pom.xml | 2 +-
 streaming-akka/pom.xml     | 2 +-
 streaming-mqtt/pom.xml     | 2 +-
 streaming-pubsub/pom.xml   | 2 +-
 streaming-twitter/pom.xml  | 2 +-
 streaming-zeromq/pom.xml   | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index f617db7..77104d1 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.1</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 32d0b8a..81f2e28 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.bahir</groupId>
   <artifactId>bahir-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Bahir - Parent POM</name>
   <url>http://bahir.apache.org/</url>
@@ -40,7 +40,7 @@
     <connection>scm:git:git@github.com:apache/bahir.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/bahir.git</developerConnection>
     <url>scm:git:git@github.com:apache/bahir.git</url>
-    <tag>v2.1.1-rc1</tag>
+    <tag>HEAD</tag>
   </scm>
   <issueManagement>
     <system>JIRA</system>

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/sql-cloudant/pom.xml
----------------------------------------------------------------------
diff --git a/sql-cloudant/pom.xml b/sql-cloudant/pom.xml
index bfd0571..5860033 100644
--- a/sql-cloudant/pom.xml
+++ b/sql-cloudant/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/sql-streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-akka/pom.xml b/sql-streaming-akka/pom.xml
index 9e134d5..4d7040b 100644
--- a/sql-streaming-akka/pom.xml
+++ b/sql-streaming-akka/pom.xml
@@ -21,7 +21,7 @@
     <parent>
         <groupId>org.apache.bahir</groupId>
         <artifactId>bahir-parent_2.11</artifactId>
-        <version>2.1.1</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/sql-streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sql-streaming-mqtt/pom.xml b/sql-streaming-mqtt/pom.xml
index 20518b0..4a01ef5 100644
--- a/sql-streaming-mqtt/pom.xml
+++ b/sql-streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/streaming-akka/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-akka/pom.xml b/streaming-akka/pom.xml
index 2e95ac0..36ce385 100644
--- a/streaming-akka/pom.xml
+++ b/streaming-akka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/streaming-mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-mqtt/pom.xml b/streaming-mqtt/pom.xml
index b5b9ac3..2935f51 100644
--- a/streaming-mqtt/pom.xml
+++ b/streaming-mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/streaming-pubsub/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-pubsub/pom.xml b/streaming-pubsub/pom.xml
index 1458a19..c3da90f 100644
--- a/streaming-pubsub/pom.xml
+++ b/streaming-pubsub/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <artifactId>bahir-parent_2.11</artifactId>
     <groupId>org.apache.bahir</groupId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/streaming-twitter/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-twitter/pom.xml b/streaming-twitter/pom.xml
index 8004aa2..949e4b3 100644
--- a/streaming-twitter/pom.xml
+++ b/streaming-twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/5d8fc42b/streaming-zeromq/pom.xml
----------------------------------------------------------------------
diff --git a/streaming-zeromq/pom.xml b/streaming-zeromq/pom.xml
index eb93b7b..db6616d 100644
--- a/streaming-zeromq/pom.xml
+++ b/streaming-zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.bahir</groupId>
     <artifactId>bahir-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>

[33/50] [abbrv] incubator-livy-website git commit: [MINOR] Update supported signatures

Posted by lr...@apache.org.

[MINOR] Update supported signatures


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/b328233a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/b328233a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/b328233a

Branch: refs/heads/master
Commit: b328233a651fa093062eede4d3348ac61ee58cdf
Parents: 561291b
Author: Luciano Resende <lr...@apache.org>
Authored: Thu Apr 13 11:49:36 2017 -0700
Committer: Luciano Resende <lr...@apache.org>
Committed: Thu Apr 13 11:49:36 2017 -0700

----------------------------------------------------------------------
 dev/release-build.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/b328233a/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index 509bede..b207b93 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -258,8 +258,12 @@ if [[ "$RELEASE_PREPARE" == "true" ]]; then
         cp bahir/distribution/target/*.zip    svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
 
         cd svn-bahir/$RELEASE_VERSION-$RELEASE_RC/
-        for i in *.zip *.gz; do gpg --output $i.asc --detach-sig --armor $i; done
-        for i in *.zip *.gz; do openssl md5 -hex $i | sed 's/MD5(\([^)]*\))= \([0-9a-f]*\)/\2 *\1/' > $i.md5; done
+        rm -f *.asc
+        for i in *.zip *.tgz; do gpg --output $i.asc --detach-sig --armor $i; done
+        rm -f *.md5
+        for i in *.zip *.tgz; do openssl md5 -hex $i | sed 's/MD5(\([^)]*\))= \([0-9a-f]*\)/\2 *\1/' > $i.md5; done
+        rm -f *.sha
+        for i in *.zip *.tgz; do shasum $i > $i.sha; done
 
         cd .. #exit $RELEASE_VERSION-$RELEASE_RC/

[18/50] [abbrv] incubator-livy-website git commit: [MINOR] Prompt for PGP passphrase when not exported

Posted by lr...@apache.org.

[MINOR] Prompt for PGP passphrase when not exported

The Release plugin forks the current Maven session but does not
propagated the system properties to the forked Maven session.
Therefore we need to pass the "-Dgpg.passphrase" wrapped inside
the "-Darguments" parameter.

http://maven.apache.org/plugins/maven-gpg-plugin/usage.html

Prompt for the GPG passphrase if the GPG_PASSPHRASE environment
variable was not exported.

Closes #34


Project: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/commit/8ad556e6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/tree/8ad556e6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-livy-website/diff/8ad556e6

Branch: refs/heads/master
Commit: 8ad556e63275cc42a15b3ff34c5e58eacdbfc6b1
Parents: bce9cd1
Author: Christian Kadner <ck...@us.ibm.com>
Authored: Tue Jan 17 12:26:52 2017 -0800
Committer: Christian Kadner <ck...@us.ibm.com>
Committed: Thu Jan 19 20:15:14 2017 -0800

----------------------------------------------------------------------
 dev/release-build.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-livy-website/blob/8ad556e6/dev/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/release-build.sh b/dev/release-build.sh
index 06762cf..509bede 100755
--- a/dev/release-build.sh
+++ b/dev/release-build.sh
@@ -143,12 +143,12 @@ while [ "${1+defined}" ]; do
 done
 
 
-for env in GPG_PASSPHRASE; do
-  if [ -z "${!env}" ]; then
-    echo "ERROR: $env must be set to run this script"
-    exit_with_usage
+if [[ -z "$GPG_PASSPHRASE" ]]; then
+    echo 'The environment variable GPG_PASSPHRASE is not set. Enter the passphrase to'
+    echo 'unlock the GPG signing key that will be used to sign the release!'
+    echo
+    stty -echo && printf "GPG passphrase: " && read GPG_PASSPHRASE && printf '\n' && stty echo
   fi
-done
 
 if [[ "$RELEASE_PREPARE" == "true" && -z "$RELEASE_VERSION" ]]; then
     echo "ERROR: --releaseVersion must be passed as an argument to run this script"
@@ -247,7 +247,7 @@ if [[ "$RELEASE_PREPARE" == "true" ]]; then
     cd target/bahir
 
     # Build and prepare the release
-    $MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Dgpg.passphrase="$GPG_PASSPHRASE" -DskipTests -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
+    $MVN $PUBLISH_PROFILES release:clean release:prepare $DRY_RUN -Darguments="-Dgpg.passphrase=\"$GPG_PASSPHRASE\" -DskipTests" -DreleaseVersion="$RELEASE_VERSION" -DdevelopmentVersion="$DEVELOPMENT_VERSION" -Dtag="$RELEASE_TAG"
 
     cd .. #exit bahir