You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by do...@apache.org on 2017/03/20 17:57:18 UTC
[1/2] incubator-predictionio git commit: [PIO-30] Set up a cross
build for Spark 2.0 and Scala 2.11
Repository: incubator-predictionio
Updated Branches:
refs/heads/develop bb0032438 -> 00779c3d8
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-files/env-conf/pio-env.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh
index 0acf3a7..16f1fef 100644
--- a/tests/docker-files/env-conf/pio-env.sh
+++ b/tests/docker-files/env-conf/pio-env.sh
@@ -24,9 +24,9 @@
# you need to change these to fit your site.
# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$SPARK_HOME
+# SPARK_HOME=$SPARK_HOME
-POSTGRES_JDBC_DRIVER=/drivers/postgresql-9.4-1204.jdbc41.jar
+POSTGRES_JDBC_DRIVER=/drivers/$PGSQL_JAR
MYSQL_JDBC_DRIVER=
# ES_CONF_DIR: You must configure this if you have advanced configuration for
@@ -88,7 +88,16 @@ PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
#PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio
PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch
PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http
-PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
+if [ ! -z "$PIO_ELASTICSEARCH_VERSION" ]; then
+ ES_MAJOR=`echo $PIO_ELASTICSEARCH_VERSION | awk -F. '{print $1}'`
+else
+ ES_MAJOR=1
+fi
+if [ "$ES_MAJOR" = "1" ]; then
+ PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
+else
+ PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
+fi
#PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME
# Local File System Example
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-files/init.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/init.sh b/tests/docker-files/init.sh
index 8dc08ea..fc12ffe 100755
--- a/tests/docker-files/init.sh
+++ b/tests/docker-files/init.sh
@@ -17,7 +17,7 @@
#
set -e
-export PYTHONPATH=/$PIO_HOME/tests:$PYTHONPATH
+export PYTHONPATH=$PIO_HOME/tests:$PYTHONPATH
echo "Sleeping $SLEEP_TIME seconds for all services to be ready..."
sleep $SLEEP_TIME
eval $@
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-files/set_build_profile.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/set_build_profile.sh b/tests/docker-files/set_build_profile.sh
new file mode 100755
index 0000000..141dd46
--- /dev/null
+++ b/tests/docker-files/set_build_profile.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Sets version of profile dependencies from sbt configuration.
+# eg. Run `source ./set_build_profile.sh scala-2.11`
+
+set -e
+
+if [[ "$#" -ne 1 ]]; then
+ echo "Usage: set-build-profile.sh <build-profile>"
+ exit 1
+fi
+
+set -a
+eval `$PIO_HOME/sbt/sbt --error 'set showSuccess := false' -Dbuild.profile=$1 printProfile | grep '.*_VERSION=.*'`
+set +a
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/pio_tests/engines/recommendation-engine/build.sbt
----------------------------------------------------------------------
diff --git a/tests/pio_tests/engines/recommendation-engine/build.sbt b/tests/pio_tests/engines/recommendation-engine/build.sbt
index 52e8742..13fe354 100644
--- a/tests/pio_tests/engines/recommendation-engine/build.sbt
+++ b/tests/pio_tests/engines/recommendation-engine/build.sbt
@@ -19,11 +19,13 @@ import AssemblyKeys._
assemblySettings
+scalaVersion in ThisBuild := sys.env.getOrElse("PIO_SCALA_VERSION", "2.10.5")
+
name := "template-scala-parallel-recommendation"
organization := "org.apache.predictionio"
libraryDependencies ++= Seq(
- "org.apache.predictionio" %% "apache-predictionio-core" % pioVersion.value % "provided",
- "org.apache.spark" %% "spark-core" % "1.3.0" % "provided",
- "org.apache.spark" %% "spark-mllib" % "1.3.0" % "provided")
+ "org.apache.predictionio" %% "apache-predictionio-core" % "0.11.0-SNAPSHOT" % "provided",
+ "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "1.6.3") % "provided",
+ "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "1.6.3") % "provided")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/pio_tests/engines/recommendation-engine/manifest.json
----------------------------------------------------------------------
diff --git a/tests/pio_tests/engines/recommendation-engine/manifest.json b/tests/pio_tests/engines/recommendation-engine/manifest.json
deleted file mode 100644
index 9c545ce..0000000
--- a/tests/pio_tests/engines/recommendation-engine/manifest.json
+++ /dev/null
@@ -1 +0,0 @@
-{"id":"yDBpzjz39AjIxlOAh8W4t3QSc75uPCuJ","version":"ee98ff9c009ef0d9fb1bc6b78750b83a0ceb37b2","name":"recommendation-engine","description":"pio-autogen-manifest","files":[],"engineFactory":""}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/pio_tests/engines/recommendation-engine/project/pio-build.sbt
----------------------------------------------------------------------
diff --git a/tests/pio_tests/engines/recommendation-engine/project/pio-build.sbt b/tests/pio_tests/engines/recommendation-engine/project/pio-build.sbt
deleted file mode 100644
index 8346a96..0000000
--- a/tests/pio_tests/engines/recommendation-engine/project/pio-build.sbt
+++ /dev/null
@@ -1 +0,0 @@
-addSbtPlugin("io.prediction" % "pio-build" % "0.9.0")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/pio_tests/scenarios/quickstart_test.py
----------------------------------------------------------------------
diff --git a/tests/pio_tests/scenarios/quickstart_test.py b/tests/pio_tests/scenarios/quickstart_test.py
index 1c5f422..ab7180d 100644
--- a/tests/pio_tests/scenarios/quickstart_test.py
+++ b/tests/pio_tests/scenarios/quickstart_test.py
@@ -89,8 +89,8 @@ class QuickStartTest(BaseTestCase):
self.app.build(engine_dir=engine_path)
self.log.info("Training...")
self.app.train(engine_dir=engine_path)
- self.log.info("Deploying and waiting 15s for it to start...")
- self.app.deploy(wait_time=15, engine_dir=engine_path)
+ self.log.info("Deploying and waiting 30s for it to start...")
+ self.app.deploy(wait_time=30, engine_dir=engine_path)
self.log.info("Sending a single query and checking results")
user_query = { "user": 1, "num": 4 }
@@ -153,8 +153,8 @@ class QuickStartTest(BaseTestCase):
self.app.build()
self.log.info("Training...")
self.app.train()
- self.log.info("Deploying and waiting 15s for it to start...")
- self.app.deploy(wait_time=15)
+ self.log.info("Deploying and waiting 35s for it to start...")
+ self.app.deploy(wait_time=35)
self.log.info("Testing pio commands outside of engine directory")
self.engine_dir_test()
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/pio_tests/utils.py
----------------------------------------------------------------------
diff --git a/tests/pio_tests/utils.py b/tests/pio_tests/utils.py
index e6c5b0b..05c8d1c 100644
--- a/tests/pio_tests/utils.py
+++ b/tests/pio_tests/utils.py
@@ -151,10 +151,11 @@ def import_events_batch(events, test_context, appid, channel=None):
try:
with open(file_path, 'w') as f:
f.write(contents)
- srun('pio import --appid {} --input {} {}'.format(
+ srun('pio import --appid {} --input {} {} -- {}'.format(
appid,
file_path,
- '--channel {}'.format(channel) if channel else ''))
+ '--channel {}'.format(channel) if channel else '',
+ '--conf spark.sql.warehouse.dir=file:///tmp/spark-warehouse'))
finally:
os.remove(file_path)
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/run_docker.sh
----------------------------------------------------------------------
diff --git a/tests/run_docker.sh b/tests/run_docker.sh
index 6257fa2..ad7e189 100755
--- a/tests/run_docker.sh
+++ b/tests/run_docker.sh
@@ -46,17 +46,10 @@ shift
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-if [ "$ES_VERSION" = "1" ]; then
- docker-compose -f $DIR/docker-compose-es1.yml run \
- -e PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$META \
- -e PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$EVENT \
- -e PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$MODEL \
- pio-testing $@
-else
- docker-compose -f $DIR/docker-compose.yml run \
- -e PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$META \
- -e PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$EVENT \
- -e PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$MODEL \
- pio-testing $@
-fi
+source $DIR/../conf/pio-vendors.sh
+docker-compose -f $DIR/docker-compose.yml run \
+ -e PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$META \
+ -e PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$EVENT \
+ -e PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$MODEL \
+ pio-testing $@
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/script.travis.sh
----------------------------------------------------------------------
diff --git a/tests/script.travis.sh b/tests/script.travis.sh
index db69413..1d4985e 100755
--- a/tests/script.travis.sh
+++ b/tests/script.travis.sh
@@ -17,9 +17,9 @@
#
if [[ $BUILD_TYPE == Unit ]]; then
- ./tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \
- /PredictionIO/tests/unit.sh
+ tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \
+ "/PredictionIO/tests/unit.sh"
else
- ./tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \
- python3 /PredictionIO/tests/pio_tests/tests.py
+ tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \
+ "python3 /PredictionIO/tests/pio_tests/tests.py"
fi
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/unit.sh
----------------------------------------------------------------------
diff --git a/tests/unit.sh b/tests/unit.sh
index 6382a70..1421dce 100755
--- a/tests/unit.sh
+++ b/tests/unit.sh
@@ -14,20 +14,31 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#
-# Run license check
pushd /PredictionIO
+# Run license check
./tests/check_license.sh
# Prepare pio environment variables
set -a
-source conf/pio-env.sh
+source ./conf/pio-env.sh
set +a
+source ./conf/pio-vendors.sh
# Run stylecheck
-sbt/sbt scalastyle
+sbt/sbt scalastyle \
+ -Dscala.version=$PIO_SCALA_VERSION \
+ -Dspark.version=$PIO_SPARK_VERSION \
+ -Dhadoop.version=$PIO_HADOOP_VERSION \
+ -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION
+
# Run all unit tests
-sbt/sbt test
+sbt/sbt dataJdbc/compile test storage/test \
+ -Dscala.version=$PIO_SCALA_VERSION \
+ -Dspark.version=$PIO_SPARK_VERSION \
+ -Dhadoop.version=$PIO_HADOOP_VERSION \
+ -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION
popd
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tools/build.sbt
----------------------------------------------------------------------
diff --git a/tools/build.sbt b/tools/build.sbt
index 57e7d96..483a591 100644
--- a/tools/build.sbt
+++ b/tools/build.sbt
@@ -15,27 +15,17 @@
* limitations under the License.
*/
+import PIOBuild._
import sbtassembly.AssemblyPlugin.autoImport._
name := "apache-predictionio-tools"
libraryDependencies ++= Seq(
- "com.github.scopt" %% "scopt" % "3.2.0",
- "io.spray" %% "spray-can" % "1.3.3",
- "io.spray" %% "spray-routing" % "1.3.3",
"me.lessis" % "semverfi_2.10" % "0.1.3",
- "org.apache.hadoop" % "hadoop-common" % "2.6.2",
- "org.apache.hadoop" % "hadoop-hdfs" % "2.6.2",
- "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
- "org.clapper" %% "grizzled-slf4j" % "1.0.2",
- "org.json4s" %% "json4s-native" % json4sVersion.value,
- "org.json4s" %% "json4s-ext" % json4sVersion.value,
- "org.scalaj" %% "scalaj-http" % "1.1.6",
- "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark",
+ "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value,
"io.spray" %% "spray-testkit" % "1.3.3" % "test",
- "org.specs2" %% "specs2" % "2.3.13" % "test",
- "org.spark-project.akka" %% "akka-slf4j" % "2.3.4-spark")
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
dependencyOverrides += "org.slf4j" % "slf4j-log4j12" % "1.7.18"
@@ -49,12 +39,8 @@ assemblyMergeStrategy in assembly := {
excludedJars in assembly <<= (fullClasspath in assembly) map { cp =>
cp filter { _.data.getName match {
- case "asm-3.1.jar" => true
- case "commons-beanutils-1.7.0.jar" => true
case "reflectasm-1.10.1.jar" => true
- case "commons-beanutils-core-1.8.0.jar" => true
case "kryo-3.0.3.jar" => true
- case "slf4j-log4j12-1.7.5.jar" => true
case _ => false
}}
}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala b/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala
index bbe39a5..7e8fd30 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala
@@ -34,6 +34,7 @@ import spray.httpx.Json4sSupport
import spray.routing._
import scala.concurrent.ExecutionContext
+import scala.concurrent.duration.Duration
class AdminServiceActor(val commandClient: CommandClient)
extends HttpServiceActor {
@@ -151,7 +152,7 @@ object AdminServer {
}
object AdminRun {
- def main (args: Array[String]) {
+ def main (args: Array[String]) : Unit = {
AdminServer.createAdminServer(AdminServerConfig(
ip = "localhost",
port = 7071))
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala
index de09cab..c101d3f 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala
@@ -21,13 +21,13 @@ package org.apache.predictionio.tools.export
import org.apache.predictionio.controller.Utils
import org.apache.predictionio.data.storage.EventJson4sSupport
import org.apache.predictionio.data.storage.Storage
+import org.apache.predictionio.data.SparkVersionDependent
import org.apache.predictionio.tools.Runner
import org.apache.predictionio.workflow.WorkflowContext
import org.apache.predictionio.workflow.WorkflowUtils
import grizzled.slf4j.Logging
import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.SQLContext
import org.json4s.native.Serialization._
case class EventsToFileArgs(
@@ -91,14 +91,14 @@ object EventsToFile extends Logging {
mode = "Export",
batch = "App ID " + args.appId + channelStr,
executorEnv = Runner.envStringToMap(args.env))
- val sqlContext = new SQLContext(sc)
+ val sqlSession = SparkVersionDependent.sqlSession(sc)
val events = Storage.getPEvents()
val eventsRdd = events.find(appId = args.appId, channelId = channelId)(sc)
val jsonStringRdd = eventsRdd.map(write(_))
if (args.format == "json") {
jsonStringRdd.saveAsTextFile(args.outputPath)
} else {
- val jsonDf = sqlContext.read.json(jsonStringRdd)
+ val jsonDf = sqlSession.read.json(jsonStringRdd)
jsonDf.write.mode(SaveMode.ErrorIfExists).parquet(args.outputPath)
}
info(s"Events are exported to ${args.outputPath}/.")
[2/2] incubator-predictionio git commit: [PIO-30] Set up a cross
build for Spark 2.0 and Scala 2.11
Posted by do...@apache.org.
[PIO-30] Set up a cross build for Spark 2.0 and Scala 2.11
Closes #345
Closes #295
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/00779c3d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/00779c3d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/00779c3d
Branch: refs/heads/develop
Commit: 00779c3d8e6a7b55306ddc29d4779875ffa3ccba
Parents: bb00324
Author: Chan Lee <ch...@gmail.com>
Authored: Mon Mar 20 10:49:24 2017 -0700
Committer: Donald Szeto <do...@apache.org>
Committed: Mon Mar 20 10:49:24 2017 -0700
----------------------------------------------------------------------
.travis.yml | 66 +++++-
bin/install.sh | 2 +-
build.sbt | 153 ++++++++++---
common/build.sbt | 9 +-
conf/pio-env.sh.template | 1 +
conf/pio-env.sh.travis | 5 +-
conf/pio-vendors.sh | 57 +++++
core/build.sbt | 17 +-
.../predictionio/workflow/CreateServer.scala | 3 +-
data/build.sbt | 17 +-
.../predictionio/data/api/EventServer.scala | 5 +-
.../predictionio/data/view/DataView.scala | 13 +-
.../predictionio/data/view/PBatchView.scala | 204 ++++++++++++++++++
.../data/SparkVersionDependent.scala | 30 +++
.../data/SparkVersionDependent.scala | 30 +++
e2/build.sbt | 4 +-
make-distribution.sh | 36 +---
project/Build.scala | 34 ---
project/PIOBuild.scala | 36 ++++
project/assembly.sbt | 2 +-
project/plugins.sbt | 4 +-
storage/elasticsearch/build.sbt | 30 +--
storage/elasticsearch1/build.sbt | 18 +-
storage/hbase/build.sbt | 14 +-
.../predictionio/data/view/PBatchView.scala | 212 -------------------
storage/hdfs/build.sbt | 20 +-
storage/jdbc/build.sbt | 16 +-
.../data/storage/jdbc/JDBCPEvents.scala | 26 ++-
storage/localfs/build.sbt | 16 +-
tests/Dockerfile | 23 +-
tests/Dockerfile-es1 | 57 -----
tests/README.md | 6 +
tests/after_script.travis.sh | 8 +-
tests/before_script.travis.sh | 2 +-
tests/build-docker.sh | 50 -----
tests/build_docker.sh | 57 +++++
tests/docker-compose-es1.yml | 38 ----
tests/docker-compose.yml | 2 +-
tests/docker-files/env-conf/pio-env-es1.sh | 103 ---------
tests/docker-files/env-conf/pio-env.sh | 15 +-
tests/docker-files/init.sh | 2 +-
tests/docker-files/set_build_profile.sh | 31 +++
.../engines/recommendation-engine/build.sbt | 8 +-
.../engines/recommendation-engine/manifest.json | 1 -
.../recommendation-engine/project/pio-build.sbt | 1 -
tests/pio_tests/scenarios/quickstart_test.py | 8 +-
tests/pio_tests/utils.py | 5 +-
tests/run_docker.sh | 19 +-
tests/script.travis.sh | 8 +-
tests/unit.sh | 19 +-
tools/build.sbt | 20 +-
.../predictionio/tools/admin/AdminAPI.scala | 3 +-
.../tools/export/EventsToFile.scala | 6 +-
53 files changed, 801 insertions(+), 771 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 634d286..8dcc2fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,9 +29,6 @@ branches:
language: scala
-scala:
- - 2.10.5
-
jdk:
- oraclejdk8
@@ -44,12 +41,63 @@ cache: false
env:
matrix:
- - BUILD_TYPE=Unit METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
- - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=1
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=1
+ - BUILD_TYPE=Unit
+ METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ - BUILD_TYPE=Integration
+ METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ PIO_ELASTICSEARCH_VERSION=5.2.2
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ PIO_ELASTICSEARCH_VERSION=5.2.2
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ PIO_ELASTICSEARCH_VERSION=1.7.3
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+ PIO_SCALA_VERSION=2.10.6
+ PIO_SPARK_VERSION=1.6.3
+ PIO_ELASTICSEARCH_VERSION=1.7.3
+
+ - BUILD_TYPE=Unit
+ METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ - BUILD_TYPE=Integration
+ METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ PIO_ELASTICSEARCH_VERSION=5.2.2
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ PIO_ELASTICSEARCH_VERSION=5.2.2
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ PIO_ELASTICSEARCH_VERSION=1.7.3
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ PIO_ELASTICSEARCH_VERSION=1.7.3
before_install:
- unset SBT_OPTS JVM_OPTS
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/bin/install.sh
----------------------------------------------------------------------
diff --git a/bin/install.sh b/bin/install.sh
index e485df9..7431b09 100755
--- a/bin/install.sh
+++ b/bin/install.sh
@@ -18,7 +18,7 @@
#
OS=`uname`
-SPARK_VERSION=1.6.2
+SPARK_VERSION=1.6.3
# Looks like support for Elasticsearch 2.0 will require 2.0 so deferring
ELASTICSEARCH_VERSION=1.7.5
HBASE_VERSION=1.2.2
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/build.sbt
----------------------------------------------------------------------
diff --git a/build.sbt b/build.sbt
index fa5ba29..fc47f6a 100644
--- a/build.sbt
+++ b/build.sbt
@@ -15,28 +15,73 @@
* limitations under the License.
*/
+import PIOBuild._
import UnidocKeys._
+lazy val scalaSparkDepsVersion = Map(
+ "2.10" -> Map(
+ "1.6" -> Map(
+ "akka" -> "2.3.15",
+ "hadoop" -> "2.6.5",
+ "json4s" -> "3.2.10"),
+ "2.0" -> Map(
+ "akka" -> "2.3.16",
+ "hadoop" -> "2.7.3",
+ "json4s" -> "3.2.11"),
+ "2.1" -> Map(
+ "akka" -> "2.3.16",
+ "hadoop" -> "2.7.3",
+ "json4s" -> "3.2.11")),
+ "2.11" -> Map(
+ "1.6" -> Map(
+ "akka" -> "2.3.15",
+ "hadoop" -> "2.6.5",
+ "json4s" -> "3.2.10"),
+ "2.0" -> Map(
+ "akka" -> "2.4.17",
+ "hadoop" -> "2.7.3",
+ "json4s" -> "3.2.11"),
+ "2.1" -> Map(
+ "akka" -> "2.4.17",
+ "hadoop" -> "2.7.3",
+ "json4s" -> "3.2.11")))
+
name := "apache-predictionio-parent"
version in ThisBuild := "0.11.0-SNAPSHOT"
organization in ThisBuild := "org.apache.predictionio"
-scalaVersion in ThisBuild := "2.10.5"
+scalaVersion in ThisBuild := sys.props.getOrElse("scala.version", "2.10.6")
+
+crossScalaVersions in ThisBuild := Seq("2.10.6", "2.11.8")
scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature")
scalacOptions in (ThisBuild, Test) ++= Seq("-Yrangepos")
-
fork in (ThisBuild, run) := true
javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.7", "-target", "1.7",
"-Xlint:deprecation", "-Xlint:unchecked")
-json4sVersion in ThisBuild := "3.2.10"
+// Ignore differentiation of Spark patch levels
+sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "1.6.3")
+
+sparkBinaryVersion in ThisBuild := binaryVersion(sparkVersion.value)
+
+akkaVersion in ThisBuild := sys.props.getOrElse(
+ "akka.version",
+ scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("akka"))
+
+lazy val es = sys.props.getOrElse("elasticsearch.version", "1.7.6")
-sparkVersion in ThisBuild := "1.6.3"
+elasticsearchVersion in ThisBuild := es
+
+json4sVersion in ThisBuild := scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("json4s")
+
+hadoopVersion in ThisBuild := sys.props.getOrElse(
+ "hadoop.version",
+ scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("hadoop"))
val pioBuildInfoSettings = buildInfoSettings ++ Seq(
sourceGenerators in Compile <+= buildInfo,
@@ -45,65 +90,106 @@ val pioBuildInfoSettings = buildInfoSettings ++ Seq(
version,
scalaVersion,
sbtVersion,
- sparkVersion),
+ sparkVersion,
+ hadoopVersion),
buildInfoPackage := "org.apache.predictionio.core")
+// Used temporarily to modify genjavadoc version to "0.10" until unidoc updates it
+val genjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq(
+ libraryDependencies += compilerPlugin("com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.10" cross CrossVersion.full),
+ scalacOptions <+= target map (t => "-P:genjavadoc:out=" + (t / "java")))
+
val conf = file("conf")
val commonSettings = Seq(
autoAPIMappings := true,
- unmanagedClasspath in Test += conf)
+ unmanagedClasspath in Test += conf,
+ unmanagedClasspath in Test += baseDirectory.value.getParentFile / s"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes")
-val common = (project in file("common")).
- settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
-
-val data = (project in file("data")).
- dependsOn(common).
- settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+val commonTestSettings = Seq(
+ libraryDependencies ++= Seq(
+ "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test",
+ "org.scalikejdbc" %% "scalikejdbc" % "2.3.5" % "test"))
val dataElasticsearch1 = (project in file("storage/elasticsearch1")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
val dataElasticsearch = (project in file("storage/elasticsearch")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
val dataHbase = (project in file("storage/hbase")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
val dataHdfs = (project in file("storage/hdfs")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
val dataJdbc = (project in file("storage/jdbc")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
val dataLocalfs = (project in file("storage/localfs")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ settings(publishArtifact := false)
+
+val common = (project in file("common")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*).
+ disablePlugins(sbtassembly.AssemblyPlugin)
+
+val data = (project in file("data")).
+ dependsOn(common).
+ settings(commonSettings: _*).
+ settings(commonTestSettings: _*).
+ settings(genjavadocSettings: _*).
+ settings(unmanagedSourceDirectories in Compile +=
+ sourceDirectory.value / s"main/spark-${majorVersion(sparkVersion.value)}").
+ disablePlugins(sbtassembly.AssemblyPlugin)
val core = (project in file("core")).
dependsOn(data).
settings(commonSettings: _*).
+ settings(commonTestSettings: _*).
settings(genjavadocSettings: _*).
settings(pioBuildInfoSettings: _*).
- enablePlugins(SbtTwirl)
+ enablePlugins(SbtTwirl).
+ disablePlugins(sbtassembly.AssemblyPlugin)
val tools = (project in file("tools")).
dependsOn(core).
dependsOn(data).
settings(commonSettings: _*).
+ settings(commonTestSettings: _*).
settings(genjavadocSettings: _*).
- enablePlugins(SbtTwirl)
+ enablePlugins(SbtTwirl).
+ settings(publishArtifact := false)
val e2 = (project in file("e2")).
settings(commonSettings: _*).
- settings(genjavadocSettings: _*)
+ settings(genjavadocSettings: _*).
+ disablePlugins(sbtassembly.AssemblyPlugin)
+
+val dataEs = if (majorVersion(es) == 1) dataElasticsearch1 else dataElasticsearch
+
+val storageSubprojects = Seq(
+ dataEs,
+ dataHbase,
+ dataHdfs,
+ dataJdbc,
+ dataLocalfs)
+
+val storage = (project in file("storage"))
+ .aggregate(storageSubprojects map Project.projectToRef: _*)
+ .disablePlugins(sbtassembly.AssemblyPlugin)
val root = (project in file(".")).
settings(commonSettings: _*).
@@ -162,18 +248,8 @@ val root = (project in file(".")).
"docs/javadoc/javadoc-overview.html",
"-noqualifier",
"java.lang")).
- aggregate(
- common,
- core,
- data,
- dataElasticsearch1,
- dataElasticsearch,
- dataHbase,
- dataHdfs,
- dataJdbc,
- dataLocalfs,
- tools,
- e2)
+ aggregate(common, core, data, tools, e2).
+ disablePlugins(sbtassembly.AssemblyPlugin)
val pioUnidoc = taskKey[Unit]("Builds PredictionIO ScalaDoc")
@@ -232,3 +308,10 @@ parallelExecution := false
parallelExecution in Global := false
testOptions in Test += Tests.Argument("-oDF")
+
+printBuildInfo := {
+ println(s"PIO_SCALA_VERSION=${scalaVersion.value}")
+ println(s"PIO_SPARK_VERSION=${sparkVersion.value}")
+ println(s"PIO_ELASTICSEARCH_VERSION=${elasticsearchVersion.value}")
+ println(s"PIO_HADOOP_VERSION=${hadoopVersion.value}")
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/common/build.sbt
----------------------------------------------------------------------
diff --git a/common/build.sbt b/common/build.sbt
index e7050d4..5a0fba1 100644
--- a/common/build.sbt
+++ b/common/build.sbt
@@ -15,12 +15,15 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-common"
libraryDependencies ++= Seq(
- "io.spray" %% "spray-can" % "1.3.2",
- "io.spray" %% "spray-routing" % "1.3.2",
- "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark"
+ "io.spray" %% "spray-can" % "1.3.3",
+ "io.spray" %% "spray-routing" % "1.3.3",
+ "com.typesafe.akka" %% "akka-actor" % akkaVersion.value,
+ "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value
)
pomExtra := childrenPomExtra.value
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template
index 0d76102..57185ba 100644
--- a/conf/pio-env.sh.template
+++ b/conf/pio-env.sh.template
@@ -24,6 +24,7 @@
# you need to change these to fit your site.
# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
+# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7
SPARK_HOME=$PIO_HOME/vendors/spark-1.6.3-bin-hadoop2.6
POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-9.4-1204.jdbc41.jar
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-env.sh.travis
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.travis b/conf/pio-env.sh.travis
index 94b30cd..80e3332 100644
--- a/conf/pio-env.sh.travis
+++ b/conf/pio-env.sh.travis
@@ -24,7 +24,8 @@
# you need to change these to fit your site.
# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$PIO_HOME/vendors/spark-1.3.0-bin-hadoop2.4
+# it is set up in script.travis.sh
+SPARK_HOME=$SPARK_HOME
# Filesystem paths where PredictionIO uses as block storage.
PIO_FS_BASEDIR=$HOME/.pio_store
@@ -41,7 +42,7 @@ PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs
PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/models
PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
-PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0
+PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME
# Storage Data Sources (pgsql)
PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-vendors.sh
----------------------------------------------------------------------
diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh
new file mode 100644
index 0000000..830b576
--- /dev/null
+++ b/conf/pio-vendors.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# IMPORTANT: PIO_*_VERSION for dependencies must be set before envoking this script.
+# `source conf/set_build_profile.sh $BUILD_PROFILE` to get the proper versions
+
+if [ -z "$PIO_SCALA_VERSION" ]; then
+ PIO_SCALA_VERSION="2.10.6"
+fi
+
+if [ -z "$PIO_SPARK_VERSION" ]; then
+ PIO_SPARK_VERSION="1.6.3"
+fi
+
+if [ -z "$PIO_HADOOP_VERSION" ]; then
+ PIO_HADOOP_VERSION="2.6.5"
+fi
+
+if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then
+ PIO_ELASTICSEARCH_VERSION="1.7.3"
+fi
+
+ES_MAJOR=`echo $PIO_ELASTICSEARCH_VERSION | awk -F. '{print $1}'`
+
+if [ "$ES_MAJOR" = "1" ]; then
+ export ES_IMAGE="elasticsearch"
+ export ES_TAG="1"
+else
+ export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch"
+ export ES_TAG="5.2.2"
+fi
+
+PGSQL_JAR=postgresql-9.4-1204.jdbc41.jar
+PGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR}
+
+HADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 "." $2}'`
+SPARK_DIR=spark-${PIO_SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR}
+SPARK_ARCHIVE=${SPARK_DIR}.tgz
+SPARK_DOWNLOAD=http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE}
+# ELASTICSEARCH_DOWNLOAD
+# 5.x https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz
+# 1.x https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/core/build.sbt
----------------------------------------------------------------------
diff --git a/core/build.sbt b/core/build.sbt
index bfb8bf3..b95a957 100644
--- a/core/build.sbt
+++ b/core/build.sbt
@@ -15,32 +15,23 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-core"
libraryDependencies ++= Seq(
- "com.github.scopt" %% "scopt" % "3.3.0",
+ "com.github.scopt" %% "scopt" % "3.5.0",
"com.google.code.gson" % "gson" % "2.5",
- "com.google.guava" % "guava" % "18.0",
- "com.twitter" %% "chill" % "0.7.2"
- exclude("com.esotericsoftware.minlog", "minlog"),
"com.twitter" %% "chill-bijection" % "0.7.2",
"de.javakaffee" % "kryo-serializers" % "0.37",
- "commons-io" % "commons-io" % "2.4",
- "io.spray" %% "spray-can" % "1.3.3",
- "io.spray" %% "spray-routing" % "1.3.3",
"net.jodah" % "typetools" % "0.3.1",
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
- "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
- "org.clapper" %% "grizzled-slf4j" % "1.0.2",
- "org.json4s" %% "json4s-native" % json4sVersion.value,
"org.json4s" %% "json4s-ext" % json4sVersion.value,
"org.scalaj" %% "scalaj-http" % "1.1.6",
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
"org.slf4j" % "slf4j-log4j12" % "1.7.18",
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
"org.specs2" %% "specs2" % "2.3.13" % "test")
-//testOptions := Seq(Tests.Filter(s => Seq("Dev").exists(s.contains(_))))
-
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
index 6f274bc..31b7831 100644
--- a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
+++ b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
@@ -51,7 +51,6 @@ import spray.routing._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._
-import scala.concurrent.future
import scala.language.existentials
import scala.util.{Failure, Random, Success}
import scalaj.http.HttpOptions
@@ -548,7 +547,7 @@ class ServerActor[Q, P](
"prediction" -> prediction)) ++ queryPrId
// At this point args.accessKey should be Some(String).
val accessKey = args.accessKey.getOrElse("")
- val f: Future[Int] = future {
+ val f: Future[Int] = Future {
scalaj.http.Http(
s"http://${args.eventServerIp}:${args.eventServerPort}/" +
s"events.json?accessKey=$accessKey").postData(
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/build.sbt
----------------------------------------------------------------------
diff --git a/data/build.sbt b/data/build.sbt
index f5e95b5..ddd085d 100644
--- a/data/build.sbt
+++ b/data/build.sbt
@@ -15,28 +15,21 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data"
libraryDependencies ++= Seq(
"com.github.nscala-time" %% "nscala-time" % "2.6.0",
- "commons-codec" % "commons-codec" % "1.9",
+ "com.google.guava" % "guava" % "19.0",
"io.spray" %% "spray-can" % "1.3.3",
"io.spray" %% "spray-routing" % "1.3.3",
- "io.spray" %% "spray-testkit" % "1.3.3" % "test",
- "mysql" % "mysql-connector-java" % "5.1.37" % "optional",
- "org.apache.hadoop" % "hadoop-common" % "2.6.2"
- exclude("javax.servlet", "servlet-api"),
- "org.apache.zookeeper" % "zookeeper" % "3.4.7"
- exclude("org.slf4j", "slf4j-api")
- exclude("org.slf4j", "slf4j-log4j12"),
- "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
"org.clapper" %% "grizzled-slf4j" % "1.0.2",
"org.json4s" %% "json4s-native" % json4sVersion.value,
- "org.json4s" %% "json4s-ext" % json4sVersion.value,
+ "org.scalikejdbc" %% "scalikejdbc" % "2.3.2",
+ "io.spray" %% "spray-testkit" % "1.3.3" % "test",
"org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.slf4j" % "slf4j-log4j12" % "1.7.18",
- "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark",
"org.specs2" %% "specs2" % "2.3.13" % "test")
parallelExecution in Test := false
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
index 648316e..b4392ff 100644
--- a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
+++ b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
@@ -48,8 +48,7 @@ import spray.httpx.Json4sSupport
import spray.routing._
import spray.routing.authentication.Authentication
-import scala.concurrent.ExecutionContext
-import scala.concurrent.Future
+import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Try, Success, Failure}
class EventServiceActor(
@@ -635,7 +634,7 @@ object EventServer {
}
object Run {
- def main(args: Array[String]) {
+ def main(args: Array[String]): Unit = {
EventServer.createEventServer(EventServerConfig(
ip = "0.0.0.0",
port = 7070))
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
index 4866b5d..1c47e10 100644
--- a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
+++ b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
@@ -20,6 +20,7 @@ package org.apache.predictionio.data.view
import org.apache.predictionio.annotation.Experimental
import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.SparkVersionDependent
import grizzled.slf4j.Logger
import org.apache.predictionio.data.store.PEventStore
@@ -27,7 +28,7 @@ import org.apache.predictionio.data.store.PEventStore
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkContext
import org.joda.time.DateTime
import scala.reflect.ClassTag
@@ -64,11 +65,11 @@ object DataView {
untilTime: Option[DateTime] = None,
conversionFunction: Event => Option[E],
name: String = "",
- version: String = "")(sqlContext: SQLContext): DataFrame = {
+ version: String = "")(sc: SparkContext): DataFrame = {
@transient lazy val logger = Logger[this.type]
- val sc = sqlContext.sparkContext
+ val sqlSession = SparkVersionDependent.sqlSession(sc)
val beginTime = startTime match {
case Some(t) => t
@@ -85,7 +86,7 @@ object DataView {
val baseDir = s"${sys.env("PIO_FS_BASEDIR")}/view"
val fileName = s"$baseDir/$name-$appName-$hash.parquet"
try {
- sqlContext.read.parquet(fileName)
+ sqlSession.read.parquet(fileName)
} catch {
case e: java.io.FileNotFoundException =>
logger.info("Cached copy not found, reading from DB.")
@@ -96,11 +97,11 @@ object DataView {
startTime = startTime,
untilTime = Some(endTime))(sc)
.flatMap((e) => conversionFunction(e))
- import sqlContext.implicits._ // needed for RDD.toDF()
+ import sqlSession.implicits._ // needed for RDD.toDF()
val resultDF = result.toDF()
resultDF.write.mode(SaveMode.ErrorIfExists).parquet(fileName)
- sqlContext.read.parquet(fileName)
+ sqlSession.read.parquet(fileName)
case e: java.lang.RuntimeException =>
if (e.toString.contains("is not a Parquet file")) {
logger.error(s"$fileName does not contain a valid Parquet file. " +
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
new file mode 100644
index 0000000..cbdebce
--- /dev/null
+++ b/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.view
+
+import org.apache.predictionio.data.storage.{DataMap, Event, EventValidation, Storage}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.joda.time.DateTime
+import org.json4s.JValue
+
+
+// each JValue data associated with the time it is set
+private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
+
+private[predictionio] case class SetProp (
+ val fields: Map[String, PropTime],
+ // last set time. Note: fields could be empty with valid set time
+ val t: Long) extends Serializable {
+
+ def ++ (that: SetProp): SetProp = {
+ val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+ val common: Map[String, PropTime] = commonKeys.map { k =>
+ val thisData = this.fields(k)
+ val thatData = that.fields(k)
+ // only keep the value with latest time
+ val v = if (thisData.t > thatData.t) thisData else thatData
+ (k, v)
+ }.toMap
+
+ val combinedFields = common ++
+ (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+ // keep the latest set time
+ val combinedT = if (this.t > that.t) this.t else that.t
+
+ SetProp(
+ fields = combinedFields,
+ t = combinedT
+ )
+ }
+}
+
+private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
+ def ++ (that: UnsetProp): UnsetProp = {
+ val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+ val common: Map[String, Long] = commonKeys.map { k =>
+ val thisData = this.fields(k)
+ val thatData = that.fields(k)
+ // only keep the value with latest time
+ val v = if (thisData > thatData) thisData else thatData
+ (k, v)
+ }.toMap
+
+ val combinedFields = common ++
+ (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+ UnsetProp(
+ fields = combinedFields
+ )
+ }
+}
+
+private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
+ def ++ (that: DeleteEntity): DeleteEntity = {
+ if (this.t > that.t) this else that
+ }
+}
+
+private[predictionio] case class EventOp (
+ val setProp: Option[SetProp] = None,
+ val unsetProp: Option[UnsetProp] = None,
+ val deleteEntity: Option[DeleteEntity] = None
+) extends Serializable {
+
+ def ++ (that: EventOp): EventOp = {
+ EventOp(
+ setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
+ unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
+ deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
+ )
+ }
+
+ def toDataMap(): Option[DataMap] = {
+ setProp.flatMap { set =>
+
+ val unsetKeys: Set[String] = unsetProp.map( unset =>
+ unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
+ ).getOrElse(Set())
+
+ val combinedFields = deleteEntity.map { delete =>
+ if (delete.t >= set.t) {
+ None
+ } else {
+ val deleteKeys: Set[String] = set.fields
+ .filter { case (k, PropTime(kv, t)) =>
+ (delete.t >= t)
+ }.keySet
+ Some(set.fields -- unsetKeys -- deleteKeys)
+ }
+ }.getOrElse{
+ Some(set.fields -- unsetKeys)
+ }
+
+ // Note: mapValues() doesn't return concrete Map and causes
+ // NotSerializableException issue. Use map(identity) to work around this.
+ // see https://issues.scala-lang.org/browse/SI-7005
+ combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
+ }
+ }
+
+}
+
+private[predictionio] object EventOp {
+ def apply(e: Event): EventOp = {
+ val t = e.eventTime.getMillis
+ e.event match {
+ case "$set" => {
+ val fields = e.properties.fields.mapValues(jv =>
+ PropTime(jv, t)
+ ).map(identity)
+
+ EventOp(
+ setProp = Some(SetProp(fields = fields, t = t))
+ )
+ }
+ case "$unset" => {
+ val fields = e.properties.fields.mapValues(jv => t).map(identity)
+ EventOp(
+ unsetProp = Some(UnsetProp(fields = fields))
+ )
+ }
+ case "$delete" => {
+ EventOp(
+ deleteEntity = Some(DeleteEntity(t))
+ )
+ }
+ case _ => {
+ EventOp()
+ }
+ }
+ }
+}
+
+@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
+class PBatchView(
+ val appId: Int,
+ val startTime: Option[DateTime],
+ val untilTime: Option[DateTime],
+ val sc: SparkContext) {
+
+ // NOTE: parallel Events DB interface
+ @transient lazy val eventsDb = Storage.getPEvents()
+
+ @transient lazy val _events: RDD[Event] =
+ eventsDb.getByAppIdAndTimeAndEntity(
+ appId = appId,
+ startTime = startTime,
+ untilTime = untilTime,
+ entityType = None,
+ entityId = None)(sc)
+
+ // TODO: change to use EventSeq?
+ @transient lazy val events: RDD[Event] = _events
+
+ def aggregateProperties(
+ entityType: String,
+ startTimeOpt: Option[DateTime] = None,
+ untilTimeOpt: Option[DateTime] = None
+ ): RDD[(String, DataMap)] = {
+
+ _events
+ .filter( e => ((e.entityType == entityType) &&
+ (EventValidation.isSpecialEvents(e.event))) )
+ .map( e => (e.entityId, EventOp(e) ))
+ .aggregateByKey[EventOp](EventOp())(
+ // within same partition
+ seqOp = { case (u, v) => u ++ v },
+ // across partition
+ combOp = { case (accu, u) => accu ++ u }
+ )
+ .mapValues(_.toDataMap)
+ .filter{ case (k, v) => v.isDefined }
+ .map{ case (k, v) => (k, v.get) }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
----------------------------------------------------------------------
diff --git a/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
new file mode 100644
index 0000000..0652e0b
--- /dev/null
+++ b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+
+object SparkVersionDependent {
+
+ def sqlSession(sc: SparkContext): SQLContext = {
+ return new SQLContext(sc)
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
----------------------------------------------------------------------
diff --git a/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
new file mode 100644
index 0000000..3d07bdf
--- /dev/null
+++ b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SparkSession
+
+object SparkVersionDependent {
+
+ def sqlSession(sc: SparkContext): SparkSession = {
+ SparkSession.builder().getOrCreate()
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/e2/build.sbt
----------------------------------------------------------------------
diff --git a/e2/build.sbt b/e2/build.sbt
index 50de5e5..0774a55 100644
--- a/e2/build.sbt
+++ b/e2/build.sbt
@@ -15,14 +15,14 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-e2"
parallelExecution in Test := false
libraryDependencies ++= Seq(
- "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided",
- "org.clapper" %% "grizzled-slf4j" % "1.0.2",
"org.scalatest" %% "scalatest" % "2.2.5" % "test")
pomExtra := childrenPomExtra.value
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index c360c0e..e92178f 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -21,15 +21,12 @@ set -e
usage ()
{
- echo "Usage: $0 [-h|--help] [--with-es=x]"
+ echo "Usage: $0 [-h|--help]"
echo ""
echo " -h|--help Show usage"
- echo ""
- echo " --with-es=1 Build distribution with Elasticsearch 1 support as default"
- echo " --with-es=5 Build distribution with Elasticsearch 5 support as default"
}
-ES_VERSION=1
+JAVA_PROPS=()
for i in "$@"
do
@@ -39,8 +36,8 @@ case $i in
shift
exit
;;
- --with-es=*)
- ES_VERSION="${i#*=}"
+ -D*)
+ JAVA_PROPS+=("$i")
shift
;;
*)
@@ -50,23 +47,19 @@ case $i in
esac
done
-if [ "$ES_VERSION" = "1" ] || [ "$ES_VERSION" = "5" ]
-then
- echo -e "\033[0;32mBuilding with Elasticsearch $ES_VERSION support as the default choice\033[0m"
-else
- usage
- exit 1
-fi
-
FWDIR="$(cd `dirname $0`; pwd)"
DISTDIR="${FWDIR}/dist"
-VERSION=$(grep version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g')
+VERSION=$(grep ^version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g')
echo "Building binary distribution for PredictionIO $VERSION..."
cd ${FWDIR}
-sbt/sbt common/publishLocal data/publishLocal core/publishLocal e2/publishLocal dataElasticsearch1/assembly dataElasticsearch/assembly dataHbase/assembly dataHdfs/assembly dataJdbc/assembly dataLocalfs/assembly tools/assembly
+set -x
+sbt/sbt "${JAVA_PROPS[@]}" clean
+sbt/sbt "${JAVA_PROPS[@]}" printBuildInfo
+sbt/sbt "${JAVA_PROPS[@]}" publishLocal assembly storage/assembly
+set +x
cd ${FWDIR}
rm -rf ${DISTDIR}
@@ -74,8 +67,8 @@ mkdir -p ${DISTDIR}/bin
mkdir -p ${DISTDIR}/conf
mkdir -p ${DISTDIR}/lib
mkdir -p ${DISTDIR}/lib/spark
-mkdir -p ${DISTDIR}/lib/extra
mkdir -p ${DISTDIR}/project
+
mkdir -p ${DISTDIR}/sbt
cp ${FWDIR}/bin/* ${DISTDIR}/bin || :
@@ -85,13 +78,6 @@ cp ${FWDIR}/sbt/sbt ${DISTDIR}/sbt
cp ${FWDIR}/assembly/*assembly*jar ${DISTDIR}/lib
cp ${FWDIR}/assembly/spark/*jar ${DISTDIR}/lib/spark
-if [ "$ES_VERSION" = "5" ]
-then
- mv ${DISTDIR}/lib/spark/pio-data-elasticsearch1-assembly-*.jar ${DISTDIR}/lib/extra
-else
- mv ${DISTDIR}/lib/spark/pio-data-elasticsearch-assembly-*.jar ${DISTDIR}/lib/extra
-fi
-
rm -f ${DISTDIR}/lib/*javadoc.jar
rm -f ${DISTDIR}/lib/*sources.jar
rm -f ${DISTDIR}/conf/pio-env.sh
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/Build.scala
----------------------------------------------------------------------
diff --git a/project/Build.scala b/project/Build.scala
deleted file mode 100644
index 885073a..0000000
--- a/project/Build.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import sbt._
-import Keys._
-
-object PIOBuild extends Build {
- val elasticsearchVersion = SettingKey[String](
- "elasticsearch-version",
- "The version of Elasticsearch used for building.")
- val json4sVersion = SettingKey[String](
- "json4s-version",
- "The version of JSON4S used for building.")
- val sparkVersion = SettingKey[String](
- "spark-version",
- "The version of Apache Spark used for building.")
- val childrenPomExtra = SettingKey[scala.xml.NodeSeq](
- "children-pom-extra",
- "Extra POM data for children projects.")
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/PIOBuild.scala
----------------------------------------------------------------------
diff --git a/project/PIOBuild.scala b/project/PIOBuild.scala
new file mode 100644
index 0000000..30fca65
--- /dev/null
+++ b/project/PIOBuild.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import sbt._
+
+object PIOBuild {
+ val elasticsearchVersion = settingKey[String]("The version of Elasticsearch used for building")
+ val json4sVersion = settingKey[String]("The version of JSON4S used for building")
+ val sparkVersion = settingKey[String]("The version of Apache Spark used for building")
+ val sparkBinaryVersion = settingKey[String]("The binary version of Apache Spark used for building")
+ val hadoopVersion = settingKey[String]("The version of Apache Hadoop used for building")
+ val akkaVersion = settingKey[String]("The version of Akka used for building")
+
+ val childrenPomExtra = settingKey[scala.xml.NodeSeq]("Extra POM data for children projects")
+ val elasticsearchSparkArtifact = settingKey[String]("Name of Elasticsearch-Spark artifact used for building")
+
+ def binaryVersion(versionString: String): String = versionString.split('.').take(2).mkString(".")
+ def majorVersion(versionString: String): Int = versionString.split('.')(0).toInt
+ def minorVersion(versionString: String): Int = versionString.split('.')(1).toInt
+
+ lazy val printBuildInfo = taskKey[Unit]("Print build information")
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/assembly.sbt
----------------------------------------------------------------------
diff --git a/project/assembly.sbt b/project/assembly.sbt
index 49085ee..39c1bb8 100644
--- a/project/assembly.sbt
+++ b/project/assembly.sbt
@@ -1 +1 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/plugins.sbt
----------------------------------------------------------------------
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 3edaf67..2f21e00 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -2,7 +2,7 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.3.2")
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0")
-addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.0.3")
+addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.1.1")
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "1.1")
@@ -10,4 +10,4 @@ addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0")
resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
-addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.1.0")
+addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/elasticsearch/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt
index 7d9b2ad..52dc965 100644
--- a/storage/elasticsearch/build.sbt
+++ b/storage/elasticsearch/build.sbt
@@ -15,36 +15,26 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data-elasticsearch"
-elasticsearchVersion := "5.2.1"
+elasticsearchSparkArtifact := (if (majorVersion(sparkVersion.value) == 2) "elasticsearch-spark-20" else "elasticsearch-spark-13")
libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
- "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
- "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
- "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
- "org.elasticsearch.client" % "rest" % elasticsearchVersion.value,
- "org.elasticsearch" %% "elasticsearch-spark-13" % elasticsearchVersion.value
- exclude("org.apache.spark", "spark-sql_2.10")
- exclude("org.apache.spark", "spark-streaming_2.10"),
- "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value,
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.specs2" %% "specs2" % "2.3.13" % "test")
+ "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
+ "org.elasticsearch.client" % "rest" % elasticsearchVersion.value,
+ "org.elasticsearch" %% elasticsearchSparkArtifact.value % elasticsearchVersion.value
+ exclude("org.apache.spark", "*"),
+ "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value,
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
assemblyShadeRules in assembly := Seq(
ShadeRule.rename("org.apache.http.**" -> "shadeio.data.http.@1").inAll
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/elasticsearch1/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/build.sbt b/storage/elasticsearch1/build.sbt
index dde7285..bb68b11 100644
--- a/storage/elasticsearch1/build.sbt
+++ b/storage/elasticsearch1/build.sbt
@@ -15,30 +15,20 @@
* limitations under the License.
*/
-name := "apache-predictionio-data-elasticsearch1"
+import PIOBuild._
-elasticsearchVersion := "1.7.3"
+name := "apache-predictionio-data-elasticsearch1"
libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
- "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
"org.elasticsearch" % "elasticsearch" % elasticsearchVersion.value,
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.specs2" %% "specs2" % "2.3.13" % "test")
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
// skip test in assembly
test in assembly := {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hbase/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt
index 513e294..19aa126 100644
--- a/storage/hbase/build.sbt
+++ b/storage/hbase/build.sbt
@@ -15,11 +15,12 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data-hbase"
libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
- "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.hbase" % "hbase-common" % "0.98.5-hadoop2",
"org.apache.hbase" % "hbase-client" % "0.98.5-hadoop2"
@@ -32,22 +33,13 @@ libraryDependencies ++= Seq(
exclude("org.mortbay.jetty", "servlet-api-2.5")
exclude("org.mortbay.jetty", "jsp-api-2.1")
exclude("org.mortbay.jetty", "jsp-2.1"),
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
"org.specs2" %% "specs2" % "2.3.13" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
// skip test in assembly
test in assembly := {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
deleted file mode 100644
index b453820..0000000
--- a/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.view
-
-import org.apache.predictionio.data.storage.hbase.HBPEvents
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.EventValidation
-import org.apache.predictionio.data.storage.DataMap
-import org.apache.predictionio.data.storage.Storage
-
-import org.joda.time.DateTime
-
-import org.json4s.JValue
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
-
-
-// each JValue data associated with the time it is set
-private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
-
-private[predictionio] case class SetProp (
- val fields: Map[String, PropTime],
- // last set time. Note: fields could be empty with valid set time
- val t: Long) extends Serializable {
-
- def ++ (that: SetProp): SetProp = {
- val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
- val common: Map[String, PropTime] = commonKeys.map { k =>
- val thisData = this.fields(k)
- val thatData = that.fields(k)
- // only keep the value with latest time
- val v = if (thisData.t > thatData.t) thisData else thatData
- (k, v)
- }.toMap
-
- val combinedFields = common ++
- (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
- // keep the latest set time
- val combinedT = if (this.t > that.t) this.t else that.t
-
- SetProp(
- fields = combinedFields,
- t = combinedT
- )
- }
-}
-
-private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
- def ++ (that: UnsetProp): UnsetProp = {
- val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
- val common: Map[String, Long] = commonKeys.map { k =>
- val thisData = this.fields(k)
- val thatData = that.fields(k)
- // only keep the value with latest time
- val v = if (thisData > thatData) thisData else thatData
- (k, v)
- }.toMap
-
- val combinedFields = common ++
- (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
- UnsetProp(
- fields = combinedFields
- )
- }
-}
-
-private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
- def ++ (that: DeleteEntity): DeleteEntity = {
- if (this.t > that.t) this else that
- }
-}
-
-private[predictionio] case class EventOp (
- val setProp: Option[SetProp] = None,
- val unsetProp: Option[UnsetProp] = None,
- val deleteEntity: Option[DeleteEntity] = None
-) extends Serializable {
-
- def ++ (that: EventOp): EventOp = {
- EventOp(
- setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
- unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
- deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
- )
- }
-
- def toDataMap(): Option[DataMap] = {
- setProp.flatMap { set =>
-
- val unsetKeys: Set[String] = unsetProp.map( unset =>
- unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
- ).getOrElse(Set())
-
- val combinedFields = deleteEntity.map { delete =>
- if (delete.t >= set.t) {
- None
- } else {
- val deleteKeys: Set[String] = set.fields
- .filter { case (k, PropTime(kv, t)) =>
- (delete.t >= t)
- }.keySet
- Some(set.fields -- unsetKeys -- deleteKeys)
- }
- }.getOrElse{
- Some(set.fields -- unsetKeys)
- }
-
- // Note: mapValues() doesn't return concrete Map and causes
- // NotSerializableException issue. Use map(identity) to work around this.
- // see https://issues.scala-lang.org/browse/SI-7005
- combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
- }
- }
-
-}
-
-private[predictionio] object EventOp {
- def apply(e: Event): EventOp = {
- val t = e.eventTime.getMillis
- e.event match {
- case "$set" => {
- val fields = e.properties.fields.mapValues(jv =>
- PropTime(jv, t)
- ).map(identity)
-
- EventOp(
- setProp = Some(SetProp(fields = fields, t = t))
- )
- }
- case "$unset" => {
- val fields = e.properties.fields.mapValues(jv => t).map(identity)
- EventOp(
- unsetProp = Some(UnsetProp(fields = fields))
- )
- }
- case "$delete" => {
- EventOp(
- deleteEntity = Some(DeleteEntity(t))
- )
- }
- case _ => {
- EventOp()
- }
- }
- }
-}
-
-@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
-class PBatchView(
- val appId: Int,
- val startTime: Option[DateTime],
- val untilTime: Option[DateTime],
- val sc: SparkContext) {
-
- // NOTE: parallel Events DB interface
- @transient lazy val eventsDb = Storage.getPEvents()
-
- @transient lazy val _events: RDD[Event] =
- eventsDb.getByAppIdAndTimeAndEntity(
- appId = appId,
- startTime = startTime,
- untilTime = untilTime,
- entityType = None,
- entityId = None)(sc)
-
- // TODO: change to use EventSeq?
- @transient lazy val events: RDD[Event] = _events
-
- def aggregateProperties(
- entityType: String,
- startTimeOpt: Option[DateTime] = None,
- untilTimeOpt: Option[DateTime] = None
- ): RDD[(String, DataMap)] = {
-
- _events
- .filter( e => ((e.entityType == entityType) &&
- (EventValidation.isSpecialEvents(e.event))) )
- .map( e => (e.entityId, EventOp(e) ))
- .aggregateByKey[EventOp](EventOp())(
- // within same partition
- seqOp = { case (u, v) => u ++ v },
- // across partition
- combOp = { case (accu, u) => accu ++ u }
- )
- .mapValues(_.toDataMap)
- .filter{ case (k, v) => v.isDefined }
- .map{ case (k, v) => (k, v.get) }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hdfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt
index 7ddc86c..8bae2a8 100644
--- a/storage/hdfs/build.sbt
+++ b/storage/hdfs/build.sbt
@@ -15,27 +15,25 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data-hdfs"
libraryDependencies ++= Seq(
- "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.hadoop" % "hadoop-common" % hadoopVersion.value
+ exclude("commons-beanutils", "*"),
"org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.specs2" %% "specs2" % "2.3.13" % "test")
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(
+ includeScala = false,
+ excludedJars = (fullClasspath in assembly).value.filter {_.data.getName startsWith "apache-predictionio"})
// skip test in assembly
test in assembly := {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/jdbc/build.sbt
----------------------------------------------------------------------
diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt
index 2011722..dc98e21 100644
--- a/storage/jdbc/build.sbt
+++ b/storage/jdbc/build.sbt
@@ -15,30 +15,22 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data-jdbc"
libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
- "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
- "org.postgresql" % "postgresql" % "9.4-1204-jdbc41",
"org.scalikejdbc" %% "scalikejdbc" % "2.3.5",
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test",
"org.specs2" %% "specs2" % "2.3.13" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
// skip test in assembly
test in assembly := {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
index 2e6ee83..ff16d5d 100644
--- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
@@ -15,16 +15,17 @@
* limitations under the License.
*/
-
package org.apache.predictionio.data.storage.jdbc
import java.sql.{DriverManager, ResultSet}
import com.github.nscala_time.time.Imports._
-import org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig}
+import org.apache.predictionio.data.storage.{
+ DataMap, Event, PEvents, StorageClientConfig}
+import org.apache.predictionio.data.SparkVersionDependent
import org.apache.spark.SparkContext
import org.apache.spark.rdd.{JdbcRDD, RDD}
-import org.apache.spark.sql.{SQLContext, SaveMode}
+import org.apache.spark.sql.SaveMode
import org.json4s.JObject
import org.json4s.native.Serialization
import scalikejdbc._
@@ -32,6 +33,7 @@ import scalikejdbc._
/** JDBC implementation of [[PEvents]] */
class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {
@transient private implicit lazy val formats = org.json4s.DefaultFormats
+
def find(
appId: Int,
channelId: Option[Int] = None,
@@ -42,6 +44,7 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
eventNames: Option[Seq[String]] = None,
targetEntityType: Option[Option[String]] = None,
targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
+
val lower = startTime.map(_.getMillis).getOrElse(0.toLong)
/** Change the default upper bound from +100 to +1 year because MySQL's
* FROM_UNIXTIME(t) will return NULL if we use +100 years.
@@ -118,13 +121,12 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
}
def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
- val sqlContext = new SQLContext(sc)
-
- import sqlContext.implicits._
+ val sqlSession = SparkVersionDependent.sqlSession(sc)
+ import sqlSession.implicits._
val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
- val eventTableColumns = Seq[String](
+ val eventsColumnNamesInDF = Seq[String](
"id"
, "event"
, "entityType"
@@ -139,11 +141,16 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
, "creationTime"
, "creationTimeZone")
+ // Necessary for handling postgres "case-sensitivity"
+ val eventsColumnNamesInSQL = JDBCUtils.driverType(client) match {
+ case "postgresql" => eventsColumnNamesInDF.map(_.toLowerCase)
+ case _ => eventsColumnNamesInDF
+ }
val eventDF = events.map(x =>
Event(eventId = None, event = x.event, entityType = x.entityType,
entityId = x.entityId, targetEntityType = x.targetEntityType,
targetEntityId = x.targetEntityId, properties = x.properties,
- eventTime = x.eventTime, tags = x.tags, prId= x.prId,
+ eventTime = x.eventTime, tags = x.tags, prId = x.prId,
creationTime = x.eventTime)
)
.map { event =>
@@ -160,9 +167,8 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
, event.prId
, new java.sql.Timestamp(event.creationTime.getMillis)
, event.creationTime.getZone.getID)
- }.toDF(eventTableColumns:_*)
+ }.toDF(eventsColumnNamesInSQL:_*)
- // spark version 1.4.0 or higher
val prop = new java.util.Properties
prop.setProperty("user", config.properties("USERNAME"))
prop.setProperty("password", config.properties("PASSWORD"))
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/localfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt
index f9e101d..f8c6b8e 100644
--- a/storage/localfs/build.sbt
+++ b/storage/localfs/build.sbt
@@ -15,27 +15,19 @@
* limitations under the License.
*/
+import PIOBuild._
+
name := "apache-predictionio-data-localfs"
libraryDependencies ++= Seq(
"org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
- "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
- "org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.specs2" %% "specs2" % "2.3.13" % "test")
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test")
parallelExecution in Test := false
pomExtra := childrenPomExtra.value
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
- case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
- case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
- case x =>
- val oldStrategy = (assemblyMergeStrategy in assembly).value
- oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
// skip test in assembly
test in assembly := {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/Dockerfile
----------------------------------------------------------------------
diff --git a/tests/Dockerfile b/tests/Dockerfile
index 94f5688..619bdf8 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -17,16 +17,25 @@
FROM predictionio/pio
-ENV SPARK_VERSION 1.6.3
-ENV ELASTICSEARCH_VERSION 5.2.1
-ENV HBASE_VERSION 1.0.0
+ARG SPARK_ARCHIVE
+ARG SPARK_DIR
+ARG PGSQL_JAR
+ARG PIO_SCALA_VERSION
+ARG PIO_SPARK_VERSION
+ARG PIO_ELASTICSEARCH_VERSION
+
+ENV PIO_SCALA_VERSION=$PIO_SCALA_VERSION
+ENV PIO_SPARK_VERSION=$PIO_SPARK_VERSION
+ENV PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION
+
+ENV PGSQL_JAR=$PGSQL_JAR
-ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
# WORKAROUND: es-hadoop stops on RDD#take(1)
-RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
-ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
+ADD docker-files/${SPARK_ARCHIVE} /vendors
+RUN echo "spark.locality.wait.node 0s" > /vendors/${SPARK_DIR}/conf/spark-defaults.conf
+ENV SPARK_HOME /vendors/${SPARK_DIR}
-COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
+COPY docker-files/${PGSQL_JAR} /drivers/${PGSQL_JAR}
COPY docker-files/init.sh init.sh
COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml
COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/Dockerfile-es1
----------------------------------------------------------------------
diff --git a/tests/Dockerfile-es1 b/tests/Dockerfile-es1
deleted file mode 100644
index 73d4c36..0000000
--- a/tests/Dockerfile-es1
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FROM predictionio/pio
-
-ENV SPARK_VERSION 1.6.3
-ENV ELASTICSEARCH_VERSION 1.7
-ENV HBASE_VERSION 1.0.0
-
-ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
-# WORKAROUND: es-hadoop stops on RDD#take(1)
-RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
-ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
-
-COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
-COPY docker-files/init.sh init.sh
-COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml
-COPY docker-files/env-conf/pio-env-es1.sh ${PIO_HOME}/conf/pio-env.sh
-COPY docker-files/pgpass /root/.pgpass
-RUN chmod 600 /root/.pgpass
-
-# Python
-RUN pip install python-dateutil
-RUN pip install pytz
-
-# Default repositories setup
-ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL
-ENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL
-ENV PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE PGSQL
-
-# JVM settings
-ENV JVM_OPTS '-Dfile.encoding=UTF8 -Xms2048M -Xmx2048M -Xss8M -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=256M'
-
-# Expose relevant ports
-# pio engine
-EXPOSE 8000
-# eventserver
-EXPOSE 7070
-
-ENV SLEEP_TIME 30
-
-ENTRYPOINT ["/init.sh"]
-CMD 'bash'
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/README.md
----------------------------------------------------------------------
diff --git a/tests/README.md b/tests/README.md
index 95b3fdf..236d168 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -34,6 +34,12 @@ To download the image run:
$ docker pull predictionio/pio-testing
```
+To build the image use the script:
+```
+$ tests/docker-build.sh <image_name>
+```
+This is necessary to infer proper versions of dependencies e.g. Spark to be included in the image.
+
The most convenient way to make use of it is to execute ***run_docker.sh*** script passing it the configuration, the path to PredictionIO's repository with archived snapshot and the command to run. When no command is provided it opens a bash shell inside the docker image. Example of usage:
```sh
$ ./run_docker.sh ELASTICSEARCH HBASE LOCALFS \
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/after_script.travis.sh
----------------------------------------------------------------------
diff --git a/tests/after_script.travis.sh b/tests/after_script.travis.sh
index 8cbe2c0..e9d5792 100755
--- a/tests/after_script.travis.sh
+++ b/tests/after_script.travis.sh
@@ -18,12 +18,10 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+source $DIR/../conf/pio-vendors.sh
+
# Print a summary of containers used
docker ps -a
# Clean up used containers
-if [ "$ES_VERSION" = "1" ]; then
- docker-compose -f $DIR/docker-compose-es1.yml down
-else
- docker-compose -f $DIR/docker-compose.yml down
-fi
+docker-compose -f $DIR/docker-compose.yml down
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/before_script.travis.sh
----------------------------------------------------------------------
diff --git a/tests/before_script.travis.sh b/tests/before_script.travis.sh
index 9ec7a4a..5889a60 100755
--- a/tests/before_script.travis.sh
+++ b/tests/before_script.travis.sh
@@ -18,4 +18,4 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-$DIR/build-docker.sh
+$DIR/build_docker.sh
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/build-docker.sh
----------------------------------------------------------------------
diff --git a/tests/build-docker.sh b/tests/build-docker.sh
deleted file mode 100755
index ed43715..0000000
--- a/tests/build-docker.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash -ex
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then
- wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz
- mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/
-fi
-
-if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then
- wget https://jdbc.postgresql.org/download/postgresql-9.4-1204.jdbc41.jar
- mv postgresql-9.4-1204.jdbc41.jar $DIR/docker-files/
-fi
-
-docker pull predictionio/pio-testing-base
-pushd $DIR/..
-if [ -z "$ES_VERSION" ]; then
- ./make-distribution.sh
-else
- ./make-distribution.sh --with-es=$ES_VERSION
-fi
-sbt/sbt clean
-mkdir assembly
-cp dist/lib/*.jar assembly/
-mkdir -p lib/spark
-cp dist/lib/spark/*.jar lib/spark
-rm *.tar.gz
-docker build -t predictionio/pio .
-popd
-
-if [ "$ES_VERSION" = "1" ]; then
- docker build -t predictionio/pio-testing-es1 -f $DIR/Dockerfile-es1 $DIR
-else
- docker build -t predictionio/pio-testing $DIR
-fi
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/build_docker.sh
----------------------------------------------------------------------
diff --git a/tests/build_docker.sh b/tests/build_docker.sh
new file mode 100755
index 0000000..af30cb8
--- /dev/null
+++ b/tests/build_docker.sh
@@ -0,0 +1,57 @@
+#!/bin/bash -ex
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+docker pull predictionio/pio-testing-base
+
+pushd $DIR/..
+
+source conf/pio-vendors.sh
+if [ ! -f $DIR/docker-files/${PGSQL_JAR} ]; then
+ wget $PGSQL_DOWNLOAD
+ mv ${PGSQL_JAR} $DIR/docker-files/
+fi
+if [ ! -f $DIR/docker-files/${SPARK_ARCHIVE} ]; then
+ wget $SPARK_DOWNLOAD
+ mv $SPARK_ARCHIVE $DIR/docker-files/
+fi
+
+./make-distribution.sh \
+ -Dscala.version=$PIO_SCALA_VERSION \
+ -Dspark.version=$PIO_SPARK_VERSION \
+ -Dhadoop.version=$PIO_HADOOP_VERSION \
+ -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION
+sbt/sbt clean storage/clean
+rm -rf assembly
+mkdir assembly
+cp dist/lib/*.jar assembly/
+rm -rf lib/spark
+mkdir -p lib/spark
+cp dist/lib/spark/*.jar lib/spark
+rm *.tar.gz
+docker build -t predictionio/pio .
+popd
+
+docker build -t predictionio/pio-testing $DIR \
+ --build-arg SPARK_ARCHIVE=$SPARK_ARCHIVE \
+ --build-arg SPARK_DIR=$SPARK_DIR \
+ --build-arg PGSQL_JAR=$PGSQL_JAR \
+ --build-arg PIO_SCALA_VERSION=$PIO_SCALA_VERSION \
+ --build-arg PIO_SPARK_VERSION=$PIO_SPARK_VERSION \
+ --build-arg PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-compose-es1.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose-es1.yml b/tests/docker-compose-es1.yml
deleted file mode 100644
index 381f41c..0000000
--- a/tests/docker-compose-es1.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-version: "2"
-services:
- elasticsearch:
- image: elasticsearch:1
- environment:
- - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- hbase:
- image: harisekhon/hbase:1.0
- postgres:
- image: postgres:9
- environment:
- POSTGRES_USER: pio
- POSTGRES_PASSWORD: pio
- POSTGRES_INITDB_ARGS: --encoding=UTF8
- pio-testing:
- image: predictionio/pio-testing-es1:latest
- depends_on:
- - elasticsearch
- - hbase
- - postgres
- volumes:
- - ~/.ivy2:/root/.ivy2
- - ~/.sbt:/root/.sbt
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index 06126a5..ac1d91d 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -16,7 +16,7 @@
version: "2"
services:
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:5.2.2
+ image: ${ES_IMAGE}:${ES_TAG}
environment:
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-files/env-conf/pio-env-es1.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env-es1.sh b/tests/docker-files/env-conf/pio-env-es1.sh
deleted file mode 100644
index e1076ba..0000000
--- a/tests/docker-files/env-conf/pio-env-es1.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copy this file as pio-env.sh and edit it for your site's configuration.
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# PredictionIO Main Configuration
-#
-# This section controls core behavior of PredictionIO. It is very likely that
-# you need to change these to fit your site.
-
-# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$SPARK_HOME
-
-POSTGRES_JDBC_DRIVER=/drivers/postgresql-9.4-1204.jdbc41.jar
-MYSQL_JDBC_DRIVER=
-
-# ES_CONF_DIR: You must configure this if you have advanced configuration for
-# your Elasticsearch setup.
-# ES_CONF_DIR=/opt/elasticsearch
-
-# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO
-# with Hadoop 2.
-# HADOOP_CONF_DIR=/opt/hadoop
-
-# HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO
-# with HBase on a remote cluster.
-HBASE_CONF_DIR=$PIO_HOME/conf
-
-# Filesystem paths where PredictionIO uses as block storage.
-PIO_FS_BASEDIR=$HOME/.pio_store
-PIO_FS_ENGINESDIR=$PIO_FS_BASEDIR/engines
-PIO_FS_TMPDIR=$PIO_FS_BASEDIR/tmp
-
-# PredictionIO Storage Configuration
-#
-# This section controls programs that make use of PredictionIO's built-in
-# storage facilities. Default values are shown below.
-#
-# For more information on storage configuration please refer to
-# https://docs.prediction.io/system/anotherdatastore/
-
-# Storage Repositories
-
-# Default is to use PostgreSQL
-PIO_STORAGE_REPOSITORIES_METADATA_NAME=pio_meta
-PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$PIO_STORAGE_REPOSITORIES_METADATA_SOURCE
-
-PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=pio_event
-PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE
-
-PIO_STORAGE_REPOSITORIES_MODELDATA_NAME=pio_model
-PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE
-
-# Storage Data Sources
-
-# PostgreSQL Default Settings
-# Please change "pio" to your database name in PIO_STORAGE_SOURCES_PGSQL_URL
-# Please change PIO_STORAGE_SOURCES_PGSQL_USERNAME and
-# PIO_STORAGE_SOURCES_PGSQL_PASSWORD accordingly
-PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc
-PIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql://postgres/pio
-PIO_STORAGE_SOURCES_PGSQL_USERNAME=pio
-PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
-
-# MySQL Example
-# PIO_STORAGE_SOURCES_MYSQL_TYPE=jdbc
-# PIO_STORAGE_SOURCES_MYSQL_URL=jdbc:mysql://localhost/pio
-# PIO_STORAGE_SOURCES_MYSQL_USERNAME=pio
-# PIO_STORAGE_SOURCES_MYSQL_PASSWORD=pio
-
-# Elasticsearch Example
-PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
-#PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio
-PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch
-PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
-#PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME
-
-# Local File System Example
-PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs
-PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/local_models
-
-# HBase Example
-PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
-#PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME
-
-# HDFS config
-PIO_STORAGE_SOURCES_HDFS_TYPE=hdfs
-PIO_STORAGE_SOURCES_HDFS_PATH=/hdfs_models