You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by do...@apache.org on 2017/03/08 07:45:56 UTC
[1/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Repository: incubator-predictionio
Updated Branches:
refs/heads/develop 8fd59fdf1 -> 31c4bd192
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
new file mode 100644
index 0000000..b9ec957
--- /dev/null
+++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.localfs
+
+import java.io.File
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+ with Logging {
+ override val prefix = "LocalFS"
+ val f = new File(
+ config.properties.getOrElse("PATH", config.properties("HOSTS")))
+ if (f.exists) {
+ if (!f.isDirectory) throw new StorageClientException(
+ s"${f} already exists but it is not a directory!",
+ null)
+ if (!f.canWrite) throw new StorageClientException(
+ s"${f} already exists but it is not writable!",
+ null)
+ } else {
+ if (!f.mkdirs) throw new StorageClientException(
+ s"${f} does not exist and automatic creation failed!",
+ null)
+ }
+ val client = f
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
----------------------------------------------------------------------
diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
new file mode 100644
index 0000000..554ab26
--- /dev/null
+++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** Local file system implementation of storage traits, supporting model data only
+ *
+ * @group Implementation
+ */
+package object localfs {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/localfs/src/test/resources/application.conf b/storage/localfs/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/localfs/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/Dockerfile
----------------------------------------------------------------------
diff --git a/tests/Dockerfile b/tests/Dockerfile
index d1d048a..94f5688 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -17,11 +17,13 @@
FROM predictionio/pio
-ENV SPARK_VERSION 1.4.0
-ENV ELASTICSEARCH_VERSION 1.4.4
+ENV SPARK_VERSION 1.6.3
+ENV ELASTICSEARCH_VERSION 5.2.1
ENV HBASE_VERSION 1.0.0
ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
+# WORKAROUND: es-hadoop stops on RDD#take(1)
+RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
@@ -31,6 +33,10 @@ COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh
COPY docker-files/pgpass /root/.pgpass
RUN chmod 600 /root/.pgpass
+# Python
+RUN pip install python-dateutil
+RUN pip install pytz
+
# Default repositories setup
ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL
ENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/build-docker.sh
----------------------------------------------------------------------
diff --git a/tests/build-docker.sh b/tests/build-docker.sh
index dd95168..459b929 100755
--- a/tests/build-docker.sh
+++ b/tests/build-docker.sh
@@ -17,9 +17,9 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-if [ ! -f $DIR/docker-files/spark-1.4.0-bin-hadoop2.6.tgz ]; then
- wget http://d3kbcqa49mib13.cloudfront.net/spark-1.4.0-bin-hadoop2.6.tgz
- mv spark-1.4.0-bin-hadoop2.6.tgz $DIR/docker-files/
+if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then
+ wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz
+ mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/
fi
if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then
@@ -33,6 +33,8 @@ pushd $DIR/..
sbt/sbt clean
mkdir assembly
cp dist/lib/*.jar assembly/
+mkdir -p lib/spark
+cp dist/lib/spark/*.jar lib/spark
docker build -t predictionio/pio .
popd
docker build -t predictionio/pio-testing $DIR
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index 3939a0b..b556f7b 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -16,7 +16,7 @@
version: "2"
services:
elasticsearch:
- image: elasticsearch:1-alpine
+ image: elasticsearch:5-alpine
hbase:
image: harisekhon/hbase:1.0
postgres:
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-files/env-conf/pio-env.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh
index e1076ba..0acf3a7 100644
--- a/tests/docker-files/env-conf/pio-env.sh
+++ b/tests/docker-files/env-conf/pio-env.sh
@@ -87,7 +87,8 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
#PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio
PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch
-PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
+PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http
+PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
#PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME
# Local File System Example
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/pio_tests/scenarios/eventserver_test.py
----------------------------------------------------------------------
diff --git a/tests/pio_tests/scenarios/eventserver_test.py b/tests/pio_tests/scenarios/eventserver_test.py
index c09e815..6f29876 100644
--- a/tests/pio_tests/scenarios/eventserver_test.py
+++ b/tests/pio_tests/scenarios/eventserver_test.py
@@ -19,6 +19,8 @@ import unittest
import requests
import json
import argparse
+import dateutil.parser
+import pytz
from subprocess import Popen
from utils import AppEngine, pjoin
from pio_tests.integration import BaseTestCase, AppContext
@@ -155,7 +157,8 @@ class EventserverTest(BaseTestCase):
'reversed': 'true' }
r = self.app.get_events(params=params)
self.assertEqual(5, len(r.json()))
- self.assertEqual('2014-11-05T09:39:45.618-08:00', r.json()[0]['eventTime'])
+ event_time = dateutil.parser.parse(r.json()[0]['eventTime']).astimezone(pytz.utc)
+ self.assertEqual('2014-11-05 17:39:45.618000+00:00', str(event_time))
def tearDown(self):
self.log.info("Deleting all app data")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/run_docker.sh
----------------------------------------------------------------------
diff --git a/tests/run_docker.sh b/tests/run_docker.sh
index 9f28d1c..fe07957 100755
--- a/tests/run_docker.sh
+++ b/tests/run_docker.sh
@@ -19,7 +19,7 @@
USAGE=$"Usage: run_docker <meta> <event> <model> <command>
Where:
meta = [PGSQL,ELASTICSEARCH]
- event = [PGSQL,HBASE]
+ event = [PGSQL,HBASE,ELASTICSEARCH]
model = [PGSQL,LOCALFS,HDFS]
command = command to run in the container"
@@ -30,7 +30,7 @@ fi
META="$1"
shift
-if ! [[ "$1" =~ ^(PGSQL|HBASE)$ ]]; then
+if ! [[ "$1" =~ ^(PGSQL|HBASE|ELASTICSEARCH)$ ]]; then
echo "$USAGE"
exit 1
fi
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
index 6c56615..7d04c07 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
@@ -102,6 +102,13 @@ object Common extends EitherLogging {
if (targetFiles.size > 0) targetFiles else libFiles
}
+ def jarFilesForSpark(pioHome: String): Array[File] = {
+ def jarFilesAt(path: File): Array[File] = path.listFiles filter {
+ _.getName.toLowerCase.endsWith(".jar")
+ }
+ jarFilesAt(new File(pioHome, "lib/spark"))
+ }
+
def coreAssembly(pioHome: String): Expected[File] = {
val core = s"pio-assembly-${BuildInfo.version}.jar"
val coreDir =
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
index 4f5a176..662dbbf 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
@@ -160,7 +160,8 @@ object Runner extends EitherLogging {
val sparkSubmitCommand =
Seq(Seq(sparkHome, "bin", "spark-submit").mkString(File.separator))
- val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars
+ val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars ++
+ Common.jarFilesForSpark(pioHome).map(_.toURI)
val sparkSubmitJars = if (sparkSubmitJarsList.nonEmpty) {
Seq("--jars", sparkSubmitJarsList.map(_.toString).mkString(","))
} else {
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
index 4656457..69a3924 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
@@ -297,7 +297,9 @@ object Engine extends EitherLogging {
val extraFiles = WorkflowUtils.thirdPartyConfFiles
val jarFiles = jarFilesForScala(engineDirPath)
jarFiles foreach { f => info(s"Found JAR: ${f.getName}") }
- val allJarFiles = jarFiles.map(_.getCanonicalPath)
+ val jarPluginFiles = jarFilesForSpark(pioHome)
+ jarPluginFiles foreach { f => info(s"Found JAR: ${f.getName}") }
+ val allJarFiles = jarFiles.map(_.getCanonicalPath) ++ jarPluginFiles.map(_.getCanonicalPath)
val cmd = s"${getSparkHome(sparkArgs.sparkHome)}/bin/spark-submit --jars " +
s"${allJarFiles.mkString(",")} " +
[2/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Posted by do...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
new file mode 100644
index 0000000..de74d46
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase.upgrade
+
+import org.apache.predictionio.annotation.Experimental
+
+import grizzled.slf4j.Logger
+import org.apache.predictionio.data.storage.Storage
+import org.apache.predictionio.data.storage.DataMap
+import org.apache.predictionio.data.storage.hbase.HBLEvents
+import org.apache.predictionio.data.storage.hbase.HBEventsUtil
+
+import scala.collection.JavaConversions._
+
+import scala.concurrent._
+import ExecutionContext.Implicits.global
+import org.apache.predictionio.data.storage.LEvents
+import scala.concurrent.Await
+import scala.concurrent.duration.Duration
+import java.lang.Thread
+
+object CheckDistribution {
+ def entityType(eventClient: LEvents, appId: Int)
+ : Map[(String, Option[String]), Int] = {
+ eventClient
+ .find(appId = appId)
+ .foldLeft(Map[(String, Option[String]), Int]().withDefaultValue(0)) {
+ case (m, e) => {
+ val k = (e.entityType, e.targetEntityType)
+ m.updated(k, m(k) + 1)
+ }
+ }
+ }
+
+ def runMain(appId: Int) {
+ val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents]
+
+ entityType(eventClient, appId)
+ .toSeq
+ .sortBy(-_._2)
+ .foreach { println }
+
+ }
+
+ def main(args: Array[String]) {
+ runMain(args(0).toInt)
+ }
+
+}
+
+/** :: Experimental :: */
+@Experimental
+object Upgrade_0_8_3 {
+ val NameMap = Map(
+ "pio_user" -> "user",
+ "pio_item" -> "item")
+ val RevNameMap = NameMap.toSeq.map(_.swap).toMap
+
+ val logger = Logger[this.type]
+
+ def main(args: Array[String]) {
+ val fromAppId = args(0).toInt
+ val toAppId = args(1).toInt
+
+ runMain(fromAppId, toAppId)
+ }
+
+ def runMain(fromAppId: Int, toAppId: Int): Unit = {
+ upgrade(fromAppId, toAppId)
+ }
+
+
+ val obsEntityTypes = Set("pio_user", "pio_item")
+ val obsProperties = Set(
+ "pio_itypes", "pio_starttime", "pio_endtime",
+ "pio_inactive", "pio_price", "pio_rating")
+
+ def hasPIOPrefix(eventClient: LEvents, appId: Int): Boolean = {
+ eventClient.find(appId = appId).filter( e =>
+ (obsEntityTypes.contains(e.entityType) ||
+ e.targetEntityType.map(obsEntityTypes.contains(_)).getOrElse(false) ||
+ (!e.properties.keySet.forall(!obsProperties.contains(_)))
+ )
+ ).hasNext
+ }
+
+ def isEmpty(eventClient: LEvents, appId: Int): Boolean =
+ !eventClient.find(appId = appId).hasNext
+
+
+ def upgradeCopy(eventClient: LEvents, fromAppId: Int, toAppId: Int) {
+ val fromDist = CheckDistribution.entityType(eventClient, fromAppId)
+
+ logger.info("FromAppId Distribution")
+ fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
+
+ val events = eventClient
+ .find(appId = fromAppId)
+ .zipWithIndex
+ .foreach { case (fromEvent, index) => {
+ if (index % 50000 == 0) {
+ // logger.info(s"Progress: $fromEvent $index")
+ logger.info(s"Progress: $index")
+ }
+
+
+ val fromEntityType = fromEvent.entityType
+ val toEntityType = NameMap.getOrElse(fromEntityType, fromEntityType)
+
+ val fromTargetEntityType = fromEvent.targetEntityType
+ val toTargetEntityType = fromTargetEntityType
+ .map { et => NameMap.getOrElse(et, et) }
+
+ val toProperties = DataMap(fromEvent.properties.fields.map {
+ case (k, v) =>
+ val newK = if (obsProperties.contains(k)) {
+ val nK = k.stripPrefix("pio_")
+ logger.info(s"property ${k} will be renamed to ${nK}")
+ nK
+ } else k
+ (newK, v)
+ })
+
+ val toEvent = fromEvent.copy(
+ entityType = toEntityType,
+ targetEntityType = toTargetEntityType,
+ properties = toProperties)
+
+ eventClient.insert(toEvent, toAppId)
+ }}
+
+
+ val toDist = CheckDistribution.entityType(eventClient, toAppId)
+
+ logger.info("Recap fromAppId Distribution")
+ fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
+
+ logger.info("ToAppId Distribution")
+ toDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
+
+ val fromGood = fromDist
+ .toSeq
+ .forall { case (k, c) => {
+ val (et, tet) = k
+ val net = NameMap.getOrElse(et, et)
+ val ntet = tet.map(tet => NameMap.getOrElse(tet, tet))
+ val nk = (net, ntet)
+ val nc = toDist.getOrElse(nk, -1)
+ val checkMatch = (c == nc)
+ if (!checkMatch) {
+ logger.info(s"${k} doesn't match: old has ${c}. new has ${nc}.")
+ }
+ checkMatch
+ }}
+
+ val toGood = toDist
+ .toSeq
+ .forall { case (k, c) => {
+ val (et, tet) = k
+ val oet = RevNameMap.getOrElse(et, et)
+ val otet = tet.map(tet => RevNameMap.getOrElse(tet, tet))
+ val ok = (oet, otet)
+ val oc = fromDist.getOrElse(ok, -1)
+ val checkMatch = (c == oc)
+ if (!checkMatch) {
+ logger.info(s"${k} doesn't match: new has ${c}. old has ${oc}.")
+ }
+ checkMatch
+ }}
+
+ if (!fromGood || !toGood) {
+ logger.error("Doesn't match!! There is an import error.")
+ } else {
+ logger.info("Count matches. Looks like we are good to go.")
+ }
+ }
+
+ /* For upgrade from 0.8.2 to 0.8.3 only */
+ def upgrade(fromAppId: Int, toAppId: Int) {
+
+ val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents]
+
+ require(fromAppId != toAppId,
+ s"FromAppId: $fromAppId must be different from toAppId: $toAppId")
+
+ if (hasPIOPrefix(eventClient, fromAppId)) {
+ require(
+ isEmpty(eventClient, toAppId),
+ s"Target appId: $toAppId is not empty. Please run " +
+ "`pio app data-delete <app_name>` to clean the data before upgrading")
+
+ logger.info(s"$fromAppId isEmpty: " + isEmpty(eventClient, fromAppId))
+
+ upgradeCopy(eventClient, fromAppId, toAppId)
+
+ } else {
+ logger.info(s"From appId: ${fromAppId} doesn't contain"
+ + s" obsolete entityTypes ${obsEntityTypes} or"
+ + s" obsolete properties ${obsProperties}."
+ + " No need data migration."
+ + s" You can continue to use appId ${fromAppId}.")
+ }
+
+ logger.info("Done.")
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
new file mode 100644
index 0000000..b453820
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.view
+
+import org.apache.predictionio.data.storage.hbase.HBPEvents
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.EventValidation
+import org.apache.predictionio.data.storage.DataMap
+import org.apache.predictionio.data.storage.Storage
+
+import org.joda.time.DateTime
+
+import org.json4s.JValue
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+
+
+// each JValue data associated with the time it is set
+private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
+
+private[predictionio] case class SetProp (
+ val fields: Map[String, PropTime],
+ // last set time. Note: fields could be empty with valid set time
+ val t: Long) extends Serializable {
+
+ def ++ (that: SetProp): SetProp = {
+ val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+ val common: Map[String, PropTime] = commonKeys.map { k =>
+ val thisData = this.fields(k)
+ val thatData = that.fields(k)
+ // only keep the value with latest time
+ val v = if (thisData.t > thatData.t) thisData else thatData
+ (k, v)
+ }.toMap
+
+ val combinedFields = common ++
+ (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+ // keep the latest set time
+ val combinedT = if (this.t > that.t) this.t else that.t
+
+ SetProp(
+ fields = combinedFields,
+ t = combinedT
+ )
+ }
+}
+
+private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
+ def ++ (that: UnsetProp): UnsetProp = {
+ val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+ val common: Map[String, Long] = commonKeys.map { k =>
+ val thisData = this.fields(k)
+ val thatData = that.fields(k)
+ // only keep the value with latest time
+ val v = if (thisData > thatData) thisData else thatData
+ (k, v)
+ }.toMap
+
+ val combinedFields = common ++
+ (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+ UnsetProp(
+ fields = combinedFields
+ )
+ }
+}
+
+private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
+ def ++ (that: DeleteEntity): DeleteEntity = {
+ if (this.t > that.t) this else that
+ }
+}
+
+private[predictionio] case class EventOp (
+ val setProp: Option[SetProp] = None,
+ val unsetProp: Option[UnsetProp] = None,
+ val deleteEntity: Option[DeleteEntity] = None
+) extends Serializable {
+
+ def ++ (that: EventOp): EventOp = {
+ EventOp(
+ setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
+ unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
+ deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
+ )
+ }
+
+ def toDataMap(): Option[DataMap] = {
+ setProp.flatMap { set =>
+
+ val unsetKeys: Set[String] = unsetProp.map( unset =>
+ unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
+ ).getOrElse(Set())
+
+ val combinedFields = deleteEntity.map { delete =>
+ if (delete.t >= set.t) {
+ None
+ } else {
+ val deleteKeys: Set[String] = set.fields
+ .filter { case (k, PropTime(kv, t)) =>
+ (delete.t >= t)
+ }.keySet
+ Some(set.fields -- unsetKeys -- deleteKeys)
+ }
+ }.getOrElse{
+ Some(set.fields -- unsetKeys)
+ }
+
+ // Note: mapValues() doesn't return concrete Map and causes
+ // NotSerializableException issue. Use map(identity) to work around this.
+ // see https://issues.scala-lang.org/browse/SI-7005
+ combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
+ }
+ }
+
+}
+
+private[predictionio] object EventOp {
+ def apply(e: Event): EventOp = {
+ val t = e.eventTime.getMillis
+ e.event match {
+ case "$set" => {
+ val fields = e.properties.fields.mapValues(jv =>
+ PropTime(jv, t)
+ ).map(identity)
+
+ EventOp(
+ setProp = Some(SetProp(fields = fields, t = t))
+ )
+ }
+ case "$unset" => {
+ val fields = e.properties.fields.mapValues(jv => t).map(identity)
+ EventOp(
+ unsetProp = Some(UnsetProp(fields = fields))
+ )
+ }
+ case "$delete" => {
+ EventOp(
+ deleteEntity = Some(DeleteEntity(t))
+ )
+ }
+ case _ => {
+ EventOp()
+ }
+ }
+ }
+}
+
+@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
+class PBatchView(
+ val appId: Int,
+ val startTime: Option[DateTime],
+ val untilTime: Option[DateTime],
+ val sc: SparkContext) {
+
+ // NOTE: parallel Events DB interface
+ @transient lazy val eventsDb = Storage.getPEvents()
+
+ @transient lazy val _events: RDD[Event] =
+ eventsDb.getByAppIdAndTimeAndEntity(
+ appId = appId,
+ startTime = startTime,
+ untilTime = untilTime,
+ entityType = None,
+ entityId = None)(sc)
+
+ // TODO: change to use EventSeq?
+ @transient lazy val events: RDD[Event] = _events
+
+ def aggregateProperties(
+ entityType: String,
+ startTimeOpt: Option[DateTime] = None,
+ untilTimeOpt: Option[DateTime] = None
+ ): RDD[(String, DataMap)] = {
+
+ _events
+ .filter( e => ((e.entityType == entityType) &&
+ (EventValidation.isSpecialEvents(e.event))) )
+ .map( e => (e.entityId, EventOp(e) ))
+ .aggregateByKey[EventOp](EventOp())(
+ // within same partition
+ seqOp = { case (u, v) => u ++ v },
+ // across partition
+ combOp = { case (accu, u) => accu ++ u }
+ )
+ .mapValues(_.toDataMap)
+ .filter{ case (k, v) => v.isDefined }
+ .map{ case (k, v) => (k, v.get) }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/hbase/src/test/resources/application.conf b/storage/hbase/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/hbase/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/.gitignore
----------------------------------------------------------------------
diff --git a/storage/hdfs/.gitignore b/storage/hdfs/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/hdfs/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt
new file mode 100644
index 0000000..9f064c6
--- /dev/null
+++ b/storage/hdfs/build.sbt
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-hdfs"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hdfs-assembly-" + version.value + ".jar")
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
----------------------------------------------------------------------
diff --git a/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
new file mode 100644
index 0000000..08dfb01
--- /dev/null
+++ b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hdfs
+
+import java.io.IOException
+
+import com.google.common.io.ByteStreams
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Model
+import org.apache.predictionio.data.storage.Models
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+class HDFSModels(fs: FileSystem, config: StorageClientConfig, prefix: String)
+ extends Models with Logging {
+
+ def insert(i: Model): Unit = {
+ try {
+ val fsdos = fs.create(new Path(s"$prefix${i.id}"))
+ fsdos.write(i.models)
+ fsdos.close
+ } catch {
+ case e: IOException => error(e.getMessage)
+ }
+ }
+
+ def get(id: String): Option[Model] = {
+ try {
+ val p = new Path(s"$prefix$id")
+ Some(Model(
+ id = id,
+ models = ByteStreams.toByteArray(fs.open(p))))
+ } catch {
+ case e: Throwable =>
+ error(e.getMessage)
+ None
+ }
+ }
+
+ def delete(id: String): Unit = {
+ val p = new Path(s"$prefix$id")
+ if (!fs.delete(p, false)) {
+ error(s"Unable to delete ${fs.makeQualified(p).toString}!")
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
new file mode 100644
index 0000000..bc57f2a
--- /dev/null
+++ b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hdfs
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+ with Logging {
+ override val prefix = "HDFS"
+ val conf = new Configuration
+ val fs = FileSystem.get(conf)
+ fs.setWorkingDirectory(
+ new Path(config.properties.getOrElse("PATH", config.properties("HOSTS"))))
+ val client = fs
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
----------------------------------------------------------------------
diff --git a/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
new file mode 100644
index 0000000..a927d78
--- /dev/null
+++ b/storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** HDFS implementation of storage traits, supporting model data only
+ *
+ * @group Implementation
+ */
+package object hdfs {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hdfs/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/hdfs/src/test/resources/application.conf b/storage/hdfs/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/hdfs/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/.gitignore
----------------------------------------------------------------------
diff --git a/storage/jdbc/.gitignore b/storage/jdbc/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/jdbc/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/build.sbt
----------------------------------------------------------------------
diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt
new file mode 100644
index 0000000..63d420b
--- /dev/null
+++ b/storage/jdbc/build.sbt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-jdbc"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
+ "org.postgresql" % "postgresql" % "9.4-1204-jdbc41",
+ "org.scalikejdbc" %% "scalikejdbc" % "2.3.5",
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-jdbc-assembly-" + version.value + ".jar")
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
new file mode 100644
index 0000000..437f8ae
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.AccessKey
+import org.apache.predictionio.data.storage.AccessKeys
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+import scala.util.Random
+
+/** JDBC implementation of [[AccessKeys]] */
+class JDBCAccessKeys(client: String, config: StorageClientConfig, prefix: String)
+ extends AccessKeys with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "accesskeys")
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ accesskey varchar(64) not null primary key,
+ appid integer not null,
+ events text)""".execute().apply()
+ }
+
+ def insert(accessKey: AccessKey): Option[String] = DB localTx { implicit s =>
+ val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
+ val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(","))
+ sql"""
+ insert into $tableName values(
+ $key,
+ ${accessKey.appid},
+ $events)""".update().apply()
+ Some(key)
+ }
+
+ def get(key: String): Option[AccessKey] = DB readOnly { implicit session =>
+ sql"SELECT accesskey, appid, events FROM $tableName WHERE accesskey = $key".
+ map(resultToAccessKey).single().apply()
+ }
+
+ def getAll(): Seq[AccessKey] = DB readOnly { implicit session =>
+ sql"SELECT accesskey, appid, events FROM $tableName".map(resultToAccessKey).list().apply()
+ }
+
+ def getByAppid(appid: Int): Seq[AccessKey] = DB readOnly { implicit session =>
+ sql"SELECT accesskey, appid, events FROM $tableName WHERE appid = $appid".
+ map(resultToAccessKey).list().apply()
+ }
+
+ def update(accessKey: AccessKey): Unit = DB localTx { implicit session =>
+ val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(","))
+ sql"""
+ UPDATE $tableName SET
+ appid = ${accessKey.appid},
+ events = $events
+ WHERE accesskey = ${accessKey.key}""".update().apply()
+ }
+
+ def delete(key: String): Unit = DB localTx { implicit session =>
+ sql"DELETE FROM $tableName WHERE accesskey = $key".update().apply()
+ }
+
+ /** Convert JDBC results to [[AccessKey]] */
+ def resultToAccessKey(rs: WrappedResultSet): AccessKey = {
+ AccessKey(
+ key = rs.string("accesskey"),
+ appid = rs.int("appid"),
+ events = rs.stringOpt("events").map(_.split(",").toSeq).getOrElse(Nil))
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
new file mode 100644
index 0000000..17e6410
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.App
+import org.apache.predictionio.data.storage.Apps
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+/** JDBC implementation of [[Apps]] */
+class JDBCApps(client: String, config: StorageClientConfig, prefix: String)
+ extends Apps with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "apps")
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ id serial not null primary key,
+ name text not null,
+ description text)""".execute.apply()
+ }
+
+ def insert(app: App): Option[Int] = DB localTx { implicit session =>
+ val q = if (app.id == 0) {
+ sql"""
+ insert into $tableName (name, description) values(${app.name}, ${app.description})
+ """
+ } else {
+ sql"""
+ insert into $tableName values(${app.id}, ${app.name}, ${app.description})
+ """
+ }
+ Some(q.updateAndReturnGeneratedKey().apply().toInt)
+ }
+
+ def get(id: Int): Option[App] = DB readOnly { implicit session =>
+ sql"SELECT id, name, description FROM $tableName WHERE id = ${id}".map(rs =>
+ App(
+ id = rs.int("id"),
+ name = rs.string("name"),
+ description = rs.stringOpt("description"))
+ ).single().apply()
+ }
+
+ def getByName(name: String): Option[App] = DB readOnly { implicit session =>
+ sql"SELECT id, name, description FROM $tableName WHERE name = ${name}".map(rs =>
+ App(
+ id = rs.int("id"),
+ name = rs.string("name"),
+ description = rs.stringOpt("description"))
+ ).single().apply()
+ }
+
+ def getAll(): Seq[App] = DB readOnly { implicit session =>
+ sql"SELECT id, name, description FROM $tableName".map(rs =>
+ App(
+ id = rs.int("id"),
+ name = rs.string("name"),
+ description = rs.stringOpt("description"))
+ ).list().apply()
+ }
+
+ def update(app: App): Unit = DB localTx { implicit session =>
+ sql"""
+ update $tableName set name = ${app.name}, description = ${app.description}
+ where id = ${app.id}""".update().apply()
+ }
+
+ def delete(id: Int): Unit = DB localTx { implicit session =>
+ sql"DELETE FROM $tableName WHERE id = $id".update().apply()
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
new file mode 100644
index 0000000..c9aaca5
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Channel
+import org.apache.predictionio.data.storage.Channels
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+/** JDBC implementation of [[Channels]] */
+class JDBCChannels(client: String, config: StorageClientConfig, prefix: String)
+ extends Channels with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "channels")
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ id serial not null primary key,
+ name text not null,
+ appid integer not null)""".execute().apply()
+ }
+
+ def insert(channel: Channel): Option[Int] = DB localTx { implicit session =>
+ val q = if (channel.id == 0) {
+ sql"INSERT INTO $tableName (name, appid) VALUES(${channel.name}, ${channel.appid})"
+ } else {
+ sql"INSERT INTO $tableName VALUES(${channel.id}, ${channel.name}, ${channel.appid})"
+ }
+ Some(q.updateAndReturnGeneratedKey().apply().toInt)
+ }
+
+ def get(id: Int): Option[Channel] = DB localTx { implicit session =>
+ sql"SELECT id, name, appid FROM $tableName WHERE id = $id".
+ map(resultToChannel).single().apply()
+ }
+
+ def getByAppid(appid: Int): Seq[Channel] = DB localTx { implicit session =>
+ sql"SELECT id, name, appid FROM $tableName WHERE appid = $appid".
+ map(resultToChannel).list().apply()
+ }
+
+ def delete(id: Int): Unit = DB localTx { implicit session =>
+ sql"DELETE FROM $tableName WHERE id = $id".update().apply()
+ }
+
+ def resultToChannel(rs: WrappedResultSet): Channel = {
+ Channel(
+ id = rs.int("id"),
+ name = rs.string("name"),
+ appid = rs.int("appid"))
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
new file mode 100644
index 0000000..13c374d
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.EngineInstance
+import org.apache.predictionio.data.storage.EngineInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+/** JDBC implementation of [[EngineInstances]] */
+class JDBCEngineInstances(client: String, config: StorageClientConfig, prefix: String)
+ extends EngineInstances with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "engineinstances")
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ id varchar(100) not null primary key,
+ status text not null,
+ startTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ endTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ engineId text not null,
+ engineVersion text not null,
+ engineVariant text not null,
+ engineFactory text not null,
+ batch text not null,
+ env text not null,
+ sparkConf text not null,
+ datasourceParams text not null,
+ preparatorParams text not null,
+ algorithmsParams text not null,
+ servingParams text not null)""".execute().apply()
+ }
+
+ def insert(i: EngineInstance): String = DB localTx { implicit session =>
+ val id = java.util.UUID.randomUUID().toString
+ sql"""
+ INSERT INTO $tableName VALUES(
+ $id,
+ ${i.status},
+ ${i.startTime},
+ ${i.endTime},
+ ${i.engineId},
+ ${i.engineVersion},
+ ${i.engineVariant},
+ ${i.engineFactory},
+ ${i.batch},
+ ${JDBCUtils.mapToString(i.env)},
+ ${JDBCUtils.mapToString(i.sparkConf)},
+ ${i.dataSourceParams},
+ ${i.preparatorParams},
+ ${i.algorithmsParams},
+ ${i.servingParams})""".update().apply()
+ id
+ }
+
+ def get(id: String): Option[EngineInstance] = DB localTx { implicit session =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ engineId,
+ engineVersion,
+ engineVariant,
+ engineFactory,
+ batch,
+ env,
+ sparkConf,
+ datasourceParams,
+ preparatorParams,
+ algorithmsParams,
+ servingParams
+ FROM $tableName WHERE id = $id""".map(resultToEngineInstance).
+ single().apply()
+ }
+
+ def getAll(): Seq[EngineInstance] = DB localTx { implicit session =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ engineId,
+ engineVersion,
+ engineVariant,
+ engineFactory,
+ batch,
+ env,
+ sparkConf,
+ datasourceParams,
+ preparatorParams,
+ algorithmsParams,
+ servingParams
+ FROM $tableName""".map(resultToEngineInstance).list().apply()
+ }
+
+ def getLatestCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Option[EngineInstance] =
+ getCompleted(engineId, engineVersion, engineVariant).headOption
+
+ def getCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Seq[EngineInstance] = DB localTx { implicit s =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ engineId,
+ engineVersion,
+ engineVariant,
+ engineFactory,
+ batch,
+ env,
+ sparkConf,
+ datasourceParams,
+ preparatorParams,
+ algorithmsParams,
+ servingParams
+ FROM $tableName
+ WHERE
+ status = 'COMPLETED' AND
+ engineId = $engineId AND
+ engineVersion = $engineVersion AND
+ engineVariant = $engineVariant
+ ORDER BY startTime DESC""".
+ map(resultToEngineInstance).list().apply()
+ }
+
+ def update(i: EngineInstance): Unit = DB localTx { implicit session =>
+ sql"""
+ update $tableName set
+ status = ${i.status},
+ startTime = ${i.startTime},
+ endTime = ${i.endTime},
+ engineId = ${i.engineId},
+ engineVersion = ${i.engineVersion},
+ engineVariant = ${i.engineVariant},
+ engineFactory = ${i.engineFactory},
+ batch = ${i.batch},
+ env = ${JDBCUtils.mapToString(i.env)},
+ sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},
+ datasourceParams = ${i.dataSourceParams},
+ preparatorParams = ${i.preparatorParams},
+ algorithmsParams = ${i.algorithmsParams},
+ servingParams = ${i.servingParams}
+ where id = ${i.id}""".update().apply()
+ }
+
+ def delete(id: String): Unit = DB localTx { implicit session =>
+ sql"DELETE FROM $tableName WHERE id = $id".update().apply()
+ }
+
+ /** Convert JDBC results to [[EngineInstance]] */
+ def resultToEngineInstance(rs: WrappedResultSet): EngineInstance = {
+ EngineInstance(
+ id = rs.string("id"),
+ status = rs.string("status"),
+ startTime = rs.jodaDateTime("startTime"),
+ endTime = rs.jodaDateTime("endTime"),
+ engineId = rs.string("engineId"),
+ engineVersion = rs.string("engineVersion"),
+ engineVariant = rs.string("engineVariant"),
+ engineFactory = rs.string("engineFactory"),
+ batch = rs.string("batch"),
+ env = JDBCUtils.stringToMap(rs.string("env")),
+ sparkConf = JDBCUtils.stringToMap(rs.string("sparkConf")),
+ dataSourceParams = rs.string("datasourceParams"),
+ preparatorParams = rs.string("preparatorParams"),
+ algorithmsParams = rs.string("algorithmsParams"),
+ servingParams = rs.string("servingParams"))
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
new file mode 100644
index 0000000..90eb5f3
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.EvaluationInstance
+import org.apache.predictionio.data.storage.EvaluationInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+/** JDBC implementations of [[EvaluationInstances]] */
+class JDBCEvaluationInstances(client: String, config: StorageClientConfig, prefix: String)
+ extends EvaluationInstances with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "evaluationinstances")
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ id varchar(100) not null primary key,
+ status text not null,
+ startTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ endTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ evaluationClass text not null,
+ engineParamsGeneratorClass text not null,
+ batch text not null,
+ env text not null,
+ sparkConf text not null,
+ evaluatorResults text not null,
+ evaluatorResultsHTML text not null,
+ evaluatorResultsJSON text)""".execute().apply()
+ }
+
+ def insert(i: EvaluationInstance): String = DB localTx { implicit session =>
+ val id = java.util.UUID.randomUUID().toString
+ sql"""
+ INSERT INTO $tableName VALUES(
+ $id,
+ ${i.status},
+ ${i.startTime},
+ ${i.endTime},
+ ${i.evaluationClass},
+ ${i.engineParamsGeneratorClass},
+ ${i.batch},
+ ${JDBCUtils.mapToString(i.env)},
+ ${JDBCUtils.mapToString(i.sparkConf)},
+ ${i.evaluatorResults},
+ ${i.evaluatorResultsHTML},
+ ${i.evaluatorResultsJSON})""".update().apply()
+ id
+ }
+
+ def get(id: String): Option[EvaluationInstance] = DB localTx { implicit session =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ evaluationClass,
+ engineParamsGeneratorClass,
+ batch,
+ env,
+ sparkConf,
+ evaluatorResults,
+ evaluatorResultsHTML,
+ evaluatorResultsJSON
+ FROM $tableName WHERE id = $id
+ """.map(resultToEvaluationInstance).single().apply()
+ }
+
+ def getAll(): Seq[EvaluationInstance] = DB localTx { implicit session =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ evaluationClass,
+ engineParamsGeneratorClass,
+ batch,
+ env,
+ sparkConf,
+ evaluatorResults,
+ evaluatorResultsHTML,
+ evaluatorResultsJSON
+ FROM $tableName
+ """.map(resultToEvaluationInstance).list().apply()
+ }
+
+ def getCompleted(): Seq[EvaluationInstance] = DB localTx { implicit s =>
+ sql"""
+ SELECT
+ id,
+ status,
+ startTime,
+ endTime,
+ evaluationClass,
+ engineParamsGeneratorClass,
+ batch,
+ env,
+ sparkConf,
+ evaluatorResults,
+ evaluatorResultsHTML,
+ evaluatorResultsJSON
+ FROM $tableName
+ WHERE
+ status = 'EVALCOMPLETED'
+ ORDER BY starttime DESC
+ """.map(resultToEvaluationInstance).list().apply()
+ }
+
+ def update(i: EvaluationInstance): Unit = DB localTx { implicit session =>
+ sql"""
+ update $tableName set
+ status = ${i.status},
+ startTime = ${i.startTime},
+ endTime = ${i.endTime},
+ evaluationClass = ${i.evaluationClass},
+ engineParamsGeneratorClass = ${i.engineParamsGeneratorClass},
+ batch = ${i.batch},
+ env = ${JDBCUtils.mapToString(i.env)},
+ sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},
+ evaluatorResults = ${i.evaluatorResults},
+ evaluatorResultsHTML = ${i.evaluatorResultsHTML},
+ evaluatorResultsJSON = ${i.evaluatorResultsJSON}
+ where id = ${i.id}""".update().apply()
+ }
+
+ def delete(id: String): Unit = DB localTx { implicit session =>
+ sql"DELETE FROM $tableName WHERE id = $id".update().apply()
+ }
+
+ /** Convert JDBC results to [[EvaluationInstance]] */
+ def resultToEvaluationInstance(rs: WrappedResultSet): EvaluationInstance = {
+ EvaluationInstance(
+ id = rs.string("id"),
+ status = rs.string("status"),
+ startTime = rs.jodaDateTime("startTime"),
+ endTime = rs.jodaDateTime("endTime"),
+ evaluationClass = rs.string("evaluationClass"),
+ engineParamsGeneratorClass = rs.string("engineParamsGeneratorClass"),
+ batch = rs.string("batch"),
+ env = JDBCUtils.stringToMap(rs.string("env")),
+ sparkConf = JDBCUtils.stringToMap(rs.string("sparkConf")),
+ evaluatorResults = rs.string("evaluatorResults"),
+ evaluatorResultsHTML = rs.string("evaluatorResultsHTML"),
+ evaluatorResultsJSON = rs.string("evaluatorResultsJSON"))
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
new file mode 100644
index 0000000..dddef67
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.DataMap
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.LEvents
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.joda.time.DateTime
+import org.joda.time.DateTimeZone
+import org.json4s.JObject
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+import scalikejdbc._
+
+import scala.concurrent.ExecutionContext
+import scala.concurrent.Future
+
+/** JDBC implementation of [[LEvents]] */
+class JDBCLEvents(
+ client: String,
+ config: StorageClientConfig,
+ namespace: String) extends LEvents with Logging {
+ implicit private val formats = org.json4s.DefaultFormats
+
+ def init(appId: Int, channelId: Option[Int] = None): Boolean = {
+
+ // To use index, it must be varchar less than 255 characters on a VARCHAR column
+ val useIndex = config.properties.contains("INDEX") &&
+ config.properties("INDEX").equalsIgnoreCase("enabled")
+
+ val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
+ val entityIdIndexName = s"idx_${tableName}_ei"
+ val entityTypeIndexName = s"idx_${tableName}_et"
+ DB autoCommit { implicit session =>
+ if (useIndex) {
+ SQL(s"""
+ create table if not exists $tableName (
+ id varchar(32) not null primary key,
+ event varchar(255) not null,
+ entityType varchar(255) not null,
+ entityId varchar(255) not null,
+ targetEntityType text,
+ targetEntityId text,
+ properties text,
+ eventTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ eventTimeZone varchar(50) not null,
+ tags text,
+ prId text,
+ creationTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ creationTimeZone varchar(50) not null)""").execute().apply()
+
+ // create index
+ SQL(s"create index $entityIdIndexName on $tableName (entityId)").execute().apply()
+ SQL(s"create index $entityTypeIndexName on $tableName (entityType)").execute().apply()
+ } else {
+ SQL(s"""
+ create table if not exists $tableName (
+ id varchar(32) not null primary key,
+ event text not null,
+ entityType text not null,
+ entityId text not null,
+ targetEntityType text,
+ targetEntityId text,
+ properties text,
+ eventTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ eventTimeZone varchar(50) not null,
+ tags text,
+ prId text,
+ creationTime timestamp DEFAULT CURRENT_TIMESTAMP,
+ creationTimeZone varchar(50) not null)""").execute().apply()
+ }
+ true
+ }
+ }
+
+ def remove(appId: Int, channelId: Option[Int] = None): Boolean =
+ DB autoCommit { implicit session =>
+ SQL(s"""
+ drop table ${JDBCUtils.eventTableName(namespace, appId, channelId)}
+ """).execute().apply()
+ true
+ }
+
+ def close(): Unit = ConnectionPool.closeAll()
+
+ def futureInsert(event: Event, appId: Int, channelId: Option[Int])(
+ implicit ec: ExecutionContext): Future[String] = Future {
+ DB localTx { implicit session =>
+ val id = event.eventId.getOrElse(JDBCUtils.generateId)
+ val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
+ sql"""
+ insert into $tableName values(
+ $id,
+ ${event.event},
+ ${event.entityType},
+ ${event.entityId},
+ ${event.targetEntityType},
+ ${event.targetEntityId},
+ ${write(event.properties.toJObject)},
+ ${event.eventTime},
+ ${event.eventTime.getZone.getID},
+ ${if (event.tags.nonEmpty) Some(event.tags.mkString(",")) else None},
+ ${event.prId},
+ ${event.creationTime},
+ ${event.creationTime.getZone.getID}
+ )
+ """.update().apply()
+ id
+ }
+ }
+
+ def futureGet(eventId: String, appId: Int, channelId: Option[Int])(
+ implicit ec: ExecutionContext): Future[Option[Event]] = Future {
+ DB readOnly { implicit session =>
+ val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
+ sql"""
+ select
+ id,
+ event,
+ entityType,
+ entityId,
+ targetEntityType,
+ targetEntityId,
+ properties,
+ eventTime,
+ eventTimeZone,
+ tags,
+ prId,
+ creationTime,
+ creationTimeZone
+ from $tableName
+ where id = $eventId
+ """.map(resultToEvent).single().apply()
+ }
+ }
+
+ def futureDelete(eventId: String, appId: Int, channelId: Option[Int])(
+ implicit ec: ExecutionContext): Future[Boolean] = Future {
+ DB localTx { implicit session =>
+ val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
+ sql"""
+ delete from $tableName where id = $eventId
+ """.update().apply()
+ true
+ }
+ }
+
+ def futureFind(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None,
+ limit: Option[Int] = None,
+ reversed: Option[Boolean] = None
+ )(implicit ec: ExecutionContext): Future[Iterator[Event]] = Future {
+ DB readOnly { implicit session =>
+ val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
+ val whereClause = sqls.toAndConditionOpt(
+ startTime.map(x => sqls"eventTime >= $x"),
+ untilTime.map(x => sqls"eventTime < $x"),
+ entityType.map(x => sqls"entityType = $x"),
+ entityId.map(x => sqls"entityId = $x"),
+ eventNames.map(x =>
+ sqls.toOrConditionOpt(x.map(y =>
+ Some(sqls"event = $y")
+ ): _*)
+ ).getOrElse(None),
+ targetEntityType.map(x => x.map(y => sqls"targetEntityType = $y")
+ .getOrElse(sqls"targetEntityType IS NULL")),
+ targetEntityId.map(x => x.map(y => sqls"targetEntityId = $y")
+ .getOrElse(sqls"targetEntityId IS NULL"))
+ ).map(sqls.where(_)).getOrElse(sqls"")
+ val orderByClause = reversed.map(x =>
+ if (x) sqls"eventTime desc" else sqls"eventTime asc"
+ ).getOrElse(sqls"eventTime asc")
+ val limitClause = limit.map(x =>
+ if (x < 0) sqls"" else sqls.limit(x)
+ ).getOrElse(sqls"")
+ val q = sql"""
+ select
+ id,
+ event,
+ entityType,
+ entityId,
+ targetEntityType,
+ targetEntityId,
+ properties,
+ eventTime,
+ eventTimeZone,
+ tags,
+ prId,
+ creationTime,
+ creationTimeZone
+ from $tableName
+ $whereClause
+ order by $orderByClause
+ $limitClause
+ """
+ q.map(resultToEvent).list().apply().toIterator
+ }
+ }
+
+ private[predictionio] def resultToEvent(rs: WrappedResultSet): Event = {
+ Event(
+ eventId = rs.stringOpt("id"),
+ event = rs.string("event"),
+ entityType = rs.string("entityType"),
+ entityId = rs.string("entityId"),
+ targetEntityType = rs.stringOpt("targetEntityType"),
+ targetEntityId = rs.stringOpt("targetEntityId"),
+ properties = rs.stringOpt("properties").map(p =>
+ DataMap(read[JObject](p))).getOrElse(DataMap()),
+ eventTime = new DateTime(rs.jodaDateTime("eventTime"),
+ DateTimeZone.forID(rs.string("eventTimeZone"))),
+ tags = rs.stringOpt("tags").map(t => t.split(",").toList).getOrElse(Nil),
+ prId = rs.stringOpt("prId"),
+ creationTime = new DateTime(rs.jodaDateTime("creationTime"),
+ DateTimeZone.forID(rs.string("creationTimeZone")))
+ )
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
new file mode 100644
index 0000000..b48502a
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Model
+import org.apache.predictionio.data.storage.Models
+import org.apache.predictionio.data.storage.StorageClientConfig
+import scalikejdbc._
+
+/** JDBC implementation of [[Models]] */
+class JDBCModels(client: String, config: StorageClientConfig, prefix: String)
+ extends Models with Logging {
+ /** Database table name for this data access object */
+ val tableName = JDBCUtils.prefixTableName(prefix, "models")
+
+ /** Determines binary column type based on JDBC driver type */
+ val binaryColumnType = JDBCUtils.binaryColumnType(client)
+ DB autoCommit { implicit session =>
+ sql"""
+ create table if not exists $tableName (
+ id varchar(100) not null primary key,
+ models $binaryColumnType not null)""".execute().apply()
+ }
+
+ def insert(i: Model): Unit = DB localTx { implicit session =>
+ sql"insert into $tableName values(${i.id}, ${i.models})".update().apply()
+ }
+
+ def get(id: String): Option[Model] = DB readOnly { implicit session =>
+ sql"select id, models from $tableName where id = $id".map { r =>
+ Model(id = r.string("id"), models = r.bytes("models"))
+ }.single().apply()
+ }
+
+ def delete(id: String): Unit = DB localTx { implicit session =>
+ sql"delete from $tableName where id = $id".execute().apply()
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
new file mode 100644
index 0000000..2e6ee83
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import java.sql.{DriverManager, ResultSet}
+
+import com.github.nscala_time.time.Imports._
+import org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.{JdbcRDD, RDD}
+import org.apache.spark.sql.{SQLContext, SaveMode}
+import org.json4s.JObject
+import org.json4s.native.Serialization
+import scalikejdbc._
+
+/** JDBC implementation of [[PEvents]] */
+class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {
+ @transient private implicit lazy val formats = org.json4s.DefaultFormats
+ def find(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
+ val lower = startTime.map(_.getMillis).getOrElse(0.toLong)
+ /** Change the default upper bound from +100 to +1 year because MySQL's
+ * FROM_UNIXTIME(t) will return NULL if we use +100 years.
+ */
+ val upper = untilTime.map(_.getMillis).getOrElse((DateTime.now + 1.years).getMillis)
+ val par = scala.math.min(
+ new Duration(upper - lower).getStandardDays,
+ config.properties.getOrElse("PARTITIONS", "4").toLong).toInt
+ val entityTypeClause = entityType.map(x => s"and entityType = '$x'").getOrElse("")
+ val entityIdClause = entityId.map(x => s"and entityId = '$x'").getOrElse("")
+ val eventNamesClause =
+ eventNames.map("and (" + _.map(y => s"event = '$y'").mkString(" or ") + ")").getOrElse("")
+ val targetEntityTypeClause = targetEntityType.map(
+ _.map(x => s"and targetEntityType = '$x'"
+ ).getOrElse("and targetEntityType is null")).getOrElse("")
+ val targetEntityIdClause = targetEntityId.map(
+ _.map(x => s"and targetEntityId = '$x'"
+ ).getOrElse("and targetEntityId is null")).getOrElse("")
+ val q = s"""
+ select
+ id,
+ event,
+ entityType,
+ entityId,
+ targetEntityType,
+ targetEntityId,
+ properties,
+ eventTime,
+ eventTimeZone,
+ tags,
+ prId,
+ creationTime,
+ creationTimeZone
+ from ${JDBCUtils.eventTableName(namespace, appId, channelId)}
+ where
+ eventTime >= ${JDBCUtils.timestampFunction(client)}(?) and
+ eventTime < ${JDBCUtils.timestampFunction(client)}(?)
+ $entityTypeClause
+ $entityIdClause
+ $eventNamesClause
+ $targetEntityTypeClause
+ $targetEntityIdClause
+ """.replace("\n", " ")
+ new JdbcRDD(
+ sc,
+ () => {
+ DriverManager.getConnection(
+ client,
+ config.properties("USERNAME"),
+ config.properties("PASSWORD"))
+ },
+ q,
+ lower / 1000,
+ upper / 1000,
+ par,
+ (r: ResultSet) => {
+ Event(
+ eventId = Option(r.getString("id")),
+ event = r.getString("event"),
+ entityType = r.getString("entityType"),
+ entityId = r.getString("entityId"),
+ targetEntityType = Option(r.getString("targetEntityType")),
+ targetEntityId = Option(r.getString("targetEntityId")),
+ properties = Option(r.getString("properties")).map(x =>
+ DataMap(Serialization.read[JObject](x))).getOrElse(DataMap()),
+ eventTime = new DateTime(r.getTimestamp("eventTime").getTime,
+ DateTimeZone.forID(r.getString("eventTimeZone"))),
+ tags = Option(r.getString("tags")).map(x =>
+ x.split(",").toList).getOrElse(Nil),
+ prId = Option(r.getString("prId")),
+ creationTime = new DateTime(r.getTimestamp("creationTime").getTime,
+ DateTimeZone.forID(r.getString("creationTimeZone"))))
+ }).cache()
+ }
+
+ def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+ val sqlContext = new SQLContext(sc)
+
+ import sqlContext.implicits._
+
+ val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
+
+ val eventTableColumns = Seq[String](
+ "id"
+ , "event"
+ , "entityType"
+ , "entityId"
+ , "targetEntityType"
+ , "targetEntityId"
+ , "properties"
+ , "eventTime"
+ , "eventTimeZone"
+ , "tags"
+ , "prId"
+ , "creationTime"
+ , "creationTimeZone")
+
+ val eventDF = events.map(x =>
+ Event(eventId = None, event = x.event, entityType = x.entityType,
+ entityId = x.entityId, targetEntityType = x.targetEntityType,
+ targetEntityId = x.targetEntityId, properties = x.properties,
+ eventTime = x.eventTime, tags = x.tags, prId= x.prId,
+ creationTime = x.eventTime)
+ )
+ .map { event =>
+ (event.eventId.getOrElse(JDBCUtils.generateId)
+ , event.event
+ , event.entityType
+ , event.entityId
+ , event.targetEntityType.orNull
+ , event.targetEntityId.orNull
+ , if (!event.properties.isEmpty) Serialization.write(event.properties.toJObject) else null
+ , new java.sql.Timestamp(event.eventTime.getMillis)
+ , event.eventTime.getZone.getID
+ , if (event.tags.nonEmpty) Some(event.tags.mkString(",")) else null
+ , event.prId
+ , new java.sql.Timestamp(event.creationTime.getMillis)
+ , event.creationTime.getZone.getID)
+ }.toDF(eventTableColumns:_*)
+
+ // spark version 1.4.0 or higher
+ val prop = new java.util.Properties
+ prop.setProperty("user", config.properties("USERNAME"))
+ prop.setProperty("password", config.properties("PASSWORD"))
+ eventDF.write.mode(SaveMode.Append).jdbc(client, tableName, prop)
+ }
+
+ def delete(eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+
+ eventIds.foreachPartition{ iter =>
+
+ iter.foreach { eventId =>
+ DB localTx { implicit session =>
+ val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
+ val table = SQLSyntax.createUnsafely(tableName)
+ sql"""
+ delete from $table where id = $eventId
+ """.update().apply()
+ true
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
new file mode 100644
index 0000000..3eb55ba
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import scalikejdbc._
+
+/** JDBC related utilities */
+object JDBCUtils {
+ /** Extract JDBC driver type from URL
+ *
+ * @param url JDBC URL
+ * @return The driver type, e.g. postgresql
+ */
+ def driverType(url: String): String = {
+ val capture = """jdbc:([^:]+):""".r
+ capture findFirstIn url match {
+ case Some(capture(driverType)) => driverType
+ case None => ""
+ }
+ }
+
+ /** Determines binary column type from JDBC URL
+ *
+ * @param url JDBC URL
+ * @return Binary column type as SQLSyntax, e.g. LONGBLOB
+ */
+ def binaryColumnType(url: String): SQLSyntax = {
+ driverType(url) match {
+ case "postgresql" => sqls"bytea"
+ case "mysql" => sqls"longblob"
+ case _ => sqls"longblob"
+ }
+ }
+
+ /** Determines UNIX timestamp conversion function from JDBC URL
+ *
+ * @param url JDBC URL
+ * @return Timestamp conversion function, e.g. TO_TIMESTAMP
+ */
+ def timestampFunction(url: String): String = {
+ driverType(url) match {
+ case "postgresql" => "to_timestamp"
+ case "mysql" => "from_unixtime"
+ case _ => "from_unixtime"
+ }
+ }
+
+ /** Converts Map of String to String to comma-separated list of key=value
+ *
+ * @param m Map of String to String
+ * @return Comma-separated list, e.g. FOO=BAR,X=Y,...
+ */
+ def mapToString(m: Map[String, String]): String = {
+ m.map(t => s"${t._1}=${t._2}").mkString(",")
+ }
+
+ /** Inverse of mapToString
+ *
+ * @param str Comma-separated list, e.g. FOO=BAR,X=Y,...
+ * @return Map of String to String, e.g. Map("FOO" -> "BAR", "X" -> "Y", ...)
+ */
+ def stringToMap(str: String): Map[String, String] = {
+ str.split(",").map { x =>
+ val y = x.split("=")
+ y(0) -> y(1)
+ }.toMap[String, String]
+ }
+
+ /** Generate 32-character random ID using UUID with - stripped */
+ def generateId: String = java.util.UUID.randomUUID().toString.replace("-", "")
+
+ /** Prefix a table name
+ *
+ * @param prefix Table prefix
+ * @param table Table name
+ * @return Prefixed table name
+ */
+ def prefixTableName(prefix: String, table: String): SQLSyntax =
+ sqls.createUnsafely(s"${prefix}_$table")
+
+ /** Derive event table name
+ *
+ * @param namespace Namespace of event tables
+ * @param appId App ID
+ * @param channelId Optional channel ID
+ * @return Full event table name
+ */
+ def eventTableName(namespace: String, appId: Int, channelId: Option[Int]): String =
+ s"${namespace}_${appId}${channelId.map("_" + _).getOrElse("")}"
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
new file mode 100644
index 0000000..661e05e
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.jdbc
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+import scalikejdbc._
+
+/** JDBC implementation of [[BaseStorageClient]] */
+class StorageClient(val config: StorageClientConfig)
+ extends BaseStorageClient with Logging {
+ override val prefix = "JDBC"
+
+ if (!config.properties.contains("URL")) {
+ throw new StorageClientException("The URL variable is not set!", null)
+ }
+ if (!config.properties.contains("USERNAME")) {
+ throw new StorageClientException("The USERNAME variable is not set!", null)
+ }
+ if (!config.properties.contains("PASSWORD")) {
+ throw new StorageClientException("The PASSWORD variable is not set!", null)
+ }
+
+ // set max size of connection pool
+ val maxSize: Int = config.properties.getOrElse("CONNECTIONS", "8").toInt
+ val settings = ConnectionPoolSettings(maxSize = maxSize)
+
+ ConnectionPool.singleton(
+ config.properties("URL"),
+ config.properties("USERNAME"),
+ config.properties("PASSWORD"),
+ settings)
+ /** JDBC connection URL. Connections are managed by ScalikeJDBC. */
+ val client = config.properties("URL")
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
new file mode 100644
index 0000000..e552e54
--- /dev/null
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** JDBC implementation of storage traits, supporting meta data, event data, and
+ * model data
+ *
+ * @group Implementation
+ */
+package object jdbc {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/jdbc/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/test/resources/application.conf b/storage/jdbc/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/jdbc/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/.gitignore
----------------------------------------------------------------------
diff --git a/storage/localfs/.gitignore b/storage/localfs/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/localfs/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt
new file mode 100644
index 0000000..2cf9977
--- /dev/null
+++ b/storage/localfs/build.sbt
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-localfs"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-localfs-assembly-" + version.value + ".jar")
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
----------------------------------------------------------------------
diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
new file mode 100644
index 0000000..f528af9
--- /dev/null
+++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.localfs
+
+import java.io.File
+import java.io.FileNotFoundException
+import java.io.FileOutputStream
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Model
+import org.apache.predictionio.data.storage.Models
+import org.apache.predictionio.data.storage.StorageClientConfig
+
+import scala.io.Source
+
+class LocalFSModels(f: File, config: StorageClientConfig, prefix: String)
+ extends Models with Logging {
+
+ def insert(i: Model): Unit = {
+ try {
+ val fos = new FileOutputStream(new File(f, s"${prefix}${i.id}"))
+ fos.write(i.models)
+ fos.close
+ } catch {
+ case e: FileNotFoundException => error(e.getMessage)
+ }
+ }
+
+ def get(id: String): Option[Model] = {
+ try {
+ Some(Model(
+ id = id,
+ models = Source.fromFile(new File(f, s"${prefix}${id}"))(
+ scala.io.Codec.ISO8859).map(_.toByte).toArray))
+ } catch {
+ case e: Throwable =>
+ error(e.getMessage)
+ None
+ }
+ }
+
+ def delete(id: String): Unit = {
+ val m = new File(f, s"${prefix}${id}")
+ if (!m.delete) error(s"Unable to delete ${m.getCanonicalPath}!")
+ }
+}
[6/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Posted by do...@apache.org.
[PIO-49] Add support for Elasticsearch 5
* Add support for Elasticsearch 5 over REST API
* Refactor storage implementations to submodules
* Build storage implementation as separate assemblies
Closes #352
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/d78b3cbe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/d78b3cbe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/d78b3cbe
Branch: refs/heads/develop
Commit: d78b3cbe912cf57e2f0278e407a0d6432bd12849
Parents: 8fd59fd
Author: Shinsuke Sugaya <sh...@yahoo.co.jp>
Authored: Tue Mar 7 20:33:30 2017 -0800
Committer: Donald Szeto <do...@apache.org>
Committed: Tue Mar 7 20:33:30 2017 -0800
----------------------------------------------------------------------
bin/compute-classpath.sh | 2 +-
build.sbt | 28 +-
conf/pio-env.sh.template | 10 +-
core/build.sbt | 1 -
data/build.sbt | 14 -
.../storage/elasticsearch/ESAccessKeys.scala | 119 ------
.../data/storage/elasticsearch/ESApps.scala | 130 ------
.../data/storage/elasticsearch/ESChannels.scala | 117 ------
.../elasticsearch/ESEngineInstances.scala | 158 -------
.../elasticsearch/ESEvaluationInstances.scala | 136 ------
.../storage/elasticsearch/ESSequences.scala | 64 ---
.../data/storage/elasticsearch/ESUtils.scala | 48 ---
.../storage/elasticsearch/StorageClient.scala | 50 ---
.../data/storage/elasticsearch/package.scala | 25 --
.../data/storage/hbase/HBEventsUtil.scala | 415 -------------------
.../data/storage/hbase/HBLEvents.scala | 195 ---------
.../data/storage/hbase/HBPEvents.scala | 131 ------
.../data/storage/hbase/PIOHBaseUtil.scala | 32 --
.../data/storage/hbase/StorageClient.scala | 86 ----
.../data/storage/hbase/package.scala | 25 --
.../data/storage/hbase/upgrade/HB_0_8_0.scala | 193 ---------
.../data/storage/hbase/upgrade/Upgrade.scala | 75 ----
.../storage/hbase/upgrade/Upgrade_0_8_3.scala | 224 ----------
.../data/storage/hdfs/HDFSModels.scala | 63 ---
.../data/storage/hdfs/StorageClient.scala | 36 --
.../data/storage/hdfs/package.scala | 25 --
.../data/storage/jdbc/JDBCAccessKeys.scala | 87 ----
.../data/storage/jdbc/JDBCApps.scala | 89 ----
.../data/storage/jdbc/JDBCChannels.scala | 69 ---
.../data/storage/jdbc/JDBCEngineInstances.scala | 197 ---------
.../storage/jdbc/JDBCEvaluationInstances.scala | 165 --------
.../data/storage/jdbc/JDBCLEvents.scala | 244 -----------
.../data/storage/jdbc/JDBCModels.scala | 55 ---
.../data/storage/jdbc/JDBCPEvents.scala | 188 ---------
.../data/storage/jdbc/JDBCUtils.scala | 106 -----
.../data/storage/jdbc/StorageClient.scala | 53 ---
.../data/storage/jdbc/package.scala | 26 --
.../data/storage/localfs/LocalFSModels.scala | 62 ---
.../data/storage/localfs/StorageClient.scala | 46 --
.../data/storage/localfs/package.scala | 25 --
.../predictionio/data/view/PBatchView.scala | 212 ----------
make-distribution.sh | 6 +-
storage/elasticsearch/.gitignore | 1 +
storage/elasticsearch/build.sbt | 57 +++
.../storage/elasticsearch/ESAccessKeys.scala | 178 ++++++++
.../data/storage/elasticsearch/ESApps.scala | 194 +++++++++
.../data/storage/elasticsearch/ESChannels.scala | 165 ++++++++
.../elasticsearch/ESEngineInstances.scala | 248 +++++++++++
.../elasticsearch/ESEvaluationInstances.scala | 194 +++++++++
.../storage/elasticsearch/ESEventsUtil.scala | 123 ++++++
.../data/storage/elasticsearch/ESLEvents.scala | 291 +++++++++++++
.../data/storage/elasticsearch/ESPEvents.scala | 144 +++++++
.../storage/elasticsearch/ESSequences.scala | 79 ++++
.../data/storage/elasticsearch/ESUtils.scala | 184 ++++++++
.../storage/elasticsearch/StorageClient.scala | 44 ++
.../data/storage/elasticsearch/package.scala | 25 ++
.../src/test/resources/application.conf | 28 ++
storage/elasticsearch1/.gitignore | 1 +
storage/elasticsearch1/build.sbt | 47 +++
.../storage/elasticsearch/ESAccessKeys.scala | 119 ++++++
.../data/storage/elasticsearch/ESApps.scala | 130 ++++++
.../data/storage/elasticsearch/ESChannels.scala | 117 ++++++
.../elasticsearch/ESEngineInstances.scala | 158 +++++++
.../elasticsearch/ESEvaluationInstances.scala | 136 ++++++
.../storage/elasticsearch/ESSequences.scala | 64 +++
.../data/storage/elasticsearch/ESUtils.scala | 48 +++
.../storage/elasticsearch/StorageClient.scala | 50 +++
.../data/storage/elasticsearch/package.scala | 25 ++
.../src/test/resources/application.conf | 28 ++
storage/hbase/.gitignore | 1 +
storage/hbase/build.sbt | 56 +++
.../data/storage/hbase/HBEventsUtil.scala | 415 +++++++++++++++++++
.../data/storage/hbase/HBLEvents.scala | 195 +++++++++
.../data/storage/hbase/HBPEvents.scala | 131 ++++++
.../data/storage/hbase/PIOHBaseUtil.scala | 32 ++
.../data/storage/hbase/StorageClient.scala | 86 ++++
.../data/storage/hbase/package.scala | 25 ++
.../data/storage/hbase/upgrade/HB_0_8_0.scala | 193 +++++++++
.../data/storage/hbase/upgrade/Upgrade.scala | 75 ++++
.../storage/hbase/upgrade/Upgrade_0_8_3.scala | 224 ++++++++++
.../predictionio/data/view/PBatchView.scala | 212 ++++++++++
.../hbase/src/test/resources/application.conf | 28 ++
storage/hdfs/.gitignore | 1 +
storage/hdfs/build.sbt | 44 ++
.../data/storage/hdfs/HDFSModels.scala | 63 +++
.../data/storage/hdfs/StorageClient.scala | 36 ++
.../data/storage/hdfs/package.scala | 25 ++
.../hdfs/src/test/resources/application.conf | 28 ++
storage/jdbc/.gitignore | 1 +
storage/jdbc/build.sbt | 47 +++
.../data/storage/jdbc/JDBCAccessKeys.scala | 87 ++++
.../data/storage/jdbc/JDBCApps.scala | 89 ++++
.../data/storage/jdbc/JDBCChannels.scala | 69 +++
.../data/storage/jdbc/JDBCEngineInstances.scala | 197 +++++++++
.../storage/jdbc/JDBCEvaluationInstances.scala | 165 ++++++++
.../data/storage/jdbc/JDBCLEvents.scala | 244 +++++++++++
.../data/storage/jdbc/JDBCModels.scala | 55 +++
.../data/storage/jdbc/JDBCPEvents.scala | 188 +++++++++
.../data/storage/jdbc/JDBCUtils.scala | 106 +++++
.../data/storage/jdbc/StorageClient.scala | 53 +++
.../data/storage/jdbc/package.scala | 26 ++
.../jdbc/src/test/resources/application.conf | 28 ++
storage/localfs/.gitignore | 1 +
storage/localfs/build.sbt | 44 ++
.../data/storage/localfs/LocalFSModels.scala | 62 +++
.../data/storage/localfs/StorageClient.scala | 46 ++
.../data/storage/localfs/package.scala | 25 ++
.../localfs/src/test/resources/application.conf | 28 ++
tests/Dockerfile | 10 +-
tests/build-docker.sh | 8 +-
tests/docker-compose.yml | 2 +-
tests/docker-files/env-conf/pio-env.sh | 3 +-
tests/pio_tests/scenarios/eventserver_test.py | 5 +-
tests/run_docker.sh | 4 +-
.../org/apache/predictionio/tools/Common.scala | 7 +
.../org/apache/predictionio/tools/Runner.scala | 3 +-
.../predictionio/tools/commands/Engine.scala | 4 +-
117 files changed, 6382 insertions(+), 4005 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/bin/compute-classpath.sh
----------------------------------------------------------------------
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 3bf6814..69cbb25 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -27,7 +27,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
# Build up classpath
CLASSPATH="${FWDIR}/conf"
-CLASSPATH="$CLASSPATH:${FWDIR}/plugins/*"
+CLASSPATH="$CLASSPATH:${FWDIR}/plugins/*:${FWDIR}/lib/spark/*"
ASSEMBLY_DIR="${FWDIR}/assembly"
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/build.sbt
----------------------------------------------------------------------
diff --git a/build.sbt b/build.sbt
index eeb3724..98444b9 100644
--- a/build.sbt
+++ b/build.sbt
@@ -34,11 +34,9 @@ fork in (ThisBuild, run) := true
javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.7", "-target", "1.7",
"-Xlint:deprecation", "-Xlint:unchecked")
-elasticsearchVersion in ThisBuild := "1.4.4"
-
json4sVersion in ThisBuild := "3.2.10"
-sparkVersion in ThisBuild := "1.4.0"
+sparkVersion in ThisBuild := "1.6.3"
val pioBuildInfoSettings = buildInfoSettings ++ Seq(
sourceGenerators in Compile <+= buildInfo,
@@ -65,6 +63,30 @@ val data = (project in file("data")).
settings(commonSettings: _*).
settings(genjavadocSettings: _*)
+val dataElasticsearch1 = (project in file("storage/elasticsearch1")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
+val dataElasticsearch = (project in file("storage/elasticsearch")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
+val dataHbase = (project in file("storage/hbase")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
+val dataHdfs = (project in file("storage/hdfs")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
+val dataJdbc = (project in file("storage/jdbc")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
+val dataLocalfs = (project in file("storage/localfs")).
+ settings(commonSettings: _*).
+ settings(genjavadocSettings: _*)
+
val core = (project in file("core")).
dependsOn(data).
settings(commonSettings: _*).
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/conf/pio-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template
index a06cd8e..0d76102 100644
--- a/conf/pio-env.sh.template
+++ b/conf/pio-env.sh.template
@@ -24,7 +24,7 @@
# you need to change these to fit your site.
# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$PIO_HOME/vendors/spark-1.5.1-bin-hadoop2.6
+SPARK_HOME=$PIO_HOME/vendors/spark-1.6.3-bin-hadoop2.6
POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-9.4-1204.jdbc41.jar
MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.37.jar
@@ -85,10 +85,16 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
# Elasticsearch Example
# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-5.2.1
+# Elasticsearch 1.x Example
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
# PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=<elasticsearch_cluster_name>
# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost
# PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
-# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-1.4.4
+# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-1.7.6
# Local File System Example
# PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/core/build.sbt
----------------------------------------------------------------------
diff --git a/core/build.sbt b/core/build.sbt
index 637d4ea..bfb8bf3 100644
--- a/core/build.sbt
+++ b/core/build.sbt
@@ -32,7 +32,6 @@ libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
"org.clapper" %% "grizzled-slf4j" % "1.0.2",
- "org.elasticsearch" % "elasticsearch" % elasticsearchVersion.value,
"org.json4s" %% "json4s-native" % json4sVersion.value,
"org.json4s" %% "json4s-ext" % json4sVersion.value,
"org.scalaj" %% "scalaj-http" % "1.1.6",
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/build.sbt
----------------------------------------------------------------------
diff --git a/data/build.sbt b/data/build.sbt
index 4526c39..f5e95b5 100644
--- a/data/build.sbt
+++ b/data/build.sbt
@@ -26,29 +26,15 @@ libraryDependencies ++= Seq(
"mysql" % "mysql-connector-java" % "5.1.37" % "optional",
"org.apache.hadoop" % "hadoop-common" % "2.6.2"
exclude("javax.servlet", "servlet-api"),
- "org.apache.hbase" % "hbase-common" % "0.98.5-hadoop2",
- "org.apache.hbase" % "hbase-client" % "0.98.5-hadoop2"
- exclude("org.apache.zookeeper", "zookeeper"),
- // added for Parallel storage interface
- "org.apache.hbase" % "hbase-server" % "0.98.5-hadoop2"
- exclude("org.apache.hbase", "hbase-client")
- exclude("org.apache.zookeeper", "zookeeper")
- exclude("javax.servlet", "servlet-api")
- exclude("org.mortbay.jetty", "servlet-api-2.5")
- exclude("org.mortbay.jetty", "jsp-api-2.1")
- exclude("org.mortbay.jetty", "jsp-2.1"),
"org.apache.zookeeper" % "zookeeper" % "3.4.7"
exclude("org.slf4j", "slf4j-api")
exclude("org.slf4j", "slf4j-log4j12"),
"org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
"org.clapper" %% "grizzled-slf4j" % "1.0.2",
- "org.elasticsearch" % "elasticsearch" % elasticsearchVersion.value,
"org.json4s" %% "json4s-native" % json4sVersion.value,
"org.json4s" %% "json4s-ext" % json4sVersion.value,
- "org.postgresql" % "postgresql" % "9.4-1204-jdbc41",
"org.scalatest" %% "scalatest" % "2.1.7" % "test",
- "org.scalikejdbc" %% "scalikejdbc" % "2.3.5",
"org.slf4j" % "slf4j-log4j12" % "1.7.18",
"org.spark-project.akka" %% "akka-actor" % "2.3.4-spark",
"org.specs2" %% "specs2" % "2.3.13" % "test")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
deleted file mode 100644
index 077168a..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.AccessKey
-import org.apache.predictionio.data.storage.AccessKeys
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.elasticsearch.index.query.FilterBuilders._
-import org.json4s.JsonDSL._
-import org.json4s._
-import org.json4s.native.JsonMethods._
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-
-import scala.util.Random
-
-/** Elasticsearch implementation of AccessKeys. */
-class ESAccessKeys(client: Client, config: StorageClientConfig, index: String)
- extends AccessKeys with Logging {
- implicit val formats = DefaultFormats.lossless
- private val estype = "accesskeys"
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val json =
- (estype ->
- ("properties" ->
- ("key" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("events" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(json))).get
- }
-
- def insert(accessKey: AccessKey): Option[String] = {
- val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
- update(accessKey.copy(key = key))
- Some(key)
- }
-
- def get(key: String): Option[AccessKey] = {
- try {
- val response = client.prepareGet(
- index,
- estype,
- key).get()
- Some(read[AccessKey](response.getSourceAsString))
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- case e: NullPointerException => None
- }
- }
-
- def getAll(): Seq[AccessKey] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype)
- ESUtils.getAll[AccessKey](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq[AccessKey]()
- }
- }
-
- def getByAppid(appid: Int): Seq[AccessKey] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype).
- setPostFilter(termFilter("appid", appid))
- ESUtils.getAll[AccessKey](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq[AccessKey]()
- }
- }
-
- def update(accessKey: AccessKey): Unit = {
- try {
- client.prepareIndex(index, estype, accessKey.key).setSource(write(accessKey)).get()
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- }
- }
-
- def delete(key: String): Unit = {
- try {
- client.prepareDelete(index, estype, key).get
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
deleted file mode 100644
index 3781a4b..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.App
-import org.apache.predictionio.data.storage.Apps
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.elasticsearch.index.query.FilterBuilders._
-import org.json4s.JsonDSL._
-import org.json4s._
-import org.json4s.native.JsonMethods._
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-
-/** Elasticsearch implementation of Items. */
-class ESApps(client: Client, config: StorageClientConfig, index: String)
- extends Apps with Logging {
- implicit val formats = DefaultFormats.lossless
- private val estype = "apps"
- private val seq = new ESSequences(client, config, index)
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val json =
- (estype ->
- ("properties" ->
- ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(json))).get
- }
-
- def insert(app: App): Option[Int] = {
- val id =
- if (app.id == 0) {
- var roll = seq.genNext("apps")
- while (!get(roll).isEmpty) roll = seq.genNext("apps")
- roll
- }
- else app.id
- val realapp = app.copy(id = id)
- update(realapp)
- Some(id)
- }
-
- def get(id: Int): Option[App] = {
- try {
- val response = client.prepareGet(
- index,
- estype,
- id.toString).get()
- Some(read[App](response.getSourceAsString))
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- case e: NullPointerException => None
- }
- }
-
- def getByName(name: String): Option[App] = {
- try {
- val response = client.prepareSearch(index).setTypes(estype).
- setPostFilter(termFilter("name", name)).get
- val hits = response.getHits().hits()
- if (hits.size > 0) {
- Some(read[App](hits.head.getSourceAsString))
- } else {
- None
- }
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- }
- }
-
- def getAll(): Seq[App] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype)
- ESUtils.getAll[App](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq[App]()
- }
- }
-
- def update(app: App): Unit = {
- try {
- val response = client.prepareIndex(index, estype, app.id.toString).
- setSource(write(app)).get()
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- }
- }
-
- def delete(id: Int): Unit = {
- try {
- client.prepareDelete(index, estype, id.toString).get
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
deleted file mode 100644
index 52697fd..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Channel
-import org.apache.predictionio.data.storage.Channels
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.elasticsearch.index.query.FilterBuilders.termFilter
-import org.json4s.DefaultFormats
-import org.json4s.JsonDSL._
-import org.json4s.native.JsonMethods._
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-
-class ESChannels(client: Client, config: StorageClientConfig, index: String)
- extends Channels with Logging {
-
- implicit val formats = DefaultFormats.lossless
- private val estype = "channels"
- private val seq = new ESSequences(client, config, index)
- private val seqName = "channels"
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val json =
- (estype ->
- ("properties" ->
- ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(json))).get
- }
-
- def insert(channel: Channel): Option[Int] = {
- val id =
- if (channel.id == 0) {
- var roll = seq.genNext(seqName)
- while (!get(roll).isEmpty) roll = seq.genNext(seqName)
- roll
- } else channel.id
-
- val realChannel = channel.copy(id = id)
- if (update(realChannel)) Some(id) else None
- }
-
- def get(id: Int): Option[Channel] = {
- try {
- val response = client.prepareGet(
- index,
- estype,
- id.toString).get()
- Some(read[Channel](response.getSourceAsString))
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- case e: NullPointerException => None
- }
- }
-
- def getByAppid(appid: Int): Seq[Channel] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype).
- setPostFilter(termFilter("appid", appid))
- ESUtils.getAll[Channel](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq[Channel]()
- }
- }
-
- def update(channel: Channel): Boolean = {
- try {
- val response = client.prepareIndex(index, estype, channel.id.toString).
- setSource(write(channel)).get()
- true
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- false
- }
- }
-
- def delete(id: Int): Unit = {
- try {
- client.prepareDelete(index, estype, id.toString).get
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
deleted file mode 100644
index 21690bf..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.EngineInstance
-import org.apache.predictionio.data.storage.EngineInstanceSerializer
-import org.apache.predictionio.data.storage.EngineInstances
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.elasticsearch.index.query.FilterBuilders._
-import org.elasticsearch.search.sort.SortOrder
-import org.json4s.JsonDSL._
-import org.json4s._
-import org.json4s.native.JsonMethods._
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-
-class ESEngineInstances(client: Client, config: StorageClientConfig, index: String)
- extends EngineInstances with Logging {
- implicit val formats = DefaultFormats + new EngineInstanceSerializer
- private val estype = "engine_instances"
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val json =
- (estype ->
- ("properties" ->
- ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("startTime" -> ("type" -> "date")) ~
- ("endTime" -> ("type" -> "date")) ~
- ("engineId" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("engineVersion" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("engineVariant" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("engineFactory" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("batch" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("dataSourceParams" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("preparatorParams" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("algorithmsParams" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("servingParams" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(json))).get
- }
-
- def insert(i: EngineInstance): String = {
- try {
- val response = client.prepareIndex(index, estype).
- setSource(write(i)).get
- response.getId
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- ""
- }
- }
-
- def get(id: String): Option[EngineInstance] = {
- try {
- val response = client.prepareGet(index, estype, id).get
- if (response.isExists) {
- Some(read[EngineInstance](response.getSourceAsString))
- } else {
- None
- }
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- }
- }
-
- def getAll(): Seq[EngineInstance] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype)
- ESUtils.getAll[EngineInstance](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq()
- }
- }
-
- def getCompleted(
- engineId: String,
- engineVersion: String,
- engineVariant: String): Seq[EngineInstance] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype).setPostFilter(
- andFilter(
- termFilter("status", "COMPLETED"),
- termFilter("engineId", engineId),
- termFilter("engineVersion", engineVersion),
- termFilter("engineVariant", engineVariant))).
- addSort("startTime", SortOrder.DESC)
- ESUtils.getAll[EngineInstance](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq()
- }
- }
-
- def getLatestCompleted(
- engineId: String,
- engineVersion: String,
- engineVariant: String): Option[EngineInstance] =
- getCompleted(
- engineId,
- engineVersion,
- engineVariant).headOption
-
- def update(i: EngineInstance): Unit = {
- try {
- client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get
- } catch {
- case e: ElasticsearchException => error(e.getMessage)
- }
- }
-
- def delete(id: String): Unit = {
- try {
- val response = client.prepareDelete(index, estype, id).get
- } catch {
- case e: ElasticsearchException => error(e.getMessage)
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
deleted file mode 100644
index 85bf820..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.EvaluationInstance
-import org.apache.predictionio.data.storage.EvaluationInstanceSerializer
-import org.apache.predictionio.data.storage.EvaluationInstances
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.elasticsearch.index.query.FilterBuilders._
-import org.elasticsearch.search.sort.SortOrder
-import org.json4s.JsonDSL._
-import org.json4s._
-import org.json4s.native.JsonMethods._
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-
-class ESEvaluationInstances(client: Client, config: StorageClientConfig, index: String)
- extends EvaluationInstances with Logging {
- implicit val formats = DefaultFormats + new EvaluationInstanceSerializer
- private val estype = "evaluation_instances"
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val json =
- (estype ->
- ("properties" ->
- ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("startTime" -> ("type" -> "date")) ~
- ("endTime" -> ("type" -> "date")) ~
- ("evaluationClass" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("engineParamsGeneratorClass" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("batch" ->
- ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
- ("evaluatorResults" ->
- ("type" -> "string") ~ ("index" -> "no")) ~
- ("evaluatorResultsHTML" ->
- ("type" -> "string") ~ ("index" -> "no")) ~
- ("evaluatorResultsJSON" ->
- ("type" -> "string") ~ ("index" -> "no"))))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(json))).get
- }
-
- def insert(i: EvaluationInstance): String = {
- try {
- val response = client.prepareIndex(index, estype).
- setSource(write(i)).get
- response.getId
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- ""
- }
- }
-
- def get(id: String): Option[EvaluationInstance] = {
- try {
- val response = client.prepareGet(index, estype, id).get
- if (response.isExists) {
- Some(read[EvaluationInstance](response.getSourceAsString))
- } else {
- None
- }
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- None
- }
- }
-
- def getAll(): Seq[EvaluationInstance] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype)
- ESUtils.getAll[EvaluationInstance](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq()
- }
- }
-
- def getCompleted(): Seq[EvaluationInstance] = {
- try {
- val builder = client.prepareSearch(index).setTypes(estype).setPostFilter(
- termFilter("status", "EVALCOMPLETED")).
- addSort("startTime", SortOrder.DESC)
- ESUtils.getAll[EvaluationInstance](client, builder)
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- Seq()
- }
- }
-
- def update(i: EvaluationInstance): Unit = {
- try {
- client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get
- } catch {
- case e: ElasticsearchException => error(e.getMessage)
- }
- }
-
- def delete(id: String): Unit = {
- try {
- client.prepareDelete(index, estype, id).get
- } catch {
- case e: ElasticsearchException => error(e.getMessage)
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
deleted file mode 100644
index 5c9e170..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.elasticsearch.ElasticsearchException
-import org.elasticsearch.client.Client
-import org.json4s.JsonDSL._
-import org.json4s._
-import org.json4s.native.JsonMethods._
-
-class ESSequences(client: Client, config: StorageClientConfig, index: String) extends Logging {
- implicit val formats = DefaultFormats
- private val estype = "sequences"
-
- val indices = client.admin.indices
- val indexExistResponse = indices.prepareExists(index).get
- if (!indexExistResponse.isExists) {
- // val settingsJson =
- // ("number_of_shards" -> 1) ~
- // ("auto_expand_replicas" -> "0-all")
- indices.prepareCreate(index).get
- }
- val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
- if (!typeExistResponse.isExists) {
- val mappingJson =
- (estype ->
- ("_source" -> ("enabled" -> 0)) ~
- ("_all" -> ("enabled" -> 0)) ~
- ("_type" -> ("index" -> "no")) ~
- ("enabled" -> 0))
- indices.preparePutMapping(index).setType(estype).
- setSource(compact(render(mappingJson))).get
- }
-
- def genNext(name: String): Int = {
- try {
- val response = client.prepareIndex(index, estype, name).
- setSource(compact(render("n" -> name))).get
- response.getVersion().toInt
- } catch {
- case e: ElasticsearchException =>
- error(e.getMessage)
- 0
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
deleted file mode 100644
index f5c99bf..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import org.elasticsearch.action.search.SearchRequestBuilder
-import org.elasticsearch.client.Client
-import org.elasticsearch.common.unit.TimeValue
-import org.json4s.Formats
-import org.json4s.native.Serialization.read
-
-import scala.collection.mutable.ArrayBuffer
-
-object ESUtils {
- val scrollLife = new TimeValue(60000)
-
- def getAll[T : Manifest](
- client: Client,
- builder: SearchRequestBuilder)(
- implicit formats: Formats): Seq[T] = {
- val results = ArrayBuffer[T]()
- var response = builder.setScroll(scrollLife).get
- var hits = response.getHits().hits()
- results ++= hits.map(h => read[T](h.getSourceAsString))
- while (hits.size > 0) {
- response = client.prepareSearchScroll(response.getScrollId).
- setScroll(scrollLife).get
- hits = response.getHits().hits()
- results ++= hits.map(h => read[T](h.getSourceAsString))
- }
- results
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
deleted file mode 100644
index 75ac2b0..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.elasticsearch
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.BaseStorageClient
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.StorageClientException
-import org.elasticsearch.client.transport.TransportClient
-import org.elasticsearch.common.settings.ImmutableSettings
-import org.elasticsearch.common.transport.InetSocketTransportAddress
-import org.elasticsearch.transport.ConnectTransportException
-
-class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
- with Logging {
- override val prefix = "ES"
- val client = try {
- val hosts = config.properties.get("HOSTS").
- map(_.split(",").toSeq).getOrElse(Seq("localhost"))
- val ports = config.properties.get("PORTS").
- map(_.split(",").toSeq.map(_.toInt)).getOrElse(Seq(9300))
- val settings = ImmutableSettings.settingsBuilder()
- .put("cluster.name", config.properties.getOrElse("CLUSTERNAME", "elasticsearch"))
- val transportClient = new TransportClient(settings)
- (hosts zip ports) foreach { hp =>
- transportClient.addTransportAddress(
- new InetSocketTransportAddress(hp._1, hp._2))
- }
- transportClient
- } catch {
- case e: ConnectTransportException =>
- throw new StorageClientException(e.getMessage, e)
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala b/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
deleted file mode 100644
index 0c549b8..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage
-
-/** Elasticsearch implementation of storage traits, supporting meta data only
- *
- * @group Implementation
- */
-package object elasticsearch {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
deleted file mode 100644
index 2cdb734..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase
-
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.EventValidation
-import org.apache.predictionio.data.storage.DataMap
-
-import org.apache.hadoop.hbase.client.Result
-import org.apache.hadoop.hbase.client.Put
-import org.apache.hadoop.hbase.client.Scan
-import org.apache.hadoop.hbase.util.Bytes
-import org.apache.hadoop.hbase.filter.FilterList
-import org.apache.hadoop.hbase.filter.RegexStringComparator
-import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
-import org.apache.hadoop.hbase.filter.BinaryComparator
-import org.apache.hadoop.hbase.filter.QualifierFilter
-import org.apache.hadoop.hbase.filter.SkipFilter
-
-import org.json4s.DefaultFormats
-import org.json4s.JObject
-import org.json4s.native.Serialization.{ read, write }
-
-import org.joda.time.DateTime
-import org.joda.time.DateTimeZone
-
-import org.apache.commons.codec.binary.Base64
-import java.security.MessageDigest
-
-import java.util.UUID
-
-/* common utility function for accessing EventsStore in HBase */
-object HBEventsUtil {
-
- implicit val formats = DefaultFormats
-
- def tableName(namespace: String, appId: Int, channelId: Option[Int] = None): String = {
- channelId.map { ch =>
- s"${namespace}:events_${appId}_${ch}"
- }.getOrElse {
- s"${namespace}:events_${appId}"
- }
- }
-
- // column names for "e" column family
- val colNames: Map[String, Array[Byte]] = Map(
- "event" -> "e",
- "entityType" -> "ety",
- "entityId" -> "eid",
- "targetEntityType" -> "tety",
- "targetEntityId" -> "teid",
- "properties" -> "p",
- "prId" -> "prid",
- "eventTime" -> "et",
- "eventTimeZone" -> "etz",
- "creationTime" -> "ct",
- "creationTimeZone" -> "ctz"
- ).mapValues(Bytes.toBytes(_))
-
- def hash(entityType: String, entityId: String): Array[Byte] = {
- val s = entityType + "-" + entityId
- // get a new MessageDigest object each time for thread-safe
- val md5 = MessageDigest.getInstance("MD5")
- md5.digest(Bytes.toBytes(s))
- }
-
- class RowKey(
- val b: Array[Byte]
- ) {
- require((b.size == 32), s"Incorrect b size: ${b.size}")
- lazy val entityHash: Array[Byte] = b.slice(0, 16)
- lazy val millis: Long = Bytes.toLong(b.slice(16, 24))
- lazy val uuidLow: Long = Bytes.toLong(b.slice(24, 32))
-
- lazy val toBytes: Array[Byte] = b
-
- override def toString: String = {
- Base64.encodeBase64URLSafeString(toBytes)
- }
- }
-
- object RowKey {
- def apply(
- entityType: String,
- entityId: String,
- millis: Long,
- uuidLow: Long): RowKey = {
- // add UUID least significant bits for multiple actions at the same time
- // (UUID's most significant bits are actually timestamp,
- // use eventTime instead).
- val b = hash(entityType, entityId) ++
- Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow)
- new RowKey(b)
- }
-
- // get RowKey from string representation
- def apply(s: String): RowKey = {
- try {
- apply(Base64.decodeBase64(s))
- } catch {
- case e: Exception => throw new RowKeyException(
- s"Failed to convert String ${s} to RowKey because ${e}", e)
- }
- }
-
- def apply(b: Array[Byte]): RowKey = {
- if (b.size != 32) {
- val bString = b.mkString(",")
- throw new RowKeyException(
- s"Incorrect byte array size. Bytes: ${bString}.")
- }
- new RowKey(b)
- }
-
- }
-
- class RowKeyException(val msg: String, val cause: Exception)
- extends Exception(msg, cause) {
- def this(msg: String) = this(msg, null)
- }
-
- case class PartialRowKey(entityType: String, entityId: String,
- millis: Option[Long] = None) {
- val toBytes: Array[Byte] = {
- hash(entityType, entityId) ++
- (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]()))
- }
- }
-
- def eventToPut(event: Event, appId: Int): (Put, RowKey) = {
- // generate new rowKey if eventId is None
- val rowKey = event.eventId.map { id =>
- RowKey(id) // create rowKey from eventId
- }.getOrElse {
- // TOOD: use real UUID. not pseudo random
- val uuidLow: Long = UUID.randomUUID().getLeastSignificantBits
- RowKey(
- entityType = event.entityType,
- entityId = event.entityId,
- millis = event.eventTime.getMillis,
- uuidLow = uuidLow
- )
- }
-
- val eBytes = Bytes.toBytes("e")
- // use eventTime as HBase's cell timestamp
- val put = new Put(rowKey.toBytes, event.eventTime.getMillis)
-
- def addStringToE(col: Array[Byte], v: String): Put = {
- put.add(eBytes, col, Bytes.toBytes(v))
- }
-
- def addLongToE(col: Array[Byte], v: Long): Put = {
- put.add(eBytes, col, Bytes.toBytes(v))
- }
-
- addStringToE(colNames("event"), event.event)
- addStringToE(colNames("entityType"), event.entityType)
- addStringToE(colNames("entityId"), event.entityId)
-
- event.targetEntityType.foreach { targetEntityType =>
- addStringToE(colNames("targetEntityType"), targetEntityType)
- }
-
- event.targetEntityId.foreach { targetEntityId =>
- addStringToE(colNames("targetEntityId"), targetEntityId)
- }
-
- // TODO: make properties Option[]
- if (!event.properties.isEmpty) {
- addStringToE(colNames("properties"), write(event.properties.toJObject))
- }
-
- event.prId.foreach { prId =>
- addStringToE(colNames("prId"), prId)
- }
-
- addLongToE(colNames("eventTime"), event.eventTime.getMillis)
- val eventTimeZone = event.eventTime.getZone
- if (!eventTimeZone.equals(EventValidation.defaultTimeZone)) {
- addStringToE(colNames("eventTimeZone"), eventTimeZone.getID)
- }
-
- addLongToE(colNames("creationTime"), event.creationTime.getMillis)
- val creationTimeZone = event.creationTime.getZone
- if (!creationTimeZone.equals(EventValidation.defaultTimeZone)) {
- addStringToE(colNames("creationTimeZone"), creationTimeZone.getID)
- }
-
- // can use zero-length byte array for tag cell value
- (put, rowKey)
- }
-
- def resultToEvent(result: Result, appId: Int): Event = {
- val rowKey = RowKey(result.getRow())
-
- val eBytes = Bytes.toBytes("e")
- // val e = result.getFamilyMap(eBytes)
-
- def getStringCol(col: String): String = {
- val r = result.getValue(eBytes, colNames(col))
- require(r != null,
- s"Failed to get value for column ${col}. " +
- s"Rowkey: ${rowKey.toString} " +
- s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
-
- Bytes.toString(r)
- }
-
- def getLongCol(col: String): Long = {
- val r = result.getValue(eBytes, colNames(col))
- require(r != null,
- s"Failed to get value for column ${col}. " +
- s"Rowkey: ${rowKey.toString} " +
- s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
-
- Bytes.toLong(r)
- }
-
- def getOptStringCol(col: String): Option[String] = {
- val r = result.getValue(eBytes, colNames(col))
- if (r == null) {
- None
- } else {
- Some(Bytes.toString(r))
- }
- }
-
- def getTimestamp(col: String): Long = {
- result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp()
- }
-
- val event = getStringCol("event")
- val entityType = getStringCol("entityType")
- val entityId = getStringCol("entityId")
- val targetEntityType = getOptStringCol("targetEntityType")
- val targetEntityId = getOptStringCol("targetEntityId")
- val properties: DataMap = getOptStringCol("properties")
- .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())
- val prId = getOptStringCol("prId")
- val eventTimeZone = getOptStringCol("eventTimeZone")
- .map(DateTimeZone.forID(_))
- .getOrElse(EventValidation.defaultTimeZone)
- val eventTime = new DateTime(
- getLongCol("eventTime"), eventTimeZone)
- val creationTimeZone = getOptStringCol("creationTimeZone")
- .map(DateTimeZone.forID(_))
- .getOrElse(EventValidation.defaultTimeZone)
- val creationTime: DateTime = new DateTime(
- getLongCol("creationTime"), creationTimeZone)
-
- Event(
- eventId = Some(RowKey(result.getRow()).toString),
- event = event,
- entityType = entityType,
- entityId = entityId,
- targetEntityType = targetEntityType,
- targetEntityId = targetEntityId,
- properties = properties,
- eventTime = eventTime,
- tags = Seq(),
- prId = prId,
- creationTime = creationTime
- )
- }
-
-
- // for mandatory field. None means don't care.
- // for optional field. None means don't care.
- // Some(None) means not exist.
- // Some(Some(x)) means it should match x
- def createScan(
- startTime: Option[DateTime] = None,
- untilTime: Option[DateTime] = None,
- entityType: Option[String] = None,
- entityId: Option[String] = None,
- eventNames: Option[Seq[String]] = None,
- targetEntityType: Option[Option[String]] = None,
- targetEntityId: Option[Option[String]] = None,
- reversed: Option[Boolean] = None): Scan = {
-
- val scan: Scan = new Scan()
-
- (entityType, entityId) match {
- case (Some(et), Some(eid)) => {
- val start = PartialRowKey(et, eid,
- startTime.map(_.getMillis)).toBytes
- // if no untilTime, stop when reach next bytes of entityTypeAndId
- val stop = PartialRowKey(et, eid,
- untilTime.map(_.getMillis).orElse(Some(-1))).toBytes
-
- if (reversed.getOrElse(false)) {
- // Reversed order.
- // If you specify a startRow and stopRow,
- // to scan in reverse, the startRow needs to be lexicographically
- // after the stopRow.
- scan.setStartRow(stop)
- scan.setStopRow(start)
- scan.setReversed(true)
- } else {
- scan.setStartRow(start)
- scan.setStopRow(stop)
- }
- }
- case (_, _) => {
- val minTime: Long = startTime.map(_.getMillis).getOrElse(0)
- val maxTime: Long = untilTime.map(_.getMillis).getOrElse(Long.MaxValue)
- scan.setTimeRange(minTime, maxTime)
- if (reversed.getOrElse(false)) {
- scan.setReversed(true)
- }
- }
- }
-
- val filters = new FilterList(FilterList.Operator.MUST_PASS_ALL)
-
- val eBytes = Bytes.toBytes("e")
-
- def createBinaryFilter(col: String, value: Array[Byte]): SingleColumnValueFilter = {
- val comp = new BinaryComparator(value)
- new SingleColumnValueFilter(
- eBytes, colNames(col), CompareOp.EQUAL, comp)
- }
-
- // skip the row if the column exists
- def createSkipRowIfColumnExistFilter(col: String): SkipFilter = {
- val comp = new BinaryComparator(colNames(col))
- val q = new QualifierFilter(CompareOp.NOT_EQUAL, comp)
- // filters an entire row if any of the Cell checks do not pass
- new SkipFilter(q)
- }
-
- entityType.foreach { et =>
- val compType = new BinaryComparator(Bytes.toBytes(et))
- val filterType = new SingleColumnValueFilter(
- eBytes, colNames("entityType"), CompareOp.EQUAL, compType)
- filters.addFilter(filterType)
- }
-
- entityId.foreach { eid =>
- val compId = new BinaryComparator(Bytes.toBytes(eid))
- val filterId = new SingleColumnValueFilter(
- eBytes, colNames("entityId"), CompareOp.EQUAL, compId)
- filters.addFilter(filterId)
- }
-
- eventNames.foreach { eventsList =>
- // match any of event in the eventsList
- val eventFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE)
- eventsList.foreach { e =>
- val compEvent = new BinaryComparator(Bytes.toBytes(e))
- val filterEvent = new SingleColumnValueFilter(
- eBytes, colNames("event"), CompareOp.EQUAL, compEvent)
- eventFilters.addFilter(filterEvent)
- }
- if (!eventFilters.getFilters().isEmpty) {
- filters.addFilter(eventFilters)
- }
- }
-
- targetEntityType.foreach { tetOpt =>
- if (tetOpt.isEmpty) {
- val filter = createSkipRowIfColumnExistFilter("targetEntityType")
- filters.addFilter(filter)
- } else {
- tetOpt.foreach { tet =>
- val filter = createBinaryFilter(
- "targetEntityType", Bytes.toBytes(tet))
- // the entire row will be skipped if the column is not found.
- filter.setFilterIfMissing(true)
- filters.addFilter(filter)
- }
- }
- }
-
- targetEntityId.foreach { teidOpt =>
- if (teidOpt.isEmpty) {
- val filter = createSkipRowIfColumnExistFilter("targetEntityId")
- filters.addFilter(filter)
- } else {
- teidOpt.foreach { teid =>
- val filter = createBinaryFilter(
- "targetEntityId", Bytes.toBytes(teid))
- // the entire row will be skipped if the column is not found.
- filter.setFilterIfMissing(true)
- filters.addFilter(filter)
- }
- }
- }
-
- if (!filters.getFilters().isEmpty) {
- scan.setFilter(filters)
- }
-
- scan
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
deleted file mode 100644
index 360b007..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.LEvents
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.hbase.HBEventsUtil.RowKey
-import org.apache.hadoop.hbase.HColumnDescriptor
-import org.apache.hadoop.hbase.HTableDescriptor
-import org.apache.hadoop.hbase.NamespaceDescriptor
-import org.apache.hadoop.hbase.TableName
-import org.apache.hadoop.hbase.client._
-import org.joda.time.DateTime
-
-import scala.collection.JavaConversions._
-import scala.concurrent.ExecutionContext
-import scala.concurrent.Future
-
-class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace: String)
- extends LEvents with Logging {
-
- // implicit val formats = DefaultFormats + new EventJson4sSupport.DBSerializer
-
- def resultToEvent(result: Result, appId: Int): Event =
- HBEventsUtil.resultToEvent(result, appId)
-
- def getTable(appId: Int, channelId: Option[Int] = None): HTableInterface =
- client.connection.getTable(HBEventsUtil.tableName(namespace, appId, channelId))
-
- override
- def init(appId: Int, channelId: Option[Int] = None): Boolean = {
- // check namespace exist
- val existingNamespace = client.admin.listNamespaceDescriptors()
- .map(_.getName)
- if (!existingNamespace.contains(namespace)) {
- val nameDesc = NamespaceDescriptor.create(namespace).build()
- info(s"The namespace ${namespace} doesn't exist yet. Creating now...")
- client.admin.createNamespace(nameDesc)
- }
-
- val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))
- if (!client.admin.tableExists(tableName)) {
- info(s"The table ${tableName.getNameAsString()} doesn't exist yet." +
- " Creating now...")
- val tableDesc = new HTableDescriptor(tableName)
- tableDesc.addFamily(new HColumnDescriptor("e"))
- tableDesc.addFamily(new HColumnDescriptor("r")) // reserved
- client.admin.createTable(tableDesc)
- }
- true
- }
-
- override
- def remove(appId: Int, channelId: Option[Int] = None): Boolean = {
- val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))
- try {
- if (client.admin.tableExists(tableName)) {
- info(s"Removing table ${tableName.getNameAsString()}...")
- client.admin.disableTable(tableName)
- client.admin.deleteTable(tableName)
- } else {
- info(s"Table ${tableName.getNameAsString()} doesn't exist." +
- s" Nothing is deleted.")
- }
- true
- } catch {
- case e: Exception => {
- error(s"Fail to remove table for appId ${appId}. Exception: ${e}")
- false
- }
- }
- }
-
- override
- def close(): Unit = {
- client.admin.close()
- client.connection.close()
- }
-
- override
- def futureInsert(
- event: Event, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
- Future[String] = {
- Future {
- val table = getTable(appId, channelId)
- val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)
- table.put(put)
- table.flushCommits()
- table.close()
- rowKey.toString
- }
- }
-
- override
- def futureGet(
- eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
- Future[Option[Event]] = {
- Future {
- val table = getTable(appId, channelId)
- val rowKey = RowKey(eventId)
- val get = new Get(rowKey.toBytes)
-
- val result = table.get(get)
- table.close()
-
- if (!result.isEmpty()) {
- val event = resultToEvent(result, appId)
- Some(event)
- } else {
- None
- }
- }
- }
-
- override
- def futureDelete(
- eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
- Future[Boolean] = {
- Future {
- val table = getTable(appId, channelId)
- val rowKey = RowKey(eventId)
- val exists = table.exists(new Get(rowKey.toBytes))
- table.delete(new Delete(rowKey.toBytes))
- table.close()
- exists
- }
- }
-
- override
- def futureFind(
- appId: Int,
- channelId: Option[Int] = None,
- startTime: Option[DateTime] = None,
- untilTime: Option[DateTime] = None,
- entityType: Option[String] = None,
- entityId: Option[String] = None,
- eventNames: Option[Seq[String]] = None,
- targetEntityType: Option[Option[String]] = None,
- targetEntityId: Option[Option[String]] = None,
- limit: Option[Int] = None,
- reversed: Option[Boolean] = None)(implicit ec: ExecutionContext):
- Future[Iterator[Event]] = {
- Future {
-
- require(!((reversed == Some(true)) && (entityType.isEmpty || entityId.isEmpty)),
- "the parameter reversed can only be used with both entityType and entityId specified.")
-
- val table = getTable(appId, channelId)
-
- val scan = HBEventsUtil.createScan(
- startTime = startTime,
- untilTime = untilTime,
- entityType = entityType,
- entityId = entityId,
- eventNames = eventNames,
- targetEntityType = targetEntityType,
- targetEntityId = targetEntityId,
- reversed = reversed)
- val scanner = table.getScanner(scan)
- table.close()
-
- val eventsIter = scanner.iterator()
-
- // Get all events if None or Some(-1)
- val results: Iterator[Result] = limit match {
- case Some(-1) => eventsIter
- case None => eventsIter
- case Some(x) => eventsIter.take(x)
- }
-
- val eventsIt = results.map { resultToEvent(_, appId) }
-
- eventsIt
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
deleted file mode 100644
index 7324fa6..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.predictionio.data.storage.hbase
-
-import org.apache.hadoop.hbase.HBaseConfiguration
-import org.apache.hadoop.hbase.client.{Delete, HTable, Result}
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable
-import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
-import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapreduce.OutputFormat
-import org.apache.predictionio.data.storage.{Event, PEvents, StorageClientConfig}
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
-import org.joda.time.DateTime
-
-class HBPEvents(client: HBClient, config: StorageClientConfig, namespace: String) extends PEvents {
-
- def checkTableExists(appId: Int, channelId: Option[Int]): Unit = {
- if (!client.admin.tableExists(HBEventsUtil.tableName(namespace, appId, channelId))) {
- if (channelId.nonEmpty) {
- logger.error(s"The appId $appId with channelId $channelId does not exist." +
- s" Please use valid appId and channelId.")
- throw new Exception(s"HBase table not found for appId $appId" +
- s" with channelId $channelId.")
- } else {
- logger.error(s"The appId $appId does not exist. Please use valid appId.")
- throw new Exception(s"HBase table not found for appId $appId.")
- }
- }
- }
-
- override
- def find(
- appId: Int,
- channelId: Option[Int] = None,
- startTime: Option[DateTime] = None,
- untilTime: Option[DateTime] = None,
- entityType: Option[String] = None,
- entityId: Option[String] = None,
- eventNames: Option[Seq[String]] = None,
- targetEntityType: Option[Option[String]] = None,
- targetEntityId: Option[Option[String]] = None
- )(sc: SparkContext): RDD[Event] = {
-
- checkTableExists(appId, channelId)
-
- val conf = HBaseConfiguration.create()
- conf.set(TableInputFormat.INPUT_TABLE,
- HBEventsUtil.tableName(namespace, appId, channelId))
-
- val scan = HBEventsUtil.createScan(
- startTime = startTime,
- untilTime = untilTime,
- entityType = entityType,
- entityId = entityId,
- eventNames = eventNames,
- targetEntityType = targetEntityType,
- targetEntityId = targetEntityId,
- reversed = None)
- scan.setCaching(500) // TODO
- scan.setCacheBlocks(false) // TODO
-
- conf.set(TableInputFormat.SCAN, PIOHBaseUtil.convertScanToString(scan))
-
- // HBase is not accessed until this rdd is actually used.
- val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
- classOf[ImmutableBytesWritable],
- classOf[Result]).map {
- case (key, row) => HBEventsUtil.resultToEvent(row, appId)
- }
-
- rdd
- }
-
- override
- def write(
- events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
-
- checkTableExists(appId, channelId)
-
- val conf = HBaseConfiguration.create()
- conf.set(TableOutputFormat.OUTPUT_TABLE,
- HBEventsUtil.tableName(namespace, appId, channelId))
- conf.setClass("mapreduce.outputformat.class",
- classOf[TableOutputFormat[Object]],
- classOf[OutputFormat[Object, Writable]])
-
- events.map { event =>
- val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)
- (new ImmutableBytesWritable(rowKey.toBytes), put)
- }.saveAsNewAPIHadoopDataset(conf)
-
- }
-
- def delete(
- eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
-
- checkTableExists(appId, channelId)
-
- val tableName = HBEventsUtil.tableName(namespace, appId, channelId)
-
- eventIds.foreachPartition{ iter =>
- val conf = HBaseConfiguration.create()
- conf.set(TableOutputFormat.OUTPUT_TABLE,
- tableName)
-
- val table = new HTable(conf, tableName)
- iter.foreach { id =>
- val rowKey = HBEventsUtil.RowKey(id)
- val delete = new Delete(rowKey.b)
- table.delete(delete)
- }
- table.close
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
deleted file mode 100644
index 745fcb9..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.predictionio.data.storage.hbase
-
-import org.apache.hadoop.hbase.client.Scan
-import org.apache.hadoop.hbase.protobuf.ProtobufUtil
-import org.apache.hadoop.hbase.util.Base64
-
-object PIOHBaseUtil {
- /*
- * Copying this from Apache HBase because of its restrictive scope in 0.98.x
- */
- def convertScanToString(scan: Scan): String = {
- val proto = ProtobufUtil.toScan(scan)
- Base64.encodeBytes(proto.toByteArray)
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
deleted file mode 100644
index 1720410..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase
-
-import org.apache.predictionio.data.storage.BaseStorageClient
-import org.apache.predictionio.data.storage.StorageClientConfig
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.HBaseConfiguration
-import org.apache.hadoop.hbase.MasterNotRunningException
-import org.apache.hadoop.hbase.ZooKeeperConnectionException
-import org.apache.hadoop.hbase.client.HConnectionManager
-import org.apache.hadoop.hbase.client.HConnection
-import org.apache.hadoop.hbase.client.HBaseAdmin
-
-import grizzled.slf4j.Logging
-
-case class HBClient(
- val conf: Configuration,
- val connection: HConnection,
- val admin: HBaseAdmin
-)
-
-class StorageClient(val config: StorageClientConfig)
- extends BaseStorageClient with Logging {
-
- val conf = HBaseConfiguration.create()
-
- if (config.test) {
- // use fewer retries and shorter timeout for test mode
- conf.set("hbase.client.retries.number", "1")
- conf.set("zookeeper.session.timeout", "30000");
- conf.set("zookeeper.recovery.retry", "1")
- }
-
- try {
- HBaseAdmin.checkHBaseAvailable(conf)
- } catch {
- case e: MasterNotRunningException =>
- error("HBase master is not running (ZooKeeper ensemble: " +
- conf.get("hbase.zookeeper.quorum") + "). Please make sure that HBase " +
- "is running properly, and that the configuration is pointing at the " +
- "correct ZooKeeper ensemble.")
- throw e
- case e: ZooKeeperConnectionException =>
- error("Cannot connect to ZooKeeper (ZooKeeper ensemble: " +
- conf.get("hbase.zookeeper.quorum") + "). Please make sure that the " +
- "configuration is pointing at the correct ZooKeeper ensemble. By " +
- "default, HBase manages its own ZooKeeper, so if you have not " +
- "configured HBase to use an external ZooKeeper, that means your " +
- "HBase is not started or configured properly.")
- throw e
- case e: Exception => {
- error("Failed to connect to HBase." +
- " Please check if HBase is running properly.")
- throw e
- }
- }
-
- val connection = HConnectionManager.createConnection(conf)
-
- val client = HBClient(
- conf = conf,
- connection = connection,
- admin = new HBaseAdmin(connection)
- )
-
- override
- val prefix = "HB"
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
deleted file mode 100644
index 49bf031..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage
-
-/** HBase implementation of storage traits, supporting event data only
- *
- * @group Implementation
- */
-package object hbase {}
[5/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Posted by do...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
deleted file mode 100644
index cc07fa4..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase.upgrade
-
-import org.apache.predictionio.annotation.Experimental
-
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.EventValidation
-import org.apache.predictionio.data.storage.DataMap
-
-import org.apache.hadoop.hbase.client.Scan
-import org.apache.hadoop.hbase.client.HConnection
-import org.apache.hadoop.hbase.client.Result
-import org.apache.hadoop.hbase.TableName
-import org.apache.hadoop.hbase.util.Bytes
-
-import org.joda.time.DateTime
-import org.joda.time.DateTimeZone
-
-import org.json4s.DefaultFormats
-import org.json4s.JObject
-import org.json4s.native.Serialization.{ read, write }
-
-import org.apache.commons.codec.binary.Base64
-
-import scala.collection.JavaConversions._
-
-/** :: Experimental :: */
-@Experimental
-object HB_0_8_0 {
-
- implicit val formats = DefaultFormats
-
- def getByAppId(
- connection: HConnection,
- namespace: String,
- appId: Int): Iterator[Event] = {
- val tableName = TableName.valueOf(namespace, "events")
- val table = connection.getTable(tableName)
- val start = PartialRowKey(appId)
- val stop = PartialRowKey(appId + 1)
- val scan = new Scan(start.toBytes, stop.toBytes)
- val scanner = table.getScanner(scan)
- table.close()
- scanner.iterator().map { resultToEvent(_) }
- }
-
- val colNames: Map[String, Array[Byte]] = Map(
- "event" -> "e",
- "entityType" -> "ety",
- "entityId" -> "eid",
- "targetEntityType" -> "tety",
- "targetEntityId" -> "teid",
- "properties" -> "p",
- "prId" -> "pk", // columna name is 'pk' in 0.8.0/0.8.1
- "eventTimeZone" -> "etz",
- "creationTimeZone" -> "ctz"
- ).mapValues(Bytes.toBytes(_))
-
-
- class RowKey(
- val appId: Int,
- val millis: Long,
- val uuidLow: Long
- ) {
- lazy val toBytes: Array[Byte] = {
- // add UUID least significant bits for multiple actions at the same time
- // (UUID's most significant bits are actually timestamp,
- // use eventTime instead).
- Bytes.toBytes(appId) ++ Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow)
- }
- override def toString: String = {
- Base64.encodeBase64URLSafeString(toBytes)
- }
- }
-
- object RowKey {
- // get RowKey from string representation
- def apply(s: String): RowKey = {
- try {
- apply(Base64.decodeBase64(s))
- } catch {
- case e: Exception => throw new RowKeyException(
- s"Failed to convert String ${s} to RowKey because ${e}", e)
- }
- }
-
- def apply(b: Array[Byte]): RowKey = {
- if (b.size != 20) {
- val bString = b.mkString(",")
- throw new RowKeyException(
- s"Incorrect byte array size. Bytes: ${bString}.")
- }
-
- new RowKey(
- appId = Bytes.toInt(b.slice(0, 4)),
- millis = Bytes.toLong(b.slice(4, 12)),
- uuidLow = Bytes.toLong(b.slice(12, 20))
- )
- }
- }
-
- class RowKeyException(msg: String, cause: Exception)
- extends Exception(msg, cause) {
- def this(msg: String) = this(msg, null)
- }
-
- case class PartialRowKey(val appId: Int, val millis: Option[Long] = None) {
- val toBytes: Array[Byte] = {
- Bytes.toBytes(appId) ++
- (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]()))
- }
- }
-
- def resultToEvent(result: Result): Event = {
- val rowKey = RowKey(result.getRow())
-
- val eBytes = Bytes.toBytes("e")
- // val e = result.getFamilyMap(eBytes)
-
- def getStringCol(col: String): String = {
- val r = result.getValue(eBytes, colNames(col))
- require(r != null,
- s"Failed to get value for column ${col}. " +
- s"Rowkey: ${rowKey.toString} " +
- s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
-
- Bytes.toString(r)
- }
-
- def getOptStringCol(col: String): Option[String] = {
- val r = result.getValue(eBytes, colNames(col))
- if (r == null) {
- None
- } else {
- Some(Bytes.toString(r))
- }
- }
-
- def getTimestamp(col: String): Long = {
- result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp()
- }
-
- val event = getStringCol("event")
- val entityType = getStringCol("entityType")
- val entityId = getStringCol("entityId")
- val targetEntityType = getOptStringCol("targetEntityType")
- val targetEntityId = getOptStringCol("targetEntityId")
- val properties: DataMap = getOptStringCol("properties")
- .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())
- val prId = getOptStringCol("prId")
- val eventTimeZone = getOptStringCol("eventTimeZone")
- .map(DateTimeZone.forID(_))
- .getOrElse(EventValidation.defaultTimeZone)
- val creationTimeZone = getOptStringCol("creationTimeZone")
- .map(DateTimeZone.forID(_))
- .getOrElse(EventValidation.defaultTimeZone)
-
- val creationTime: DateTime = new DateTime(
- getTimestamp("event"), creationTimeZone
- )
-
- Event(
- eventId = Some(RowKey(result.getRow()).toString),
- event = event,
- entityType = entityType,
- entityId = entityId,
- targetEntityType = targetEntityType,
- targetEntityId = targetEntityId,
- properties = properties,
- eventTime = new DateTime(rowKey.millis, eventTimeZone),
- tags = Seq(),
- prId = prId,
- creationTime = creationTime
- )
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
deleted file mode 100644
index 1759561..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase.upgrade
-
-import org.apache.predictionio.annotation.Experimental
-
-import org.apache.predictionio.data.storage.Storage
-import org.apache.predictionio.data.storage.hbase.HBLEvents
-import org.apache.predictionio.data.storage.hbase.HBEventsUtil
-
-import scala.collection.JavaConversions._
-
-/** :: Experimental :: */
-@Experimental
-object Upgrade {
-
- def main(args: Array[String]) {
- val fromAppId = args(0).toInt
- val toAppId = args(1).toInt
- val batchSize = args.lift(2).map(_.toInt).getOrElse(100)
- val fromNamespace = args.lift(3).getOrElse("predictionio_eventdata")
-
- upgrade(fromAppId, toAppId, batchSize, fromNamespace)
- }
-
- /* For upgrade from 0.8.0 or 0.8.1 to 0.8.2 only */
- def upgrade(
- fromAppId: Int,
- toAppId: Int,
- batchSize: Int,
- fromNamespace: String) {
-
- val events = Storage.getLEvents().asInstanceOf[HBLEvents]
-
- // Assume already run "pio app new <newapp>" (new app already created)
- // TODO: check if new table empty and warn user if not
- val newTable = events.getTable(toAppId)
-
- val newTableName = newTable.getName().getNameAsString()
- println(s"Copying data from ${fromNamespace}:events for app ID ${fromAppId}"
- + s" to new HBase table ${newTableName}...")
-
- HB_0_8_0.getByAppId(
- events.client.connection,
- fromNamespace,
- fromAppId).grouped(batchSize).foreach { eventGroup =>
- val puts = eventGroup.map{ e =>
- val (put, rowkey) = HBEventsUtil.eventToPut(e, toAppId)
- put
- }
- newTable.put(puts.toList)
- }
-
- newTable.flushCommits()
- newTable.close()
- println("Done.")
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
deleted file mode 100644
index de74d46..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hbase.upgrade
-
-import org.apache.predictionio.annotation.Experimental
-
-import grizzled.slf4j.Logger
-import org.apache.predictionio.data.storage.Storage
-import org.apache.predictionio.data.storage.DataMap
-import org.apache.predictionio.data.storage.hbase.HBLEvents
-import org.apache.predictionio.data.storage.hbase.HBEventsUtil
-
-import scala.collection.JavaConversions._
-
-import scala.concurrent._
-import ExecutionContext.Implicits.global
-import org.apache.predictionio.data.storage.LEvents
-import scala.concurrent.Await
-import scala.concurrent.duration.Duration
-import java.lang.Thread
-
-object CheckDistribution {
- def entityType(eventClient: LEvents, appId: Int)
- : Map[(String, Option[String]), Int] = {
- eventClient
- .find(appId = appId)
- .foldLeft(Map[(String, Option[String]), Int]().withDefaultValue(0)) {
- case (m, e) => {
- val k = (e.entityType, e.targetEntityType)
- m.updated(k, m(k) + 1)
- }
- }
- }
-
- def runMain(appId: Int) {
- val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents]
-
- entityType(eventClient, appId)
- .toSeq
- .sortBy(-_._2)
- .foreach { println }
-
- }
-
- def main(args: Array[String]) {
- runMain(args(0).toInt)
- }
-
-}
-
-/** :: Experimental :: */
-@Experimental
-object Upgrade_0_8_3 {
- val NameMap = Map(
- "pio_user" -> "user",
- "pio_item" -> "item")
- val RevNameMap = NameMap.toSeq.map(_.swap).toMap
-
- val logger = Logger[this.type]
-
- def main(args: Array[String]) {
- val fromAppId = args(0).toInt
- val toAppId = args(1).toInt
-
- runMain(fromAppId, toAppId)
- }
-
- def runMain(fromAppId: Int, toAppId: Int): Unit = {
- upgrade(fromAppId, toAppId)
- }
-
-
- val obsEntityTypes = Set("pio_user", "pio_item")
- val obsProperties = Set(
- "pio_itypes", "pio_starttime", "pio_endtime",
- "pio_inactive", "pio_price", "pio_rating")
-
- def hasPIOPrefix(eventClient: LEvents, appId: Int): Boolean = {
- eventClient.find(appId = appId).filter( e =>
- (obsEntityTypes.contains(e.entityType) ||
- e.targetEntityType.map(obsEntityTypes.contains(_)).getOrElse(false) ||
- (!e.properties.keySet.forall(!obsProperties.contains(_)))
- )
- ).hasNext
- }
-
- def isEmpty(eventClient: LEvents, appId: Int): Boolean =
- !eventClient.find(appId = appId).hasNext
-
-
- def upgradeCopy(eventClient: LEvents, fromAppId: Int, toAppId: Int) {
- val fromDist = CheckDistribution.entityType(eventClient, fromAppId)
-
- logger.info("FromAppId Distribution")
- fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
-
- val events = eventClient
- .find(appId = fromAppId)
- .zipWithIndex
- .foreach { case (fromEvent, index) => {
- if (index % 50000 == 0) {
- // logger.info(s"Progress: $fromEvent $index")
- logger.info(s"Progress: $index")
- }
-
-
- val fromEntityType = fromEvent.entityType
- val toEntityType = NameMap.getOrElse(fromEntityType, fromEntityType)
-
- val fromTargetEntityType = fromEvent.targetEntityType
- val toTargetEntityType = fromTargetEntityType
- .map { et => NameMap.getOrElse(et, et) }
-
- val toProperties = DataMap(fromEvent.properties.fields.map {
- case (k, v) =>
- val newK = if (obsProperties.contains(k)) {
- val nK = k.stripPrefix("pio_")
- logger.info(s"property ${k} will be renamed to ${nK}")
- nK
- } else k
- (newK, v)
- })
-
- val toEvent = fromEvent.copy(
- entityType = toEntityType,
- targetEntityType = toTargetEntityType,
- properties = toProperties)
-
- eventClient.insert(toEvent, toAppId)
- }}
-
-
- val toDist = CheckDistribution.entityType(eventClient, toAppId)
-
- logger.info("Recap fromAppId Distribution")
- fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
-
- logger.info("ToAppId Distribution")
- toDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) }
-
- val fromGood = fromDist
- .toSeq
- .forall { case (k, c) => {
- val (et, tet) = k
- val net = NameMap.getOrElse(et, et)
- val ntet = tet.map(tet => NameMap.getOrElse(tet, tet))
- val nk = (net, ntet)
- val nc = toDist.getOrElse(nk, -1)
- val checkMatch = (c == nc)
- if (!checkMatch) {
- logger.info(s"${k} doesn't match: old has ${c}. new has ${nc}.")
- }
- checkMatch
- }}
-
- val toGood = toDist
- .toSeq
- .forall { case (k, c) => {
- val (et, tet) = k
- val oet = RevNameMap.getOrElse(et, et)
- val otet = tet.map(tet => RevNameMap.getOrElse(tet, tet))
- val ok = (oet, otet)
- val oc = fromDist.getOrElse(ok, -1)
- val checkMatch = (c == oc)
- if (!checkMatch) {
- logger.info(s"${k} doesn't match: new has ${c}. old has ${oc}.")
- }
- checkMatch
- }}
-
- if (!fromGood || !toGood) {
- logger.error("Doesn't match!! There is an import error.")
- } else {
- logger.info("Count matches. Looks like we are good to go.")
- }
- }
-
- /* For upgrade from 0.8.2 to 0.8.3 only */
- def upgrade(fromAppId: Int, toAppId: Int) {
-
- val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents]
-
- require(fromAppId != toAppId,
- s"FromAppId: $fromAppId must be different from toAppId: $toAppId")
-
- if (hasPIOPrefix(eventClient, fromAppId)) {
- require(
- isEmpty(eventClient, toAppId),
- s"Target appId: $toAppId is not empty. Please run " +
- "`pio app data-delete <app_name>` to clean the data before upgrading")
-
- logger.info(s"$fromAppId isEmpty: " + isEmpty(eventClient, fromAppId))
-
- upgradeCopy(eventClient, fromAppId, toAppId)
-
- } else {
- logger.info(s"From appId: ${fromAppId} doesn't contain"
- + s" obsolete entityTypes ${obsEntityTypes} or"
- + s" obsolete properties ${obsProperties}."
- + " No need data migration."
- + s" You can continue to use appId ${fromAppId}.")
- }
-
- logger.info("Done.")
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
deleted file mode 100644
index 08dfb01..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hdfs
-
-import java.io.IOException
-
-import com.google.common.io.ByteStreams
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Model
-import org.apache.predictionio.data.storage.Models
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
-
-class HDFSModels(fs: FileSystem, config: StorageClientConfig, prefix: String)
- extends Models with Logging {
-
- def insert(i: Model): Unit = {
- try {
- val fsdos = fs.create(new Path(s"$prefix${i.id}"))
- fsdos.write(i.models)
- fsdos.close
- } catch {
- case e: IOException => error(e.getMessage)
- }
- }
-
- def get(id: String): Option[Model] = {
- try {
- val p = new Path(s"$prefix$id")
- Some(Model(
- id = id,
- models = ByteStreams.toByteArray(fs.open(p))))
- } catch {
- case e: Throwable =>
- error(e.getMessage)
- None
- }
- }
-
- def delete(id: String): Unit = {
- val p = new Path(s"$prefix$id")
- if (!fs.delete(p, false)) {
- error(s"Unable to delete ${fs.makeQualified(p).toString}!")
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
deleted file mode 100644
index bc57f2a..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.hdfs
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.BaseStorageClient
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
-
-class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
- with Logging {
- override val prefix = "HDFS"
- val conf = new Configuration
- val fs = FileSystem.get(conf)
- fs.setWorkingDirectory(
- new Path(config.properties.getOrElse("PATH", config.properties("HOSTS"))))
- val client = fs
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala b/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
deleted file mode 100644
index a927d78..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage
-
-/** HDFS implementation of storage traits, supporting model data only
- *
- * @group Implementation
- */
-package object hdfs {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
deleted file mode 100644
index 437f8ae..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.AccessKey
-import org.apache.predictionio.data.storage.AccessKeys
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-import scala.util.Random
-
-/** JDBC implementation of [[AccessKeys]] */
-class JDBCAccessKeys(client: String, config: StorageClientConfig, prefix: String)
- extends AccessKeys with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "accesskeys")
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- accesskey varchar(64) not null primary key,
- appid integer not null,
- events text)""".execute().apply()
- }
-
- def insert(accessKey: AccessKey): Option[String] = DB localTx { implicit s =>
- val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
- val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(","))
- sql"""
- insert into $tableName values(
- $key,
- ${accessKey.appid},
- $events)""".update().apply()
- Some(key)
- }
-
- def get(key: String): Option[AccessKey] = DB readOnly { implicit session =>
- sql"SELECT accesskey, appid, events FROM $tableName WHERE accesskey = $key".
- map(resultToAccessKey).single().apply()
- }
-
- def getAll(): Seq[AccessKey] = DB readOnly { implicit session =>
- sql"SELECT accesskey, appid, events FROM $tableName".map(resultToAccessKey).list().apply()
- }
-
- def getByAppid(appid: Int): Seq[AccessKey] = DB readOnly { implicit session =>
- sql"SELECT accesskey, appid, events FROM $tableName WHERE appid = $appid".
- map(resultToAccessKey).list().apply()
- }
-
- def update(accessKey: AccessKey): Unit = DB localTx { implicit session =>
- val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(","))
- sql"""
- UPDATE $tableName SET
- appid = ${accessKey.appid},
- events = $events
- WHERE accesskey = ${accessKey.key}""".update().apply()
- }
-
- def delete(key: String): Unit = DB localTx { implicit session =>
- sql"DELETE FROM $tableName WHERE accesskey = $key".update().apply()
- }
-
- /** Convert JDBC results to [[AccessKey]] */
- def resultToAccessKey(rs: WrappedResultSet): AccessKey = {
- AccessKey(
- key = rs.string("accesskey"),
- appid = rs.int("appid"),
- events = rs.stringOpt("events").map(_.split(",").toSeq).getOrElse(Nil))
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
deleted file mode 100644
index 17e6410..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.App
-import org.apache.predictionio.data.storage.Apps
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-/** JDBC implementation of [[Apps]] */
-class JDBCApps(client: String, config: StorageClientConfig, prefix: String)
- extends Apps with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "apps")
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- id serial not null primary key,
- name text not null,
- description text)""".execute.apply()
- }
-
- def insert(app: App): Option[Int] = DB localTx { implicit session =>
- val q = if (app.id == 0) {
- sql"""
- insert into $tableName (name, description) values(${app.name}, ${app.description})
- """
- } else {
- sql"""
- insert into $tableName values(${app.id}, ${app.name}, ${app.description})
- """
- }
- Some(q.updateAndReturnGeneratedKey().apply().toInt)
- }
-
- def get(id: Int): Option[App] = DB readOnly { implicit session =>
- sql"SELECT id, name, description FROM $tableName WHERE id = ${id}".map(rs =>
- App(
- id = rs.int("id"),
- name = rs.string("name"),
- description = rs.stringOpt("description"))
- ).single().apply()
- }
-
- def getByName(name: String): Option[App] = DB readOnly { implicit session =>
- sql"SELECT id, name, description FROM $tableName WHERE name = ${name}".map(rs =>
- App(
- id = rs.int("id"),
- name = rs.string("name"),
- description = rs.stringOpt("description"))
- ).single().apply()
- }
-
- def getAll(): Seq[App] = DB readOnly { implicit session =>
- sql"SELECT id, name, description FROM $tableName".map(rs =>
- App(
- id = rs.int("id"),
- name = rs.string("name"),
- description = rs.stringOpt("description"))
- ).list().apply()
- }
-
- def update(app: App): Unit = DB localTx { implicit session =>
- sql"""
- update $tableName set name = ${app.name}, description = ${app.description}
- where id = ${app.id}""".update().apply()
- }
-
- def delete(id: Int): Unit = DB localTx { implicit session =>
- sql"DELETE FROM $tableName WHERE id = $id".update().apply()
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
deleted file mode 100644
index c9aaca5..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Channel
-import org.apache.predictionio.data.storage.Channels
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-/** JDBC implementation of [[Channels]] */
-class JDBCChannels(client: String, config: StorageClientConfig, prefix: String)
- extends Channels with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "channels")
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- id serial not null primary key,
- name text not null,
- appid integer not null)""".execute().apply()
- }
-
- def insert(channel: Channel): Option[Int] = DB localTx { implicit session =>
- val q = if (channel.id == 0) {
- sql"INSERT INTO $tableName (name, appid) VALUES(${channel.name}, ${channel.appid})"
- } else {
- sql"INSERT INTO $tableName VALUES(${channel.id}, ${channel.name}, ${channel.appid})"
- }
- Some(q.updateAndReturnGeneratedKey().apply().toInt)
- }
-
- def get(id: Int): Option[Channel] = DB localTx { implicit session =>
- sql"SELECT id, name, appid FROM $tableName WHERE id = $id".
- map(resultToChannel).single().apply()
- }
-
- def getByAppid(appid: Int): Seq[Channel] = DB localTx { implicit session =>
- sql"SELECT id, name, appid FROM $tableName WHERE appid = $appid".
- map(resultToChannel).list().apply()
- }
-
- def delete(id: Int): Unit = DB localTx { implicit session =>
- sql"DELETE FROM $tableName WHERE id = $id".update().apply()
- }
-
- def resultToChannel(rs: WrappedResultSet): Channel = {
- Channel(
- id = rs.int("id"),
- name = rs.string("name"),
- appid = rs.int("appid"))
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
deleted file mode 100644
index 13c374d..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.EngineInstance
-import org.apache.predictionio.data.storage.EngineInstances
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-/** JDBC implementation of [[EngineInstances]] */
-class JDBCEngineInstances(client: String, config: StorageClientConfig, prefix: String)
- extends EngineInstances with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "engineinstances")
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- id varchar(100) not null primary key,
- status text not null,
- startTime timestamp DEFAULT CURRENT_TIMESTAMP,
- endTime timestamp DEFAULT CURRENT_TIMESTAMP,
- engineId text not null,
- engineVersion text not null,
- engineVariant text not null,
- engineFactory text not null,
- batch text not null,
- env text not null,
- sparkConf text not null,
- datasourceParams text not null,
- preparatorParams text not null,
- algorithmsParams text not null,
- servingParams text not null)""".execute().apply()
- }
-
- def insert(i: EngineInstance): String = DB localTx { implicit session =>
- val id = java.util.UUID.randomUUID().toString
- sql"""
- INSERT INTO $tableName VALUES(
- $id,
- ${i.status},
- ${i.startTime},
- ${i.endTime},
- ${i.engineId},
- ${i.engineVersion},
- ${i.engineVariant},
- ${i.engineFactory},
- ${i.batch},
- ${JDBCUtils.mapToString(i.env)},
- ${JDBCUtils.mapToString(i.sparkConf)},
- ${i.dataSourceParams},
- ${i.preparatorParams},
- ${i.algorithmsParams},
- ${i.servingParams})""".update().apply()
- id
- }
-
- def get(id: String): Option[EngineInstance] = DB localTx { implicit session =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- engineId,
- engineVersion,
- engineVariant,
- engineFactory,
- batch,
- env,
- sparkConf,
- datasourceParams,
- preparatorParams,
- algorithmsParams,
- servingParams
- FROM $tableName WHERE id = $id""".map(resultToEngineInstance).
- single().apply()
- }
-
- def getAll(): Seq[EngineInstance] = DB localTx { implicit session =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- engineId,
- engineVersion,
- engineVariant,
- engineFactory,
- batch,
- env,
- sparkConf,
- datasourceParams,
- preparatorParams,
- algorithmsParams,
- servingParams
- FROM $tableName""".map(resultToEngineInstance).list().apply()
- }
-
- def getLatestCompleted(
- engineId: String,
- engineVersion: String,
- engineVariant: String): Option[EngineInstance] =
- getCompleted(engineId, engineVersion, engineVariant).headOption
-
- def getCompleted(
- engineId: String,
- engineVersion: String,
- engineVariant: String): Seq[EngineInstance] = DB localTx { implicit s =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- engineId,
- engineVersion,
- engineVariant,
- engineFactory,
- batch,
- env,
- sparkConf,
- datasourceParams,
- preparatorParams,
- algorithmsParams,
- servingParams
- FROM $tableName
- WHERE
- status = 'COMPLETED' AND
- engineId = $engineId AND
- engineVersion = $engineVersion AND
- engineVariant = $engineVariant
- ORDER BY startTime DESC""".
- map(resultToEngineInstance).list().apply()
- }
-
- def update(i: EngineInstance): Unit = DB localTx { implicit session =>
- sql"""
- update $tableName set
- status = ${i.status},
- startTime = ${i.startTime},
- endTime = ${i.endTime},
- engineId = ${i.engineId},
- engineVersion = ${i.engineVersion},
- engineVariant = ${i.engineVariant},
- engineFactory = ${i.engineFactory},
- batch = ${i.batch},
- env = ${JDBCUtils.mapToString(i.env)},
- sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},
- datasourceParams = ${i.dataSourceParams},
- preparatorParams = ${i.preparatorParams},
- algorithmsParams = ${i.algorithmsParams},
- servingParams = ${i.servingParams}
- where id = ${i.id}""".update().apply()
- }
-
- def delete(id: String): Unit = DB localTx { implicit session =>
- sql"DELETE FROM $tableName WHERE id = $id".update().apply()
- }
-
- /** Convert JDBC results to [[EngineInstance]] */
- def resultToEngineInstance(rs: WrappedResultSet): EngineInstance = {
- EngineInstance(
- id = rs.string("id"),
- status = rs.string("status"),
- startTime = rs.jodaDateTime("startTime"),
- endTime = rs.jodaDateTime("endTime"),
- engineId = rs.string("engineId"),
- engineVersion = rs.string("engineVersion"),
- engineVariant = rs.string("engineVariant"),
- engineFactory = rs.string("engineFactory"),
- batch = rs.string("batch"),
- env = JDBCUtils.stringToMap(rs.string("env")),
- sparkConf = JDBCUtils.stringToMap(rs.string("sparkConf")),
- dataSourceParams = rs.string("datasourceParams"),
- preparatorParams = rs.string("preparatorParams"),
- algorithmsParams = rs.string("algorithmsParams"),
- servingParams = rs.string("servingParams"))
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
deleted file mode 100644
index 90eb5f3..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.EvaluationInstance
-import org.apache.predictionio.data.storage.EvaluationInstances
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-/** JDBC implementations of [[EvaluationInstances]] */
-class JDBCEvaluationInstances(client: String, config: StorageClientConfig, prefix: String)
- extends EvaluationInstances with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "evaluationinstances")
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- id varchar(100) not null primary key,
- status text not null,
- startTime timestamp DEFAULT CURRENT_TIMESTAMP,
- endTime timestamp DEFAULT CURRENT_TIMESTAMP,
- evaluationClass text not null,
- engineParamsGeneratorClass text not null,
- batch text not null,
- env text not null,
- sparkConf text not null,
- evaluatorResults text not null,
- evaluatorResultsHTML text not null,
- evaluatorResultsJSON text)""".execute().apply()
- }
-
- def insert(i: EvaluationInstance): String = DB localTx { implicit session =>
- val id = java.util.UUID.randomUUID().toString
- sql"""
- INSERT INTO $tableName VALUES(
- $id,
- ${i.status},
- ${i.startTime},
- ${i.endTime},
- ${i.evaluationClass},
- ${i.engineParamsGeneratorClass},
- ${i.batch},
- ${JDBCUtils.mapToString(i.env)},
- ${JDBCUtils.mapToString(i.sparkConf)},
- ${i.evaluatorResults},
- ${i.evaluatorResultsHTML},
- ${i.evaluatorResultsJSON})""".update().apply()
- id
- }
-
- def get(id: String): Option[EvaluationInstance] = DB localTx { implicit session =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- evaluationClass,
- engineParamsGeneratorClass,
- batch,
- env,
- sparkConf,
- evaluatorResults,
- evaluatorResultsHTML,
- evaluatorResultsJSON
- FROM $tableName WHERE id = $id
- """.map(resultToEvaluationInstance).single().apply()
- }
-
- def getAll(): Seq[EvaluationInstance] = DB localTx { implicit session =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- evaluationClass,
- engineParamsGeneratorClass,
- batch,
- env,
- sparkConf,
- evaluatorResults,
- evaluatorResultsHTML,
- evaluatorResultsJSON
- FROM $tableName
- """.map(resultToEvaluationInstance).list().apply()
- }
-
- def getCompleted(): Seq[EvaluationInstance] = DB localTx { implicit s =>
- sql"""
- SELECT
- id,
- status,
- startTime,
- endTime,
- evaluationClass,
- engineParamsGeneratorClass,
- batch,
- env,
- sparkConf,
- evaluatorResults,
- evaluatorResultsHTML,
- evaluatorResultsJSON
- FROM $tableName
- WHERE
- status = 'EVALCOMPLETED'
- ORDER BY starttime DESC
- """.map(resultToEvaluationInstance).list().apply()
- }
-
- def update(i: EvaluationInstance): Unit = DB localTx { implicit session =>
- sql"""
- update $tableName set
- status = ${i.status},
- startTime = ${i.startTime},
- endTime = ${i.endTime},
- evaluationClass = ${i.evaluationClass},
- engineParamsGeneratorClass = ${i.engineParamsGeneratorClass},
- batch = ${i.batch},
- env = ${JDBCUtils.mapToString(i.env)},
- sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},
- evaluatorResults = ${i.evaluatorResults},
- evaluatorResultsHTML = ${i.evaluatorResultsHTML},
- evaluatorResultsJSON = ${i.evaluatorResultsJSON}
- where id = ${i.id}""".update().apply()
- }
-
- def delete(id: String): Unit = DB localTx { implicit session =>
- sql"DELETE FROM $tableName WHERE id = $id".update().apply()
- }
-
- /** Convert JDBC results to [[EvaluationInstance]] */
- def resultToEvaluationInstance(rs: WrappedResultSet): EvaluationInstance = {
- EvaluationInstance(
- id = rs.string("id"),
- status = rs.string("status"),
- startTime = rs.jodaDateTime("startTime"),
- endTime = rs.jodaDateTime("endTime"),
- evaluationClass = rs.string("evaluationClass"),
- engineParamsGeneratorClass = rs.string("engineParamsGeneratorClass"),
- batch = rs.string("batch"),
- env = JDBCUtils.stringToMap(rs.string("env")),
- sparkConf = JDBCUtils.stringToMap(rs.string("sparkConf")),
- evaluatorResults = rs.string("evaluatorResults"),
- evaluatorResultsHTML = rs.string("evaluatorResultsHTML"),
- evaluatorResultsJSON = rs.string("evaluatorResultsJSON"))
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
deleted file mode 100644
index dddef67..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.DataMap
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.LEvents
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.joda.time.DateTime
-import org.joda.time.DateTimeZone
-import org.json4s.JObject
-import org.json4s.native.Serialization.read
-import org.json4s.native.Serialization.write
-import scalikejdbc._
-
-import scala.concurrent.ExecutionContext
-import scala.concurrent.Future
-
-/** JDBC implementation of [[LEvents]] */
-class JDBCLEvents(
- client: String,
- config: StorageClientConfig,
- namespace: String) extends LEvents with Logging {
- implicit private val formats = org.json4s.DefaultFormats
-
- def init(appId: Int, channelId: Option[Int] = None): Boolean = {
-
- // To use index, it must be varchar less than 255 characters on a VARCHAR column
- val useIndex = config.properties.contains("INDEX") &&
- config.properties("INDEX").equalsIgnoreCase("enabled")
-
- val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
- val entityIdIndexName = s"idx_${tableName}_ei"
- val entityTypeIndexName = s"idx_${tableName}_et"
- DB autoCommit { implicit session =>
- if (useIndex) {
- SQL(s"""
- create table if not exists $tableName (
- id varchar(32) not null primary key,
- event varchar(255) not null,
- entityType varchar(255) not null,
- entityId varchar(255) not null,
- targetEntityType text,
- targetEntityId text,
- properties text,
- eventTime timestamp DEFAULT CURRENT_TIMESTAMP,
- eventTimeZone varchar(50) not null,
- tags text,
- prId text,
- creationTime timestamp DEFAULT CURRENT_TIMESTAMP,
- creationTimeZone varchar(50) not null)""").execute().apply()
-
- // create index
- SQL(s"create index $entityIdIndexName on $tableName (entityId)").execute().apply()
- SQL(s"create index $entityTypeIndexName on $tableName (entityType)").execute().apply()
- } else {
- SQL(s"""
- create table if not exists $tableName (
- id varchar(32) not null primary key,
- event text not null,
- entityType text not null,
- entityId text not null,
- targetEntityType text,
- targetEntityId text,
- properties text,
- eventTime timestamp DEFAULT CURRENT_TIMESTAMP,
- eventTimeZone varchar(50) not null,
- tags text,
- prId text,
- creationTime timestamp DEFAULT CURRENT_TIMESTAMP,
- creationTimeZone varchar(50) not null)""").execute().apply()
- }
- true
- }
- }
-
- def remove(appId: Int, channelId: Option[Int] = None): Boolean =
- DB autoCommit { implicit session =>
- SQL(s"""
- drop table ${JDBCUtils.eventTableName(namespace, appId, channelId)}
- """).execute().apply()
- true
- }
-
- def close(): Unit = ConnectionPool.closeAll()
-
- def futureInsert(event: Event, appId: Int, channelId: Option[Int])(
- implicit ec: ExecutionContext): Future[String] = Future {
- DB localTx { implicit session =>
- val id = event.eventId.getOrElse(JDBCUtils.generateId)
- val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
- sql"""
- insert into $tableName values(
- $id,
- ${event.event},
- ${event.entityType},
- ${event.entityId},
- ${event.targetEntityType},
- ${event.targetEntityId},
- ${write(event.properties.toJObject)},
- ${event.eventTime},
- ${event.eventTime.getZone.getID},
- ${if (event.tags.nonEmpty) Some(event.tags.mkString(",")) else None},
- ${event.prId},
- ${event.creationTime},
- ${event.creationTime.getZone.getID}
- )
- """.update().apply()
- id
- }
- }
-
- def futureGet(eventId: String, appId: Int, channelId: Option[Int])(
- implicit ec: ExecutionContext): Future[Option[Event]] = Future {
- DB readOnly { implicit session =>
- val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
- sql"""
- select
- id,
- event,
- entityType,
- entityId,
- targetEntityType,
- targetEntityId,
- properties,
- eventTime,
- eventTimeZone,
- tags,
- prId,
- creationTime,
- creationTimeZone
- from $tableName
- where id = $eventId
- """.map(resultToEvent).single().apply()
- }
- }
-
- def futureDelete(eventId: String, appId: Int, channelId: Option[Int])(
- implicit ec: ExecutionContext): Future[Boolean] = Future {
- DB localTx { implicit session =>
- val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
- sql"""
- delete from $tableName where id = $eventId
- """.update().apply()
- true
- }
- }
-
- def futureFind(
- appId: Int,
- channelId: Option[Int] = None,
- startTime: Option[DateTime] = None,
- untilTime: Option[DateTime] = None,
- entityType: Option[String] = None,
- entityId: Option[String] = None,
- eventNames: Option[Seq[String]] = None,
- targetEntityType: Option[Option[String]] = None,
- targetEntityId: Option[Option[String]] = None,
- limit: Option[Int] = None,
- reversed: Option[Boolean] = None
- )(implicit ec: ExecutionContext): Future[Iterator[Event]] = Future {
- DB readOnly { implicit session =>
- val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))
- val whereClause = sqls.toAndConditionOpt(
- startTime.map(x => sqls"eventTime >= $x"),
- untilTime.map(x => sqls"eventTime < $x"),
- entityType.map(x => sqls"entityType = $x"),
- entityId.map(x => sqls"entityId = $x"),
- eventNames.map(x =>
- sqls.toOrConditionOpt(x.map(y =>
- Some(sqls"event = $y")
- ): _*)
- ).getOrElse(None),
- targetEntityType.map(x => x.map(y => sqls"targetEntityType = $y")
- .getOrElse(sqls"targetEntityType IS NULL")),
- targetEntityId.map(x => x.map(y => sqls"targetEntityId = $y")
- .getOrElse(sqls"targetEntityId IS NULL"))
- ).map(sqls.where(_)).getOrElse(sqls"")
- val orderByClause = reversed.map(x =>
- if (x) sqls"eventTime desc" else sqls"eventTime asc"
- ).getOrElse(sqls"eventTime asc")
- val limitClause = limit.map(x =>
- if (x < 0) sqls"" else sqls.limit(x)
- ).getOrElse(sqls"")
- val q = sql"""
- select
- id,
- event,
- entityType,
- entityId,
- targetEntityType,
- targetEntityId,
- properties,
- eventTime,
- eventTimeZone,
- tags,
- prId,
- creationTime,
- creationTimeZone
- from $tableName
- $whereClause
- order by $orderByClause
- $limitClause
- """
- q.map(resultToEvent).list().apply().toIterator
- }
- }
-
- private[predictionio] def resultToEvent(rs: WrappedResultSet): Event = {
- Event(
- eventId = rs.stringOpt("id"),
- event = rs.string("event"),
- entityType = rs.string("entityType"),
- entityId = rs.string("entityId"),
- targetEntityType = rs.stringOpt("targetEntityType"),
- targetEntityId = rs.stringOpt("targetEntityId"),
- properties = rs.stringOpt("properties").map(p =>
- DataMap(read[JObject](p))).getOrElse(DataMap()),
- eventTime = new DateTime(rs.jodaDateTime("eventTime"),
- DateTimeZone.forID(rs.string("eventTimeZone"))),
- tags = rs.stringOpt("tags").map(t => t.split(",").toList).getOrElse(Nil),
- prId = rs.stringOpt("prId"),
- creationTime = new DateTime(rs.jodaDateTime("creationTime"),
- DateTimeZone.forID(rs.string("creationTimeZone")))
- )
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
deleted file mode 100644
index b48502a..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Model
-import org.apache.predictionio.data.storage.Models
-import org.apache.predictionio.data.storage.StorageClientConfig
-import scalikejdbc._
-
-/** JDBC implementation of [[Models]] */
-class JDBCModels(client: String, config: StorageClientConfig, prefix: String)
- extends Models with Logging {
- /** Database table name for this data access object */
- val tableName = JDBCUtils.prefixTableName(prefix, "models")
-
- /** Determines binary column type based on JDBC driver type */
- val binaryColumnType = JDBCUtils.binaryColumnType(client)
- DB autoCommit { implicit session =>
- sql"""
- create table if not exists $tableName (
- id varchar(100) not null primary key,
- models $binaryColumnType not null)""".execute().apply()
- }
-
- def insert(i: Model): Unit = DB localTx { implicit session =>
- sql"insert into $tableName values(${i.id}, ${i.models})".update().apply()
- }
-
- def get(id: String): Option[Model] = DB readOnly { implicit session =>
- sql"select id, models from $tableName where id = $id".map { r =>
- Model(id = r.string("id"), models = r.bytes("models"))
- }.single().apply()
- }
-
- def delete(id: String): Unit = DB localTx { implicit session =>
- sql"delete from $tableName where id = $id".execute().apply()
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
deleted file mode 100644
index 2e6ee83..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import java.sql.{DriverManager, ResultSet}
-
-import com.github.nscala_time.time.Imports._
-import org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig}
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.{JdbcRDD, RDD}
-import org.apache.spark.sql.{SQLContext, SaveMode}
-import org.json4s.JObject
-import org.json4s.native.Serialization
-import scalikejdbc._
-
-/** JDBC implementation of [[PEvents]] */
-class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {
- @transient private implicit lazy val formats = org.json4s.DefaultFormats
- def find(
- appId: Int,
- channelId: Option[Int] = None,
- startTime: Option[DateTime] = None,
- untilTime: Option[DateTime] = None,
- entityType: Option[String] = None,
- entityId: Option[String] = None,
- eventNames: Option[Seq[String]] = None,
- targetEntityType: Option[Option[String]] = None,
- targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
- val lower = startTime.map(_.getMillis).getOrElse(0.toLong)
- /** Change the default upper bound from +100 to +1 year because MySQL's
- * FROM_UNIXTIME(t) will return NULL if we use +100 years.
- */
- val upper = untilTime.map(_.getMillis).getOrElse((DateTime.now + 1.years).getMillis)
- val par = scala.math.min(
- new Duration(upper - lower).getStandardDays,
- config.properties.getOrElse("PARTITIONS", "4").toLong).toInt
- val entityTypeClause = entityType.map(x => s"and entityType = '$x'").getOrElse("")
- val entityIdClause = entityId.map(x => s"and entityId = '$x'").getOrElse("")
- val eventNamesClause =
- eventNames.map("and (" + _.map(y => s"event = '$y'").mkString(" or ") + ")").getOrElse("")
- val targetEntityTypeClause = targetEntityType.map(
- _.map(x => s"and targetEntityType = '$x'"
- ).getOrElse("and targetEntityType is null")).getOrElse("")
- val targetEntityIdClause = targetEntityId.map(
- _.map(x => s"and targetEntityId = '$x'"
- ).getOrElse("and targetEntityId is null")).getOrElse("")
- val q = s"""
- select
- id,
- event,
- entityType,
- entityId,
- targetEntityType,
- targetEntityId,
- properties,
- eventTime,
- eventTimeZone,
- tags,
- prId,
- creationTime,
- creationTimeZone
- from ${JDBCUtils.eventTableName(namespace, appId, channelId)}
- where
- eventTime >= ${JDBCUtils.timestampFunction(client)}(?) and
- eventTime < ${JDBCUtils.timestampFunction(client)}(?)
- $entityTypeClause
- $entityIdClause
- $eventNamesClause
- $targetEntityTypeClause
- $targetEntityIdClause
- """.replace("\n", " ")
- new JdbcRDD(
- sc,
- () => {
- DriverManager.getConnection(
- client,
- config.properties("USERNAME"),
- config.properties("PASSWORD"))
- },
- q,
- lower / 1000,
- upper / 1000,
- par,
- (r: ResultSet) => {
- Event(
- eventId = Option(r.getString("id")),
- event = r.getString("event"),
- entityType = r.getString("entityType"),
- entityId = r.getString("entityId"),
- targetEntityType = Option(r.getString("targetEntityType")),
- targetEntityId = Option(r.getString("targetEntityId")),
- properties = Option(r.getString("properties")).map(x =>
- DataMap(Serialization.read[JObject](x))).getOrElse(DataMap()),
- eventTime = new DateTime(r.getTimestamp("eventTime").getTime,
- DateTimeZone.forID(r.getString("eventTimeZone"))),
- tags = Option(r.getString("tags")).map(x =>
- x.split(",").toList).getOrElse(Nil),
- prId = Option(r.getString("prId")),
- creationTime = new DateTime(r.getTimestamp("creationTime").getTime,
- DateTimeZone.forID(r.getString("creationTimeZone"))))
- }).cache()
- }
-
- def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
- val sqlContext = new SQLContext(sc)
-
- import sqlContext.implicits._
-
- val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
-
- val eventTableColumns = Seq[String](
- "id"
- , "event"
- , "entityType"
- , "entityId"
- , "targetEntityType"
- , "targetEntityId"
- , "properties"
- , "eventTime"
- , "eventTimeZone"
- , "tags"
- , "prId"
- , "creationTime"
- , "creationTimeZone")
-
- val eventDF = events.map(x =>
- Event(eventId = None, event = x.event, entityType = x.entityType,
- entityId = x.entityId, targetEntityType = x.targetEntityType,
- targetEntityId = x.targetEntityId, properties = x.properties,
- eventTime = x.eventTime, tags = x.tags, prId= x.prId,
- creationTime = x.eventTime)
- )
- .map { event =>
- (event.eventId.getOrElse(JDBCUtils.generateId)
- , event.event
- , event.entityType
- , event.entityId
- , event.targetEntityType.orNull
- , event.targetEntityId.orNull
- , if (!event.properties.isEmpty) Serialization.write(event.properties.toJObject) else null
- , new java.sql.Timestamp(event.eventTime.getMillis)
- , event.eventTime.getZone.getID
- , if (event.tags.nonEmpty) Some(event.tags.mkString(",")) else null
- , event.prId
- , new java.sql.Timestamp(event.creationTime.getMillis)
- , event.creationTime.getZone.getID)
- }.toDF(eventTableColumns:_*)
-
- // spark version 1.4.0 or higher
- val prop = new java.util.Properties
- prop.setProperty("user", config.properties("USERNAME"))
- prop.setProperty("password", config.properties("PASSWORD"))
- eventDF.write.mode(SaveMode.Append).jdbc(client, tableName, prop)
- }
-
- def delete(eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
-
- eventIds.foreachPartition{ iter =>
-
- iter.foreach { eventId =>
- DB localTx { implicit session =>
- val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
- val table = SQLSyntax.createUnsafely(tableName)
- sql"""
- delete from $table where id = $eventId
- """.update().apply()
- true
- }
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
deleted file mode 100644
index 3eb55ba..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import scalikejdbc._
-
-/** JDBC related utilities */
-object JDBCUtils {
- /** Extract JDBC driver type from URL
- *
- * @param url JDBC URL
- * @return The driver type, e.g. postgresql
- */
- def driverType(url: String): String = {
- val capture = """jdbc:([^:]+):""".r
- capture findFirstIn url match {
- case Some(capture(driverType)) => driverType
- case None => ""
- }
- }
-
- /** Determines binary column type from JDBC URL
- *
- * @param url JDBC URL
- * @return Binary column type as SQLSyntax, e.g. LONGBLOB
- */
- def binaryColumnType(url: String): SQLSyntax = {
- driverType(url) match {
- case "postgresql" => sqls"bytea"
- case "mysql" => sqls"longblob"
- case _ => sqls"longblob"
- }
- }
-
- /** Determines UNIX timestamp conversion function from JDBC URL
- *
- * @param url JDBC URL
- * @return Timestamp conversion function, e.g. TO_TIMESTAMP
- */
- def timestampFunction(url: String): String = {
- driverType(url) match {
- case "postgresql" => "to_timestamp"
- case "mysql" => "from_unixtime"
- case _ => "from_unixtime"
- }
- }
-
- /** Converts Map of String to String to comma-separated list of key=value
- *
- * @param m Map of String to String
- * @return Comma-separated list, e.g. FOO=BAR,X=Y,...
- */
- def mapToString(m: Map[String, String]): String = {
- m.map(t => s"${t._1}=${t._2}").mkString(",")
- }
-
- /** Inverse of mapToString
- *
- * @param str Comma-separated list, e.g. FOO=BAR,X=Y,...
- * @return Map of String to String, e.g. Map("FOO" -> "BAR", "X" -> "Y", ...)
- */
- def stringToMap(str: String): Map[String, String] = {
- str.split(",").map { x =>
- val y = x.split("=")
- y(0) -> y(1)
- }.toMap[String, String]
- }
-
- /** Generate 32-character random ID using UUID with - stripped */
- def generateId: String = java.util.UUID.randomUUID().toString.replace("-", "")
-
- /** Prefix a table name
- *
- * @param prefix Table prefix
- * @param table Table name
- * @return Prefixed table name
- */
- def prefixTableName(prefix: String, table: String): SQLSyntax =
- sqls.createUnsafely(s"${prefix}_$table")
-
- /** Derive event table name
- *
- * @param namespace Namespace of event tables
- * @param appId App ID
- * @param channelId Optional channel ID
- * @return Full event table name
- */
- def eventTableName(namespace: String, appId: Int, channelId: Option[Int]): String =
- s"${namespace}_${appId}${channelId.map("_" + _).getOrElse("")}"
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
deleted file mode 100644
index 661e05e..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.jdbc
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.BaseStorageClient
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.StorageClientException
-import scalikejdbc._
-
-/** JDBC implementation of [[BaseStorageClient]] */
-class StorageClient(val config: StorageClientConfig)
- extends BaseStorageClient with Logging {
- override val prefix = "JDBC"
-
- if (!config.properties.contains("URL")) {
- throw new StorageClientException("The URL variable is not set!", null)
- }
- if (!config.properties.contains("USERNAME")) {
- throw new StorageClientException("The USERNAME variable is not set!", null)
- }
- if (!config.properties.contains("PASSWORD")) {
- throw new StorageClientException("The PASSWORD variable is not set!", null)
- }
-
- // set max size of connection pool
- val maxSize: Int = config.properties.getOrElse("CONNECTIONS", "8").toInt
- val settings = ConnectionPoolSettings(maxSize = maxSize)
-
- ConnectionPool.singleton(
- config.properties("URL"),
- config.properties("USERNAME"),
- config.properties("PASSWORD"),
- settings)
- /** JDBC connection URL. Connections are managed by ScalikeJDBC. */
- val client = config.properties("URL")
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala b/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
deleted file mode 100644
index e552e54..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage
-
-/** JDBC implementation of storage traits, supporting meta data, event data, and
- * model data
- *
- * @group Implementation
- */
-package object jdbc {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala b/data/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
deleted file mode 100644
index f528af9..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.localfs
-
-import java.io.File
-import java.io.FileNotFoundException
-import java.io.FileOutputStream
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.Model
-import org.apache.predictionio.data.storage.Models
-import org.apache.predictionio.data.storage.StorageClientConfig
-
-import scala.io.Source
-
-class LocalFSModels(f: File, config: StorageClientConfig, prefix: String)
- extends Models with Logging {
-
- def insert(i: Model): Unit = {
- try {
- val fos = new FileOutputStream(new File(f, s"${prefix}${i.id}"))
- fos.write(i.models)
- fos.close
- } catch {
- case e: FileNotFoundException => error(e.getMessage)
- }
- }
-
- def get(id: String): Option[Model] = {
- try {
- Some(Model(
- id = id,
- models = Source.fromFile(new File(f, s"${prefix}${id}"))(
- scala.io.Codec.ISO8859).map(_.toByte).toArray))
- } catch {
- case e: Throwable =>
- error(e.getMessage)
- None
- }
- }
-
- def delete(id: String): Unit = {
- val m = new File(f, s"${prefix}${id}")
- if (!m.delete) error(s"Unable to delete ${m.getCanonicalPath}!")
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala b/data/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
deleted file mode 100644
index b9ec957..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage.localfs
-
-import java.io.File
-
-import grizzled.slf4j.Logging
-import org.apache.predictionio.data.storage.BaseStorageClient
-import org.apache.predictionio.data.storage.StorageClientConfig
-import org.apache.predictionio.data.storage.StorageClientException
-
-class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
- with Logging {
- override val prefix = "LocalFS"
- val f = new File(
- config.properties.getOrElse("PATH", config.properties("HOSTS")))
- if (f.exists) {
- if (!f.isDirectory) throw new StorageClientException(
- s"${f} already exists but it is not a directory!",
- null)
- if (!f.canWrite) throw new StorageClientException(
- s"${f} already exists but it is not writable!",
- null)
- } else {
- if (!f.mkdirs) throw new StorageClientException(
- s"${f} does not exist and automatic creation failed!",
- null)
- }
- val client = f
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala b/data/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
deleted file mode 100644
index 554ab26..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.storage
-
-/** Local file system implementation of storage traits, supporting model data only
- *
- * @group Implementation
- */
-package object localfs {}
[7/7] incubator-predictionio git commit: [PIO-49] Additional
improvement to ES5 support
Posted by do...@apache.org.
[PIO-49] Additional improvement to ES5 support
* Add option to build ES1 or ES5-default distribution
* Aggregate all storage subprojects
* Update deprecated SBT code
* Update Docker Compose to use official Elastic images
* Update Travis to use new build option
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/31c4bd19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/31c4bd19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/31c4bd19
Branch: refs/heads/develop
Commit: 31c4bd1927eb9ae753f82b39b71ff3df7ca779de
Parents: d78b3cb
Author: Donald Szeto <do...@apache.org>
Authored: Tue Mar 7 23:16:33 2017 -0800
Committer: Donald Szeto <do...@apache.org>
Committed: Tue Mar 7 23:16:33 2017 -0800
----------------------------------------------------------------------
.travis.yml | 4 +--
build.sbt | 6 +++++
make-distribution.sh | 47 ++++++++++++++++++++++++++++++++++-
storage/elasticsearch/build.sbt | 3 +--
storage/elasticsearch1/build.sbt | 2 +-
storage/hbase/build.sbt | 2 +-
storage/hdfs/build.sbt | 2 +-
storage/jdbc/build.sbt | 2 +-
storage/localfs/build.sbt | 2 +-
tests/build-docker.sh | 8 ++++--
tests/docker-compose.yml | 4 ++-
11 files changed, 69 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index b763e2d..799ec3d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -46,8 +46,8 @@ env:
matrix:
- BUILD_TYPE=Unit METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
- BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
- - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+ - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5
+ - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5
before_install:
- unset SBT_OPTS JVM_OPTS
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/build.sbt
----------------------------------------------------------------------
diff --git a/build.sbt b/build.sbt
index 98444b9..fa5ba29 100644
--- a/build.sbt
+++ b/build.sbt
@@ -166,6 +166,12 @@ val root = (project in file(".")).
common,
core,
data,
+ dataElasticsearch1,
+ dataElasticsearch,
+ dataHbase,
+ dataHdfs,
+ dataJdbc,
+ dataLocalfs,
tools,
e2)
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index 31954c0..c360c0e 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -19,6 +19,45 @@
set -e
+usage ()
+{
+ echo "Usage: $0 [-h|--help] [--with-es=x]"
+ echo ""
+ echo " -h|--help Show usage"
+ echo ""
+ echo " --with-es=1 Build distribution with Elasticsearch 1 support as default"
+ echo " --with-es=5 Build distribution with Elasticsearch 5 support as default"
+}
+
+ES_VERSION=1
+
+for i in "$@"
+do
+case $i in
+ -h|--help)
+ usage
+ shift
+ exit
+ ;;
+ --with-es=*)
+ ES_VERSION="${i#*=}"
+ shift
+ ;;
+ *)
+ usage
+ exit 1
+ ;;
+esac
+done
+
+if [ "$ES_VERSION" = "1" ] || [ "$ES_VERSION" = "5" ]
+then
+ echo -e "\033[0;32mBuilding with Elasticsearch $ES_VERSION support as the default choice\033[0m"
+else
+ usage
+ exit 1
+fi
+
FWDIR="$(cd `dirname $0`; pwd)"
DISTDIR="${FWDIR}/dist"
@@ -45,7 +84,13 @@ cp ${FWDIR}/project/build.properties ${DISTDIR}/project
cp ${FWDIR}/sbt/sbt ${DISTDIR}/sbt
cp ${FWDIR}/assembly/*assembly*jar ${DISTDIR}/lib
cp ${FWDIR}/assembly/spark/*jar ${DISTDIR}/lib/spark
-cp ${FWDIR}/assembly/extra/*jar ${DISTDIR}/lib/extra
+
+if [ "$ES_VERSION" = "5" ]
+then
+ mv ${DISTDIR}/lib/spark/pio-data-elasticsearch1-assembly-*.jar ${DISTDIR}/lib/extra
+else
+ mv ${DISTDIR}/lib/spark/pio-data-elasticsearch-assembly-*.jar ${DISTDIR}/lib/extra
+fi
rm -f ${DISTDIR}/lib/*javadoc.jar
rm -f ${DISTDIR}/lib/*sources.jar
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/elasticsearch/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt
index b22cbd8..925b66f 100644
--- a/storage/elasticsearch/build.sbt
+++ b/storage/elasticsearch/build.sbt
@@ -53,5 +53,4 @@ assemblyShadeRules in assembly := Seq(
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-elasticsearch-assembly-" + version.value + ".jar")
-
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-elasticsearch-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/elasticsearch1/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/build.sbt b/storage/elasticsearch1/build.sbt
index 8c29b84..457aba2 100644
--- a/storage/elasticsearch1/build.sbt
+++ b/storage/elasticsearch1/build.sbt
@@ -43,5 +43,5 @@ assemblyMergeStrategy in assembly := {
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "extra" / ("pio-data-elasticsearch1-assembly-" + version.value + ".jar")
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-elasticsearch1-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/hbase/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt
index 5856a5e..673697b 100644
--- a/storage/hbase/build.sbt
+++ b/storage/hbase/build.sbt
@@ -52,5 +52,5 @@ assemblyMergeStrategy in assembly := {
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hbase-assembly-" + version.value + ".jar")
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hbase-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/hdfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt
index 9f064c6..35c9153 100644
--- a/storage/hdfs/build.sbt
+++ b/storage/hdfs/build.sbt
@@ -40,5 +40,5 @@ assemblyMergeStrategy in assembly := {
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hdfs-assembly-" + version.value + ".jar")
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hdfs-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/jdbc/build.sbt
----------------------------------------------------------------------
diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt
index 63d420b..0a07ee0 100644
--- a/storage/jdbc/build.sbt
+++ b/storage/jdbc/build.sbt
@@ -43,5 +43,5 @@ assemblyMergeStrategy in assembly := {
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-jdbc-assembly-" + version.value + ".jar")
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-jdbc-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/storage/localfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt
index 2cf9977..f94b355 100644
--- a/storage/localfs/build.sbt
+++ b/storage/localfs/build.sbt
@@ -40,5 +40,5 @@ assemblyMergeStrategy in assembly := {
// skip test in assembly
test in assembly := {}
-outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-localfs-assembly-" + version.value + ".jar")
+assemblyOutputPath := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-localfs-assembly-" + version.value + ".jar")
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/tests/build-docker.sh
----------------------------------------------------------------------
diff --git a/tests/build-docker.sh b/tests/build-docker.sh
index 459b929..a899a18 100755
--- a/tests/build-docker.sh
+++ b/tests/build-docker.sh
@@ -1,4 +1,4 @@
-#!/bin/bash -x
+#!/usr/bin/env bash -ex
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
@@ -29,7 +29,11 @@ fi
docker pull predictionio/pio-testing-base
pushd $DIR/..
-./make-distribution.sh
+if [ -z "$ES_VERSION" ]; then
+ ./make-distribution.sh
+else
+ ./make-distribution.sh --with-es=$ES_VERSION
+fi
sbt/sbt clean
mkdir assembly
cp dist/lib/*.jar assembly/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/31c4bd19/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index b556f7b..b6774d3 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -16,7 +16,9 @@
version: "2"
services:
elasticsearch:
- image: elasticsearch:5-alpine
+ image: docker.elastic.co/elasticsearch/elasticsearch:5.2.2
+ environment:
+ - xpack.security.enabled=false
hbase:
image: harisekhon/hbase:1.0
postgres:
[4/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Posted by do...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
deleted file mode 100644
index b453820..0000000
--- a/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.view
-
-import org.apache.predictionio.data.storage.hbase.HBPEvents
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.EventValidation
-import org.apache.predictionio.data.storage.DataMap
-import org.apache.predictionio.data.storage.Storage
-
-import org.joda.time.DateTime
-
-import org.json4s.JValue
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
-
-
-// each JValue data associated with the time it is set
-private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
-
-private[predictionio] case class SetProp (
- val fields: Map[String, PropTime],
- // last set time. Note: fields could be empty with valid set time
- val t: Long) extends Serializable {
-
- def ++ (that: SetProp): SetProp = {
- val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
- val common: Map[String, PropTime] = commonKeys.map { k =>
- val thisData = this.fields(k)
- val thatData = that.fields(k)
- // only keep the value with latest time
- val v = if (thisData.t > thatData.t) thisData else thatData
- (k, v)
- }.toMap
-
- val combinedFields = common ++
- (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
- // keep the latest set time
- val combinedT = if (this.t > that.t) this.t else that.t
-
- SetProp(
- fields = combinedFields,
- t = combinedT
- )
- }
-}
-
-private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
- def ++ (that: UnsetProp): UnsetProp = {
- val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
- val common: Map[String, Long] = commonKeys.map { k =>
- val thisData = this.fields(k)
- val thatData = that.fields(k)
- // only keep the value with latest time
- val v = if (thisData > thatData) thisData else thatData
- (k, v)
- }.toMap
-
- val combinedFields = common ++
- (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
- UnsetProp(
- fields = combinedFields
- )
- }
-}
-
-private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
- def ++ (that: DeleteEntity): DeleteEntity = {
- if (this.t > that.t) this else that
- }
-}
-
-private[predictionio] case class EventOp (
- val setProp: Option[SetProp] = None,
- val unsetProp: Option[UnsetProp] = None,
- val deleteEntity: Option[DeleteEntity] = None
-) extends Serializable {
-
- def ++ (that: EventOp): EventOp = {
- EventOp(
- setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
- unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
- deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
- )
- }
-
- def toDataMap(): Option[DataMap] = {
- setProp.flatMap { set =>
-
- val unsetKeys: Set[String] = unsetProp.map( unset =>
- unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
- ).getOrElse(Set())
-
- val combinedFields = deleteEntity.map { delete =>
- if (delete.t >= set.t) {
- None
- } else {
- val deleteKeys: Set[String] = set.fields
- .filter { case (k, PropTime(kv, t)) =>
- (delete.t >= t)
- }.keySet
- Some(set.fields -- unsetKeys -- deleteKeys)
- }
- }.getOrElse{
- Some(set.fields -- unsetKeys)
- }
-
- // Note: mapValues() doesn't return concrete Map and causes
- // NotSerializableException issue. Use map(identity) to work around this.
- // see https://issues.scala-lang.org/browse/SI-7005
- combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
- }
- }
-
-}
-
-private[predictionio] object EventOp {
- def apply(e: Event): EventOp = {
- val t = e.eventTime.getMillis
- e.event match {
- case "$set" => {
- val fields = e.properties.fields.mapValues(jv =>
- PropTime(jv, t)
- ).map(identity)
-
- EventOp(
- setProp = Some(SetProp(fields = fields, t = t))
- )
- }
- case "$unset" => {
- val fields = e.properties.fields.mapValues(jv => t).map(identity)
- EventOp(
- unsetProp = Some(UnsetProp(fields = fields))
- )
- }
- case "$delete" => {
- EventOp(
- deleteEntity = Some(DeleteEntity(t))
- )
- }
- case _ => {
- EventOp()
- }
- }
- }
-}
-
-@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
-class PBatchView(
- val appId: Int,
- val startTime: Option[DateTime],
- val untilTime: Option[DateTime],
- val sc: SparkContext) {
-
- // NOTE: parallel Events DB interface
- @transient lazy val eventsDb = Storage.getPEvents()
-
- @transient lazy val _events: RDD[Event] =
- eventsDb.getByAppIdAndTimeAndEntity(
- appId = appId,
- startTime = startTime,
- untilTime = untilTime,
- entityType = None,
- entityId = None)(sc)
-
- // TODO: change to use EventSeq?
- @transient lazy val events: RDD[Event] = _events
-
- def aggregateProperties(
- entityType: String,
- startTimeOpt: Option[DateTime] = None,
- untilTimeOpt: Option[DateTime] = None
- ): RDD[(String, DataMap)] = {
-
- _events
- .filter( e => ((e.entityType == entityType) &&
- (EventValidation.isSpecialEvents(e.event))) )
- .map( e => (e.entityId, EventOp(e) ))
- .aggregateByKey[EventOp](EventOp())(
- // within same partition
- seqOp = { case (u, v) => u ++ v },
- // across partition
- combOp = { case (accu, u) => accu ++ u }
- )
- .mapValues(_.toDataMap)
- .filter{ case (k, v) => v.isDefined }
- .map{ case (k, v) => (k, v.get) }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index b6c8ed3..31954c0 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -27,13 +27,15 @@ VERSION=$(grep version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | se
echo "Building binary distribution for PredictionIO $VERSION..."
cd ${FWDIR}
-sbt/sbt common/publishLocal data/publishLocal core/publishLocal e2/publishLocal tools/assembly
+sbt/sbt common/publishLocal data/publishLocal core/publishLocal e2/publishLocal dataElasticsearch1/assembly dataElasticsearch/assembly dataHbase/assembly dataHdfs/assembly dataJdbc/assembly dataLocalfs/assembly tools/assembly
cd ${FWDIR}
rm -rf ${DISTDIR}
mkdir -p ${DISTDIR}/bin
mkdir -p ${DISTDIR}/conf
mkdir -p ${DISTDIR}/lib
+mkdir -p ${DISTDIR}/lib/spark
+mkdir -p ${DISTDIR}/lib/extra
mkdir -p ${DISTDIR}/project
mkdir -p ${DISTDIR}/sbt
@@ -42,6 +44,8 @@ cp ${FWDIR}/conf/* ${DISTDIR}/conf
cp ${FWDIR}/project/build.properties ${DISTDIR}/project
cp ${FWDIR}/sbt/sbt ${DISTDIR}/sbt
cp ${FWDIR}/assembly/*assembly*jar ${DISTDIR}/lib
+cp ${FWDIR}/assembly/spark/*jar ${DISTDIR}/lib/spark
+cp ${FWDIR}/assembly/extra/*jar ${DISTDIR}/lib/extra
rm -f ${DISTDIR}/lib/*javadoc.jar
rm -f ${DISTDIR}/lib/*sources.jar
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/.gitignore
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/.gitignore b/storage/elasticsearch/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/elasticsearch/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt
new file mode 100644
index 0000000..b22cbd8
--- /dev/null
+++ b/storage/elasticsearch/build.sbt
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-elasticsearch"
+
+elasticsearchVersion := "5.2.1"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
+ "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
+ "org.elasticsearch.client" % "rest" % elasticsearchVersion.value,
+ "org.elasticsearch" %% "elasticsearch-spark-13" % elasticsearchVersion.value
+ exclude("org.apache.spark", "spark-sql_2.10")
+ exclude("org.apache.spark", "spark-streaming_2.10"),
+ "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value,
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+assemblyShadeRules in assembly := Seq(
+ ShadeRule.rename("org.apache.http.**" -> "shadeio.data.http.@1").inAll
+)
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-elasticsearch-assembly-" + version.value + ".jar")
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
new file mode 100644
index 0000000..cb6d330
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.AccessKey
+import org.apache.predictionio.data.storage.AccessKeys
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+
+/** Elasticsearch implementation of AccessKeys. */
+class ESAccessKeys(client: ESClient, config: StorageClientConfig, index: String)
+ extends AccessKeys with Logging {
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "accesskeys"
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("key" -> ("type" -> "keyword")) ~
+ ("events" -> ("type" -> "keyword"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def insert(accessKey: AccessKey): Option[String] = {
+ val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
+ update(accessKey.copy(key = key))
+ Some(key)
+ }
+
+ def get(id: String): Option[AccessKey] = {
+ if (id.isEmpty) {
+ return None
+ }
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "found").extract[Boolean] match {
+ case true =>
+ Some((jsonResponse \ "_source").extract[AccessKey])
+ case _ =>
+ None
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => None
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ }
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getAll(): Seq[AccessKey] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("match_all" -> List.empty))
+ ESUtils.getAll[AccessKey](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getByAppid(appid: Int): Seq[AccessKey] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("appid" -> appid)))
+ ESUtils.getAll[AccessKey](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def update(accessKey: AccessKey): Unit = {
+ val id = accessKey.key
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" =>
+ case "updated" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def delete(id: String): Unit = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "DELETE",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "deleted" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
new file mode 100644
index 0000000..abea2b8
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.App
+import org.apache.predictionio.data.storage.Apps
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+
+/** Elasticsearch implementation of Items. */
+class ESApps(client: ESClient, config: StorageClientConfig, index: String)
+ extends Apps with Logging {
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "apps"
+ private val seq = new ESSequences(client, config, index)
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("id" -> ("type" -> "keyword")) ~
+ ("name" -> ("type" -> "keyword"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def insert(app: App): Option[Int] = {
+ val id =
+ if (app.id == 0) {
+ var roll = seq.genNext(estype)
+ while (!get(roll).isEmpty) roll = seq.genNext(estype)
+ roll
+ } else app.id
+ update(app.copy(id = id))
+ Some(id)
+ }
+
+ def get(id: Int): Option[App] = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "found").extract[Boolean] match {
+ case true =>
+ Some((jsonResponse \ "_source").extract[App])
+ case _ =>
+ None
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => None
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ }
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getByName(name: String): Option[App] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("name" -> name)))
+ val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/_search",
+ Map.empty[String, String].asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "hits" \ "total").extract[Long] match {
+ case 0 => None
+ case _ =>
+ val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]]
+ val result = (results.head \ "_source").extract[App]
+ Some(result)
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/_search", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getAll(): Seq[App] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("match_all" -> List.empty))
+ ESUtils.getAll[App](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def update(app: App): Unit = {
+ val id = app.id.toString
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON);
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" =>
+ case "updated" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def delete(id: Int): Unit = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "DELETE",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "deleted" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
new file mode 100644
index 0000000..f092cc7
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.Channel
+import org.apache.predictionio.data.storage.Channels
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+
+class ESChannels(client: ESClient, config: StorageClientConfig, index: String)
+ extends Channels with Logging {
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "channels"
+ private val seq = new ESSequences(client, config, index)
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("name" -> ("type" -> "keyword"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def insert(channel: Channel): Option[Int] = {
+ val id =
+ if (channel.id == 0) {
+ var roll = seq.genNext(estype)
+ while (!get(roll).isEmpty) roll = seq.genNext(estype)
+ roll
+ } else channel.id
+
+ if (update(channel.copy(id = id))) Some(id) else None
+ }
+
+ def get(id: Int): Option[Channel] = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "found").extract[Boolean] match {
+ case true =>
+ Some((jsonResponse \ "_source").extract[Channel])
+ case _ =>
+ None
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => None
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ }
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getByAppid(appid: Int): Seq[Channel] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("appid" -> appid)))
+ ESUtils.getAll[Channel](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def update(channel: Channel): Boolean = {
+ val id = channel.id.toString
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "created" => true
+ case "updated" => true
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ false
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ false
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def delete(id: Int): Unit = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "DELETE",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "deleted" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
new file mode 100644
index 0000000..4dbacb7
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.EngineInstance
+import org.apache.predictionio.data.storage.EngineInstanceSerializer
+import org.apache.predictionio.data.storage.EngineInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+
+class ESEngineInstances(client: ESClient, config: StorageClientConfig, index: String)
+ extends EngineInstances with Logging {
+ implicit val formats = DefaultFormats + new EngineInstanceSerializer
+ private val estype = "engine_instances"
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("status" -> ("type" -> "keyword")) ~
+ ("startTime" -> ("type" -> "date")) ~
+ ("endTime" -> ("type" -> "date")) ~
+ ("engineId" -> ("type" -> "keyword")) ~
+ ("engineVersion" -> ("type" -> "keyword")) ~
+ ("engineVariant" -> ("type" -> "keyword")) ~
+ ("engineFactory" -> ("type" -> "keyword")) ~
+ ("batch" -> ("type" -> "keyword")) ~
+ ("dataSourceParams" -> ("type" -> "keyword")) ~
+ ("preparatorParams" -> ("type" -> "keyword")) ~
+ ("algorithmsParams" -> ("type" -> "keyword")) ~
+ ("servingParams" -> ("type" -> "keyword")) ~
+ ("status" -> ("type" -> "keyword"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def insert(i: EngineInstance): String = {
+ val id = i.id match {
+ case x if x.isEmpty =>
+ @scala.annotation.tailrec
+ def generateId(newId: Option[String]): String = {
+ newId match {
+ case Some(x) => x
+ case _ => generateId(preInsert())
+ }
+ }
+ generateId(preInsert())
+ case x => x
+ }
+
+ update(i.copy(id = id))
+ id
+ }
+
+ def preInsert(): Option[String] = {
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" =>
+ Some((jsonResponse \ "_id").extract[String])
+ case _ =>
+ error(s"[$result] Failed to create $index/$estype")
+ None
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to create $index/$estype", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def get(id: String): Option[EngineInstance] = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "found").extract[Boolean] match {
+ case true =>
+ Some((jsonResponse \ "_source").extract[EngineInstance])
+ case _ =>
+ None
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => None
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ }
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getAll(): Seq[EngineInstance] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("match_all" -> List.empty))
+ ESUtils.getAll[EngineInstance](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Seq[EngineInstance] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("bool" ->
+ ("must" -> List(
+ ("term" ->
+ ("status" -> "COMPLETED")),
+ ("term" ->
+ ("engineId" -> engineId)),
+ ("term" ->
+ ("engineVersion" -> engineVersion)),
+ ("term" ->
+ ("engineVariant" -> engineVariant)))))) ~
+ ("sort" -> List(
+ ("startTime" ->
+ ("order" -> "desc"))))
+ ESUtils.getAll[EngineInstance](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getLatestCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Option[EngineInstance] =
+ getCompleted(
+ engineId,
+ engineVersion,
+ engineVariant).headOption
+
+ def update(i: EngineInstance): Unit = {
+ val id = i.id
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" =>
+ case "updated" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def delete(id: String): Unit = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "DELETE",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "deleted" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
new file mode 100644
index 0000000..5bdc0fb
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters._
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.EvaluationInstance
+import org.apache.predictionio.data.storage.EvaluationInstanceSerializer
+import org.apache.predictionio.data.storage.EvaluationInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+
+class ESEvaluationInstances(client: ESClient, config: StorageClientConfig, index: String)
+ extends EvaluationInstances with Logging {
+ implicit val formats = DefaultFormats + new EvaluationInstanceSerializer
+ private val estype = "evaluation_instances"
+ private val seq = new ESSequences(client, config, index)
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("status" -> ("type" -> "keyword")) ~
+ ("startTime" -> ("type" -> "date")) ~
+ ("endTime" -> ("type" -> "date")) ~
+ ("evaluationClass" -> ("type" -> "keyword")) ~
+ ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~
+ ("batch" -> ("type" -> "keyword")) ~
+ ("evaluatorResults" -> ("type" -> "text") ~ ("index" -> "no")) ~
+ ("evaluatorResultsHTML" -> ("type" -> "text") ~ ("index" -> "no")) ~
+ ("evaluatorResultsJSON" -> ("type" -> "text") ~ ("index" -> "no"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def insert(i: EvaluationInstance): String = {
+ val id = i.id match {
+ case x if x.isEmpty =>
+ var roll = seq.genNext(estype).toString
+ while (!get(roll).isEmpty) roll = seq.genNext(estype).toString
+ roll
+ case x => x
+ }
+
+ update(i.copy(id = id))
+ id
+ }
+
+ def get(id: String): Option[EvaluationInstance] = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "found").extract[Boolean] match {
+ case true =>
+ Some((jsonResponse \ "_source").extract[EvaluationInstance])
+ case _ =>
+ None
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => None
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ }
+ case e: IOException =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getAll(): Seq[EvaluationInstance] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("match_all" -> List.empty))
+ ESUtils.getAll[EvaluationInstance](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def getCompleted(): Seq[EvaluationInstance] = {
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("status" -> "EVALCOMPLETED"))) ~
+ ("sort" ->
+ ("startTime" ->
+ ("order" -> "desc")))
+ ESUtils.getAll[EvaluationInstance](restClient, index, estype, compact(render(json)))
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ Nil
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def update(i: EvaluationInstance): Unit = {
+ val id = i.id
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "created" =>
+ case "updated" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+
+ def delete(id: String): Unit = {
+ val restClient = client.open()
+ try {
+ val response = restClient.performRequest(
+ "DELETE",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava)
+ val json = parse(EntityUtils.toString(response.getEntity))
+ val result = (json \ "result").extract[String]
+ result match {
+ case "deleted" =>
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/$id", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEventsUtil.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEventsUtil.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEventsUtil.scala
new file mode 100644
index 0000000..56f47ab
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEventsUtil.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import org.apache.hadoop.io.DoubleWritable
+import org.apache.hadoop.io.LongWritable
+import org.apache.hadoop.io.MapWritable
+import org.apache.hadoop.io.Text
+import org.apache.predictionio.data.storage.DataMap
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.EventValidation
+import org.joda.time.DateTime
+import org.joda.time.DateTimeZone
+import org.json4s._
+
+object ESEventsUtil {
+
+ implicit val formats = DefaultFormats
+
+ def resultToEvent(id: Text, result: MapWritable, appId: Int): Event = {
+
+ def getStringCol(col: String): String = {
+ val r = result.get(new Text(col)).asInstanceOf[Text]
+ require(r != null,
+ s"Failed to get value for column ${col}. " +
+ s"StringBinary: ${r.getBytes()}.")
+
+ r.toString()
+ }
+
+ def getOptStringCol(col: String): Option[String] = {
+ val r = result.get(new Text(col))
+ if (r == null) {
+ None
+ } else {
+ Some(r.asInstanceOf[Text].toString())
+ }
+ }
+
+ val tmp = result
+ .get(new Text("properties")).asInstanceOf[MapWritable]
+ .get(new Text("fields")).asInstanceOf[MapWritable]
+ .get(new Text("rating"))
+
+ val rating =
+ if (tmp.isInstanceOf[DoubleWritable]) tmp.asInstanceOf[DoubleWritable]
+ else if (tmp.isInstanceOf[LongWritable]) {
+ new DoubleWritable(tmp.asInstanceOf[LongWritable].get().toDouble)
+ }
+ else null
+
+ val properties: DataMap =
+ if (rating != null) DataMap(s"""{"rating":${rating.get().toString}}""")
+ else DataMap()
+
+
+ val eventId = Some(getStringCol("eventId"))
+ val event = getStringCol("event")
+ val entityType = getStringCol("entityType")
+ val entityId = getStringCol("entityId")
+ val targetEntityType = getOptStringCol("targetEntityType")
+ val targetEntityId = getOptStringCol("targetEntityId")
+ val prId = getOptStringCol("prId")
+ val eventTimeZone = getOptStringCol("eventTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+ val eventTime = new DateTime(
+ getStringCol("eventTime"), eventTimeZone)
+ val creationTimeZone = getOptStringCol("creationTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+ val creationTime: DateTime = new DateTime(
+ getStringCol("creationTime"), creationTimeZone)
+
+
+ Event(
+ eventId = eventId,
+ event = event,
+ entityType = entityType,
+ entityId = entityId,
+ targetEntityType = targetEntityType,
+ targetEntityId = targetEntityId,
+ properties = properties,
+ eventTime = eventTime,
+ tags = Seq(),
+ prId = prId,
+ creationTime = creationTime
+ )
+ }
+
+ def eventToPut(event: Event, appId: Int): Map[String, Any] = {
+ Map(
+ "eventId" -> event.eventId,
+ "event" -> event.event,
+ "entityType" -> event.entityType,
+ "entityId" -> event.entityId,
+ "targetEntityType" -> event.targetEntityType,
+ "targetEntityId" -> event.targetEntityId,
+ "properties" -> event.properties.toJObject,
+ "eventTime" -> event.eventTime.toString,
+ "tags" -> event.tags,
+ "prId" -> event.prId,
+ "creationTime" -> event.creationTime.toString
+ )
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala
new file mode 100644
index 0000000..fdd370a
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters._
+import scala.concurrent.ExecutionContext
+import scala.concurrent.Future
+
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.LEvents
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.client.RestClient
+import org.joda.time.DateTime
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+import org.json4s.ext.JodaTimeSerializers
+
+import grizzled.slf4j.Logging
+import org.elasticsearch.client.ResponseException
+import org.apache.http.entity.StringEntity
+
+class ESLEvents(val client: ESClient, config: StorageClientConfig, val index: String)
+ extends LEvents with Logging {
+ implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all
+ private val seq = new ESSequences(client, config, index)
+ private val seqName = "events"
+
+ def getEsType(appId: Int, channelId: Option[Int] = None): String = {
+ channelId.map { ch =>
+ s"${appId}_${ch}"
+ }.getOrElse {
+ s"${appId}"
+ }
+ }
+
+ override def init(appId: Int, channelId: Option[Int] = None): Boolean = {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val json =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)) ~
+ ("properties" ->
+ ("name" -> ("type" -> "keyword")) ~
+ ("eventId" -> ("type" -> "keyword")) ~
+ ("event" -> ("type" -> "keyword")) ~
+ ("entityType" -> ("type" -> "keyword")) ~
+ ("entityId" -> ("type" -> "keyword")) ~
+ ("targetEntityType" -> ("type" -> "keyword")) ~
+ ("targetEntityId" -> ("type" -> "keyword")) ~
+ ("properties" ->
+ ("type" -> "nested") ~
+ ("properties" ->
+ ("fields" -> ("type" -> "nested") ~
+ ("properties" ->
+ ("user" -> ("type" -> "long")) ~
+ ("num" -> ("type" -> "long")))))) ~
+ ("eventTime" -> ("type" -> "date")) ~
+ ("tags" -> ("type" -> "keyword")) ~
+ ("prId" -> ("type" -> "keyword")) ~
+ ("creationTime" -> ("type" -> "date"))))
+ ESUtils.createMapping(restClient, index, estype, compact(render(json)))
+ } finally {
+ restClient.close()
+ }
+ true
+ }
+
+ override def remove(appId: Int, channelId: Option[Int] = None): Boolean = {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("match_all" -> List.empty))
+ val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)
+ restClient.performRequest(
+ "POST",
+ s"/$index/$estype/_delete_by_query",
+ Map("refresh" -> "true").asJava,
+ entity).getStatusLine.getStatusCode match {
+ case 200 => true
+ case _ =>
+ error(s"Failed to remove $index/$estype")
+ false
+ }
+ } catch {
+ case e: Exception =>
+ error(s"Failed to remove $index/$estype", e)
+ false
+ } finally {
+ restClient.close()
+ }
+ }
+
+ override def close(): Unit = {
+ // nothing
+ }
+
+ override def futureInsert(
+ event: Event,
+ appId: Int,
+ channelId: Option[Int])(implicit ec: ExecutionContext): Future[String] = {
+ Future {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ val id = event.eventId.getOrElse {
+ var roll = seq.genNext(seqName)
+ while (exists(restClient, estype, roll)) roll = seq.genNext(seqName)
+ roll.toString
+ }
+ val json = write(event.copy(eventId = Some(id)))
+ val entity = new NStringEntity(json, ContentType.APPLICATION_JSON);
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$id",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" => id
+ case "updated" => id
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype/$id")
+ ""
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype/<id>", e)
+ ""
+ } finally {
+ restClient.close()
+ }
+ }
+ }
+
+ private def exists(restClient: RestClient, estype: String, id: Int): Boolean = {
+ try {
+ restClient.performRequest(
+ "GET",
+ s"/$index/$estype/$id",
+ Map.empty[String, String].asJava).getStatusLine.getStatusCode match {
+ case 200 => true
+ case _ => false
+ }
+ } catch {
+ case e: ResponseException =>
+ e.getResponse.getStatusLine.getStatusCode match {
+ case 404 => false
+ case _ =>
+ error(s"Failed to access to /$index/$estype/$id", e)
+ false
+ }
+ case e: IOException =>
+ error(s"Failed to access to $index/$estype/$id", e)
+ false
+ }
+ }
+
+ override def futureGet(
+ eventId: String,
+ appId: Int,
+ channelId: Option[Int])(implicit ec: ExecutionContext): Future[Option[Event]] = {
+ Future {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("eventId" -> eventId)))
+ val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/_search",
+ Map.empty[String, String].asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ (jsonResponse \ "hits" \ "total").extract[Long] match {
+ case 0 => None
+ case _ =>
+ val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]]
+ val result = (results.head \ "_source").extract[Event]
+ Some(result)
+ }
+ } catch {
+ case e: IOException =>
+ error("Failed to access to /$index/$estype/_search", e)
+ None
+ } finally {
+ restClient.close()
+ }
+ }
+ }
+
+ override def futureDelete(
+ eventId: String,
+ appId: Int,
+ channelId: Option[Int])(implicit ec: ExecutionContext): Future[Boolean] = {
+ Future {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("eventId" -> eventId)))
+ val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/_delete_by_query",
+ Map("refresh" -> "true").asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "deleted" => true
+ case _ =>
+ error(s"[$result] Failed to update $index/$estype:$eventId")
+ false
+ }
+ } catch {
+ case e: IOException =>
+ error(s"Failed to update $index/$estype:$eventId", e)
+ false
+ } finally {
+ restClient.close()
+ }
+ }
+ }
+
+ override def futureFind(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None,
+ limit: Option[Int] = None,
+ reversed: Option[Boolean] = None)
+ (implicit ec: ExecutionContext): Future[Iterator[Event]] = {
+ Future {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ val query = ESUtils.createEventQuery(
+ startTime, untilTime, entityType, entityId,
+ eventNames, targetEntityType, targetEntityId, reversed)
+ limit.getOrElse(20) match {
+ case -1 => ESUtils.getAll[Event](restClient, index, estype, query).toIterator
+ case size => ESUtils.get[Event](restClient, index, estype, query, size).toIterator
+ }
+ } catch {
+ case e: IOException =>
+ error(e.getMessage)
+ Iterator[Event]()
+ } finally {
+ restClient.close()
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala
new file mode 100644
index 0000000..390e78c
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.io.MapWritable
+import org.apache.hadoop.io.Text
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.PEvents
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.elasticsearch.client.RestClient
+import org.elasticsearch.hadoop.mr.EsInputFormat
+import org.elasticsearch.spark._
+import org.joda.time.DateTime
+import java.io.IOException
+import org.apache.http.util.EntityUtils
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.entity.ContentType
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.ext.JodaTimeSerializers
+
+
+class ESPEvents(client: ESClient, config: StorageClientConfig, index: String)
+ extends PEvents {
+ implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all
+
+ def getEsType(appId: Int, channelId: Option[Int] = None): String = {
+ channelId.map { ch =>
+ s"${appId}_${ch}"
+ }.getOrElse {
+ s"${appId}"
+ }
+ }
+
+ def getESNodes(): String = {
+ val hosts = config.properties.get("HOSTS").
+ map(_.split(",").toSeq).getOrElse(Seq("localhost"))
+ val ports = config.properties.get("PORTS").
+ map(_.split(",").toSeq.map(_.toInt)).getOrElse(Seq(9200))
+ (hosts, ports).zipped.map(
+ (h, p) => s"$h:$p").mkString(",")
+ }
+
+ override def find(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
+
+ val query = ESUtils.createEventQuery(
+ startTime, untilTime, entityType, entityId,
+ eventNames, targetEntityType, targetEntityId, None)
+
+ val estype = getEsType(appId, channelId)
+ val conf = new Configuration()
+ conf.set("es.resource", s"$index/$estype")
+ conf.set("es.query", query)
+ conf.set("es.nodes", getESNodes())
+
+ val rdd = sc.newAPIHadoopRDD(conf, classOf[EsInputFormat[Text, MapWritable]],
+ classOf[Text], classOf[MapWritable]).map {
+ case (key, doc) => {
+ ESEventsUtil.resultToEvent(key, doc, appId)
+ }
+ }
+
+ rdd
+ }
+
+ override def write(
+ events: RDD[Event],
+ appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+ val estype = getEsType(appId, channelId)
+ val conf = Map("es.resource" -> s"$index/$estype", "es.nodes" -> getESNodes())
+ events.map { event =>
+ ESEventsUtil.eventToPut(event, appId)
+ }.saveToEs(conf)
+ }
+
+ override def delete(
+ eventIds: RDD[String],
+ appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+ val estype = getEsType(appId, channelId)
+ val restClient = client.open()
+ try {
+ eventIds.foreachPartition { iter =>
+ iter.foreach { eventId =>
+ try {
+ val json =
+ ("query" ->
+ ("term" ->
+ ("eventId" -> eventId)))
+ val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/_delete_by_query",
+ Map("refresh" -> "true").asJava)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "deleted" => true
+ case _ =>
+ logger.error(s"[$result] Failed to update $index/$estype:$eventId")
+ false
+ }
+ } catch {
+ case e: IOException =>
+ logger.error(s"Failed to update $index/$estype:$eventId", e)
+ false
+ }
+ }
+ }
+ } finally {
+ restClient.close()
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
new file mode 100644
index 0000000..e5264ae
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import java.io.IOException
+
+import scala.collection.JavaConverters._
+
+import org.apache.http.Header
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.write
+
+import grizzled.slf4j.Logging
+
+class ESSequences(client: ESClient, config: StorageClientConfig, index: String) extends Logging {
+ implicit val formats = DefaultFormats
+ private val estype = "sequences"
+
+ val restClient = client.open()
+ try {
+ ESUtils.createIndex(restClient, index)
+ val mappingJson =
+ (estype ->
+ ("_all" -> ("enabled" -> 0)))
+ ESUtils.createMapping(restClient, index, estype, compact(render(mappingJson)))
+ } finally {
+ restClient.close()
+ }
+
+ def genNext(name: String): Int = {
+ val restClient = client.open()
+ try {
+ val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON)
+ val response = restClient.performRequest(
+ "POST",
+ s"/$index/$estype/$name",
+ Map("refresh" -> "true").asJava,
+ entity)
+ val jsonResponse = parse(EntityUtils.toString(response.getEntity))
+ val result = (jsonResponse \ "result").extract[String]
+ result match {
+ case "created" =>
+ (jsonResponse \ "_version").extract[Int]
+ case "updated" =>
+ (jsonResponse \ "_version").extract[Int]
+ case _ =>
+ throw new IllegalStateException(s"[$result] Failed to update $index/$estype/$name")
+ }
+ } catch {
+ case e: IOException =>
+ throw new StorageClientException(s"Failed to update $index/$estype/$name", e)
+ } finally {
+ restClient.close()
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
new file mode 100644
index 0000000..72f4dd6
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
+import org.apache.http.entity.ContentType
+import org.apache.http.entity.StringEntity
+import org.apache.http.nio.entity.NStringEntity
+import org.elasticsearch.client.RestClient
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.apache.http.util.EntityUtils
+import org.joda.time.DateTime
+import org.joda.time.format.DateTimeFormat
+import org.joda.time.DateTimeZone
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.http.HttpHost
+
+object ESUtils {
+ val scrollLife = "1m"
+
+ def get[T: Manifest](
+ client: RestClient,
+ index: String,
+ estype: String,
+ query: String,
+ size: Int)(
+ implicit formats: Formats): Seq[T] = {
+ val response = client.performRequest(
+ "POST",
+ s"/$index/$estype/_search",
+ Map("size" -> s"${size}"),
+ new StringEntity(query))
+ val responseJValue = parse(EntityUtils.toString(response.getEntity))
+ val hits = (responseJValue \ "hits" \ "hits").extract[Seq[JValue]]
+ hits.map(h => (h \ "_source").extract[T])
+ }
+
+ def getAll[T: Manifest](
+ client: RestClient,
+ index: String,
+ estype: String,
+ query: String)(
+ implicit formats: Formats): Seq[T] = {
+
+ @scala.annotation.tailrec
+ def scroll(scrollId: String, hits: Seq[JValue], results: Seq[T]): Seq[T] = {
+ if (hits.isEmpty) results
+ else {
+ val json = ("scroll" -> scrollLife) ~ ("scroll_id" -> scrollId)
+ val scrollBody = new StringEntity(compact(render(json)))
+ val response = client.performRequest(
+ "POST",
+ "/_search/scroll",
+ Map[String, String](),
+ scrollBody)
+ val responseJValue = parse(EntityUtils.toString(response.getEntity))
+ scroll((responseJValue \ "_scroll_id").extract[String],
+ (responseJValue \ "hits" \ "hits").extract[Seq[JValue]],
+ hits.map(h => (h \ "_source").extract[T]) ++ results)
+ }
+ }
+
+ val response = client.performRequest(
+ "POST",
+ s"/$index/$estype/_search",
+ Map("scroll" -> scrollLife),
+ new StringEntity(query))
+ val responseJValue = parse(EntityUtils.toString(response.getEntity))
+ scroll((responseJValue \ "_scroll_id").extract[String],
+ (responseJValue \ "hits" \ "hits").extract[Seq[JValue]],
+ Nil)
+ }
+
+ def createIndex(
+ client: RestClient,
+ index: String): Unit = {
+ client.performRequest(
+ "HEAD",
+ s"/$index",
+ Map.empty[String, String].asJava).getStatusLine.getStatusCode match {
+ case 404 =>
+ client.performRequest(
+ "PUT",
+ s"/$index",
+ Map.empty[String, String].asJava)
+ case 200 =>
+ case _ =>
+ throw new IllegalStateException(s"/$index is invalid.")
+ }
+ }
+
+ def createMapping(
+ client: RestClient,
+ index: String,
+ estype: String,
+ json: String): Unit = {
+ client.performRequest(
+ "HEAD",
+ s"/$index/_mapping/$estype",
+ Map.empty[String, String].asJava).getStatusLine.getStatusCode match {
+ case 404 =>
+ val entity = new NStringEntity(json, ContentType.APPLICATION_JSON)
+ client.performRequest(
+ "PUT",
+ s"/$index/_mapping/$estype",
+ Map.empty[String, String].asJava,
+ entity)
+ case 200 =>
+ case _ =>
+ throw new IllegalStateException(s"/$index/$estype is invalid: $json")
+ }
+ }
+
+ def createEventQuery(
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None,
+ reversed: Option[Boolean] = None): String = {
+ val mustQueries = Seq(
+ startTime.map(x => {
+ val v = DateTimeFormat
+ .forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ").print(x.withZone(DateTimeZone.UTC))
+ s"""{"range":{"eventTime":{"gte":"${v}"}}}"""
+ }),
+ untilTime.map(x => {
+ val v = DateTimeFormat
+ .forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ").print(x.withZone(DateTimeZone.UTC))
+ s"""{"range":{"eventTime":{"lt":"${v}"}}}"""
+ }),
+ entityType.map(x => s"""{"term":{"entityType":"${x}"}}"""),
+ entityId.map(x => s"""{"term":{"entityId":"${x}"}}"""),
+ targetEntityType.flatMap(xx => xx.map(x => s"""{"term":{"targetEntityType":"${x}"}}""")),
+ targetEntityId.flatMap(xx => xx.map(x => s"""{"term":{"targetEntityId":"${x}"}}""")),
+ eventNames
+ .map { xx => xx.map(x => "\"%s\"".format(x)) }
+ .map(x => s"""{"terms":{"event":[${x.mkString(",")}]}}""")).flatten.mkString(",")
+ val query = mustQueries.isEmpty match {
+ case true => """query":{"match_all":{}}"""
+ case _ => s"""query":{"bool":{"must":[${mustQueries}]}}"""
+ }
+ val sortOrder = reversed.map(x => x match {
+ case true => "desc"
+ case _ => "asc"
+ }).getOrElse("asc")
+ s"""{
+ |"${query},
+ |"sort":[{"eventTime":{"order":"${sortOrder}"}}]
+ |}""".stripMargin
+ }
+
+ def getHttpHosts(config: StorageClientConfig): Seq[HttpHost] = {
+ val hosts = config.properties.get("HOSTS").
+ map(_.split(",").toSeq).getOrElse(Seq("localhost"))
+ val ports = config.properties.get("PORTS").
+ map(_.split(",").toSeq.map(_.toInt)).getOrElse(Seq(9200))
+ val schemes = config.properties.get("SCHEMES").
+ map(_.split(",").toSeq).getOrElse(Seq("http"))
+ (hosts, ports, schemes).zipped.map((h, p, s) => new HttpHost(h, p, s))
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
new file mode 100644
index 0000000..647d180
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import org.apache.http.HttpHost
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+import org.elasticsearch.client.RestClient
+
+import grizzled.slf4j.Logging
+
+case class ESClient(hosts: Seq[HttpHost]) {
+ def open(): RestClient = {
+ try {
+ RestClient.builder(hosts: _*).build()
+ } catch {
+ case e: Throwable =>
+ throw new StorageClientException(e.getMessage, e)
+ }
+ }
+}
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+ with Logging {
+ override val prefix = "ES"
+
+ val client = ESClient(ESUtils.getHttpHosts(config))
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
new file mode 100644
index 0000000..fdc3b48
--- /dev/null
+++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** Elasticsearch implementation of storage traits, supporting meta data only
+ *
+ * @group Implementation
+ */
+package elasticsearch {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/src/test/resources/application.conf b/storage/elasticsearch/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/elasticsearch/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/.gitignore
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/.gitignore b/storage/elasticsearch1/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/elasticsearch1/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/build.sbt b/storage/elasticsearch1/build.sbt
new file mode 100644
index 0000000..8c29b84
--- /dev/null
+++ b/storage/elasticsearch1/build.sbt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-elasticsearch1"
+
+elasticsearchVersion := "1.7.3"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.elasticsearch" % "elasticsearch" % elasticsearchVersion.value,
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "extra" / ("pio-data-elasticsearch1-assembly-" + version.value + ".jar")
+
[3/7] incubator-predictionio git commit: [PIO-49] Add support for
Elasticsearch 5
Posted by do...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
new file mode 100644
index 0000000..077168a
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.AccessKey
+import org.apache.predictionio.data.storage.AccessKeys
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.elasticsearch.index.query.FilterBuilders._
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+
+import scala.util.Random
+
+/** Elasticsearch implementation of AccessKeys. */
+class ESAccessKeys(client: Client, config: StorageClientConfig, index: String)
+ extends AccessKeys with Logging {
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "accesskeys"
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val json =
+ (estype ->
+ ("properties" ->
+ ("key" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("events" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(json))).get
+ }
+
+ def insert(accessKey: AccessKey): Option[String] = {
+ val key = if (accessKey.key.isEmpty) generateKey else accessKey.key
+ update(accessKey.copy(key = key))
+ Some(key)
+ }
+
+ def get(key: String): Option[AccessKey] = {
+ try {
+ val response = client.prepareGet(
+ index,
+ estype,
+ key).get()
+ Some(read[AccessKey](response.getSourceAsString))
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ case e: NullPointerException => None
+ }
+ }
+
+ def getAll(): Seq[AccessKey] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype)
+ ESUtils.getAll[AccessKey](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq[AccessKey]()
+ }
+ }
+
+ def getByAppid(appid: Int): Seq[AccessKey] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype).
+ setPostFilter(termFilter("appid", appid))
+ ESUtils.getAll[AccessKey](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq[AccessKey]()
+ }
+ }
+
+ def update(accessKey: AccessKey): Unit = {
+ try {
+ client.prepareIndex(index, estype, accessKey.key).setSource(write(accessKey)).get()
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ }
+ }
+
+ def delete(key: String): Unit = {
+ try {
+ client.prepareDelete(index, estype, key).get
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
new file mode 100644
index 0000000..3781a4b
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.App
+import org.apache.predictionio.data.storage.Apps
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.elasticsearch.index.query.FilterBuilders._
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+
+/** Elasticsearch implementation of Items. */
+class ESApps(client: Client, config: StorageClientConfig, index: String)
+ extends Apps with Logging {
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "apps"
+ private val seq = new ESSequences(client, config, index)
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val json =
+ (estype ->
+ ("properties" ->
+ ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(json))).get
+ }
+
+ def insert(app: App): Option[Int] = {
+ val id =
+ if (app.id == 0) {
+ var roll = seq.genNext("apps")
+ while (!get(roll).isEmpty) roll = seq.genNext("apps")
+ roll
+ }
+ else app.id
+ val realapp = app.copy(id = id)
+ update(realapp)
+ Some(id)
+ }
+
+ def get(id: Int): Option[App] = {
+ try {
+ val response = client.prepareGet(
+ index,
+ estype,
+ id.toString).get()
+ Some(read[App](response.getSourceAsString))
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ case e: NullPointerException => None
+ }
+ }
+
+ def getByName(name: String): Option[App] = {
+ try {
+ val response = client.prepareSearch(index).setTypes(estype).
+ setPostFilter(termFilter("name", name)).get
+ val hits = response.getHits().hits()
+ if (hits.size > 0) {
+ Some(read[App](hits.head.getSourceAsString))
+ } else {
+ None
+ }
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ }
+ }
+
+ def getAll(): Seq[App] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype)
+ ESUtils.getAll[App](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq[App]()
+ }
+ }
+
+ def update(app: App): Unit = {
+ try {
+ val response = client.prepareIndex(index, estype, app.id.toString).
+ setSource(write(app)).get()
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ }
+ }
+
+ def delete(id: Int): Unit = {
+ try {
+ client.prepareDelete(index, estype, id.toString).get
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
new file mode 100644
index 0000000..52697fd
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Channel
+import org.apache.predictionio.data.storage.Channels
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.elasticsearch.index.query.FilterBuilders.termFilter
+import org.json4s.DefaultFormats
+import org.json4s.JsonDSL._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+
+class ESChannels(client: Client, config: StorageClientConfig, index: String)
+ extends Channels with Logging {
+
+ implicit val formats = DefaultFormats.lossless
+ private val estype = "channels"
+ private val seq = new ESSequences(client, config, index)
+ private val seqName = "channels"
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val json =
+ (estype ->
+ ("properties" ->
+ ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(json))).get
+ }
+
+ def insert(channel: Channel): Option[Int] = {
+ val id =
+ if (channel.id == 0) {
+ var roll = seq.genNext(seqName)
+ while (!get(roll).isEmpty) roll = seq.genNext(seqName)
+ roll
+ } else channel.id
+
+ val realChannel = channel.copy(id = id)
+ if (update(realChannel)) Some(id) else None
+ }
+
+ def get(id: Int): Option[Channel] = {
+ try {
+ val response = client.prepareGet(
+ index,
+ estype,
+ id.toString).get()
+ Some(read[Channel](response.getSourceAsString))
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ case e: NullPointerException => None
+ }
+ }
+
+ def getByAppid(appid: Int): Seq[Channel] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype).
+ setPostFilter(termFilter("appid", appid))
+ ESUtils.getAll[Channel](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq[Channel]()
+ }
+ }
+
+ def update(channel: Channel): Boolean = {
+ try {
+ val response = client.prepareIndex(index, estype, channel.id.toString).
+ setSource(write(channel)).get()
+ true
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ false
+ }
+ }
+
+ def delete(id: Int): Unit = {
+ try {
+ client.prepareDelete(index, estype, id.toString).get
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
new file mode 100644
index 0000000..21690bf
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.EngineInstance
+import org.apache.predictionio.data.storage.EngineInstanceSerializer
+import org.apache.predictionio.data.storage.EngineInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.elasticsearch.index.query.FilterBuilders._
+import org.elasticsearch.search.sort.SortOrder
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+
+class ESEngineInstances(client: Client, config: StorageClientConfig, index: String)
+ extends EngineInstances with Logging {
+ implicit val formats = DefaultFormats + new EngineInstanceSerializer
+ private val estype = "engine_instances"
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val json =
+ (estype ->
+ ("properties" ->
+ ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("startTime" -> ("type" -> "date")) ~
+ ("endTime" -> ("type" -> "date")) ~
+ ("engineId" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("engineVersion" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("engineVariant" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("engineFactory" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("batch" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("dataSourceParams" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("preparatorParams" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("algorithmsParams" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("servingParams" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed"))))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(json))).get
+ }
+
+ def insert(i: EngineInstance): String = {
+ try {
+ val response = client.prepareIndex(index, estype).
+ setSource(write(i)).get
+ response.getId
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ ""
+ }
+ }
+
+ def get(id: String): Option[EngineInstance] = {
+ try {
+ val response = client.prepareGet(index, estype, id).get
+ if (response.isExists) {
+ Some(read[EngineInstance](response.getSourceAsString))
+ } else {
+ None
+ }
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ }
+ }
+
+ def getAll(): Seq[EngineInstance] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype)
+ ESUtils.getAll[EngineInstance](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq()
+ }
+ }
+
+ def getCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Seq[EngineInstance] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype).setPostFilter(
+ andFilter(
+ termFilter("status", "COMPLETED"),
+ termFilter("engineId", engineId),
+ termFilter("engineVersion", engineVersion),
+ termFilter("engineVariant", engineVariant))).
+ addSort("startTime", SortOrder.DESC)
+ ESUtils.getAll[EngineInstance](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq()
+ }
+ }
+
+ def getLatestCompleted(
+ engineId: String,
+ engineVersion: String,
+ engineVariant: String): Option[EngineInstance] =
+ getCompleted(
+ engineId,
+ engineVersion,
+ engineVariant).headOption
+
+ def update(i: EngineInstance): Unit = {
+ try {
+ client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get
+ } catch {
+ case e: ElasticsearchException => error(e.getMessage)
+ }
+ }
+
+ def delete(id: String): Unit = {
+ try {
+ val response = client.prepareDelete(index, estype, id).get
+ } catch {
+ case e: ElasticsearchException => error(e.getMessage)
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
new file mode 100644
index 0000000..85bf820
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.EvaluationInstance
+import org.apache.predictionio.data.storage.EvaluationInstanceSerializer
+import org.apache.predictionio.data.storage.EvaluationInstances
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.elasticsearch.index.query.FilterBuilders._
+import org.elasticsearch.search.sort.SortOrder
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+import org.json4s.native.Serialization.read
+import org.json4s.native.Serialization.write
+
+class ESEvaluationInstances(client: Client, config: StorageClientConfig, index: String)
+ extends EvaluationInstances with Logging {
+ implicit val formats = DefaultFormats + new EvaluationInstanceSerializer
+ private val estype = "evaluation_instances"
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val json =
+ (estype ->
+ ("properties" ->
+ ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("startTime" -> ("type" -> "date")) ~
+ ("endTime" -> ("type" -> "date")) ~
+ ("evaluationClass" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("engineParamsGeneratorClass" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("batch" ->
+ ("type" -> "string") ~ ("index" -> "not_analyzed")) ~
+ ("evaluatorResults" ->
+ ("type" -> "string") ~ ("index" -> "no")) ~
+ ("evaluatorResultsHTML" ->
+ ("type" -> "string") ~ ("index" -> "no")) ~
+ ("evaluatorResultsJSON" ->
+ ("type" -> "string") ~ ("index" -> "no"))))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(json))).get
+ }
+
+ def insert(i: EvaluationInstance): String = {
+ try {
+ val response = client.prepareIndex(index, estype).
+ setSource(write(i)).get
+ response.getId
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ ""
+ }
+ }
+
+ def get(id: String): Option[EvaluationInstance] = {
+ try {
+ val response = client.prepareGet(index, estype, id).get
+ if (response.isExists) {
+ Some(read[EvaluationInstance](response.getSourceAsString))
+ } else {
+ None
+ }
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ None
+ }
+ }
+
+ def getAll(): Seq[EvaluationInstance] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype)
+ ESUtils.getAll[EvaluationInstance](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq()
+ }
+ }
+
+ def getCompleted(): Seq[EvaluationInstance] = {
+ try {
+ val builder = client.prepareSearch(index).setTypes(estype).setPostFilter(
+ termFilter("status", "EVALCOMPLETED")).
+ addSort("startTime", SortOrder.DESC)
+ ESUtils.getAll[EvaluationInstance](client, builder)
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ Seq()
+ }
+ }
+
+ def update(i: EvaluationInstance): Unit = {
+ try {
+ client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get
+ } catch {
+ case e: ElasticsearchException => error(e.getMessage)
+ }
+ }
+
+ def delete(id: String): Unit = {
+ try {
+ client.prepareDelete(index, estype, id).get
+ } catch {
+ case e: ElasticsearchException => error(e.getMessage)
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
new file mode 100644
index 0000000..5c9e170
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.elasticsearch.ElasticsearchException
+import org.elasticsearch.client.Client
+import org.json4s.JsonDSL._
+import org.json4s._
+import org.json4s.native.JsonMethods._
+
+class ESSequences(client: Client, config: StorageClientConfig, index: String) extends Logging {
+ implicit val formats = DefaultFormats
+ private val estype = "sequences"
+
+ val indices = client.admin.indices
+ val indexExistResponse = indices.prepareExists(index).get
+ if (!indexExistResponse.isExists) {
+ // val settingsJson =
+ // ("number_of_shards" -> 1) ~
+ // ("auto_expand_replicas" -> "0-all")
+ indices.prepareCreate(index).get
+ }
+ val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get
+ if (!typeExistResponse.isExists) {
+ val mappingJson =
+ (estype ->
+ ("_source" -> ("enabled" -> 0)) ~
+ ("_all" -> ("enabled" -> 0)) ~
+ ("_type" -> ("index" -> "no")) ~
+ ("enabled" -> 0))
+ indices.preparePutMapping(index).setType(estype).
+ setSource(compact(render(mappingJson))).get
+ }
+
+ def genNext(name: String): Int = {
+ try {
+ val response = client.prepareIndex(index, estype, name).
+ setSource(compact(render("n" -> name))).get
+ response.getVersion().toInt
+ } catch {
+ case e: ElasticsearchException =>
+ error(e.getMessage)
+ 0
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
new file mode 100644
index 0000000..f5c99bf
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import org.elasticsearch.action.search.SearchRequestBuilder
+import org.elasticsearch.client.Client
+import org.elasticsearch.common.unit.TimeValue
+import org.json4s.Formats
+import org.json4s.native.Serialization.read
+
+import scala.collection.mutable.ArrayBuffer
+
+object ESUtils {
+ val scrollLife = new TimeValue(60000)
+
+ def getAll[T : Manifest](
+ client: Client,
+ builder: SearchRequestBuilder)(
+ implicit formats: Formats): Seq[T] = {
+ val results = ArrayBuffer[T]()
+ var response = builder.setScroll(scrollLife).get
+ var hits = response.getHits().hits()
+ results ++= hits.map(h => read[T](h.getSourceAsString))
+ while (hits.size > 0) {
+ response = client.prepareSearchScroll(response.getScrollId).
+ setScroll(scrollLife).get
+ hits = response.getHits().hits()
+ results ++= hits.map(h => read[T](h.getSourceAsString))
+ }
+ results
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
new file mode 100644
index 0000000..75ac2b0
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.elasticsearch
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+import org.elasticsearch.client.transport.TransportClient
+import org.elasticsearch.common.settings.ImmutableSettings
+import org.elasticsearch.common.transport.InetSocketTransportAddress
+import org.elasticsearch.transport.ConnectTransportException
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+ with Logging {
+ override val prefix = "ES"
+ val client = try {
+ val hosts = config.properties.get("HOSTS").
+ map(_.split(",").toSeq).getOrElse(Seq("localhost"))
+ val ports = config.properties.get("PORTS").
+ map(_.split(",").toSeq.map(_.toInt)).getOrElse(Seq(9300))
+ val settings = ImmutableSettings.settingsBuilder()
+ .put("cluster.name", config.properties.getOrElse("CLUSTERNAME", "elasticsearch"))
+ val transportClient = new TransportClient(settings)
+ (hosts zip ports) foreach { hp =>
+ transportClient.addTransportAddress(
+ new InetSocketTransportAddress(hp._1, hp._2))
+ }
+ transportClient
+ } catch {
+ case e: ConnectTransportException =>
+ throw new StorageClientException(e.getMessage, e)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
new file mode 100644
index 0000000..0c549b8
--- /dev/null
+++ b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** Elasticsearch implementation of storage traits, supporting meta data only
+ *
+ * @group Implementation
+ */
+package object elasticsearch {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/elasticsearch1/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/src/test/resources/application.conf b/storage/elasticsearch1/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/elasticsearch1/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+ sources {
+ mongodb {
+ type = mongodb
+ hosts = [localhost]
+ ports = [27017]
+ }
+ elasticsearch {
+ type = elasticsearch
+ hosts = [localhost]
+ ports = [9300]
+ }
+ }
+ repositories {
+ # This section is dummy just to make storage happy.
+ # The actual testing will not bypass these repository settings completely.
+ # Please refer to StorageTestUtils.scala.
+ settings {
+ name = "test_predictionio"
+ source = mongodb
+ }
+
+ appdata {
+ name = "test_predictionio_appdata"
+ source = mongodb
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/.gitignore
----------------------------------------------------------------------
diff --git a/storage/hbase/.gitignore b/storage/hbase/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/hbase/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt
new file mode 100644
index 0000000..5856a5e
--- /dev/null
+++ b/storage/hbase/build.sbt
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+name := "apache-predictionio-data-hbase"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
+ "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
+ "org.apache.hbase" % "hbase-common" % "0.98.5-hadoop2",
+ "org.apache.hbase" % "hbase-client" % "0.98.5-hadoop2"
+ exclude("org.apache.zookeeper", "zookeeper"),
+ // added for Parallel storage interface
+ "org.apache.hbase" % "hbase-server" % "0.98.5-hadoop2"
+ exclude("org.apache.hbase", "hbase-client")
+ exclude("org.apache.zookeeper", "zookeeper")
+ exclude("javax.servlet", "servlet-api")
+ exclude("org.mortbay.jetty", "servlet-api-2.5")
+ exclude("org.mortbay.jetty", "jsp-api-2.1")
+ exclude("org.mortbay.jetty", "jsp-2.1"),
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test",
+ "org.specs2" %% "specs2" % "2.3.13" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+
+assemblyMergeStrategy in assembly := {
+ case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
+ case PathList("META-INF", "NOTICE.txt") => MergeStrategy.concat
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
+
+// skip test in assembly
+test in assembly := {}
+
+outputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / "assembly" / "spark" / ("pio-data-hbase-assembly-" + version.value + ".jar")
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
new file mode 100644
index 0000000..2cdb734
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase
+
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.EventValidation
+import org.apache.predictionio.data.storage.DataMap
+
+import org.apache.hadoop.hbase.client.Result
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.filter.FilterList
+import org.apache.hadoop.hbase.filter.RegexStringComparator
+import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
+import org.apache.hadoop.hbase.filter.BinaryComparator
+import org.apache.hadoop.hbase.filter.QualifierFilter
+import org.apache.hadoop.hbase.filter.SkipFilter
+
+import org.json4s.DefaultFormats
+import org.json4s.JObject
+import org.json4s.native.Serialization.{ read, write }
+
+import org.joda.time.DateTime
+import org.joda.time.DateTimeZone
+
+import org.apache.commons.codec.binary.Base64
+import java.security.MessageDigest
+
+import java.util.UUID
+
+/* common utility function for accessing EventsStore in HBase */
+object HBEventsUtil {
+
+ implicit val formats = DefaultFormats
+
+ def tableName(namespace: String, appId: Int, channelId: Option[Int] = None): String = {
+ channelId.map { ch =>
+ s"${namespace}:events_${appId}_${ch}"
+ }.getOrElse {
+ s"${namespace}:events_${appId}"
+ }
+ }
+
+ // column names for "e" column family
+ val colNames: Map[String, Array[Byte]] = Map(
+ "event" -> "e",
+ "entityType" -> "ety",
+ "entityId" -> "eid",
+ "targetEntityType" -> "tety",
+ "targetEntityId" -> "teid",
+ "properties" -> "p",
+ "prId" -> "prid",
+ "eventTime" -> "et",
+ "eventTimeZone" -> "etz",
+ "creationTime" -> "ct",
+ "creationTimeZone" -> "ctz"
+ ).mapValues(Bytes.toBytes(_))
+
+ def hash(entityType: String, entityId: String): Array[Byte] = {
+ val s = entityType + "-" + entityId
+ // get a new MessageDigest object each time for thread-safe
+ val md5 = MessageDigest.getInstance("MD5")
+ md5.digest(Bytes.toBytes(s))
+ }
+
+ class RowKey(
+ val b: Array[Byte]
+ ) {
+ require((b.size == 32), s"Incorrect b size: ${b.size}")
+ lazy val entityHash: Array[Byte] = b.slice(0, 16)
+ lazy val millis: Long = Bytes.toLong(b.slice(16, 24))
+ lazy val uuidLow: Long = Bytes.toLong(b.slice(24, 32))
+
+ lazy val toBytes: Array[Byte] = b
+
+ override def toString: String = {
+ Base64.encodeBase64URLSafeString(toBytes)
+ }
+ }
+
+ object RowKey {
+ def apply(
+ entityType: String,
+ entityId: String,
+ millis: Long,
+ uuidLow: Long): RowKey = {
+ // add UUID least significant bits for multiple actions at the same time
+ // (UUID's most significant bits are actually timestamp,
+ // use eventTime instead).
+ val b = hash(entityType, entityId) ++
+ Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow)
+ new RowKey(b)
+ }
+
+ // get RowKey from string representation
+ def apply(s: String): RowKey = {
+ try {
+ apply(Base64.decodeBase64(s))
+ } catch {
+ case e: Exception => throw new RowKeyException(
+ s"Failed to convert String ${s} to RowKey because ${e}", e)
+ }
+ }
+
+ def apply(b: Array[Byte]): RowKey = {
+ if (b.size != 32) {
+ val bString = b.mkString(",")
+ throw new RowKeyException(
+ s"Incorrect byte array size. Bytes: ${bString}.")
+ }
+ new RowKey(b)
+ }
+
+ }
+
+ class RowKeyException(val msg: String, val cause: Exception)
+ extends Exception(msg, cause) {
+ def this(msg: String) = this(msg, null)
+ }
+
+ case class PartialRowKey(entityType: String, entityId: String,
+ millis: Option[Long] = None) {
+ val toBytes: Array[Byte] = {
+ hash(entityType, entityId) ++
+ (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]()))
+ }
+ }
+
+ def eventToPut(event: Event, appId: Int): (Put, RowKey) = {
+ // generate new rowKey if eventId is None
+ val rowKey = event.eventId.map { id =>
+ RowKey(id) // create rowKey from eventId
+ }.getOrElse {
+ // TOOD: use real UUID. not pseudo random
+ val uuidLow: Long = UUID.randomUUID().getLeastSignificantBits
+ RowKey(
+ entityType = event.entityType,
+ entityId = event.entityId,
+ millis = event.eventTime.getMillis,
+ uuidLow = uuidLow
+ )
+ }
+
+ val eBytes = Bytes.toBytes("e")
+ // use eventTime as HBase's cell timestamp
+ val put = new Put(rowKey.toBytes, event.eventTime.getMillis)
+
+ def addStringToE(col: Array[Byte], v: String): Put = {
+ put.add(eBytes, col, Bytes.toBytes(v))
+ }
+
+ def addLongToE(col: Array[Byte], v: Long): Put = {
+ put.add(eBytes, col, Bytes.toBytes(v))
+ }
+
+ addStringToE(colNames("event"), event.event)
+ addStringToE(colNames("entityType"), event.entityType)
+ addStringToE(colNames("entityId"), event.entityId)
+
+ event.targetEntityType.foreach { targetEntityType =>
+ addStringToE(colNames("targetEntityType"), targetEntityType)
+ }
+
+ event.targetEntityId.foreach { targetEntityId =>
+ addStringToE(colNames("targetEntityId"), targetEntityId)
+ }
+
+ // TODO: make properties Option[]
+ if (!event.properties.isEmpty) {
+ addStringToE(colNames("properties"), write(event.properties.toJObject))
+ }
+
+ event.prId.foreach { prId =>
+ addStringToE(colNames("prId"), prId)
+ }
+
+ addLongToE(colNames("eventTime"), event.eventTime.getMillis)
+ val eventTimeZone = event.eventTime.getZone
+ if (!eventTimeZone.equals(EventValidation.defaultTimeZone)) {
+ addStringToE(colNames("eventTimeZone"), eventTimeZone.getID)
+ }
+
+ addLongToE(colNames("creationTime"), event.creationTime.getMillis)
+ val creationTimeZone = event.creationTime.getZone
+ if (!creationTimeZone.equals(EventValidation.defaultTimeZone)) {
+ addStringToE(colNames("creationTimeZone"), creationTimeZone.getID)
+ }
+
+ // can use zero-length byte array for tag cell value
+ (put, rowKey)
+ }
+
+ def resultToEvent(result: Result, appId: Int): Event = {
+ val rowKey = RowKey(result.getRow())
+
+ val eBytes = Bytes.toBytes("e")
+ // val e = result.getFamilyMap(eBytes)
+
+ def getStringCol(col: String): String = {
+ val r = result.getValue(eBytes, colNames(col))
+ require(r != null,
+ s"Failed to get value for column ${col}. " +
+ s"Rowkey: ${rowKey.toString} " +
+ s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
+
+ Bytes.toString(r)
+ }
+
+ def getLongCol(col: String): Long = {
+ val r = result.getValue(eBytes, colNames(col))
+ require(r != null,
+ s"Failed to get value for column ${col}. " +
+ s"Rowkey: ${rowKey.toString} " +
+ s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
+
+ Bytes.toLong(r)
+ }
+
+ def getOptStringCol(col: String): Option[String] = {
+ val r = result.getValue(eBytes, colNames(col))
+ if (r == null) {
+ None
+ } else {
+ Some(Bytes.toString(r))
+ }
+ }
+
+ def getTimestamp(col: String): Long = {
+ result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp()
+ }
+
+ val event = getStringCol("event")
+ val entityType = getStringCol("entityType")
+ val entityId = getStringCol("entityId")
+ val targetEntityType = getOptStringCol("targetEntityType")
+ val targetEntityId = getOptStringCol("targetEntityId")
+ val properties: DataMap = getOptStringCol("properties")
+ .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())
+ val prId = getOptStringCol("prId")
+ val eventTimeZone = getOptStringCol("eventTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+ val eventTime = new DateTime(
+ getLongCol("eventTime"), eventTimeZone)
+ val creationTimeZone = getOptStringCol("creationTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+ val creationTime: DateTime = new DateTime(
+ getLongCol("creationTime"), creationTimeZone)
+
+ Event(
+ eventId = Some(RowKey(result.getRow()).toString),
+ event = event,
+ entityType = entityType,
+ entityId = entityId,
+ targetEntityType = targetEntityType,
+ targetEntityId = targetEntityId,
+ properties = properties,
+ eventTime = eventTime,
+ tags = Seq(),
+ prId = prId,
+ creationTime = creationTime
+ )
+ }
+
+
+ // for mandatory field. None means don't care.
+ // for optional field. None means don't care.
+ // Some(None) means not exist.
+ // Some(Some(x)) means it should match x
+ def createScan(
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None,
+ reversed: Option[Boolean] = None): Scan = {
+
+ val scan: Scan = new Scan()
+
+ (entityType, entityId) match {
+ case (Some(et), Some(eid)) => {
+ val start = PartialRowKey(et, eid,
+ startTime.map(_.getMillis)).toBytes
+ // if no untilTime, stop when reach next bytes of entityTypeAndId
+ val stop = PartialRowKey(et, eid,
+ untilTime.map(_.getMillis).orElse(Some(-1))).toBytes
+
+ if (reversed.getOrElse(false)) {
+ // Reversed order.
+ // If you specify a startRow and stopRow,
+ // to scan in reverse, the startRow needs to be lexicographically
+ // after the stopRow.
+ scan.setStartRow(stop)
+ scan.setStopRow(start)
+ scan.setReversed(true)
+ } else {
+ scan.setStartRow(start)
+ scan.setStopRow(stop)
+ }
+ }
+ case (_, _) => {
+ val minTime: Long = startTime.map(_.getMillis).getOrElse(0)
+ val maxTime: Long = untilTime.map(_.getMillis).getOrElse(Long.MaxValue)
+ scan.setTimeRange(minTime, maxTime)
+ if (reversed.getOrElse(false)) {
+ scan.setReversed(true)
+ }
+ }
+ }
+
+ val filters = new FilterList(FilterList.Operator.MUST_PASS_ALL)
+
+ val eBytes = Bytes.toBytes("e")
+
+ def createBinaryFilter(col: String, value: Array[Byte]): SingleColumnValueFilter = {
+ val comp = new BinaryComparator(value)
+ new SingleColumnValueFilter(
+ eBytes, colNames(col), CompareOp.EQUAL, comp)
+ }
+
+ // skip the row if the column exists
+ def createSkipRowIfColumnExistFilter(col: String): SkipFilter = {
+ val comp = new BinaryComparator(colNames(col))
+ val q = new QualifierFilter(CompareOp.NOT_EQUAL, comp)
+ // filters an entire row if any of the Cell checks do not pass
+ new SkipFilter(q)
+ }
+
+ entityType.foreach { et =>
+ val compType = new BinaryComparator(Bytes.toBytes(et))
+ val filterType = new SingleColumnValueFilter(
+ eBytes, colNames("entityType"), CompareOp.EQUAL, compType)
+ filters.addFilter(filterType)
+ }
+
+ entityId.foreach { eid =>
+ val compId = new BinaryComparator(Bytes.toBytes(eid))
+ val filterId = new SingleColumnValueFilter(
+ eBytes, colNames("entityId"), CompareOp.EQUAL, compId)
+ filters.addFilter(filterId)
+ }
+
+ eventNames.foreach { eventsList =>
+ // match any of event in the eventsList
+ val eventFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE)
+ eventsList.foreach { e =>
+ val compEvent = new BinaryComparator(Bytes.toBytes(e))
+ val filterEvent = new SingleColumnValueFilter(
+ eBytes, colNames("event"), CompareOp.EQUAL, compEvent)
+ eventFilters.addFilter(filterEvent)
+ }
+ if (!eventFilters.getFilters().isEmpty) {
+ filters.addFilter(eventFilters)
+ }
+ }
+
+ targetEntityType.foreach { tetOpt =>
+ if (tetOpt.isEmpty) {
+ val filter = createSkipRowIfColumnExistFilter("targetEntityType")
+ filters.addFilter(filter)
+ } else {
+ tetOpt.foreach { tet =>
+ val filter = createBinaryFilter(
+ "targetEntityType", Bytes.toBytes(tet))
+ // the entire row will be skipped if the column is not found.
+ filter.setFilterIfMissing(true)
+ filters.addFilter(filter)
+ }
+ }
+ }
+
+ targetEntityId.foreach { teidOpt =>
+ if (teidOpt.isEmpty) {
+ val filter = createSkipRowIfColumnExistFilter("targetEntityId")
+ filters.addFilter(filter)
+ } else {
+ teidOpt.foreach { teid =>
+ val filter = createBinaryFilter(
+ "targetEntityId", Bytes.toBytes(teid))
+ // the entire row will be skipped if the column is not found.
+ filter.setFilterIfMissing(true)
+ filters.addFilter(filter)
+ }
+ }
+ }
+
+ if (!filters.getFilters().isEmpty) {
+ scan.setFilter(filters)
+ }
+
+ scan
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
new file mode 100644
index 0000000..360b007
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.LEvents
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.hbase.HBEventsUtil.RowKey
+import org.apache.hadoop.hbase.HColumnDescriptor
+import org.apache.hadoop.hbase.HTableDescriptor
+import org.apache.hadoop.hbase.NamespaceDescriptor
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.client._
+import org.joda.time.DateTime
+
+import scala.collection.JavaConversions._
+import scala.concurrent.ExecutionContext
+import scala.concurrent.Future
+
+class HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace: String)
+ extends LEvents with Logging {
+
+ // implicit val formats = DefaultFormats + new EventJson4sSupport.DBSerializer
+
+ def resultToEvent(result: Result, appId: Int): Event =
+ HBEventsUtil.resultToEvent(result, appId)
+
+ def getTable(appId: Int, channelId: Option[Int] = None): HTableInterface =
+ client.connection.getTable(HBEventsUtil.tableName(namespace, appId, channelId))
+
+ override
+ def init(appId: Int, channelId: Option[Int] = None): Boolean = {
+ // check namespace exist
+ val existingNamespace = client.admin.listNamespaceDescriptors()
+ .map(_.getName)
+ if (!existingNamespace.contains(namespace)) {
+ val nameDesc = NamespaceDescriptor.create(namespace).build()
+ info(s"The namespace ${namespace} doesn't exist yet. Creating now...")
+ client.admin.createNamespace(nameDesc)
+ }
+
+ val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))
+ if (!client.admin.tableExists(tableName)) {
+ info(s"The table ${tableName.getNameAsString()} doesn't exist yet." +
+ " Creating now...")
+ val tableDesc = new HTableDescriptor(tableName)
+ tableDesc.addFamily(new HColumnDescriptor("e"))
+ tableDesc.addFamily(new HColumnDescriptor("r")) // reserved
+ client.admin.createTable(tableDesc)
+ }
+ true
+ }
+
+ override
+ def remove(appId: Int, channelId: Option[Int] = None): Boolean = {
+ val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))
+ try {
+ if (client.admin.tableExists(tableName)) {
+ info(s"Removing table ${tableName.getNameAsString()}...")
+ client.admin.disableTable(tableName)
+ client.admin.deleteTable(tableName)
+ } else {
+ info(s"Table ${tableName.getNameAsString()} doesn't exist." +
+ s" Nothing is deleted.")
+ }
+ true
+ } catch {
+ case e: Exception => {
+ error(s"Fail to remove table for appId ${appId}. Exception: ${e}")
+ false
+ }
+ }
+ }
+
+ override
+ def close(): Unit = {
+ client.admin.close()
+ client.connection.close()
+ }
+
+ override
+ def futureInsert(
+ event: Event, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
+ Future[String] = {
+ Future {
+ val table = getTable(appId, channelId)
+ val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)
+ table.put(put)
+ table.flushCommits()
+ table.close()
+ rowKey.toString
+ }
+ }
+
+ override
+ def futureGet(
+ eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
+ Future[Option[Event]] = {
+ Future {
+ val table = getTable(appId, channelId)
+ val rowKey = RowKey(eventId)
+ val get = new Get(rowKey.toBytes)
+
+ val result = table.get(get)
+ table.close()
+
+ if (!result.isEmpty()) {
+ val event = resultToEvent(result, appId)
+ Some(event)
+ } else {
+ None
+ }
+ }
+ }
+
+ override
+ def futureDelete(
+ eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):
+ Future[Boolean] = {
+ Future {
+ val table = getTable(appId, channelId)
+ val rowKey = RowKey(eventId)
+ val exists = table.exists(new Get(rowKey.toBytes))
+ table.delete(new Delete(rowKey.toBytes))
+ table.close()
+ exists
+ }
+ }
+
+ override
+ def futureFind(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None,
+ limit: Option[Int] = None,
+ reversed: Option[Boolean] = None)(implicit ec: ExecutionContext):
+ Future[Iterator[Event]] = {
+ Future {
+
+ require(!((reversed == Some(true)) && (entityType.isEmpty || entityId.isEmpty)),
+ "the parameter reversed can only be used with both entityType and entityId specified.")
+
+ val table = getTable(appId, channelId)
+
+ val scan = HBEventsUtil.createScan(
+ startTime = startTime,
+ untilTime = untilTime,
+ entityType = entityType,
+ entityId = entityId,
+ eventNames = eventNames,
+ targetEntityType = targetEntityType,
+ targetEntityId = targetEntityId,
+ reversed = reversed)
+ val scanner = table.getScanner(scan)
+ table.close()
+
+ val eventsIter = scanner.iterator()
+
+ // Get all events if None or Some(-1)
+ val results: Iterator[Result] = limit match {
+ case Some(-1) => eventsIter
+ case None => eventsIter
+ case Some(x) => eventsIter.take(x)
+ }
+
+ val eventsIt = results.map { resultToEvent(_, appId) }
+
+ eventsIt
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
new file mode 100644
index 0000000..7324fa6
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.hbase
+
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.client.{Delete, HTable, Result}
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce.OutputFormat
+import org.apache.predictionio.data.storage.{Event, PEvents, StorageClientConfig}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.joda.time.DateTime
+
+class HBPEvents(client: HBClient, config: StorageClientConfig, namespace: String) extends PEvents {
+
+ def checkTableExists(appId: Int, channelId: Option[Int]): Unit = {
+ if (!client.admin.tableExists(HBEventsUtil.tableName(namespace, appId, channelId))) {
+ if (channelId.nonEmpty) {
+ logger.error(s"The appId $appId with channelId $channelId does not exist." +
+ s" Please use valid appId and channelId.")
+ throw new Exception(s"HBase table not found for appId $appId" +
+ s" with channelId $channelId.")
+ } else {
+ logger.error(s"The appId $appId does not exist. Please use valid appId.")
+ throw new Exception(s"HBase table not found for appId $appId.")
+ }
+ }
+ }
+
+ override
+ def find(
+ appId: Int,
+ channelId: Option[Int] = None,
+ startTime: Option[DateTime] = None,
+ untilTime: Option[DateTime] = None,
+ entityType: Option[String] = None,
+ entityId: Option[String] = None,
+ eventNames: Option[Seq[String]] = None,
+ targetEntityType: Option[Option[String]] = None,
+ targetEntityId: Option[Option[String]] = None
+ )(sc: SparkContext): RDD[Event] = {
+
+ checkTableExists(appId, channelId)
+
+ val conf = HBaseConfiguration.create()
+ conf.set(TableInputFormat.INPUT_TABLE,
+ HBEventsUtil.tableName(namespace, appId, channelId))
+
+ val scan = HBEventsUtil.createScan(
+ startTime = startTime,
+ untilTime = untilTime,
+ entityType = entityType,
+ entityId = entityId,
+ eventNames = eventNames,
+ targetEntityType = targetEntityType,
+ targetEntityId = targetEntityId,
+ reversed = None)
+ scan.setCaching(500) // TODO
+ scan.setCacheBlocks(false) // TODO
+
+ conf.set(TableInputFormat.SCAN, PIOHBaseUtil.convertScanToString(scan))
+
+ // HBase is not accessed until this rdd is actually used.
+ val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
+ classOf[ImmutableBytesWritable],
+ classOf[Result]).map {
+ case (key, row) => HBEventsUtil.resultToEvent(row, appId)
+ }
+
+ rdd
+ }
+
+ override
+ def write(
+ events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+
+ checkTableExists(appId, channelId)
+
+ val conf = HBaseConfiguration.create()
+ conf.set(TableOutputFormat.OUTPUT_TABLE,
+ HBEventsUtil.tableName(namespace, appId, channelId))
+ conf.setClass("mapreduce.outputformat.class",
+ classOf[TableOutputFormat[Object]],
+ classOf[OutputFormat[Object, Writable]])
+
+ events.map { event =>
+ val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)
+ (new ImmutableBytesWritable(rowKey.toBytes), put)
+ }.saveAsNewAPIHadoopDataset(conf)
+
+ }
+
+ def delete(
+ eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
+
+ checkTableExists(appId, channelId)
+
+ val tableName = HBEventsUtil.tableName(namespace, appId, channelId)
+
+ eventIds.foreachPartition{ iter =>
+ val conf = HBaseConfiguration.create()
+ conf.set(TableOutputFormat.OUTPUT_TABLE,
+ tableName)
+
+ val table = new HTable(conf, tableName)
+ iter.foreach { id =>
+ val rowKey = HBEventsUtil.RowKey(id)
+ val delete = new Delete(rowKey.b)
+ table.delete(delete)
+ }
+ table.close
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
new file mode 100644
index 0000000..745fcb9
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.hbase
+
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil
+import org.apache.hadoop.hbase.util.Base64
+
+object PIOHBaseUtil {
+ /*
+ * Copying this from Apache HBase because of its restrictive scope in 0.98.x
+ */
+ def convertScanToString(scan: Scan): String = {
+ val proto = ProtobufUtil.toScan(scan)
+ Base64.encodeBytes(proto.toByteArray)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
new file mode 100644
index 0000000..1720410
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase
+
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.MasterNotRunningException
+import org.apache.hadoop.hbase.ZooKeeperConnectionException
+import org.apache.hadoop.hbase.client.HConnectionManager
+import org.apache.hadoop.hbase.client.HConnection
+import org.apache.hadoop.hbase.client.HBaseAdmin
+
+import grizzled.slf4j.Logging
+
+case class HBClient(
+ val conf: Configuration,
+ val connection: HConnection,
+ val admin: HBaseAdmin
+)
+
+class StorageClient(val config: StorageClientConfig)
+ extends BaseStorageClient with Logging {
+
+ val conf = HBaseConfiguration.create()
+
+ if (config.test) {
+ // use fewer retries and shorter timeout for test mode
+ conf.set("hbase.client.retries.number", "1")
+ conf.set("zookeeper.session.timeout", "30000");
+ conf.set("zookeeper.recovery.retry", "1")
+ }
+
+ try {
+ HBaseAdmin.checkHBaseAvailable(conf)
+ } catch {
+ case e: MasterNotRunningException =>
+ error("HBase master is not running (ZooKeeper ensemble: " +
+ conf.get("hbase.zookeeper.quorum") + "). Please make sure that HBase " +
+ "is running properly, and that the configuration is pointing at the " +
+ "correct ZooKeeper ensemble.")
+ throw e
+ case e: ZooKeeperConnectionException =>
+ error("Cannot connect to ZooKeeper (ZooKeeper ensemble: " +
+ conf.get("hbase.zookeeper.quorum") + "). Please make sure that the " +
+ "configuration is pointing at the correct ZooKeeper ensemble. By " +
+ "default, HBase manages its own ZooKeeper, so if you have not " +
+ "configured HBase to use an external ZooKeeper, that means your " +
+ "HBase is not started or configured properly.")
+ throw e
+ case e: Exception => {
+ error("Failed to connect to HBase." +
+ " Please check if HBase is running properly.")
+ throw e
+ }
+ }
+
+ val connection = HConnectionManager.createConnection(conf)
+
+ val client = HBClient(
+ conf = conf,
+ connection = connection,
+ admin = new HBaseAdmin(connection)
+ )
+
+ override
+ val prefix = "HB"
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
new file mode 100644
index 0000000..49bf031
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** HBase implementation of storage traits, supporting event data only
+ *
+ * @group Implementation
+ */
+package object hbase {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
new file mode 100644
index 0000000..cc07fa4
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase.upgrade
+
+import org.apache.predictionio.annotation.Experimental
+
+import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.storage.EventValidation
+import org.apache.predictionio.data.storage.DataMap
+
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.client.HConnection
+import org.apache.hadoop.hbase.client.Result
+import org.apache.hadoop.hbase.TableName
+import org.apache.hadoop.hbase.util.Bytes
+
+import org.joda.time.DateTime
+import org.joda.time.DateTimeZone
+
+import org.json4s.DefaultFormats
+import org.json4s.JObject
+import org.json4s.native.Serialization.{ read, write }
+
+import org.apache.commons.codec.binary.Base64
+
+import scala.collection.JavaConversions._
+
+/** :: Experimental :: */
+@Experimental
+object HB_0_8_0 {
+
+ implicit val formats = DefaultFormats
+
+ def getByAppId(
+ connection: HConnection,
+ namespace: String,
+ appId: Int): Iterator[Event] = {
+ val tableName = TableName.valueOf(namespace, "events")
+ val table = connection.getTable(tableName)
+ val start = PartialRowKey(appId)
+ val stop = PartialRowKey(appId + 1)
+ val scan = new Scan(start.toBytes, stop.toBytes)
+ val scanner = table.getScanner(scan)
+ table.close()
+ scanner.iterator().map { resultToEvent(_) }
+ }
+
+ val colNames: Map[String, Array[Byte]] = Map(
+ "event" -> "e",
+ "entityType" -> "ety",
+ "entityId" -> "eid",
+ "targetEntityType" -> "tety",
+ "targetEntityId" -> "teid",
+ "properties" -> "p",
+ "prId" -> "pk", // columna name is 'pk' in 0.8.0/0.8.1
+ "eventTimeZone" -> "etz",
+ "creationTimeZone" -> "ctz"
+ ).mapValues(Bytes.toBytes(_))
+
+
+ class RowKey(
+ val appId: Int,
+ val millis: Long,
+ val uuidLow: Long
+ ) {
+ lazy val toBytes: Array[Byte] = {
+ // add UUID least significant bits for multiple actions at the same time
+ // (UUID's most significant bits are actually timestamp,
+ // use eventTime instead).
+ Bytes.toBytes(appId) ++ Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow)
+ }
+ override def toString: String = {
+ Base64.encodeBase64URLSafeString(toBytes)
+ }
+ }
+
+ object RowKey {
+ // get RowKey from string representation
+ def apply(s: String): RowKey = {
+ try {
+ apply(Base64.decodeBase64(s))
+ } catch {
+ case e: Exception => throw new RowKeyException(
+ s"Failed to convert String ${s} to RowKey because ${e}", e)
+ }
+ }
+
+ def apply(b: Array[Byte]): RowKey = {
+ if (b.size != 20) {
+ val bString = b.mkString(",")
+ throw new RowKeyException(
+ s"Incorrect byte array size. Bytes: ${bString}.")
+ }
+
+ new RowKey(
+ appId = Bytes.toInt(b.slice(0, 4)),
+ millis = Bytes.toLong(b.slice(4, 12)),
+ uuidLow = Bytes.toLong(b.slice(12, 20))
+ )
+ }
+ }
+
+ class RowKeyException(msg: String, cause: Exception)
+ extends Exception(msg, cause) {
+ def this(msg: String) = this(msg, null)
+ }
+
+ case class PartialRowKey(val appId: Int, val millis: Option[Long] = None) {
+ val toBytes: Array[Byte] = {
+ Bytes.toBytes(appId) ++
+ (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]()))
+ }
+ }
+
+ def resultToEvent(result: Result): Event = {
+ val rowKey = RowKey(result.getRow())
+
+ val eBytes = Bytes.toBytes("e")
+ // val e = result.getFamilyMap(eBytes)
+
+ def getStringCol(col: String): String = {
+ val r = result.getValue(eBytes, colNames(col))
+ require(r != null,
+ s"Failed to get value for column ${col}. " +
+ s"Rowkey: ${rowKey.toString} " +
+ s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.")
+
+ Bytes.toString(r)
+ }
+
+ def getOptStringCol(col: String): Option[String] = {
+ val r = result.getValue(eBytes, colNames(col))
+ if (r == null) {
+ None
+ } else {
+ Some(Bytes.toString(r))
+ }
+ }
+
+ def getTimestamp(col: String): Long = {
+ result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp()
+ }
+
+ val event = getStringCol("event")
+ val entityType = getStringCol("entityType")
+ val entityId = getStringCol("entityId")
+ val targetEntityType = getOptStringCol("targetEntityType")
+ val targetEntityId = getOptStringCol("targetEntityId")
+ val properties: DataMap = getOptStringCol("properties")
+ .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())
+ val prId = getOptStringCol("prId")
+ val eventTimeZone = getOptStringCol("eventTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+ val creationTimeZone = getOptStringCol("creationTimeZone")
+ .map(DateTimeZone.forID(_))
+ .getOrElse(EventValidation.defaultTimeZone)
+
+ val creationTime: DateTime = new DateTime(
+ getTimestamp("event"), creationTimeZone
+ )
+
+ Event(
+ eventId = Some(RowKey(result.getRow()).toString),
+ event = event,
+ entityType = entityType,
+ entityId = entityId,
+ targetEntityType = targetEntityType,
+ targetEntityId = targetEntityId,
+ properties = properties,
+ eventTime = new DateTime(rowKey.millis, eventTimeZone),
+ tags = Seq(),
+ prId = prId,
+ creationTime = creationTime
+ )
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
new file mode 100644
index 0000000..1759561
--- /dev/null
+++ b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.hbase.upgrade
+
+import org.apache.predictionio.annotation.Experimental
+
+import org.apache.predictionio.data.storage.Storage
+import org.apache.predictionio.data.storage.hbase.HBLEvents
+import org.apache.predictionio.data.storage.hbase.HBEventsUtil
+
+import scala.collection.JavaConversions._
+
+/** :: Experimental :: */
+@Experimental
+object Upgrade {
+
+ def main(args: Array[String]) {
+ val fromAppId = args(0).toInt
+ val toAppId = args(1).toInt
+ val batchSize = args.lift(2).map(_.toInt).getOrElse(100)
+ val fromNamespace = args.lift(3).getOrElse("predictionio_eventdata")
+
+ upgrade(fromAppId, toAppId, batchSize, fromNamespace)
+ }
+
+ /* For upgrade from 0.8.0 or 0.8.1 to 0.8.2 only */
+ def upgrade(
+ fromAppId: Int,
+ toAppId: Int,
+ batchSize: Int,
+ fromNamespace: String) {
+
+ val events = Storage.getLEvents().asInstanceOf[HBLEvents]
+
+ // Assume already run "pio app new <newapp>" (new app already created)
+ // TODO: check if new table empty and warn user if not
+ val newTable = events.getTable(toAppId)
+
+ val newTableName = newTable.getName().getNameAsString()
+ println(s"Copying data from ${fromNamespace}:events for app ID ${fromAppId}"
+ + s" to new HBase table ${newTableName}...")
+
+ HB_0_8_0.getByAppId(
+ events.client.connection,
+ fromNamespace,
+ fromAppId).grouped(batchSize).foreach { eventGroup =>
+ val puts = eventGroup.map{ e =>
+ val (put, rowkey) = HBEventsUtil.eventToPut(e, toAppId)
+ put
+ }
+ newTable.put(puts.toList)
+ }
+
+ newTable.flushCommits()
+ newTable.close()
+ println("Done.")
+ }
+
+}