You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by do...@apache.org on 2017/06/05 21:29:52 UTC
incubator-predictionio git commit: [PIO-61] Add S3 Model Data
Repository
Repository: incubator-predictionio
Updated Branches:
refs/heads/develop 5b02cd1a5 -> 3330013b9
[PIO-61] Add S3 Model Data Repository
Closes #371
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/3330013b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/3330013b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/3330013b
Branch: refs/heads/develop
Commit: 3330013b9d19f2c21e18f9b18e1a48fda3912e68
Parents: 5b02cd1
Author: Shinsuke Sugaya <sh...@yahoo.co.jp>
Authored: Mon Jun 5 14:29:23 2017 -0700
Committer: Donald Szeto <do...@apache.org>
Committed: Mon Jun 5 14:29:23 2017 -0700
----------------------------------------------------------------------
.travis.yml | 5 +
build.sbt | 7 +-
conf/pio-env.sh.template | 5 +
storage/s3/.gitignore | 1 +
storage/s3/build.sbt | 44 ++++++++
.../predictionio/data/storage/s3/S3Models.scala | 101 +++++++++++++++++++
.../data/storage/s3/StorageClient.scala | 45 +++++++++
.../predictionio/data/storage/s3/package.scala | 25 +++++
tests/Dockerfile | 2 +
tests/docker-compose.yml | 6 ++
tests/docker-files/awscredentials | 19 ++++
tests/docker-files/env-conf/pio-env.sh | 9 ++
tests/docker-files/init.sh | 4 +
tests/run_docker.sh | 4 +-
14 files changed, 274 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 8dcc2fa..c8b7826 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -98,6 +98,11 @@ env:
PIO_SCALA_VERSION=2.11.8
PIO_SPARK_VERSION=2.1.0
PIO_ELASTICSEARCH_VERSION=1.7.3
+ - BUILD_TYPE=Integration
+ METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3
+ PIO_SCALA_VERSION=2.11.8
+ PIO_SPARK_VERSION=2.1.0
+ PIO_ELASTICSEARCH_VERSION=5.2.2
before_install:
- unset SBT_OPTS JVM_OPTS
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/build.sbt
----------------------------------------------------------------------
diff --git a/build.sbt b/build.sbt
index e49613f..4287ea9 100644
--- a/build.sbt
+++ b/build.sbt
@@ -129,6 +129,10 @@ val dataLocalfs = (project in file("storage/localfs")).
settings(commonSettings: _*).
enablePlugins(GenJavadocPlugin)
+val dataS3 = (project in file("storage/s3")).
+ settings(commonSettings: _*).
+ enablePlugins(GenJavadocPlugin)
+
val common = (project in file("common")).
settings(commonSettings: _*).
enablePlugins(GenJavadocPlugin).
@@ -173,7 +177,8 @@ val storageSubprojects = Seq(
dataHbase,
dataHdfs,
dataJdbc,
- dataLocalfs)
+ dataLocalfs,
+ dataS3)
val storage = (project in file("storage"))
.aggregate(storageSubprojects map Project.projectToRef: _*)
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/conf/pio-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template
index c3a1e07..2e55900 100644
--- a/conf/pio-env.sh.template
+++ b/conf/pio-env.sh.template
@@ -104,3 +104,8 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
# HBase Example
# PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0
+
+# AWS S3 Example
+# PIO_STORAGE_SOURCES_S3_TYPE=s3
+# PIO_STORAGE_SOURCES_S3_BUCKET_NAME=pio_bucket
+# PIO_STORAGE_SOURCES_S3_BASE_PATH=pio_model
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/storage/s3/.gitignore
----------------------------------------------------------------------
diff --git a/storage/s3/.gitignore b/storage/s3/.gitignore
new file mode 100644
index 0000000..ae3c172
--- /dev/null
+++ b/storage/s3/.gitignore
@@ -0,0 +1 @@
+/bin/
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/storage/s3/build.sbt
----------------------------------------------------------------------
diff --git a/storage/s3/build.sbt b/storage/s3/build.sbt
new file mode 100644
index 0000000..4022209
--- /dev/null
+++ b/storage/s3/build.sbt
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import PIOBuild._
+
+name := "apache-predictionio-data-s3"
+
+libraryDependencies ++= Seq(
+ "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+ "com.google.guava" % "guava" % "14.0.1" % "provided",
+ "com.amazonaws" % "aws-java-sdk-s3" % "1.11.132",
+ "org.scalatest" %% "scalatest" % "2.1.7" % "test")
+
+parallelExecution in Test := false
+
+pomExtra := childrenPomExtra.value
+
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
+
+assemblyShadeRules in assembly := Seq(
+ ShadeRule.rename("org.apache.http.**" -> "shadeio.data.s3.http.@1").inAll,
+ ShadeRule.rename("com.fasterxml.**" -> "shadeio.data.s3.fasterxml.@1").inAll
+)
+
+// skip test in assembly
+test in assembly := {}
+
+assemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /
+ "assembly" / "src" / "universal" / "lib" / "spark" /
+ s"pio-data-s3-assembly-${version.value}.jar"
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/S3Models.scala
----------------------------------------------------------------------
diff --git a/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/S3Models.scala b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/S3Models.scala
new file mode 100644
index 0000000..f907e05
--- /dev/null
+++ b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/S3Models.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.s3
+
+import java.io.ByteArrayInputStream
+
+import org.apache.predictionio.data.storage.Model
+import org.apache.predictionio.data.storage.Models
+import org.apache.predictionio.data.storage.StorageClientConfig
+
+import com.amazonaws.services.s3.AmazonS3
+import com.amazonaws.services.s3.model.DeleteObjectRequest
+import com.amazonaws.services.s3.model.GetObjectRequest
+import com.amazonaws.services.s3.model.ObjectMetadata
+import com.amazonaws.services.s3.model.PutObjectRequest
+import com.amazonaws.services.s3.model.S3Object
+import com.google.common.io.ByteStreams
+
+import grizzled.slf4j.Logging
+
+class S3Models(s3Client: AmazonS3, config: StorageClientConfig, prefix: String)
+ extends Models with Logging {
+
+ def insert(i: Model): Unit = {
+ def getModel(bucketName: String, key: String): Option[Model] = {
+ val data = i.models
+ val metadata: ObjectMetadata = new ObjectMetadata()
+ metadata.setContentLength(data.length)
+ val req = new PutObjectRequest(bucketName, key, new ByteArrayInputStream(data), metadata)
+ try {
+ s3Client.putObject(req)
+ } catch {
+ case e: Throwable => error(s"Failed to insert a model to s3://${bucketName}/${key}", e)
+ }
+ None
+ }
+ doAction(i.id, getModel)
+ }
+
+ def get(id: String): Option[Model] = {
+ def getModel(bucketName: String, key: String): Option[Model] = {
+ val s3object: S3Object = s3Client.getObject(new GetObjectRequest(
+ bucketName, key));
+ val is = s3object.getObjectContent
+ try {
+ Some(Model(
+ id = id,
+ models = ByteStreams.toByteArray(is)))
+ } catch {
+ case e: Throwable =>
+ error(s"Failed to get a model from s3://${bucketName}/${key}", e)
+ None
+ } finally {
+ is.close()
+ }
+ }
+ doAction(id, getModel)
+ }
+
+ def delete(id: String): Unit = {
+ def deleteModel(bucketName: String, key: String): Option[Model] = {
+ try {
+ s3Client.deleteObject(new DeleteObjectRequest(bucketName, key))
+ } catch {
+ case e: Throwable => error(s"Failed to delete s3://${bucketName}/${key}", e)
+ }
+ None
+ }
+ doAction(id, deleteModel)
+ }
+
+ def doAction(id: String, action: (String, String) => Option[Model]): Option[Model] = {
+ config.properties.get("BUCKET_NAME") match {
+ case Some(bucketName) =>
+ val key = config.properties.get("BASE_PATH") match {
+ case Some(basePath) => s"${basePath}/${prefix}${id}"
+ case None => s"${prefix}${id}"
+ }
+ action(bucketName, key)
+ case None =>
+ error("S3 bucket is empty.")
+ None
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/StorageClient.scala b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/StorageClient.scala
new file mode 100644
index 0000000..c37bf3d
--- /dev/null
+++ b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/StorageClient.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.predictionio.data.storage.s3
+
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+
+import com.amazonaws.auth.profile.ProfileCredentialsProvider
+import com.amazonaws.client.builder.AwsClientBuilder
+import com.amazonaws.services.s3.AmazonS3
+import com.amazonaws.services.s3.AmazonS3ClientBuilder
+
+import grizzled.slf4j.Logging
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+ with Logging {
+ override val prefix = "S3"
+ val client: AmazonS3 = {
+ val builder = AmazonS3ClientBuilder.standard().withCredentials(new ProfileCredentialsProvider())
+ (config.properties.get("ENDPOINT"), config.properties.get("REGION")) match {
+ case (Some(endpoint), Some(region)) =>
+ builder.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, region))
+ case (None, Some(region)) => builder.withRegion(region)
+ }
+ config.properties.get("DISABLE_CHUNKED_ENCODING") match {
+ case Some(x) if x.equalsIgnoreCase("true") => builder.disableChunkedEncoding()
+ }
+ builder.build()
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/package.scala
----------------------------------------------------------------------
diff --git a/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/package.scala b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/package.scala
new file mode 100644
index 0000000..32845cf
--- /dev/null
+++ b/storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** AWS S3 implementation of storage traits, supporting model data only
+ *
+ * @group Implementation
+ */
+package object s3 {}
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/Dockerfile
----------------------------------------------------------------------
diff --git a/tests/Dockerfile b/tests/Dockerfile
index f7dbd68..45e4bd5 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -42,11 +42,13 @@ COPY docker-files/init.sh init.sh
COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml
COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh
COPY docker-files/pgpass /root/.pgpass
+COPY docker-files/awscredentials /root/.aws/credentials
RUN chmod 600 /root/.pgpass
# Python
RUN pip install python-dateutil
RUN pip install pytz
+RUN pip install awscli
# Default repositories setup
ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index ac1d91d..e0eda34 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -28,12 +28,18 @@ services:
POSTGRES_USER: pio
POSTGRES_PASSWORD: pio
POSTGRES_INITDB_ARGS: --encoding=UTF8
+ localstack:
+ image: atlassianlabs/localstack
+ environment:
+ - SERVICES=s3
+ - DEBUG=1
pio-testing:
image: predictionio/pio-testing:latest
depends_on:
- elasticsearch
- hbase
- postgres
+ - localstack
volumes:
- ~/.ivy2:/root/.ivy2
- ~/.sbt:/root/.sbt
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/docker-files/awscredentials
----------------------------------------------------------------------
diff --git a/tests/docker-files/awscredentials b/tests/docker-files/awscredentials
new file mode 100644
index 0000000..039c36f
--- /dev/null
+++ b/tests/docker-files/awscredentials
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+[default]
+aws_access_key_id = foo
+aws_secret_access_key = foo
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/docker-files/env-conf/pio-env.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh
index 72a8f12..276b052 100644
--- a/tests/docker-files/env-conf/pio-env.sh
+++ b/tests/docker-files/env-conf/pio-env.sh
@@ -111,3 +111,12 @@ PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
# HDFS config
PIO_STORAGE_SOURCES_HDFS_TYPE=hdfs
PIO_STORAGE_SOURCES_HDFS_PATH=/hdfs_models
+
+# AWS S3 Example
+PIO_STORAGE_SOURCES_S3_TYPE=s3
+PIO_STORAGE_SOURCES_S3_ENDPOINT=http://localstack:4572
+PIO_STORAGE_SOURCES_S3_REGION=us-east-1
+PIO_STORAGE_SOURCES_S3_BUCKET_NAME=pio_bucket
+PIO_STORAGE_SOURCES_S3_BASE_PATH=pio_model
+PIO_STORAGE_SOURCES_S3_DISABLE_CHUNKED_ENCODING=true
+
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/docker-files/init.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/init.sh b/tests/docker-files/init.sh
index fc12ffe..0cde41d 100755
--- a/tests/docker-files/init.sh
+++ b/tests/docker-files/init.sh
@@ -20,4 +20,8 @@ set -e
export PYTHONPATH=$PIO_HOME/tests:$PYTHONPATH
echo "Sleeping $SLEEP_TIME seconds for all services to be ready..."
sleep $SLEEP_TIME
+
+# create S3 bucket in localstack
+aws --endpoint-url=http://localstack:4572 --region=us-east-1 s3 mb s3://pio_bucket
+
eval $@
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/3330013b/tests/run_docker.sh
----------------------------------------------------------------------
diff --git a/tests/run_docker.sh b/tests/run_docker.sh
index ad7e189..8200785 100755
--- a/tests/run_docker.sh
+++ b/tests/run_docker.sh
@@ -20,7 +20,7 @@ USAGE=$"Usage: run_docker <meta> <event> <model> <command>
Where:
meta = [PGSQL,ELASTICSEARCH]
event = [PGSQL,HBASE,ELASTICSEARCH]
- model = [PGSQL,LOCALFS,HDFS]
+ model = [PGSQL,LOCALFS,HDFS,S3]
command = command to run in the container"
if ! [[ "$1" =~ ^(PGSQL|ELASTICSEARCH)$ ]]; then
@@ -37,7 +37,7 @@ fi
EVENT="$1"
shift
-if ! [[ "$1" =~ ^(PGSQL|LOCALFS|HDFS)$ ]]; then
+if ! [[ "$1" =~ ^(PGSQL|LOCALFS|HDFS|S3)$ ]]; then
echo "$USAGE"
exit 1
fi