You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2018/02/14 04:39:10 UTC
zeppelin git commit: [ZEPPELIN-3182] Support saving notebooks to
Google Cloud Storage
Repository: zeppelin
Updated Branches:
refs/heads/master d4783040c -> 91b5d69be
[ZEPPELIN-3182] Support saving notebooks to Google Cloud Storage
### What is this PR for?
Support saving notebooks to Google Cloud Storage, similar to implementations for S3 and Azure. It uses the same authentication mechanisms as the BigQuery interpreter.
I am new to Maven, so please check my work on the pom.xml files. In particular, I upgraded Guava to 23.0, which was required for `google-cloud-java`. Going through hello-world with my changes seems to work.
Also, I modified the BigQuery interpreter docs to point to the **latest** GCS storage docs. Is it more appropriate to pin to the version you are viewing? How can I do that?
### What type of PR is it?
Improvement
### Todos
* [Low priority] Support encryption keys
I don't this is particularly important, at least for v1.
### How should this be tested?
* I added unit tests for the core functionality
* I manually tested the authentication instructions (but that could use a second pair of eyes)
### Questions:
* Does the licenses files need update?
* No idea. `google-cloud-java` is Apache 2: https://github.com/GoogleCloudPlatform/google-cloud-java/blob/master/LICENSE
* Is there breaking changes for older versions?
* Nope.
* Does this needs documentation?
* Yes, and I tried to update the docs (but there are likely other things that need to be updated)
Author: Karthik Palaniappan <ka...@google.com>
Closes #2738 from karth295/master and squashes the following commits:
c4a45b7 [Karthik Palaniappan] [ZEPPELIN-3182] Support saving notebooks to Google Cloud Storage
8dc819e [Karthik Palaniappan] Unify logic to clear notebook runtime state on load from storage
Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/91b5d69b
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/91b5d69b
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/91b5d69b
Branch: refs/heads/master
Commit: 91b5d69be2aa8f72dc49d27800a90f8bed9781cc
Parents: d478304
Author: Karthik Palaniappan <ka...@google.com>
Authored: Sun Jan 28 19:21:34 2018 -0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Wed Feb 14 12:39:03 2018 +0800
----------------------------------------------------------------------
LICENSE | 5 +-
conf/zeppelin-env.sh.template | 6 +
conf/zeppelin-site.xml.template | 17 ++
docs/index.md | 1 +
docs/interpreter/bigquery.md | 20 +-
docs/setup/storage/storage.md | 92 ++++++++
.../zeppelin/conf/ZeppelinConfiguration.java | 11 +-
zeppelin-zengine/pom.xml | 103 +++++++-
.../java/org/apache/zeppelin/notebook/Note.java | 13 +
.../notebook/repo/AzureNotebookRepo.java | 28 +--
.../zeppelin/notebook/repo/GCSNotebookRepo.java | 234 ++++++++++++++++++
.../notebook/repo/MongoNotebookRepo.java | 42 +---
.../zeppelin/notebook/repo/S3NotebookRepo.java | 17 +-
.../zeppelin/notebook/repo/VFSNotebookRepo.java | 27 +--
.../notebook/repo/GCSNotebookRepoTest.java | 235 +++++++++++++++++++
15 files changed, 744 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 2252d65..3b34053 100644
--- a/LICENSE
+++ b/LICENSE
@@ -259,6 +259,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(Apache 2.0) Gson extra (https://github.com/DanySK/gson-extras)
(Apache 2.0) Nimbus JOSE+JWT (https://bitbucket.org/connect2id/nimbus-jose-jwt/wiki/Home)
(Apache 2.0) jarchivelib (https://github.com/thrau/jarchivelib)
+ (Apache 2.0) Google Cloud Client Library for Java (https://github.com/GoogleCloudPlatform/google-cloud-java)
========================================================================
BSD 3-Clause licenses
@@ -274,6 +275,8 @@ The following components are provided under the BSD 3-Clause license. See file
(BSD 3 Clause) diff.js (https://github.com/kpdecker/jsdiff)
+ (BSD 3-Clause) Google Auth Library for Java (https://github.com/google/google-auth-library-java)
+
========================================================================
BSD 2-Clause licenses
========================================================================
@@ -287,4 +290,4 @@ Jython Software License
========================================================================
The following components are provided under the Jython Software License. See file headers and project links for details.
- (Jython Software License) jython-standalone - http://www.jython.org/
\ No newline at end of file
+ (Jython Software License) jython-standalone - http://www.jython.org/
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/conf/zeppelin-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template
index 7bc38d6..c7204bd 100644
--- a/conf/zeppelin-env.sh.template
+++ b/conf/zeppelin-env.sh.template
@@ -30,16 +30,22 @@
# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved
# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
+
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region
# export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks
+
+# export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir
+# export GOOGLE_APPLICATION_CREDENTIALS # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials)
+
# export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost"
# export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin"
# export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes"
# export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false"
+
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/conf/zeppelin-site.xml.template
----------------------------------------------------------------------
diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index 9e9898b..9774f0d 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -67,6 +67,23 @@
<description>hide homescreen notebook from list when this value set to true</description>
</property>
+<!-- Google Cloud Storage notebook storage -->
+<!--
+<property>
+ <name>zeppelin.notebook.gcs.dir</name>
+ <value></value>
+ <description>
+ A GCS path in the form gs://bucketname/path/to/dir.
+ Notes are stored at {zeppelin.notebook.gcs.dir}/{notebook-id}/note.json
+ </description>
+</property>
+
+<property>
+ <name>zeppelin.notebook.storage</name>
+ <value>org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value>
+ <description>notebook persistence layer implementation</description>
+</property>
+-->
<!-- Amazon S3 notebook storage -->
<!-- Creates the following directory structure: s3://{bucket}/{username}/{notebook-id}/note.json -->
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index 587ae93..3d42735 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -104,6 +104,7 @@ limitations under the License.
* [Git Storage](./setup/storage/storage.html#notebook-storage-in-local-git-repository)
* [S3 Storage](./setup/storage/storage.html#notebook-storage-in-s3)
* [Azure Storage](./setup/storage/storage.html#notebook-storage-in-azure)
+ * [Google Cloud Storage](./setup/storage/storage.html#notebook-storage-in-gcs)
* [ZeppelinHub Storage](./setup/storage/storage.html#notebook-storage-in-zeppelinhub)
* [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb)
* Operation
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/interpreter/bigquery.md
----------------------------------------------------------------------
diff --git a/docs/interpreter/bigquery.md b/docs/interpreter/bigquery.md
index 7ebe2e2..1b90f99 100644
--- a/docs/interpreter/bigquery.md
+++ b/docs/interpreter/bigquery.md
@@ -58,20 +58,12 @@ Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs]
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
-### Setup service account credentials
-
-In order to run BigQuery interpreter outside of Google Cloud Engine you need to provide authentication credentials,
-by [following this instructions](https://developers.google.com/identity/protocols/application-default-credentials):
-
- - Go to the [API Console Credentials page](https://console.developers.google.com/project/_/apis/credentials)
- - From the project drop-down, select your project.
- - On the `Credentials` page, select the `Create credentials` drop-down, then select `Service account key`.
- - From the Service account drop-down, select an existing service account or create a new one.
- - For `Key type`, select the `JSON` key option, then select `Create`. The file automatically downloads to your computer.
- - Put the `*.json` file you just downloaded in a directory of your choosing. This directory must be private (you can't let anyone get access to this), but accessible to your Zeppelin instance.
- - Set the environment variable `GOOGLE_APPLICATION_CREDENTIALS` to the path of the JSON file downloaded.
- * either though GUI: in interpreter configuration page property names in CAPITAL_CASE set up env vars
- * or though `zeppelin-env.sh`: just add it to the end of the file.
+### Provide Application Default Credentials
+
+Within Google Cloud Platform (e.g. Google App Engine, Google Compute Engine),
+built-in credentials are used by default.
+
+Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/storage/storage.html#notebook-storage-in-gcs)
## Using the BigQuery Interpreter
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/docs/setup/storage/storage.md
----------------------------------------------------------------------
diff --git a/docs/setup/storage/storage.md b/docs/setup/storage/storage.md
index f34fc2c..6f2ace4 100644
--- a/docs/setup/storage/storage.md
+++ b/docs/setup/storage/storage.md
@@ -33,6 +33,7 @@ There are few notebook storage systems available for a use out of the box:
* all notes are saved in the notebook folder in hadoop compatible file system - `FileSystemNotebookRepo`
* storage using Amazon S3 service - `S3NotebookRepo`
* storage using Azure service - `AzureNotebookRepo`
+ * storage using Google Cloud Storage - `GCSNotebookRepo`
* storage using MongoDB - `MongoNotebookRepo`
* storage using GitHub - `GitHubNotebookRepo`
@@ -264,6 +265,97 @@ Optionally, you can specify Azure folder structure name in the file **zeppelin-s
```
</br>
+## Notebook Storage in Google Cloud Storage<a name="GCS"></a>
+
+Using `GCSNotebookRepo` you can connect Zeppelin with Google Cloud Storage using [Application Default Credentials](https://cloud.google.com/docs/authentication/production).
+
+First, choose a GCS path under which to store notebooks.
+
+```
+<property>
+ <name>zeppelin.notebook.gcs.dir</name>
+ <value></value>
+ <description>
+ A GCS path in the form gs://bucketname/path/to/dir.
+ Notes are stored at {zeppelin.notebook.gcs.dir}/{notebook-id}/note.json
+ </description>
+</property>
+```
+
+Then, initialize the `GCSNotebookRepo` class in the file **zeppelin-site.xml** by commenting the next property:
+
+```
+<property>
+ <name>zeppelin.notebook.storage</name>
+ <value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value>
+ <description>versioned notebook persistence layer implementation</description>
+</property>
+```
+
+and commenting out:
+
+```
+<property>
+ <name>zeppelin.notebook.storage</name>
+ <value>org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value>
+ <description>notebook persistence layer implementation</description>
+</property>
+```
+
+Or, if you want to simultaneously use your local git storage with GCS, use the following property instead:
+
+ ```
+<property>
+ <name>zeppelin.notebook.storage</name>
+ <value>org.apache.zeppelin.notebook.repo.GitNotebookRepo,org.apache.zeppelin.notebook.repo.GCSNotebookRepo</value>
+ <description>notebook persistence layer implementation</description>
+</property>
+```
+
+### Google Cloud API Authentication
+
+Note: On Google App Engine, Google Cloud Shell, and Google Compute Engine, these
+steps are not necessary, as build-in credentials are used by default.
+
+For more information, see [Application Default Credentials](https://cloud.google.com/docs/authentication/production)
+
+#### Using gcloud auth application-default login
+
+See the [gcloud docs](https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login)
+
+As the user running the zeppelin daemon, run:
+
+```bash
+gcloud auth application-default login
+```
+
+You can also use `--scopes` to restrict access to specific Google APIs, such as
+Cloud Storage and BigQuery.
+
+#### Using service account key files
+
+Alternatively, to use a [service account](https://cloud.google.com/compute/docs/access/service-accounts)
+for authentication with GCS, you will need a JSON service account key file.
+
+1. Navigate to the [service accounts page](https://console.cloud.google.com/iam-admin/serviceaccounts/project)
+2. Click `CREATE SERVICE ACCOUNT`
+3. Select at least `Storage -> Storage Object Admin`. Note that this is
+ **different** than `Storage Admin`.
+4. If you are also using the BigQuery Interpreter, add the appropriate
+ permissions (e.g. `Bigquery -> Bigquery Data Viewer and BigQuery User`)
+5. Name your service account, and select "Furnish a new private key" to download
+ a `.json` file. Click "Create".
+6. Move the downloaded file to a location of your choice (e.g.
+ `/path/to/my/key.json`), and give it appropriate permissions. Ensure at
+ least the user running the zeppelin daemon can read it.
+
+Then, point `GOOGLE_APPLICATION_CREDENTIALS` at your new key file in **zeppelin-env.sh**. For example:
+
+```bash
+export GOOGLE_APPLICATION_CREDENTIALS=/path/to/my/key.json
+```
+
+</br>
## Notebook Storage in ZeppelinHub <a name="ZeppelinHub"></a>
ZeppelinHub storage layer allows out of the box connection of Zeppelin instance with your ZeppelinHub account. First of all, you need to either comment out the following property in **zeppelin-site.xml**:
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
----------------------------------------------------------------------
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index f7b3d7b..5beb2c7 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -360,15 +360,19 @@ public class ZeppelinConfiguration extends XMLConfiguration {
"org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage");
}
- public String getUser() {
+ public String getGCSStorageDir() {
+ return getString(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR);
+ }
+
+ public String getS3User() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_USER);
}
- public String getBucketName() {
+ public String getS3BucketName() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_BUCKET);
}
- public String getEndpoint() {
+ public String getS3Endpoint() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_ENDPOINT);
}
@@ -697,6 +701,7 @@ public class ZeppelinConfiguration extends XMLConfiguration {
ZEPPELIN_NOTEBOOK_HOMESCREEN("zeppelin.notebook.homescreen", null),
// whether homescreen notebook will be hidden from notebook list or not
ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE("zeppelin.notebook.homescreen.hide", false),
+ ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR("zeppelin.notebook.gcs.dir", ""),
ZEPPELIN_NOTEBOOK_S3_BUCKET("zeppelin.notebook.s3.bucket", "zeppelin"),
ZEPPELIN_NOTEBOOK_S3_ENDPOINT("zeppelin.notebook.s3.endpoint", "s3.amazonaws.com"),
ZEPPELIN_NOTEBOOK_S3_USER("zeppelin.notebook.s3.user", "user"),
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/pom.xml
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml
index a864fdf..81ce716 100644
--- a/zeppelin-zengine/pom.xml
+++ b/zeppelin-zengine/pom.xml
@@ -39,6 +39,7 @@
<hadoop.version>2.7.3</hadoop.version>
<commons.lang3.version>3.4</commons.lang3.version>
<commons.vfs2.version>2.0</commons.vfs2.version>
+ <gcs.storage.version>1.14.0</gcs.storage.version>
<aws.sdk.s3.version>1.10.62</aws.sdk.s3.version>
<adl.sdk.version>2.1.4</adl.sdk.version>
<jackrabbit.webdav.version>1.5.2</jackrabbit.webdav.version>
@@ -51,6 +52,7 @@
<!--test library versions-->
<google.truth.version>0.27</google.truth.version>
+ <google.testing.nio.version>0.32.0-alpha</google.testing.nio.version>
</properties>
<dependencies>
@@ -115,9 +117,83 @@
</dependency>
<dependency>
+ <groupId>com.google.cloud</groupId>
+ <artifactId>google-cloud-storage</artifactId>
+ <version>${gcs.storage.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.api</groupId>
+ <artifactId>api-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.http-client</groupId>
+ <artifactId>google-http-client-jackson2</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.http-client</groupId>
+ <artifactId>google-http-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.api</groupId>
+ <artifactId>api-common</artifactId>
+ <version>1.2.0</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.http-client</groupId>
+ <artifactId>google-http-client-jackson2</artifactId>
+ <version>1.23.0</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>${aws.sdk.s3.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
@@ -146,7 +222,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>15.0</version>
+ <version>20.0</version>
</dependency>
<dependency>
@@ -228,6 +304,23 @@
</dependency>
<dependency>
+ <groupId>com.google.cloud</groupId>
+ <artifactId>google-cloud-nio</artifactId>
+ <version>${google.testing.nio.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
<groupId>com.google.truth</groupId>
<artifactId>truth</artifactId>
<version>${google.truth.version}</version>
@@ -612,6 +705,10 @@
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java-util</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
@@ -623,6 +720,10 @@
<groupId>io.grpc</groupId>
<artifactId>grpc-context</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>com.google.api.grpc</groupId>
+ <artifactId>proto-google-common-protos</artifactId>
+ </exclusion>
</exclusions>
</dependency>
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
index 281c4de..0a8fb12 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
@@ -963,6 +963,19 @@ public class Note implements ParagraphJobListener, JsonSerializable {
for (Paragraph p : paragraphs) {
p.clearRuntimeInfos();
p.parseText();
+
+ if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) {
+ p.setStatus(Status.ABORT);
+ }
+
+ List<ApplicationState> appStates = p.getAllApplicationStates();
+ if (appStates != null) {
+ for (ApplicationState app : appStates) {
+ if (app.getStatus() != ApplicationState.Status.ERROR) {
+ app.setStatus(ApplicationState.Status.UNLOADED);
+ }
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java
index de337fa..731a3e8 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/AzureNotebookRepo.java
@@ -17,6 +17,13 @@
package org.apache.zeppelin.notebook.repo;
+import com.microsoft.azure.storage.CloudStorageAccount;
+import com.microsoft.azure.storage.StorageException;
+import com.microsoft.azure.storage.file.CloudFile;
+import com.microsoft.azure.storage.file.CloudFileClient;
+import com.microsoft.azure.storage.file.CloudFileDirectory;
+import com.microsoft.azure.storage.file.CloudFileShare;
+import com.microsoft.azure.storage.file.ListFileItem;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -28,26 +35,15 @@ import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
-import org.apache.zeppelin.notebook.Paragraph;
-import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.microsoft.azure.storage.CloudStorageAccount;
-import com.microsoft.azure.storage.StorageException;
-import com.microsoft.azure.storage.file.CloudFile;
-import com.microsoft.azure.storage.file.CloudFileClient;
-import com.microsoft.azure.storage.file.CloudFileDirectory;
-import com.microsoft.azure.storage.file.CloudFileShare;
-import com.microsoft.azure.storage.file.ListFileItem;
-
/**
* Azure storage backend for notebooks
*/
@@ -128,15 +124,7 @@ public class AzureNotebookRepo implements NotebookRepo {
String json = IOUtils.toString(ins,
conf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_ENCODING));
ins.close();
- Note note = Note.fromJson(json);
-
- for (Paragraph p : note.getParagraphs()) {
- if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) {
- p.setStatus(Job.Status.ABORT);
- }
- }
-
- return note;
+ return Note.fromJson(json);
}
@Override
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java
new file mode 100644
index 0000000..591c532
--- /dev/null
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepo.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.notebook.repo;
+
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.Storage.BlobListOption;
+import com.google.cloud.storage.StorageException;
+import com.google.cloud.storage.StorageOptions;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.gson.JsonParseException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.commons.lang.StringUtils;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.NoteInfo;
+import org.apache.zeppelin.user.AuthenticationInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A NotebookRepo implementation for storing notebooks in Google Cloud Storage.
+ *
+ * Notes are stored in the GCS "directory" specified by zeppelin.notebook.gcs.dir. This path
+ * must be in the form gs://bucketName/path/to/Dir. The bucket must already exist. N.B: GCS is an
+ * object store, so this "directory" should not itself be an object. Instead, it represents the base
+ * path for the note.json files.
+ *
+ * Authentication is provided by google-auth-library-java.
+ * @see <a href="https://github.com/google/google-auth-library-java">
+ * google-auth-library-java</a>.
+ */
+public class GCSNotebookRepo implements NotebookRepo {
+
+ private static final Logger LOG = LoggerFactory.getLogger(GCSNotebookRepo.class);
+ private String encoding;
+ private String bucketName;
+ private Optional<String> basePath;
+ private Pattern noteNamePattern;
+ private Storage storage;
+
+ public GCSNotebookRepo(ZeppelinConfiguration conf) throws IOException {
+ this(conf, StorageOptions.getDefaultInstance().getService());
+ }
+
+ // For tests to use an in-memory storage implementation
+ GCSNotebookRepo(ZeppelinConfiguration conf, Storage storage) throws IOException {
+ this.encoding = conf.getString(ConfVars.ZEPPELIN_ENCODING);
+
+ String gcsStorageDir = conf.getGCSStorageDir();
+ if (gcsStorageDir.isEmpty()) {
+ throw new IOException("GCS storage directory must be set using 'zeppelin.notebook.gcs.dir'");
+ }
+ if (!gcsStorageDir.startsWith("gs://")) {
+ throw new IOException(String.format(
+ "GCS storage directory '%s' must start with 'gs://'.", gcsStorageDir));
+ }
+ String storageDirWithoutScheme = gcsStorageDir.substring("gs://".length());
+
+ // pathComponents excludes empty string if trailing slash is present
+ List<String> pathComponents = Arrays.asList(storageDirWithoutScheme.split("/"));
+ if (pathComponents.size() < 1) {
+ throw new IOException(String.format(
+ "GCS storage directory '%s' must be in the form gs://bucketname/path/to/dir",
+ gcsStorageDir));
+ }
+ this.bucketName = pathComponents.get(0);
+ if (pathComponents.size() > 1) {
+ this.basePath = Optional.of(StringUtils.join(
+ pathComponents.subList(1, pathComponents.size()), "/"));
+ } else {
+ this.basePath = Optional.absent();
+ }
+
+ // Notes are stored at gs://bucketName/basePath/<note-id>/note.json
+ if (basePath.isPresent()) {
+ this.noteNamePattern = Pattern.compile(
+ "^" + Pattern.quote(basePath.get() + "/") + "([^/]+)/note\\.json$");
+ } else {
+ this.noteNamePattern = Pattern.compile("^([^/]+)/note\\.json$");
+ }
+
+ this.storage = storage;
+ }
+
+ private BlobId makeBlobId(String noteId) {
+ if (basePath.isPresent()) {
+ return BlobId.of(bucketName, basePath.get() + "/" + noteId + "/note.json");
+ } else {
+ return BlobId.of(bucketName, noteId + "/note.json");
+ }
+ }
+
+ @Override
+ public List<NoteInfo> list(AuthenticationInfo subject) throws IOException {
+ try {
+ List<NoteInfo> infos = new ArrayList<>();
+ Iterable<Blob> blobsUnderDir;
+ if (basePath.isPresent()) {
+ blobsUnderDir = storage
+ .list(bucketName, BlobListOption.prefix(this.basePath.get() + "/"))
+ .iterateAll();
+ } else {
+ blobsUnderDir = storage
+ .list(bucketName)
+ .iterateAll();
+ }
+ for (Blob b : blobsUnderDir) {
+ Matcher matcher = noteNamePattern.matcher(b.getName());
+ if (matcher.matches()) {
+ // Callers only use the id field, so do not fetch each note
+ // This matches the implementation in FileSystemNoteRepo#list
+ infos.add(new NoteInfo(matcher.group(1), "", null));
+ }
+ }
+ return infos;
+ } catch (StorageException se) {
+ throw new IOException("Could not list GCS directory: " + se.getMessage(), se);
+ }
+ }
+
+ @Override
+ public Note get(String noteId, AuthenticationInfo subject) throws IOException {
+ BlobId blobId = makeBlobId(noteId);
+ byte[] contents;
+ try {
+ contents = storage.readAllBytes(blobId);
+ } catch (StorageException se) {
+ throw new IOException("Could not read " + blobId.toString() + ": " + se.getMessage(), se);
+ }
+
+ try {
+ return Note.fromJson(new String(contents, encoding));
+ } catch (JsonParseException jpe) {
+ throw new IOException(
+ "Could note parse as json " + blobId.toString() + jpe.getMessage(), jpe);
+ }
+ }
+
+ @Override
+ public void save(Note note, AuthenticationInfo subject) throws IOException {
+ BlobInfo info = BlobInfo.newBuilder(makeBlobId(note.getId()))
+ .setContentType("application/json")
+ .build();
+ try {
+ storage.create(info, note.toJson().getBytes("UTF-8"));
+ } catch (StorageException se) {
+ throw new IOException("Could not write " + info.toString() + ": " + se.getMessage(), se);
+ }
+ }
+
+ @Override
+ public void remove(String noteId, AuthenticationInfo subject) throws IOException {
+ Preconditions.checkArgument(!Strings.isNullOrEmpty(noteId));
+ BlobId blobId = makeBlobId(noteId);
+ try {
+ boolean deleted = storage.delete(blobId);
+ if (!deleted) {
+ throw new IOException("Tried to remove nonexistent blob " + blobId.toString());
+ }
+ } catch (StorageException se) {
+ throw new IOException("Could not remove " + blobId.toString() + ": " + se.getMessage(), se);
+ }
+ }
+
+ @Override
+ public void close() {
+ //no-op
+ }
+
+ @Override
+ public Revision checkpoint(String noteId, String checkpointMsg, AuthenticationInfo subject)
+ throws IOException {
+ LOG.warn("checkpoint is not implemented for GCSNotebookRepo");
+ return null;
+ }
+
+ @Override
+ public Note get(String noteId, String revId, AuthenticationInfo subject) throws IOException {
+ LOG.warn("get revId is not implemented for GCSNotebookRepo");
+ return null;
+ }
+
+ @Override
+ public List<Revision> revisionHistory(String noteId, AuthenticationInfo subject) {
+ LOG.warn("revisionHistory is not implemented for GCSNotebookRepo");
+ return Collections.emptyList();
+ }
+
+ @Override
+ public Note setNoteRevision(String noteId, String revId, AuthenticationInfo subject)
+ throws IOException {
+ LOG.warn("setNoteRevision is not implemented for GCSNotebookRepo");
+ return null;
+ }
+
+ @Override
+ public List<NotebookRepoSettingsInfo> getSettings(AuthenticationInfo subject) {
+ LOG.warn("getSettings is not implemented for GCSNotebookRepo");
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void updateSettings(Map<String, String> settings, AuthenticationInfo subject) {
+ LOG.warn("updateSettings is not implemented for GCSNotebookRepo");
+ }
+}
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java
index 273d75d..376a986 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/MongoNotebookRepo.java
@@ -1,5 +1,9 @@
package org.apache.zeppelin.notebook.repo;
+import static com.mongodb.client.model.Filters.eq;
+import static com.mongodb.client.model.Filters.in;
+import static com.mongodb.client.model.Filters.type;
+
import com.mongodb.MongoBulkWriteException;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
@@ -7,18 +11,18 @@ import com.mongodb.bulk.BulkWriteError;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
-import static com.mongodb.client.model.Filters.eq;
-import static com.mongodb.client.model.Filters.type;
-import static com.mongodb.client.model.Filters.in;
-
import com.mongodb.client.model.InsertManyOptions;
import com.mongodb.client.model.UpdateOptions;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
-import org.apache.zeppelin.notebook.Paragraph;
-import org.apache.zeppelin.notebook.ApplicationState;
-import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.bson.BsonType;
import org.bson.Document;
@@ -26,11 +30,6 @@ import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
/**
* Backend for storing Notebook on MongoDB
*/
@@ -161,24 +160,7 @@ public class MongoNotebookRepo implements NotebookRepo {
// document to JSON
String json = doc.toJson();
// JSON to note
- Note note = Note.fromJson(json);
-
- for (Paragraph p : note.getParagraphs()) {
- if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) {
- p.setStatus(Job.Status.ABORT);
- }
-
- List<ApplicationState> appStates = p.getAllApplicationStates();
- if (appStates != null) {
- for (ApplicationState app : appStates) {
- if (app.getStatus() != ApplicationState.Status.ERROR) {
- app.setStatus(ApplicationState.Status.UNLOADED);
- }
- }
- }
- }
-
- return note;
+ return Note.fromJson(json);
}
/**
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java
index 8828985..7d64702 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/S3NotebookRepo.java
@@ -90,8 +90,8 @@ public class S3NotebookRepo implements NotebookRepo {
public S3NotebookRepo(ZeppelinConfiguration conf) throws IOException {
this.conf = conf;
- bucketName = conf.getBucketName();
- user = conf.getUser();
+ bucketName = conf.getS3BucketName();
+ user = conf.getS3User();
useServerSideEncryption = conf.isS3ServerSideEncryption();
// always use the default provider chain
@@ -123,7 +123,7 @@ public class S3NotebookRepo implements NotebookRepo {
}
// set S3 endpoint to use
- s3client.setEndpoint(conf.getEndpoint());
+ s3client.setEndpoint(conf.getS3Endpoint());
}
/**
@@ -205,19 +205,10 @@ public class S3NotebookRepo implements NotebookRepo {
throw new IOException("Unable to retrieve object from S3: " + ace, ace);
}
- Note note;
try (InputStream ins = s3object.getObjectContent()) {
String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING));
- note = Note.fromJson(json);
+ return Note.fromJson(json);
}
-
- for (Paragraph p : note.getParagraphs()) {
- if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) {
- p.setStatus(Status.ABORT);
- }
- }
-
- return note;
}
private NoteInfo getNoteInfo(String key) throws IOException {
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java
index 63395f9..481ea3d 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepo.java
@@ -17,6 +17,7 @@
package org.apache.zeppelin.notebook.repo;
+import com.google.common.collect.Lists;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -24,11 +25,9 @@ import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
-import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.vfs2.FileContent;
@@ -40,17 +39,12 @@ import org.apache.commons.vfs2.Selectors;
import org.apache.commons.vfs2.VFS;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
-import org.apache.zeppelin.notebook.ApplicationState;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
-import org.apache.zeppelin.notebook.Paragraph;
-import org.apache.zeppelin.scheduler.Job.Status;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-
/**
*
*/
@@ -167,24 +161,7 @@ public class VFSNotebookRepo implements NotebookRepo {
String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING));
ins.close();
- Note note = Note.fromJson(json);
-
- for (Paragraph p : note.getParagraphs()) {
- if (p.getStatus() == Status.PENDING || p.getStatus() == Status.RUNNING) {
- p.setStatus(Status.ABORT);
- }
-
- List<ApplicationState> appStates = p.getAllApplicationStates();
- if (appStates != null) {
- for (ApplicationState app : appStates) {
- if (app.getStatus() != ApplicationState.Status.ERROR) {
- app.setStatus(ApplicationState.Status.UNLOADED);
- }
- }
- }
- }
-
- return note;
+ return Note.fromJson(json);
}
private NoteInfo getNoteInfo(FileObject noteDir) throws IOException {
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/91b5d69b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java
new file mode 100644
index 0000000..c1fae67
--- /dev/null
+++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/GCSNotebookRepoTest.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.notebook.repo;
+
+import static com.google.common.truth.Truth.assertThat;
+import static junit.framework.TestCase.fail;
+
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.contrib.nio.testing.LocalStorageHelper;
+import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.NoteInfo;
+import org.apache.zeppelin.notebook.Paragraph;
+import org.apache.zeppelin.scheduler.Job.Status;
+import org.apache.zeppelin.user.AuthenticationInfo;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class GCSNotebookRepoTest {
+ private static final AuthenticationInfo AUTH_INFO = AuthenticationInfo.ANONYMOUS;
+
+ private GCSNotebookRepo notebookRepo;
+ private Storage storage;
+
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] {
+ { "bucketname", Optional.absent(), "gs://bucketname" },
+ { "bucketname-with-slash", Optional.absent(), "gs://bucketname-with-slash/" },
+ { "bucketname", Optional.of("path/to/dir"), "gs://bucketname/path/to/dir" },
+ { "bucketname", Optional.of("trailing/slash"), "gs://bucketname/trailing/slash/" }
+ });
+ }
+
+ @Parameter(0)
+ public String bucketName;
+
+ @Parameter(1)
+ public Optional<String> basePath;
+
+ @Parameter(2)
+ public String uriPath;
+
+ private Note runningNote;
+
+ @Before
+ public void setUp() throws Exception {
+ this.runningNote = makeRunningNote();
+
+ this.storage = LocalStorageHelper.getOptions().getService();
+
+ System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), uriPath);
+ this.notebookRepo = new GCSNotebookRepo(new ZeppelinConfiguration(), storage);
+ }
+
+ private static Note makeRunningNote() {
+ Note note = new Note();
+ note.setConfig(ImmutableMap.<String, Object>of("key", "value"));
+
+ Paragraph p = new Paragraph(note, null, null);
+ p.setText("text");
+ p.setStatus(Status.RUNNING);
+ note.addParagraph(p);
+ return note;
+ }
+
+ @Test
+ public void testList_nonexistent() throws Exception {
+ assertThat(notebookRepo.list(AUTH_INFO)).isEmpty();
+ }
+
+ @Test
+ public void testList() throws Exception {
+ createAt(runningNote, "note.json");
+ createAt(runningNote, "/note.json");
+ createAt(runningNote, "validid/note.json");
+ createAt(runningNote, "validid-2/note.json");
+ createAt(runningNote, "cannot-be-dir/note.json/foo");
+ createAt(runningNote, "cannot/be/nested/note.json");
+
+ List<NoteInfo> infos = notebookRepo.list(AUTH_INFO);
+ List<String> noteIds = new ArrayList<>();
+ for (NoteInfo info : infos) {
+ noteIds.add(info.getId());
+ }
+ // Only valid paths are gs://bucketname/path/<noteid>/note.json
+ assertThat(noteIds).containsExactlyElementsIn(ImmutableList.of("validid", "validid-2"));
+ }
+
+ @Test
+ public void testGet_nonexistent() throws Exception {
+ try {
+ notebookRepo.get("id", AUTH_INFO);
+ fail();
+ } catch (IOException e) {}
+ }
+
+ @Test
+ public void testGet() throws Exception {
+ create(runningNote);
+
+ // Status of saved running note is removed in get()
+ Note got = notebookRepo.get(runningNote.getId(), AUTH_INFO);
+ assertThat(got.getLastParagraph().getStatus()).isEqualTo(Status.ABORT);
+
+ // But otherwise equal
+ got.getLastParagraph().setStatus(Status.RUNNING);
+ assertThat(got).isEqualTo(runningNote);
+ }
+
+ @Test
+ public void testGet_malformed() throws Exception {
+ createMalformed("id");
+ try {
+ notebookRepo.get("id", AUTH_INFO);
+ fail();
+ } catch (IOException e) {}
+ }
+
+ @Test
+ public void testSave_create() throws Exception {
+ notebookRepo.save(runningNote, AUTH_INFO);
+ // Output is saved
+ assertThat(storage.readAllBytes(makeBlobId(runningNote.getId())))
+ .isEqualTo(runningNote.toJson().getBytes("UTF-8"));
+ }
+
+ @Test
+ public void testSave_update() throws Exception {
+ notebookRepo.save(runningNote, AUTH_INFO);
+ // Change name of runningNote
+ runningNote.setName("new-name");
+ notebookRepo.save(runningNote, AUTH_INFO);
+ assertThat(storage.readAllBytes(makeBlobId(runningNote.getId())))
+ .isEqualTo(runningNote.toJson().getBytes("UTF-8"));
+ }
+
+ @Test
+ public void testRemove_nonexistent() throws Exception {
+ try {
+ notebookRepo.remove("id", AUTH_INFO);
+ fail();
+ } catch (IOException e) {}
+ }
+
+ @Test
+ public void testRemove() throws Exception {
+ create(runningNote);
+ notebookRepo.remove(runningNote.getId(), AUTH_INFO);
+ assertThat(storage.get(makeBlobId(runningNote.getId()))).isNull();
+ }
+
+ private String makeName(String relativePath) {
+ if (basePath.isPresent()) {
+ return basePath.get() + "/" + relativePath;
+ } else {
+ return relativePath;
+ }
+ }
+
+ private BlobId makeBlobId(String noteId) {
+ return BlobId.of(bucketName, makeName(noteId + "/note.json"));
+ }
+
+ private void createAt(Note note, String relativePath) throws IOException {
+ BlobId id = BlobId.of(bucketName, makeName(relativePath));
+ BlobInfo info = BlobInfo.newBuilder(id).setContentType("application/json").build();
+ storage.create(info, note.toJson().getBytes("UTF-8"));
+ }
+
+ private void create(Note note) throws IOException {
+ BlobInfo info = BlobInfo.newBuilder(makeBlobId(note.getId()))
+ .setContentType("application/json")
+ .build();
+ storage.create(info, note.toJson().getBytes("UTF-8"));
+ }
+
+ private void createMalformed(String noteId) throws IOException {
+ BlobInfo info = BlobInfo.newBuilder(makeBlobId(noteId))
+ .setContentType("application/json")
+ .build();
+ storage.create(info, "{ invalid-json }".getBytes("UTF-8"));
+ }
+
+ /* These tests test path parsing for illegal paths, and do not use the parameterized vars */
+
+ @Test
+ public void testInitialization_pathNotSet() throws Exception {
+ try {
+ System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), "");
+ new GCSNotebookRepo(new ZeppelinConfiguration(), storage);
+ fail();
+ } catch (IOException e) {}
+ }
+
+ @Test
+ public void testInitialization_malformedPath() throws Exception {
+ try {
+ System.setProperty(ConfVars.ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR.getVarName(), "foo");
+ new GCSNotebookRepo(new ZeppelinConfiguration(), storage);
+ fail();
+ } catch (IOException e) {}
+ }
+}