You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/08/16 17:56:38 UTC
[tika] 03/03: TIKA-3524 -- add tika-pipes support for google cloud
storage
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 48d9389125e217a9c6301840ff39a3c4ee13d742
Author: tallison <ta...@apache.org>
AuthorDate: Mon Aug 16 13:56:14 2021 -0400
TIKA-3524 -- add tika-pipes support for google cloud storage
---
tika-parent/pom.xml | 1 +
tika-pipes/tika-emitters/pom.xml | 1 +
tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml | 118 +++++++++++++
.../apache/tika/pipes/emitter/gcs/GCSEmitter.java | 184 +++++++++++++++++++++
.../tika/pipes/emitter/gcs/TestGCSEmitter.java | 51 ++++++
.../src/test/resources/config/tika-config-gcs.xml | 28 ++++
tika-pipes/tika-fetchers/pom.xml | 1 +
tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml | 110 ++++++++++++
.../apache/tika/pipes/fetcher/gcs/GCSFetcher.java | 135 +++++++++++++++
.../tika/pipes/fetcher/s3/TestGCSFetcher.java | 63 +++++++
.../src/test/resources/tika-config-gcs.xml | 28 ++++
tika-pipes/tika-pipes-iterators/pom.xml | 1 +
.../tika-pipes-iterator-gcs/pom.xml | 112 +++++++++++++
.../pipes/pipesiterator/gcs/GCSPipesIterator.java | 123 ++++++++++++++
.../pipesiterator/gcs/TestGCSPipesIterator.java | 102 ++++++++++++
.../src/test/resources/log4j.properties | 22 +++
16 files changed, 1080 insertions(+)
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 0fe2354..753b0de 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -303,6 +303,7 @@
<!-- fakeload versions > 0.4.0 require java > 8 -->
<fakeload.version>0.4.0</fakeload.version>
<geoapi.version>3.0.1</geoapi.version>
+ <google.cloud.version>2.0.1</google.cloud.version>
<gson.version>2.8.7</gson.version>
<guava.version>30.1.1-jre</guava.version>
<h2.version>1.4.200</h2.version>
diff --git a/tika-pipes/tika-emitters/pom.xml b/tika-pipes/tika-emitters/pom.xml
index e6a338e..db1a4b6 100644
--- a/tika-pipes/tika-emitters/pom.xml
+++ b/tika-pipes/tika-emitters/pom.xml
@@ -36,5 +36,6 @@
<module>tika-emitter-s3</module>
<module>tika-emitter-solr</module>
<module>tika-emitter-opensearch</module>
+ <module>tika-emitter-gcs</module>
</modules>
</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml b/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml
new file mode 100644
index 0000000..9b17e51
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-emitters</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.1.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-emitter-gcs</artifactId>
+ <name>Apache Tika GCS emitter</name>
+
+
+ <dependencies>
+ <!-- should serialization be provided or bundled? -->
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.cloud</groupId>
+ <artifactId>google-cloud-storage</artifactId>
+ <version>${google.cloud.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Automatic-Module-Name>org.apache.tika.pipes.emitter.gcs</Automatic-Module-Name>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>
+ false
+ </createDependencyReducedPom>
+ <!-- <filters> -->
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*</exclude>
+ <exclude>LICENSE.txt</exclude>
+ <exclude>NOTICE.txt</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/LICENSE</resource>
+ <file>target/classes/META-INF/LICENSE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/NOTICE</resource>
+ <file>target/classes/META-INF/NOTICE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/DEPENDENCIES</resource>
+ <file>target/classes/META-INF/DEPENDENCIES</file>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java b/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java
new file mode 100644
index 0000000..e4c03c4
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.List;
+import java.util.Map;
+
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.pipes.emitter.AbstractEmitter;
+import org.apache.tika.pipes.emitter.StreamEmitter;
+import org.apache.tika.pipes.emitter.TikaEmitterException;
+import org.apache.tika.utils.StringUtils;
+
+
+public class GCSEmitter extends AbstractEmitter implements Initializable, StreamEmitter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(GCSEmitter.class);
+ private String projectId;
+ private String bucket;
+ private String fileExtension = "json";
+ private String prefix = null;
+ private Storage storage;
+
+ /**
+ * Requires the src-bucket/path/to/my/file.txt in the {@link TikaCoreProperties#SOURCE_PATH}.
+ *
+ * @param metadataList
+ * @throws IOException
+ * @throws TikaException
+ */
+ @Override
+ public void emit(String emitKey, List<Metadata> metadataList)
+ throws IOException, TikaEmitterException {
+ if (metadataList == null || metadataList.size() == 0) {
+ throw new TikaEmitterException("metadata list must not be null or of size 0");
+ }
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ try (Writer writer = new OutputStreamWriter(bos, StandardCharsets.UTF_8)) {
+ JsonMetadataList.toJson(metadataList, writer);
+ } catch (IOException e) {
+ throw new TikaEmitterException("can't jsonify", e);
+ }
+
+ write(emitKey, new Metadata(), bos.toByteArray());
+
+ }
+
+ /**
+ * @param path -- object path, not including the bucket
+ * @param is inputStream to copy
+ * @param userMetadata this will be written to the s3 ObjectMetadata's userMetadata
+ * @throws TikaEmitterException or IOexception if there is a Runtime s3 client exception
+ */
+ @Override
+ public void emit(String path, InputStream is, Metadata userMetadata)
+ throws IOException, TikaEmitterException {
+
+ if (is instanceof TikaInputStream && ((TikaInputStream) is).hasFile()) {
+ write(path, userMetadata, Files.readAllBytes(((TikaInputStream) is).getPath()));
+ } else {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(is, bos);
+ write(path, userMetadata, bos.toByteArray());
+ }
+ }
+
+ private void write(String path, Metadata userMetadata, byte[] bytes) {
+ if (!StringUtils.isBlank(prefix)) {
+ path = prefix + "/" + path;
+ }
+
+ if (!StringUtils.isBlank(fileExtension)) {
+ path += "." + fileExtension;
+ }
+
+ LOGGER.debug("about to emit to target bucket: ({}) path:({})", bucket, path);
+ BlobId blobId = BlobId.of(bucket, path);
+ BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
+
+ for (String n : userMetadata.names()) {
+ String[] vals = userMetadata.getValues(n);
+ if (vals.length > 1) {
+ LOGGER.warn("Can only write the first value for key {}. I see {} values.", n,
+ vals.length);
+ }
+ blobInfo.getMetadata().put(n, vals[0]);
+ }
+ storage.create(blobInfo, bytes);
+ }
+
+
+ @Field
+ public void setProjectId(String projectId) {
+ this.projectId = projectId;
+ }
+
+ @Field
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ @Field
+ public void setPrefix(String prefix) {
+ //strip final "/" if it exists
+ if (prefix.endsWith("/")) {
+ this.prefix = prefix.substring(0, prefix.length() - 1);
+ } else {
+ this.prefix = prefix;
+ }
+ }
+
+ /**
+ * If you want to customize the output file's file extension.
+ * Do not include the "."
+ *
+ * @param fileExtension
+ */
+ @Field
+ public void setFileExtension(String fileExtension) {
+ this.fileExtension = fileExtension;
+ }
+
+
+ /**
+ * This initializes the gcs client.
+ *
+ * @param params params to use for initialization
+ * @throws TikaConfigException
+ */
+ @Override
+ public void initialize(Map<String, Param> params) throws TikaConfigException {
+ //params have already been set...ignore them
+ //TODO -- add other params to the builder as needed
+ storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler problemHandler)
+ throws TikaConfigException {
+ mustNotBeEmpty("bucket", this.bucket);
+ mustNotBeEmpty("projectId", this.projectId);
+ }
+
+}
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java
new file mode 100644
index 0000000..aaee49e
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.gcs;
+
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.Emitter;
+import org.apache.tika.pipes.emitter.EmitterManager;
+
+@Disabled("turn into an actual test")
+public class TestGCSEmitter {
+
+ @Test
+ public void testBasic() throws Exception {
+ EmitterManager emitterManager = EmitterManager.load(getConfig("tika-config-gcs.xml"));
+ Emitter emitter = emitterManager.getEmitter("gcs");
+ List<Metadata> metadataList = new ArrayList<>();
+ Metadata m = new Metadata();
+ m.set("k1", "v1");
+ m.add("k1", "v2");
+ m.set("k2", "v3");
+ metadataList.add(m);
+ emitter.emit("something-or-other/test-out", metadataList);
+ }
+
+ private Path getConfig(String configFile) throws URISyntaxException {
+ return Paths.get(this.getClass().getResource("/config/" + configFile).toURI());
+ }
+}
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml
new file mode 100644
index 0000000..b45ec31
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <emitters>
+ <emitter class="org.apache.tika.pipes.emitter.gcs.GCSEmitter">
+ <params>
+ <name>gcs</name>
+ <projectId>My First Project</projectId>
+ <bucket>tika-tallison-test-bucket</bucket>
+ </params>
+ </emitter>
+ </emitters>
+</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml
index 8fa2681..3086175 100644
--- a/tika-pipes/tika-fetchers/pom.xml
+++ b/tika-pipes/tika-fetchers/pom.xml
@@ -34,5 +34,6 @@
<modules>
<module>tika-fetcher-http</module>
<module>tika-fetcher-s3</module>
+ <module>tika-fetcher-gcs</module>
</modules>
</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml
new file mode 100644
index 0000000..0e47e64
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-fetchers</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.1.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-fetcher-gcs</artifactId>
+ <name>Apache Tika Google Cloud Storage fetcher</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.cloud</groupId>
+ <artifactId>google-cloud-storage</artifactId>
+ <version>${google.cloud.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Automatic-Module-Name>org.apache.tika.pipes.fetcher.gcs</Automatic-Module-Name>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>
+ false
+ </createDependencyReducedPom>
+ <!-- <filters> -->
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*</exclude>
+ <exclude>LICENSE.txt</exclude>
+ <exclude>NOTICE.txt</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/LICENSE</resource>
+ <file>target/classes/META-INF/LICENSE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/NOTICE</resource>
+ <file>target/classes/META-INF/NOTICE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/DEPENDENCIES</resource>
+ <file>target/classes/META-INF/DEPENDENCIES</file>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
new file mode 100644
index 0000000..6881c5a
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.Map;
+
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.AbstractFetcher;
+
+/**
+ * Fetches files from google cloud storage. Must set projectId and bucket via the config.
+ */
+public class GCSFetcher extends AbstractFetcher implements Initializable {
+
+ private static String PREFIX = "gcs";
+ private static final Logger LOGGER = LoggerFactory.getLogger(GCSFetcher.class);
+ private String projectId;
+ private String bucket;
+ private boolean extractUserMetadata = true;
+ private Storage storage;
+ private boolean spoolToTemp = true;
+
+ @Override
+ public InputStream fetch(String fetchKey, Metadata metadata) throws TikaException, IOException {
+
+ LOGGER.debug("about to fetch fetchkey={} from bucket ({})", fetchKey, bucket);
+
+ try {
+ Blob blob = storage.get(BlobId.of(bucket, fetchKey));
+
+ if (extractUserMetadata) {
+ if (blob.getMetadata() != null) {
+ for (Map.Entry<String, String> e : blob.getMetadata().entrySet()) {
+ metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+ }
+ }
+ }
+ if (!spoolToTemp) {
+ return TikaInputStream.get(blob.getContent());
+ } else {
+ long start = System.currentTimeMillis();
+ TemporaryResources tmpResources = new TemporaryResources();
+ Path tmp = tmpResources.createTempFile();
+ blob.downloadTo(tmp);
+ TikaInputStream tis = TikaInputStream.get(tmp, metadata, tmpResources);
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
+ return tis;
+ }
+ } catch (Exception e) {
+ throw new IOException("gcs storage exception", e);
+ }
+ }
+
+ @Field
+ public void setSpoolToTemp(boolean spoolToTemp) {
+ this.spoolToTemp = spoolToTemp;
+ }
+
+ @Field
+ public void setProjectId(String projectId) {
+ this.projectId = projectId;
+ }
+
+ @Field
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ /**
+ * Whether or not to extract user metadata from the S3Object
+ *
+ * @param extractUserMetadata
+ */
+ @Field
+ public void setExtractUserMetadata(boolean extractUserMetadata) {
+ this.extractUserMetadata = extractUserMetadata;
+ }
+
+ //TODO: parameterize extracting other blob metadata, eg. md5, crc, etc.
+
+ /**
+ * This initializes the gcs storage client.
+ *
+ * @param params params to use for initialization
+ * @throws TikaConfigException
+ */
+ @Override
+ public void initialize(Map<String, Param> params) throws TikaConfigException {
+ //params have already been set...ignore them
+ //TODO -- add other params to the builder as needed
+ storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler problemHandler)
+ throws TikaConfigException {
+ mustNotBeEmpty("bucket", this.bucket);
+ mustNotBeEmpty("projectId", this.projectId);
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java
new file mode 100644
index 0000000..35aabbe
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.s3;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.Fetcher;
+import org.apache.tika.pipes.fetcher.FetcherManager;
+
+@Disabled("write actual unit tests")
+public class TestGCSFetcher {
+
+ private static final String FETCH_STRING = "testExtraSpaces.pdf";
+ private static Path outputFile;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ outputFile = Files.createTempFile("tika-test", ".pdf");
+ }
+
+ @AfterAll
+ public static void tearDown() throws Exception {
+ Files.delete(outputFile);
+ }
+
+ @Test
+ public void testConfig() throws Exception {
+ FetcherManager fetcherManager = FetcherManager.load(
+ Paths.get(this.getClass().getResource("/tika-config-gcs.xml").toURI()));
+ Fetcher fetcher = fetcherManager.getFetcher("gcs");
+ Metadata metadata = new Metadata();
+ try (InputStream is = fetcher.fetch(FETCH_STRING, metadata)) {
+ Files.copy(is, outputFile, StandardCopyOption.REPLACE_EXISTING);
+ }
+ assertEquals(20743, Files.size(outputFile));
+ }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml
new file mode 100644
index 0000000..eee110d
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <fetchers>
+ <fetcher class="org.apache.tika.pipes.fetcher.gcs.GCSFetcher">
+ <params>
+ <name>gcs</name>
+ <projectId>My First Project</projectId>
+ <bucket>tika-tallison-test-bucket</bucket>
+ </params>
+ </fetcher>
+ </fetchers>
+</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-iterators/pom.xml b/tika-pipes/tika-pipes-iterators/pom.xml
index 72a5912..337147d 100644
--- a/tika-pipes/tika-pipes-iterators/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/pom.xml
@@ -38,5 +38,6 @@
<module>tika-pipes-iterator-jdbc</module>
<module>tika-pipes-iterator-s3</module>
<module>tika-pipes-iterator-solr</module>
+ <module>tika-pipes-iterator-gcs</module>
</modules>
</project>
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml
new file mode 100644
index 0000000..5b37973
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-pipes-iterators</artifactId>
+ <version>2.1.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-pipes-iterator-gcs</artifactId>
+
+ <name>Apache Tika Fetch Iterator - Google Cloud Storage</name>
+ <url>https://tika.apache.org/</url>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.cloud</groupId>
+ <artifactId>google-cloud-storage</artifactId>
+ <version>${google.cloud.version}</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Automatic-Module-Name>org.apache.tika.pipes.pipesiterator.s3</Automatic-Module-Name>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven.shade.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>
+ false
+ </createDependencyReducedPom>
+ <!-- <filters> -->
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*</exclude>
+ <exclude>LICENSE.txt</exclude>
+ <exclude>NOTICE.txt</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/LICENSE</resource>
+ <file>target/classes/META-INF/LICENSE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/NOTICE</resource>
+ <file>target/classes/META-INF/NOTICE</file>
+ </transformer>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>META-INF/DEPENDENCIES</resource>
+ <file>target/classes/META-INF/DEPENDENCIES</file>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+</project>
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java
new file mode 100644
index 0000000..a9d052b
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.pipesiterator.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+import com.google.api.gax.paging.Page;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.HandlerConfig;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
+import org.apache.tika.utils.StringUtils;
+
+public class GCSPipesIterator extends PipesIterator implements Initializable {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(GCSPipesIterator.class);
+ private String prefix = "";
+ private String projectId = "";
+ private String bucket;
+
+ private Storage storage;
+
+ @Field
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ @Field
+ public void setPrefix(String prefix) {
+ this.prefix = prefix;
+ }
+
+ @Field
+ public void setProjectId(String projectId) {
+ this.projectId = projectId;
+ }
+
+ /**
+ * This initializes the gcs client.
+ *
+ * @param params params to use for initialization
+ * @throws TikaConfigException
+ */
+ @Override
+ public void initialize(Map<String, Param> params) throws TikaConfigException {
+ //TODO -- add other params to the builder as needed
+ storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler problemHandler)
+ throws TikaConfigException {
+ super.checkInitialization(problemHandler);
+ mustNotBeEmpty("bucket", this.bucket);
+ mustNotBeEmpty("projectId", this.projectId);
+ }
+
+ @Override
+ protected void enqueue() throws InterruptedException, IOException, TimeoutException {
+ String fetcherName = getFetcherName();
+ String emitterName = getEmitterName();
+ long start = System.currentTimeMillis();
+ int count = 0;
+ HandlerConfig handlerConfig = getHandlerConfig();
+
+ Page<Blob> blobs = null;
+ if (StringUtils.isBlank(prefix)) {
+ blobs = storage.list(bucket);
+ } else {
+ blobs = storage.list(bucket,
+ Storage.BlobListOption.prefix(prefix));
+ }
+
+ for (Blob blob : blobs.iterateAll()) {
+ //I couldn't find a better way to skip directories
+ //calling blob.isDirectory() does not appear to work. #usererror I'm sure.
+ if (blob.getSize() == 0) {
+ continue;
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.debug("adding ({}) {} in {} ms", count, blob.getName(), elapsed);
+ //TODO -- allow user specified metadata as the "id"?
+ tryToAdd(new FetchEmitTuple(blob.getName(), new FetchKey(fetcherName,
+ blob.getName()),
+ new EmitKey(emitterName, blob.getName()), new Metadata(), handlerConfig,
+ getOnParseException()));
+ count++;
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.info("finished enqueuing {} files in {} ms", count, elapsed);
+ }
+}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java
new file mode 100644
index 0000000..5fa51ab
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.pipesiterator.gcs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
+import org.apache.tika.pipes.pipesiterator.gcs.GCSPipesIterator;
+
+@Disabled("turn into an actual unit test")
+public class TestGCSPipesIterator {
+
+ @Test
+ public void testSimple() throws Exception {
+ GCSPipesIterator it = new GCSPipesIterator();
+ it.setFetcherName("gcs");
+ it.setBucket("tika-tallison-test-bucket");
+ it.setProjectId("My First Project");
+ it.setPrefix("pdfs");
+ it.initialize(Collections.EMPTY_MAP);
+ int numConsumers = 6;
+ ArrayBlockingQueue<FetchEmitTuple> queue = new ArrayBlockingQueue<>(10);
+
+ ExecutorService es = Executors.newFixedThreadPool(numConsumers + 1);
+ ExecutorCompletionService c = new ExecutorCompletionService(es);
+ List<MockFetcher> fetchers = new ArrayList<>();
+ for (int i = 0; i < numConsumers; i++) {
+ MockFetcher fetcher = new MockFetcher(queue);
+ fetchers.add(fetcher);
+ c.submit(fetcher);
+ }
+ for (FetchEmitTuple t : it) {
+ System.out.println(t);
+ queue.offer(t);
+ }
+ for (int i = 0; i < numConsumers; i++) {
+ queue.offer(PipesIterator.COMPLETED_SEMAPHORE);
+ }
+ int finished = 0;
+ int completed = 0;
+ try {
+ while (finished < numConsumers) {
+ Future<Integer> f = c.take();
+ completed += f.get();
+ finished++;
+ }
+ } finally {
+ es.shutdownNow();
+ }
+ assertEquals(2, completed);
+
+ }
+
+ private static class MockFetcher implements Callable<Integer> {
+ private final ArrayBlockingQueue<FetchEmitTuple> queue;
+ private final List<FetchEmitTuple> pairs = new ArrayList<>();
+
+ private MockFetcher(ArrayBlockingQueue<FetchEmitTuple> queue) {
+ this.queue = queue;
+ }
+
+ @Override
+ public Integer call() throws Exception {
+ while (true) {
+ FetchEmitTuple t = queue.poll(1, TimeUnit.HOURS);
+ if (t == PipesIterator.COMPLETED_SEMAPHORE) {
+ return pairs.size();
+ }
+ pairs.add(t);
+ }
+ }
+ }
+}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties
new file mode 100644
index 0000000..2b2da1a
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+log4j.appender.stderr.layout.ConversionPattern=%-5p [%t]: %m%n