You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/08/16 17:56:38 UTC

[tika] 03/03: TIKA-3524 -- add tika-pipes support for google cloud storage

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 48d9389125e217a9c6301840ff39a3c4ee13d742
Author: tallison <ta...@apache.org>
AuthorDate: Mon Aug 16 13:56:14 2021 -0400

    TIKA-3524 -- add tika-pipes support for google cloud storage
---
 tika-parent/pom.xml                                |   1 +
 tika-pipes/tika-emitters/pom.xml                   |   1 +
 tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml  | 118 +++++++++++++
 .../apache/tika/pipes/emitter/gcs/GCSEmitter.java  | 184 +++++++++++++++++++++
 .../tika/pipes/emitter/gcs/TestGCSEmitter.java     |  51 ++++++
 .../src/test/resources/config/tika-config-gcs.xml  |  28 ++++
 tika-pipes/tika-fetchers/pom.xml                   |   1 +
 tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml  | 110 ++++++++++++
 .../apache/tika/pipes/fetcher/gcs/GCSFetcher.java  | 135 +++++++++++++++
 .../tika/pipes/fetcher/s3/TestGCSFetcher.java      |  63 +++++++
 .../src/test/resources/tika-config-gcs.xml         |  28 ++++
 tika-pipes/tika-pipes-iterators/pom.xml            |   1 +
 .../tika-pipes-iterator-gcs/pom.xml                | 112 +++++++++++++
 .../pipes/pipesiterator/gcs/GCSPipesIterator.java  | 123 ++++++++++++++
 .../pipesiterator/gcs/TestGCSPipesIterator.java    | 102 ++++++++++++
 .../src/test/resources/log4j.properties            |  22 +++
 16 files changed, 1080 insertions(+)

diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 0fe2354..753b0de 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -303,6 +303,7 @@
     <!-- fakeload versions &gt; 0.4.0 require java > 8 -->
     <fakeload.version>0.4.0</fakeload.version>
     <geoapi.version>3.0.1</geoapi.version>
+    <google.cloud.version>2.0.1</google.cloud.version>
     <gson.version>2.8.7</gson.version>
     <guava.version>30.1.1-jre</guava.version>
     <h2.version>1.4.200</h2.version>
diff --git a/tika-pipes/tika-emitters/pom.xml b/tika-pipes/tika-emitters/pom.xml
index e6a338e..db1a4b6 100644
--- a/tika-pipes/tika-emitters/pom.xml
+++ b/tika-pipes/tika-emitters/pom.xml
@@ -36,5 +36,6 @@
     <module>tika-emitter-s3</module>
     <module>tika-emitter-solr</module>
     <module>tika-emitter-opensearch</module>
+    <module>tika-emitter-gcs</module>
   </modules>
 </project>
\ No newline at end of file
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml b/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml
new file mode 100644
index 0000000..9b17e51
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>tika-emitters</artifactId>
+    <groupId>org.apache.tika</groupId>
+    <version>2.1.0-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>tika-emitter-gcs</artifactId>
+  <name>Apache Tika GCS emitter</name>
+
+
+  <dependencies>
+    <!-- should serialization be provided or bundled? -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-serialization</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-storage</artifactId>
+      <version>${google.cloud.version}</version>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <configuration>
+          <archive>
+            <manifestEntries>
+              <Automatic-Module-Name>org.apache.tika.pipes.emitter.gcs</Automatic-Module-Name>
+            </manifestEntries>
+          </archive>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven.shade.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createDependencyReducedPom>
+                false
+              </createDependencyReducedPom>
+              <!-- <filters> -->
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*</exclude>
+                    <exclude>LICENSE.txt</exclude>
+                    <exclude>NOTICE.txt</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/NOTICE</resource>
+                  <file>target/classes/META-INF/NOTICE</file>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/DEPENDENCIES</resource>
+                  <file>target/classes/META-INF/DEPENDENCIES</file>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java b/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java
new file mode 100644
index 0000000..e4c03c4
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/main/java/org/apache/tika/pipes/emitter/gcs/GCSEmitter.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.List;
+import java.util.Map;
+
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.pipes.emitter.AbstractEmitter;
+import org.apache.tika.pipes.emitter.StreamEmitter;
+import org.apache.tika.pipes.emitter.TikaEmitterException;
+import org.apache.tika.utils.StringUtils;
+
+
+public class GCSEmitter extends AbstractEmitter implements Initializable, StreamEmitter {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(GCSEmitter.class);
+    private String projectId;
+    private String bucket;
+    private String fileExtension = "json";
+    private String prefix = null;
+    private Storage storage;
+
+    /**
+     * Requires the src-bucket/path/to/my/file.txt in the {@link TikaCoreProperties#SOURCE_PATH}.
+     *
+     * @param metadataList
+     * @throws IOException
+     * @throws TikaException
+     */
+    @Override
+    public void emit(String emitKey, List<Metadata> metadataList)
+            throws IOException, TikaEmitterException {
+        if (metadataList == null || metadataList.size() == 0) {
+            throw new TikaEmitterException("metadata list must not be null or of size 0");
+        }
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        try (Writer writer = new OutputStreamWriter(bos, StandardCharsets.UTF_8)) {
+            JsonMetadataList.toJson(metadataList, writer);
+        } catch (IOException e) {
+            throw new TikaEmitterException("can't jsonify", e);
+        }
+
+        write(emitKey, new Metadata(), bos.toByteArray());
+
+    }
+
+    /**
+     * @param path         -- object path, not including the bucket
+     * @param is           inputStream to copy
+     * @param userMetadata this will be written to the s3 ObjectMetadata's userMetadata
+     * @throws TikaEmitterException or IOexception if there is a Runtime s3 client exception
+     */
+    @Override
+    public void emit(String path, InputStream is, Metadata userMetadata)
+            throws IOException, TikaEmitterException {
+
+        if (is instanceof TikaInputStream && ((TikaInputStream) is).hasFile()) {
+            write(path, userMetadata, Files.readAllBytes(((TikaInputStream) is).getPath()));
+        } else {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            IOUtils.copy(is, bos);
+            write(path, userMetadata, bos.toByteArray());
+        }
+    }
+
+    private void write(String path, Metadata userMetadata, byte[] bytes) {
+        if (!StringUtils.isBlank(prefix)) {
+            path = prefix + "/" + path;
+        }
+
+        if (!StringUtils.isBlank(fileExtension)) {
+            path += "." + fileExtension;
+        }
+
+        LOGGER.debug("about to emit to target bucket: ({}) path:({})", bucket, path);
+        BlobId blobId = BlobId.of(bucket, path);
+        BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
+
+        for (String n : userMetadata.names()) {
+            String[] vals = userMetadata.getValues(n);
+            if (vals.length > 1) {
+                LOGGER.warn("Can only write the first value for key {}. I see {} values.", n,
+                        vals.length);
+            }
+            blobInfo.getMetadata().put(n, vals[0]);
+        }
+        storage.create(blobInfo, bytes);
+    }
+
+
+    @Field
+    public void setProjectId(String projectId) {
+        this.projectId = projectId;
+    }
+
+    @Field
+    public void setBucket(String bucket) {
+        this.bucket = bucket;
+    }
+
+    @Field
+    public void setPrefix(String prefix) {
+        //strip final "/" if it exists
+        if (prefix.endsWith("/")) {
+            this.prefix = prefix.substring(0, prefix.length() - 1);
+        } else {
+            this.prefix = prefix;
+        }
+    }
+
+    /**
+     * If you want to customize the output file's file extension.
+     * Do not include the "."
+     *
+     * @param fileExtension
+     */
+    @Field
+    public void setFileExtension(String fileExtension) {
+        this.fileExtension = fileExtension;
+    }
+
+
+    /**
+     * This initializes the gcs client.
+     *
+     * @param params params to use for initialization
+     * @throws TikaConfigException
+     */
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        //params have already been set...ignore them
+        //TODO -- add other params to the builder as needed
+        storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        mustNotBeEmpty("bucket", this.bucket);
+        mustNotBeEmpty("projectId", this.projectId);
+    }
+
+}
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java
new file mode 100644
index 0000000..aaee49e
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/java/org/apache/tika/pipes/emitter/gcs/TestGCSEmitter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.gcs;
+
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.Emitter;
+import org.apache.tika.pipes.emitter.EmitterManager;
+
+@Disabled("turn into an actual test")
+public class TestGCSEmitter {
+
+    @Test
+    public void testBasic() throws Exception {
+        EmitterManager emitterManager = EmitterManager.load(getConfig("tika-config-gcs.xml"));
+        Emitter emitter = emitterManager.getEmitter("gcs");
+        List<Metadata> metadataList = new ArrayList<>();
+        Metadata m = new Metadata();
+        m.set("k1", "v1");
+        m.add("k1", "v2");
+        m.set("k2", "v3");
+        metadataList.add(m);
+        emitter.emit("something-or-other/test-out", metadataList);
+    }
+
+    private Path getConfig(String configFile) throws URISyntaxException {
+        return Paths.get(this.getClass().getResource("/config/" + configFile).toURI());
+    }
+}
diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml
new file mode 100644
index 0000000..b45ec31
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <emitters>
+        <emitter class="org.apache.tika.pipes.emitter.gcs.GCSEmitter">
+            <params>
+                <name>gcs</name>
+                <projectId>My First Project</projectId>
+                <bucket>tika-tallison-test-bucket</bucket>
+            </params>
+        </emitter>
+    </emitters>
+</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml
index 8fa2681..3086175 100644
--- a/tika-pipes/tika-fetchers/pom.xml
+++ b/tika-pipes/tika-fetchers/pom.xml
@@ -34,5 +34,6 @@
   <modules>
     <module>tika-fetcher-http</module>
     <module>tika-fetcher-s3</module>
+    <module>tika-fetcher-gcs</module>
   </modules>
 </project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml
new file mode 100644
index 0000000..0e47e64
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>tika-fetchers</artifactId>
+        <groupId>org.apache.tika</groupId>
+        <version>2.1.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>tika-fetcher-gcs</artifactId>
+    <name>Apache Tika Google Cloud Storage fetcher</name>
+
+    <dependencies>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.google.cloud</groupId>
+            <artifactId>google-cloud-storage</artifactId>
+            <version>${google.cloud.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifestEntries>
+                            <Automatic-Module-Name>org.apache.tika.pipes.fetcher.gcs</Automatic-Module-Name>
+                        </manifestEntries>
+                    </archive>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>${maven.shade.version}</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>
+                                false
+                            </createDependencyReducedPom>
+                            <!-- <filters> -->
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*</exclude>
+                                        <exclude>LICENSE.txt</exclude>
+                                        <exclude>NOTICE.txt</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/LICENSE</resource>
+                                    <file>target/classes/META-INF/LICENSE</file>
+                                </transformer>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/NOTICE</resource>
+                                    <file>target/classes/META-INF/NOTICE</file>
+                                </transformer>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                                    <resource>META-INF/DEPENDENCIES</resource>
+                                    <file>target/classes/META-INF/DEPENDENCIES</file>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+        </plugins>
+    </build>
+</project>
\ No newline at end of file
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
new file mode 100644
index 0000000..6881c5a
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.Map;
+
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.AbstractFetcher;
+
+/**
+ * Fetches files from google cloud storage. Must set projectId and bucket via the config.
+ */
+public class GCSFetcher extends AbstractFetcher implements Initializable {
+
+    private static String PREFIX = "gcs";
+    private static final Logger LOGGER = LoggerFactory.getLogger(GCSFetcher.class);
+    private String projectId;
+    private String bucket;
+    private boolean extractUserMetadata = true;
+    private Storage storage;
+    private boolean spoolToTemp = true;
+
+    @Override
+    public InputStream fetch(String fetchKey, Metadata metadata) throws TikaException, IOException {
+
+        LOGGER.debug("about to fetch fetchkey={} from bucket ({})", fetchKey, bucket);
+
+        try {
+            Blob blob = storage.get(BlobId.of(bucket, fetchKey));
+
+            if (extractUserMetadata) {
+                if (blob.getMetadata() != null) {
+                    for (Map.Entry<String, String> e : blob.getMetadata().entrySet()) {
+                        metadata.add(PREFIX + ":" + e.getKey(), e.getValue());
+                    }
+                }
+            }
+            if (!spoolToTemp) {
+                return TikaInputStream.get(blob.getContent());
+            } else {
+                long start = System.currentTimeMillis();
+                TemporaryResources tmpResources = new TemporaryResources();
+                Path tmp = tmpResources.createTempFile();
+                blob.downloadTo(tmp);
+                TikaInputStream tis = TikaInputStream.get(tmp, metadata, tmpResources);
+                long elapsed = System.currentTimeMillis() - start;
+                LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
+                return tis;
+            }
+        } catch (Exception e) {
+            throw new IOException("gcs storage exception", e);
+        }
+    }
+
+    @Field
+    public void setSpoolToTemp(boolean spoolToTemp) {
+        this.spoolToTemp = spoolToTemp;
+    }
+
+    @Field
+    public void setProjectId(String projectId) {
+        this.projectId = projectId;
+    }
+
+    @Field
+    public void setBucket(String bucket) {
+        this.bucket = bucket;
+    }
+
+    /**
+     * Whether or not to extract user metadata from the S3Object
+     *
+     * @param extractUserMetadata
+     */
+    @Field
+    public void setExtractUserMetadata(boolean extractUserMetadata) {
+        this.extractUserMetadata = extractUserMetadata;
+    }
+
+    //TODO: parameterize extracting other blob metadata, eg. md5, crc, etc.
+
+    /**
+     * This initializes the gcs storage client.
+     *
+     * @param params params to use for initialization
+     * @throws TikaConfigException
+     */
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        //params have already been set...ignore them
+        //TODO -- add other params to the builder as needed
+        storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        mustNotBeEmpty("bucket", this.bucket);
+        mustNotBeEmpty("projectId", this.projectId);
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java
new file mode 100644
index 0000000..35aabbe
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.s3;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.fetcher.Fetcher;
+import org.apache.tika.pipes.fetcher.FetcherManager;
+
+@Disabled("write actual unit tests")
+public class TestGCSFetcher {
+
+    private static final String FETCH_STRING = "testExtraSpaces.pdf";
+    private static Path outputFile;
+
+    @BeforeAll
+    public static void setUp() throws Exception {
+        outputFile = Files.createTempFile("tika-test", ".pdf");
+    }
+
+    @AfterAll
+    public static void tearDown() throws Exception {
+        Files.delete(outputFile);
+    }
+
+    @Test
+    public void testConfig() throws Exception {
+        FetcherManager fetcherManager = FetcherManager.load(
+                Paths.get(this.getClass().getResource("/tika-config-gcs.xml").toURI()));
+        Fetcher fetcher = fetcherManager.getFetcher("gcs");
+        Metadata metadata = new Metadata();
+        try (InputStream is = fetcher.fetch(FETCH_STRING, metadata)) {
+            Files.copy(is, outputFile, StandardCopyOption.REPLACE_EXISTING);
+        }
+        assertEquals(20743, Files.size(outputFile));
+    }
+}
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml
new file mode 100644
index 0000000..eee110d
--- /dev/null
+++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <fetchers>
+        <fetcher class="org.apache.tika.pipes.fetcher.gcs.GCSFetcher">
+            <params>
+                <name>gcs</name>
+                <projectId>My First Project</projectId>
+                <bucket>tika-tallison-test-bucket</bucket>
+            </params>
+        </fetcher>
+    </fetchers>
+</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-iterators/pom.xml b/tika-pipes/tika-pipes-iterators/pom.xml
index 72a5912..337147d 100644
--- a/tika-pipes/tika-pipes-iterators/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/pom.xml
@@ -38,5 +38,6 @@
     <module>tika-pipes-iterator-jdbc</module>
     <module>tika-pipes-iterator-s3</module>
     <module>tika-pipes-iterator-solr</module>
+    <module>tika-pipes-iterator-gcs</module>
   </modules>
 </project>
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml
new file mode 100644
index 0000000..5b37973
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/pom.xml
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-pipes-iterators</artifactId>
+    <version>2.1.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>tika-pipes-iterator-gcs</artifactId>
+
+  <name>Apache Tika Fetch Iterator - Google Cloud Storage</name>
+  <url>https://tika.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-storage</artifactId>
+      <version>${google.cloud.version}</version>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <configuration>
+          <archive>
+            <manifestEntries>
+              <Automatic-Module-Name>org.apache.tika.pipes.pipesiterator.s3</Automatic-Module-Name>
+            </manifestEntries>
+          </archive>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven.shade.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createDependencyReducedPom>
+                false
+              </createDependencyReducedPom>
+              <!-- <filters> -->
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*</exclude>
+                    <exclude>LICENSE.txt</exclude>
+                    <exclude>NOTICE.txt</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/NOTICE</resource>
+                  <file>target/classes/META-INF/NOTICE</file>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/DEPENDENCIES</resource>
+                  <file>target/classes/META-INF/DEPENDENCIES</file>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+</project>
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java
new file mode 100644
index 0000000..a9d052b
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.pipesiterator.gcs;
+
+import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+import com.google.api.gax.paging.Page;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.HandlerConfig;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
+import org.apache.tika.utils.StringUtils;
+
+public class GCSPipesIterator extends PipesIterator implements Initializable {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(GCSPipesIterator.class);
+    private String prefix = "";
+    private String projectId = "";
+    private String bucket;
+
+    private Storage storage;
+
+    @Field
+    public void setBucket(String bucket) {
+        this.bucket = bucket;
+    }
+
+    @Field
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    @Field
+    public void setProjectId(String projectId) {
+        this.projectId = projectId;
+    }
+
+    /**
+     * This initializes the gcs client.
+     *
+     * @param params params to use for initialization
+     * @throws TikaConfigException
+     */
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        //TODO -- add other params to the builder as needed
+        storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService();
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        super.checkInitialization(problemHandler);
+        mustNotBeEmpty("bucket", this.bucket);
+        mustNotBeEmpty("projectId", this.projectId);
+    }
+
+    @Override
+    protected void enqueue() throws InterruptedException, IOException, TimeoutException {
+        String fetcherName = getFetcherName();
+        String emitterName = getEmitterName();
+        long start = System.currentTimeMillis();
+        int count = 0;
+        HandlerConfig handlerConfig = getHandlerConfig();
+
+        Page<Blob> blobs = null;
+        if (StringUtils.isBlank(prefix)) {
+            blobs = storage.list(bucket);
+        } else {
+            blobs = storage.list(bucket,
+                    Storage.BlobListOption.prefix(prefix));
+        }
+
+        for (Blob blob : blobs.iterateAll()) {
+            //I couldn't find a better way to skip directories
+            //calling blob.isDirectory() does not appear to work.  #usererror I'm sure.
+            if (blob.getSize() == 0) {
+                continue;
+            }
+            long elapsed = System.currentTimeMillis() - start;
+            LOGGER.debug("adding ({}) {} in {} ms", count, blob.getName(), elapsed);
+            //TODO -- allow user specified metadata as the "id"?
+            tryToAdd(new FetchEmitTuple(blob.getName(), new FetchKey(fetcherName,
+                    blob.getName()),
+                    new EmitKey(emitterName, blob.getName()), new Metadata(), handlerConfig,
+                    getOnParseException()));
+            count++;
+        }
+        long elapsed = System.currentTimeMillis() - start;
+        LOGGER.info("finished enqueuing {} files in {} ms", count, elapsed);
+    }
+}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java
new file mode 100644
index 0000000..5fa51ab
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/java/org/apache/tika/pipes/pipesiterator/gcs/TestGCSPipesIterator.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.pipesiterator.gcs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
+import org.apache.tika.pipes.pipesiterator.gcs.GCSPipesIterator;
+
+@Disabled("turn into an actual unit test")
+public class TestGCSPipesIterator {
+
+    @Test
+    public void testSimple() throws Exception {
+        GCSPipesIterator it = new GCSPipesIterator();
+        it.setFetcherName("gcs");
+        it.setBucket("tika-tallison-test-bucket");
+        it.setProjectId("My First Project");
+        it.setPrefix("pdfs");
+        it.initialize(Collections.EMPTY_MAP);
+        int numConsumers = 6;
+        ArrayBlockingQueue<FetchEmitTuple> queue = new ArrayBlockingQueue<>(10);
+
+        ExecutorService es = Executors.newFixedThreadPool(numConsumers + 1);
+        ExecutorCompletionService c = new ExecutorCompletionService(es);
+        List<MockFetcher> fetchers = new ArrayList<>();
+        for (int i = 0; i < numConsumers; i++) {
+            MockFetcher fetcher = new MockFetcher(queue);
+            fetchers.add(fetcher);
+            c.submit(fetcher);
+        }
+        for (FetchEmitTuple t : it) {
+            System.out.println(t);
+            queue.offer(t);
+        }
+        for (int i = 0; i < numConsumers; i++) {
+            queue.offer(PipesIterator.COMPLETED_SEMAPHORE);
+        }
+        int finished = 0;
+        int completed = 0;
+        try {
+            while (finished < numConsumers) {
+                Future<Integer> f = c.take();
+                completed += f.get();
+                finished++;
+            }
+        } finally {
+            es.shutdownNow();
+        }
+        assertEquals(2, completed);
+
+    }
+
+    private static class MockFetcher implements Callable<Integer> {
+        private final ArrayBlockingQueue<FetchEmitTuple> queue;
+        private final List<FetchEmitTuple> pairs = new ArrayList<>();
+
+        private MockFetcher(ArrayBlockingQueue<FetchEmitTuple> queue) {
+            this.queue = queue;
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            while (true) {
+                FetchEmitTuple t = queue.poll(1, TimeUnit.HOURS);
+                if (t == PipesIterator.COMPLETED_SEMAPHORE) {
+                    return pairs.size();
+                }
+                pairs.add(t);
+            }
+        }
+    }
+}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties
new file mode 100644
index 0000000..2b2da1a
--- /dev/null
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/test/resources/log4j.properties
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+log4j.appender.stderr.layout.ConversionPattern=%-5p [%t]: %m%n