You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/02/23 14:57:21 UTC

[tika] branch TIKA-3304 updated: allow instance credentials in s3 components

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3304
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/TIKA-3304 by this push:
     new 3eefaee  allow instance credentials in s3 components
3eefaee is described below

commit 3eefaeea4cc3c12cd0e7e82d791597d151acc01f
Author: tballison <ta...@apache.org>
AuthorDate: Tue Feb 23 09:57:08 2021 -0500

    allow instance credentials in s3 components
---
 tika-pipes/tika-emitters/tika-emitter-s3/pom.xml   |  9 ++++++
 .../apache/tika/pipes/emitter/s3/S3Emitter.java    | 32 +++++++++++++++++---
 .../tika-fetch-iterator-s3/pom.xml                 |  9 ++++++
 .../pipes/fetchiterator/s3/S3FetchIterator.java    | 34 +++++++++++++++++-----
 tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml   |  9 ++++++
 .../apache/tika/pipes/fetcher/s3/S3Fetcher.java    | 28 ++++++++++++++----
 6 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
index 62a79f0..7b7def3 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
@@ -60,9 +60,18 @@
                     <groupId>com.fasterxml.jackson.core</groupId>
                     <artifactId>jackson-databind</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
         <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>${commons.codec.version}</version>
+        </dependency>
+        <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
             <version>${jackson.version}</version>
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
index cb97d20..4cae5c9 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
@@ -17,10 +17,13 @@
 package org.apache.tika.pipes.emitter.s3;
 
 import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.auth.profile.ProfileCredentialsProvider;
 import com.amazonaws.services.s3.AmazonS3;
 import com.amazonaws.services.s3.AmazonS3ClientBuilder;
 import com.amazonaws.services.s3.model.ObjectMetadata;
+import org.apache.http.client.CredentialsProvider;
 import org.apache.tika.config.Field;
 import org.apache.tika.config.Initializable;
 import org.apache.tika.config.InitializableProblemHandler;
@@ -67,6 +70,8 @@ import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
  *                  &lt;!-- required --&gt;
  *                  &lt;param name="region" type="string"&gt;us-east-1&lt;/param&gt;
  *                  &lt;!-- required --&gt;
+ *                  &lt;param name="credentialsProvider" type="string"&gt;(profile|instance)&lt;/param&gt;
+ *                  &lt;!-- required if credentialsProvider=profile--&gt;
  *                  &lt;param name="profile" type="string"&gt;my-profile&lt;/param&gt;
  *                  &lt;!-- required --&gt;
  *                  &lt;param name="bucket" type="string"&gt;my-bucket&lt;/param&gt;
@@ -88,6 +93,7 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
     private String region;
     private String profile;
     private String bucket;
+    private String credentialsProvider;
     private String fileExtension = "json";
     private boolean spoolToTemp = true;
     private String prefix = null;
@@ -222,6 +228,14 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
         }
     }
 
+    @Field
+    public void setCredentialsProvider(String credentialsProvider) {
+        if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+            throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+        }
+        this.credentialsProvider = credentialsProvider;
+    }
+
     /**
      * If you want to customize the output file's file extension.
      * Do not include the "."
@@ -232,20 +246,30 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
         this.fileExtension = fileExtension;
     }
 
+
+
     @Override
     public void initialize(Map<String, Param> params) throws TikaConfigException {
-        //params have already been set
-        //ignore them
+        //params have already been set...ignore them
+        AWSCredentialsProvider provider = null;
+        if ("instance".equals(credentialsProvider)) {
+            provider = InstanceProfileCredentialsProvider.getInstance();
+        } else if ("profile".equals(credentialsProvider)){
+            provider = new ProfileCredentialsProvider(profile);
+        } else {
+            throw new TikaConfigException("credentialsProvider must be set and " +
+                    "must be either 'instance' or 'profile'");
+        }
+
         s3Client = AmazonS3ClientBuilder.standard()
                 .withRegion(region)
-                .withCredentials(new ProfileCredentialsProvider(profile))
+                .withCredentials(provider)
                 .build();
     }
 
     @Override
     public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException {
         mustNotBeEmpty("bucket", this.bucket);
-        mustNotBeEmpty("profile", this.profile);
         mustNotBeEmpty("region", this.region);
     }
 
diff --git a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
index f8b2424..ef8854d 100644
--- a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
+++ b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
@@ -57,9 +57,18 @@
                     <groupId>com.fasterxml.jackson.core</groupId>
                     <artifactId>jackson-databind</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
         <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>${commons.codec.version}</version>
+        </dependency>
+        <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
             <version>${jackson.version}</version>
diff --git a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
index dd14463..15fb849 100644
--- a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
+++ b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.pipes.fetchiterator.s3;
 
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.auth.profile.ProfileCredentialsProvider;
 import com.amazonaws.services.s3.AmazonS3;
 import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@@ -44,8 +46,9 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
 
 
     private static final Logger LOGGER = LoggerFactory.getLogger(S3FetchIterator.class);
-    private String s3PathPrefix = "";
+    private String prefix = "";
     private String region;
+    private String credentialsProvider;
     private String profile;
     private String bucket;
 
@@ -67,18 +70,34 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
     }
 
     @Field
-    public void setS3PathPrefix(String s3PathPrefix) {
-        this.s3PathPrefix = s3PathPrefix;
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    @Field
+    public void setCredentialsProvider(String credentialsProvider) {
+        if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+            throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+        }
+        this.credentialsProvider = credentialsProvider;
     }
 
     @Override
     public void initialize(Map<String, Param> params) throws TikaConfigException {
-        //params have already been set
-        //ignore them
+        //params have already been set...ignore them
+        AWSCredentialsProvider provider = null;
+        if ("instance".equals(credentialsProvider)) {
+            provider = InstanceProfileCredentialsProvider.getInstance();
+        } else if ("profile".equals(credentialsProvider)){
+            provider = new ProfileCredentialsProvider(profile);
+        } else {
+            throw new TikaConfigException("credentialsProvider must be set and " +
+                    "must be either 'instance' or 'profile'");
+        }
 
         s3Client = AmazonS3ClientBuilder.standard()
                 .withRegion(region)
-                .withCredentials(new ProfileCredentialsProvider(profile))
+                .withCredentials(provider)
                 .build();
     }
 
@@ -87,7 +106,6 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
             throws TikaConfigException {
         super.checkInitialization(problemHandler);
         mustNotBeEmpty("bucket", this.bucket);
-        mustNotBeEmpty("profile", this.profile);
         mustNotBeEmpty("region", this.region);
     }
 
@@ -96,7 +114,7 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
         String fetcherName = getFetcherName();
         long start = System.currentTimeMillis();
         int count = 0;
-        for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, s3PathPrefix)) {
+        for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, prefix)) {
 
             long elapsed = System.currentTimeMillis() - start;
             LOGGER.debug("adding ({}) {} in {} ms", count, summary.getKey(),
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
index 158e701..7d8288c 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
+++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
@@ -47,8 +47,17 @@
                     <groupId>com.fasterxml.jackson.core</groupId>
                     <artifactId>jackson-databind</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>${commons.codec.version}</version>
+        </dependency>
 
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
index 217881e..200e567 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
+++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.pipes.fetcher.s3;
 
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.auth.profile.ProfileCredentialsProvider;
 import com.amazonaws.services.s3.AmazonS3;
 import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@@ -52,6 +54,7 @@ public class S3Fetcher extends AbstractFetcher implements Initializable {
     private String region;
     private String bucket;
     private String profile;
+    private String credentialsProvider;
     private boolean extractUserMetadata = true;
     private AmazonS3 s3Client;
     private boolean spoolToTemp = true;
@@ -143,21 +146,36 @@ public class S3Fetcher extends AbstractFetcher implements Initializable {
         this.extractUserMetadata = extractUserMetadata;
     }
 
+    @Field
+    public void setCredentialsProvider(String credentialsProvider) {
+        if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+            throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+        }
+        this.credentialsProvider = credentialsProvider;
+    }
+
     @Override
     public void initialize(Map<String, Param> params) throws TikaConfigException {
-        //params have already been set
-        //ignore them
+        //params have already been set...ignore them
+        AWSCredentialsProvider provider = null;
+        if ("instance".equals(credentialsProvider)) {
+            provider = InstanceProfileCredentialsProvider.getInstance();
+        } else if ("profile".equals(credentialsProvider)){
+            provider = new ProfileCredentialsProvider(profile);
+        } else {
+            throw new TikaConfigException("credentialsProvider must be set and " +
+                    "must be either 'instance' or 'profile'");
+        }
+
         s3Client = AmazonS3ClientBuilder.standard()
                 .withRegion(region)
-                .withCredentials(new ProfileCredentialsProvider(profile))
+                .withCredentials(provider)
                 .build();
     }
 
     @Override
     public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException {
         mustNotBeEmpty("bucket", this.bucket);
-        mustNotBeEmpty("profile", this.profile);
         mustNotBeEmpty("region", this.region);
-
     }
 }