You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/02/23 14:57:21 UTC
[tika] branch TIKA-3304 updated: allow instance credentials in s3
components
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3304
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-3304 by this push:
new 3eefaee allow instance credentials in s3 components
3eefaee is described below
commit 3eefaeea4cc3c12cd0e7e82d791597d151acc01f
Author: tballison <ta...@apache.org>
AuthorDate: Tue Feb 23 09:57:08 2021 -0500
allow instance credentials in s3 components
---
tika-pipes/tika-emitters/tika-emitter-s3/pom.xml | 9 ++++++
.../apache/tika/pipes/emitter/s3/S3Emitter.java | 32 +++++++++++++++++---
.../tika-fetch-iterator-s3/pom.xml | 9 ++++++
.../pipes/fetchiterator/s3/S3FetchIterator.java | 34 +++++++++++++++++-----
tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml | 9 ++++++
.../apache/tika/pipes/fetcher/s3/S3Fetcher.java | 28 ++++++++++++++----
6 files changed, 104 insertions(+), 17 deletions(-)
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
index 62a79f0..7b7def3 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
@@ -60,9 +60,18 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons.codec.version}</version>
+ </dependency>
+ <dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
index cb97d20..4cae5c9 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/src/main/java/org/apache/tika/pipes/emitter/s3/S3Emitter.java
@@ -17,10 +17,13 @@
package org.apache.tika.pipes.emitter.s3;
import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.ObjectMetadata;
+import org.apache.http.client.CredentialsProvider;
import org.apache.tika.config.Field;
import org.apache.tika.config.Initializable;
import org.apache.tika.config.InitializableProblemHandler;
@@ -67,6 +70,8 @@ import static org.apache.tika.config.TikaConfig.mustNotBeEmpty;
* <!-- required -->
* <param name="region" type="string">us-east-1</param>
* <!-- required -->
+ * <param name="credentialsProvider" type="string">(profile|instance)</param>
+ * <!-- required if credentialsProvider=profile-->
* <param name="profile" type="string">my-profile</param>
* <!-- required -->
* <param name="bucket" type="string">my-bucket</param>
@@ -88,6 +93,7 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
private String region;
private String profile;
private String bucket;
+ private String credentialsProvider;
private String fileExtension = "json";
private boolean spoolToTemp = true;
private String prefix = null;
@@ -222,6 +228,14 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
}
}
+ @Field
+ public void setCredentialsProvider(String credentialsProvider) {
+ if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+ throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+ }
+ this.credentialsProvider = credentialsProvider;
+ }
+
/**
* If you want to customize the output file's file extension.
* Do not include the "."
@@ -232,20 +246,30 @@ public class S3Emitter extends AbstractEmitter implements Initializable, StreamE
this.fileExtension = fileExtension;
}
+
+
@Override
public void initialize(Map<String, Param> params) throws TikaConfigException {
- //params have already been set
- //ignore them
+ //params have already been set...ignore them
+ AWSCredentialsProvider provider = null;
+ if ("instance".equals(credentialsProvider)) {
+ provider = InstanceProfileCredentialsProvider.getInstance();
+ } else if ("profile".equals(credentialsProvider)){
+ provider = new ProfileCredentialsProvider(profile);
+ } else {
+ throw new TikaConfigException("credentialsProvider must be set and " +
+ "must be either 'instance' or 'profile'");
+ }
+
s3Client = AmazonS3ClientBuilder.standard()
.withRegion(region)
- .withCredentials(new ProfileCredentialsProvider(profile))
+ .withCredentials(provider)
.build();
}
@Override
public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException {
mustNotBeEmpty("bucket", this.bucket);
- mustNotBeEmpty("profile", this.profile);
mustNotBeEmpty("region", this.region);
}
diff --git a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
index f8b2424..ef8854d 100644
--- a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
+++ b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
@@ -57,9 +57,18 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons.codec.version}</version>
+ </dependency>
+ <dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
diff --git a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
index dd14463..15fb849 100644
--- a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
+++ b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
@@ -16,6 +16,8 @@
*/
package org.apache.tika.pipes.fetchiterator.s3;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@@ -44,8 +46,9 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
private static final Logger LOGGER = LoggerFactory.getLogger(S3FetchIterator.class);
- private String s3PathPrefix = "";
+ private String prefix = "";
private String region;
+ private String credentialsProvider;
private String profile;
private String bucket;
@@ -67,18 +70,34 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
}
@Field
- public void setS3PathPrefix(String s3PathPrefix) {
- this.s3PathPrefix = s3PathPrefix;
+ public void setPrefix(String prefix) {
+ this.prefix = prefix;
+ }
+
+ @Field
+ public void setCredentialsProvider(String credentialsProvider) {
+ if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+ throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+ }
+ this.credentialsProvider = credentialsProvider;
}
@Override
public void initialize(Map<String, Param> params) throws TikaConfigException {
- //params have already been set
- //ignore them
+ //params have already been set...ignore them
+ AWSCredentialsProvider provider = null;
+ if ("instance".equals(credentialsProvider)) {
+ provider = InstanceProfileCredentialsProvider.getInstance();
+ } else if ("profile".equals(credentialsProvider)){
+ provider = new ProfileCredentialsProvider(profile);
+ } else {
+ throw new TikaConfigException("credentialsProvider must be set and " +
+ "must be either 'instance' or 'profile'");
+ }
s3Client = AmazonS3ClientBuilder.standard()
.withRegion(region)
- .withCredentials(new ProfileCredentialsProvider(profile))
+ .withCredentials(provider)
.build();
}
@@ -87,7 +106,6 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
throws TikaConfigException {
super.checkInitialization(problemHandler);
mustNotBeEmpty("bucket", this.bucket);
- mustNotBeEmpty("profile", this.profile);
mustNotBeEmpty("region", this.region);
}
@@ -96,7 +114,7 @@ public class S3FetchIterator extends FetchIterator implements Initializable {
String fetcherName = getFetcherName();
long start = System.currentTimeMillis();
int count = 0;
- for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, s3PathPrefix)) {
+ for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, prefix)) {
long elapsed = System.currentTimeMillis() - start;
LOGGER.debug("adding ({}) {} in {} ms", count, summary.getKey(),
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
index 158e701..7d8288c 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
+++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
@@ -47,8 +47,17 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons.codec.version}</version>
+ </dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
index 217881e..200e567 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
+++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
@@ -16,6 +16,8 @@
*/
package org.apache.tika.pipes.fetcher.s3;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@@ -52,6 +54,7 @@ public class S3Fetcher extends AbstractFetcher implements Initializable {
private String region;
private String bucket;
private String profile;
+ private String credentialsProvider;
private boolean extractUserMetadata = true;
private AmazonS3 s3Client;
private boolean spoolToTemp = true;
@@ -143,21 +146,36 @@ public class S3Fetcher extends AbstractFetcher implements Initializable {
this.extractUserMetadata = extractUserMetadata;
}
+ @Field
+ public void setCredentialsProvider(String credentialsProvider) {
+ if (! credentialsProvider.equals("profile") && ! credentialsProvider.equals("instance")) {
+ throw new IllegalArgumentException("credentialsProvider must be either 'profile' or instance'");
+ }
+ this.credentialsProvider = credentialsProvider;
+ }
+
@Override
public void initialize(Map<String, Param> params) throws TikaConfigException {
- //params have already been set
- //ignore them
+ //params have already been set...ignore them
+ AWSCredentialsProvider provider = null;
+ if ("instance".equals(credentialsProvider)) {
+ provider = InstanceProfileCredentialsProvider.getInstance();
+ } else if ("profile".equals(credentialsProvider)){
+ provider = new ProfileCredentialsProvider(profile);
+ } else {
+ throw new TikaConfigException("credentialsProvider must be set and " +
+ "must be either 'instance' or 'profile'");
+ }
+
s3Client = AmazonS3ClientBuilder.standard()
.withRegion(region)
- .withCredentials(new ProfileCredentialsProvider(profile))
+ .withCredentials(provider)
.build();
}
@Override
public void checkInitialization(InitializableProblemHandler problemHandler) throws TikaConfigException {
mustNotBeEmpty("bucket", this.bucket);
- mustNotBeEmpty("profile", this.profile);
mustNotBeEmpty("region", this.region);
-
}
}