You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/09/07 13:05:04 UTC
[tika] 01/02: TIKA-3831 -- client lock should not be static
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 96a27141cc6646f5d5c56ec289eb72ecdc49ebfd
Author: tballison <ta...@apache.org>
AuthorDate: Wed Sep 7 08:52:30 2022 -0400
TIKA-3831 -- client lock should not be static
---
.../org/apache/tika/pipes/fetcher/s3/S3Fetcher.java | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
index 4e2c641f9..e05c067fe 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
+++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
@@ -55,13 +55,12 @@ import org.apache.tika.utils.StringUtils;
* Fetches files from s3. Example file: s3://my_bucket/path/to/my_file.pdf
* The bucket must be specified via the tika-config or before
* initialization, and the fetch key is "path/to/my_file.pdf".
- * This will parse the bucket out of that string and retrieve the path.
*/
public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(S3Fetcher.class);
private static final String PREFIX = "s3";
- private static final Object[] LOCK = new Object[0];
+ private final Object[] clientLock = new Object[0];
private String region;
private String bucket;
private String profile;
@@ -106,7 +105,11 @@ public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFe
initialize(new HashMap<>());
}
try {
- return _fetch(theFetchKey, metadata, startRange, endRange);
+ long start = System.currentTimeMillis();
+ InputStream is = _fetch(theFetchKey, metadata, startRange, endRange);
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.debug("total to fetch {}", elapsed);
+ return is;
} catch (AmazonClientException e ) {
//TODO -- filter exceptions -- if the file doesn't exist, don't retry
LOGGER.warn("client exception fetching on retry=" + tries, e);
@@ -124,15 +127,17 @@ public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFe
Long startRange, Long endRange) throws IOException {
TemporaryResources tmp = null;
try {
+ long start = System.currentTimeMillis();
GetObjectRequest objectRequest = new GetObjectRequest(bucket, fetchKey);
if (startRange != null && endRange != null
&& startRange > -1 && endRange > -1) {
objectRequest.withRange(startRange, endRange);
}
S3Object s3Object = null;
- synchronized (LOCK) {
+ synchronized (clientLock) {
s3Object = s3Client.getObject(objectRequest);
}
+
if (extractUserMetadata) {
for (Map.Entry<String, String> e : s3Object.getObjectMetadata().getUserMetadata()
.entrySet()) {
@@ -142,13 +147,12 @@ public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFe
if (!spoolToTemp) {
return TikaInputStream.get(s3Object.getObjectContent());
} else {
- long start = System.currentTimeMillis();
+ start = System.currentTimeMillis();
tmp = new TemporaryResources();
Path tmpPath = tmp.createTempFile();
Files.copy(s3Object.getObjectContent(), tmpPath, StandardCopyOption.REPLACE_EXISTING);
TikaInputStream tis = TikaInputStream.get(tmpPath, metadata, tmp);
- long elapsed = System.currentTimeMillis() - start;
- LOGGER.debug("took {} ms to copy to local tmp file", elapsed);
+ LOGGER.debug("took {} ms to copy to local tmp file", System.currentTimeMillis() - start);
return tis;
}
} catch (Throwable e) {
@@ -243,7 +247,7 @@ public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFe
ClientConfiguration clientConfiguration = new ClientConfiguration()
.withMaxConnections(maxConnections);
try {
- synchronized (LOCK) {
+ synchronized (clientLock) {
s3Client = AmazonS3ClientBuilder.standard()
.withClientConfiguration(clientConfiguration)
.withRegion(region)