You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/26 21:57:37 UTC
[tika] branch main updated: TIKA-3776 -- don't overwrite file name if it exists in the metadata already. The goal is not to overwrite the file name with a temp file name.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 636adeaad TIKA-3776 -- don't overwrite file name if it exists in the metadata already. The goal is not to overwrite the file name with a temp file name.
636adeaad is described below
commit 636adeaadaf3bea4c54847aeebbd0c9ebc9b7065
Author: tallison <ta...@apache.org>
AuthorDate: Thu May 26 17:57:22 2022 -0400
TIKA-3776 -- don't overwrite file name if it exists in the metadata already. The goal is not to overwrite the file name with a temp file name.
---
.../main/java/org/apache/tika/io/TikaInputStream.java | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
index 331edb1f0..f10dc4ebc 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
@@ -42,6 +42,7 @@ import org.apache.commons.io.input.TaggedInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.StringUtils;
/**
* Input stream with extended capabilities. The purpose of this class is
@@ -321,6 +322,9 @@ public class TikaInputStream extends TaggedInputStream {
* Creates a TikaInputStream from the file at the given path. The file name
* and length are stored as input metadata in the given metadata instance.
* <p>
+ * If there's an {@link TikaCoreProperties#RESOURCE_NAME_KEY} in the
+ * metadata object, this will not overwrite that value with the path's name.
+ * <p>
* Note that you must always explicitly close the returned stream to
* prevent leaking open file handles.
*
@@ -330,7 +334,9 @@ public class TikaInputStream extends TaggedInputStream {
* @throws IOException if an I/O error occurs
*/
public static TikaInputStream get(Path path, Metadata metadata) throws IOException {
- metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+ if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+ }
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
return new TikaInputStream(path);
}
@@ -338,7 +344,9 @@ public class TikaInputStream extends TaggedInputStream {
public static TikaInputStream get(Path path, Metadata metadata, TemporaryResources tmp)
throws IOException {
long length = Files.size(path);
- metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+ if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+ }
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(length));
return new TikaInputStream(path, tmp, length);
}
@@ -377,7 +385,9 @@ public class TikaInputStream extends TaggedInputStream {
*/
@Deprecated
public static TikaInputStream get(File file, Metadata metadata) throws FileNotFoundException {
- metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
+ if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
+ }
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.length()));
return new TikaInputStream(file);
}