You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/26 21:57:37 UTC

[tika] branch main updated: TIKA-3776 -- don't overwrite file name if it exists in the metadata already. The goal is not to overwrite the file name with a temp file name.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 636adeaad TIKA-3776 -- don't overwrite file name if it exists in the metadata already.  The goal is not to overwrite the file name with a temp file name.
636adeaad is described below

commit 636adeaadaf3bea4c54847aeebbd0c9ebc9b7065
Author: tallison <ta...@apache.org>
AuthorDate: Thu May 26 17:57:22 2022 -0400

    TIKA-3776 -- don't overwrite file name if it exists in the metadata already.  The goal is not to overwrite the file name with a temp file name.
---
 .../main/java/org/apache/tika/io/TikaInputStream.java    | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
index 331edb1f0..f10dc4ebc 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
@@ -42,6 +42,7 @@ import org.apache.commons.io.input.TaggedInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.StringUtils;
 
 /**
  * Input stream with extended capabilities. The purpose of this class is
@@ -321,6 +322,9 @@ public class TikaInputStream extends TaggedInputStream {
      * Creates a TikaInputStream from the file at the given path. The file name
      * and length are stored as input metadata in the given metadata instance.
      * <p>
+     * If there's an {@link TikaCoreProperties#RESOURCE_NAME_KEY} in the
+     * metadata object, this will not overwrite that value with the path's name.
+     * <p>
      * Note that you must always explicitly close the returned stream to
      * prevent leaking open file handles.
      *
@@ -330,7 +334,9 @@ public class TikaInputStream extends TaggedInputStream {
      * @throws IOException if an I/O error occurs
      */
     public static TikaInputStream get(Path path, Metadata metadata) throws IOException {
-        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+        if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+            metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+        }
         metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
         return new TikaInputStream(path);
     }
@@ -338,7 +344,9 @@ public class TikaInputStream extends TaggedInputStream {
     public static TikaInputStream get(Path path, Metadata metadata, TemporaryResources tmp)
             throws IOException {
         long length = Files.size(path);
-        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+        if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+            metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
+        }
         metadata.set(Metadata.CONTENT_LENGTH, Long.toString(length));
         return new TikaInputStream(path, tmp, length);
     }
@@ -377,7 +385,9 @@ public class TikaInputStream extends TaggedInputStream {
      */
     @Deprecated
     public static TikaInputStream get(File file, Metadata metadata) throws FileNotFoundException {
-        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
+        if (StringUtils.isBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY))) {
+            metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
+        }
         metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.length()));
         return new TikaInputStream(file);
     }