You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/05/10 14:40:43 UTC

[tika] branch TIKA-4035 created (now f6a607b1d)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4035
in repository https://gitbox.apache.org/repos/asf/tika.git


      at f6a607b1d TIKA-4035 -- extract file system metadata

This branch includes the following new commits:

     new f6a607b1d TIKA-4035 -- extract file system metadata

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 01/01: TIKA-4035 -- extract file system metadata

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4035
in repository https://gitbox.apache.org/repos/asf/tika.git

commit f6a607b1da4bced8bfffaf8092fd4a0a7d7b63d3
Author: tallison <ta...@apache.org>
AuthorDate: Wed May 10 10:39:34 2023 -0400

    TIKA-4035 -- extract file system metadata
---
 .../java/org/apache/tika/metadata/FileSystem.java  | 30 ++++++++++++++++++
 .../tika/pipes/fetcher/fs/FileSystemFetcher.java   | 37 ++++++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/FileSystem.java b/tika-core/src/main/java/org/apache/tika/metadata/FileSystem.java
new file mode 100644
index 000000000..87afab71c
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/FileSystem.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * A collection of metadata elements for file system level metadata
+ */
+public interface FileSystem {
+
+    final String PREFIX = "fs:";
+
+    Property CREATED = Property.externalDate(PREFIX + "created");
+    Property MODIFIED = Property.externalDate(PREFIX + "modified");
+    Property ACCESSED = Property.externalDate(PREFIX + "accessed");
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
index c169aa815..70ad5ab86 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java
@@ -22,6 +22,9 @@ import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.nio.file.attribute.FileTime;
+import java.util.Date;
 import java.util.Map;
 
 import org.slf4j.Logger;
@@ -34,7 +37,9 @@ import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.FileSystem;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.pipes.fetcher.AbstractFetcher;
 
@@ -45,6 +50,8 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable
     //Warning! basePath can be null!
     private Path basePath = null;
 
+    private boolean extractFileSystemMetadata = false;
+
     static boolean isDescendant(Path root, Path descendant) {
         return descendant.toAbsolutePath().normalize()
                 .startsWith(root.toAbsolutePath().normalize());
@@ -70,6 +77,7 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable
         }
 
         metadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey);
+        updateFileSystemMetadata(p, metadata);
 
         if (!Files.isRegularFile(p)) {
             if (basePath != null && !Files.isDirectory(basePath)) {
@@ -82,6 +90,24 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable
         return TikaInputStream.get(p, metadata);
     }
 
+    private void updateFileSystemMetadata(Path p, Metadata metadata) throws IOException {
+        if (! extractFileSystemMetadata) {
+            return;
+        }
+        BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class);
+        updateFileTime(FileSystem.CREATED, attrs.creationTime(), metadata);
+        updateFileTime(FileSystem.MODIFIED, attrs.lastModifiedTime(), metadata);
+        updateFileTime(FileSystem.ACCESSED, attrs.lastAccessTime(), metadata);
+        //TODO extract owner or group?
+    }
+
+    private void updateFileTime(Property property, FileTime fileTime, Metadata metadata) {
+        if (fileTime == null) {
+            return;
+        }
+        metadata.set(property, new Date(fileTime.toMillis()));
+    }
+
     /**
      *
      * @return the basePath or <code>null</code> if no base path was set
@@ -102,6 +128,17 @@ public class FileSystemFetcher extends AbstractFetcher implements Initializable
         this.basePath = Paths.get(basePath);
     }
 
+    /**
+     * Extract file system metadata (created, modified, accessed) when fetching file.
+     * The default is <code>false</code>.
+     *
+     * @param extractFileSystemMetadata
+     */
+    @Field
+    public void setExtractFileSystemMetadata(boolean extractFileSystemMetadata) {
+        this.extractFileSystemMetadata = extractFileSystemMetadata;
+    }
+
     @Override
     public void initialize(Map<String, Param> params) throws TikaConfigException {
         //no-op