You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/12/16 20:53:05 UTC

[tika] 02/02: TIKA-3242 -- allow users to pass metadata via httpheaders to tika-server

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a8293fee09ca7c5b922d50fe9d6b6a00696d074e
Author: tallison <ta...@apache.org>
AuthorDate: Wed Dec 16 15:52:40 2020 -0500

    TIKA-3242 -- allow users to pass metadata via httpheaders to tika-server
---
 .../tika/server/classic/DetectorResourceTest.java  |  2 +-
 .../classic/RecursiveMetadataResourceTest.java     | 20 +++++++++++++++++
 .../tika/server/core/resource/TikaResource.java    | 13 ++++++++++-
 .../apache/tika/server/core/TikaResourceTest.java  | 25 +++++++++++++++++-----
 4 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
index d531c2e..8f1afb5 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
@@ -80,7 +80,7 @@ public class DetectorResourceTest extends CXFTestBase {
 
     @Test
     public void testDetectCsvNoExt() throws Exception {
-        String url = endPoint + DETECT_STREAM_PATH;
+
         Response response = WebClient
                 .create(endPoint + DETECT_STREAM_PATH)
                 .type("text/csv")
diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
index 3a32f13..a9be9a6 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
@@ -19,11 +19,14 @@ package org.apache.tika.server.classic;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.tika.TikaTest.assertNotContained;
+import static org.apache.tika.TikaTest.debug;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import javax.ws.rs.core.MultivaluedHashMap;
+import javax.ws.rs.core.MultivaluedMap;
 import javax.ws.rs.core.Response;
 
 import java.io.InputStream;
@@ -138,6 +141,23 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
     }
 
     @Test
+    public void testHeaders() throws Exception {
+        MultivaluedMap<String, String> map = new MultivaluedHashMap<>();
+        map.addAll("meta_mymeta", "first", "second", "third");
+
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .headers(map)
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+        Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals("first,second,third", metadataList.get(0).get("mymeta"));
+    }
+
+    @Test
     public void testPasswordProtected() throws Exception {
         Response response = WebClient
                 .create(endPoint + META_PATH)
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 627a12a..c8ee4da 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -72,7 +72,9 @@ import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.util.List;
 import java.util.Locale;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -82,7 +84,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 public class TikaResource {
 
     private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-/_+\\.A-Z0-9 ]+$");
-
+    private static final String META_PREFIX = "meta_";
     public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n";
 
 
@@ -309,6 +311,15 @@ public class TikaResource {
                 }
             });
         }
+
+        for (Map.Entry<String, List<String>> e : httpHeaders.entrySet()) {
+            if (e.getKey().startsWith(META_PREFIX)) {
+                String tikaKey = e.getKey().substring(META_PREFIX.length());
+                for (String value: e.getValue()) {
+                    metadata.add(tikaKey, value);
+                }
+            }
+        }
     }
 
     public static void setDetector(Parser p, Detector detector) {
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
index d8f93ce..18b859a 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -22,19 +22,16 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
 import org.apache.tika.server.core.resource.TikaResource;
-import org.junit.Ignore;
 import org.junit.Test;
 
-import javax.ws.rs.ProcessingException;
+import javax.ws.rs.core.MultivaluedHashMap;
+import javax.ws.rs.core.MultivaluedMap;
 import javax.ws.rs.core.Response;
 import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.apache.cxf.helpers.HttpHeaderHelper.CONTENT_ENCODING;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 public class TikaResourceTest extends CXFTestBase {
@@ -68,6 +65,24 @@ public class TikaResourceTest extends CXFTestBase {
                 getStringFromInputStream((InputStream) response.getEntity()));
     }
 
+    @Test
+    public void testHeaders() throws Exception {
+        MultivaluedMap<String, String> map = new MultivaluedHashMap<>();
+        map.addAll("meta_mymeta", "first", "second", "third");
+        Response response = WebClient
+                .create(endPoint + TIKA_PATH)
+                .headers(map)
+                .accept("text/xml")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TEST_HELLO_WORLD));
+        String xml = getStringFromInputStream((InputStream) response.getEntity());
+        //can't figure out why these values are comma-delimited, rather
+        //than a true list...is this really the expected behavior?
+        //this at least tests that the pass-through, basically works...
+        //except for multi-values... :D
+        assertContains("<meta name=\"mymeta\" content=\"first,second,third\"/>",
+                xml);
+    }
 
     @Test
     public void testJAXBAndActivationDependency() {