You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/12/16 20:53:05 UTC
[tika] 02/02: TIKA-3242 -- allow users to pass metadata via
httpheaders to tika-server
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
commit a8293fee09ca7c5b922d50fe9d6b6a00696d074e
Author: tallison <ta...@apache.org>
AuthorDate: Wed Dec 16 15:52:40 2020 -0500
TIKA-3242 -- allow users to pass metadata via httpheaders to tika-server
---
.../tika/server/classic/DetectorResourceTest.java | 2 +-
.../classic/RecursiveMetadataResourceTest.java | 20 +++++++++++++++++
.../tika/server/core/resource/TikaResource.java | 13 ++++++++++-
.../apache/tika/server/core/TikaResourceTest.java | 25 +++++++++++++++++-----
4 files changed, 53 insertions(+), 7 deletions(-)
diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
index d531c2e..8f1afb5 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/DetectorResourceTest.java
@@ -80,7 +80,7 @@ public class DetectorResourceTest extends CXFTestBase {
@Test
public void testDetectCsvNoExt() throws Exception {
- String url = endPoint + DETECT_STREAM_PATH;
+
Response response = WebClient
.create(endPoint + DETECT_STREAM_PATH)
.type("text/csv")
diff --git a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
index 3a32f13..a9be9a6 100644
--- a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
+++ b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/RecursiveMetadataResourceTest.java
@@ -19,11 +19,14 @@ package org.apache.tika.server.classic;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.tika.TikaTest.assertNotContained;
+import static org.apache.tika.TikaTest.debug;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import javax.ws.rs.core.MultivaluedHashMap;
+import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import java.io.InputStream;
@@ -138,6 +141,23 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
}
@Test
+ public void testHeaders() throws Exception {
+ MultivaluedMap<String, String> map = new MultivaluedHashMap<>();
+ map.addAll("meta_mymeta", "first", "second", "third");
+
+ Response response = WebClient
+ .create(endPoint + META_PATH)
+ .headers(map)
+ .accept("application/json")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+ Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
+ List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+ assertEquals("first,second,third", metadataList.get(0).get("mymeta"));
+ }
+
+ @Test
public void testPasswordProtected() throws Exception {
Response response = WebClient
.create(endPoint + META_PATH)
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 627a12a..c8ee4da 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -72,7 +72,9 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
+import java.util.List;
import java.util.Locale;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -82,7 +84,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
public class TikaResource {
private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-/_+\\.A-Z0-9 ]+$");
-
+ private static final String META_PREFIX = "meta_";
public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n";
@@ -309,6 +311,15 @@ public class TikaResource {
}
});
}
+
+ for (Map.Entry<String, List<String>> e : httpHeaders.entrySet()) {
+ if (e.getKey().startsWith(META_PREFIX)) {
+ String tikaKey = e.getKey().substring(META_PREFIX.length());
+ for (String value: e.getValue()) {
+ metadata.add(tikaKey, value);
+ }
+ }
+ }
}
public static void setDetector(Parser p, Detector detector) {
diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
index d8f93ce..18b859a 100644
--- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
+++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceTest.java
@@ -22,19 +22,16 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.server.core.resource.TikaResource;
-import org.junit.Ignore;
import org.junit.Test;
-import javax.ws.rs.ProcessingException;
+import javax.ws.rs.core.MultivaluedHashMap;
+import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
-import static org.apache.cxf.helpers.HttpHeaderHelper.CONTENT_ENCODING;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class TikaResourceTest extends CXFTestBase {
@@ -68,6 +65,24 @@ public class TikaResourceTest extends CXFTestBase {
getStringFromInputStream((InputStream) response.getEntity()));
}
+ @Test
+ public void testHeaders() throws Exception {
+ MultivaluedMap<String, String> map = new MultivaluedHashMap<>();
+ map.addAll("meta_mymeta", "first", "second", "third");
+ Response response = WebClient
+ .create(endPoint + TIKA_PATH)
+ .headers(map)
+ .accept("text/xml")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TEST_HELLO_WORLD));
+ String xml = getStringFromInputStream((InputStream) response.getEntity());
+ //can't figure out why these values are comma-delimited, rather
+ //than a true list...is this really the expected behavior?
+ //this at least tests that the pass-through, basically works...
+ //except for multi-values... :D
+ assertContains("<meta name=\"mymeta\" content=\"first,second,third\"/>",
+ xml);
+ }
@Test
public void testJAXBAndActivationDependency() {