You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2013/01/27 20:17:35 UTC
svn commit: r1439143 - in /tika/trunk: CHANGES.txt
tika-server/src/main/java/org/apache/tika/server/TikaResource.java
tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Author: mattmann
Date: Sun Jan 27 19:17:35 2013
New Revision: 1439143
URL: http://svn.apache.org/viewvc?rev=1439143&view=rev
Log:
Apply patch from Raimund Merkert and Chris Mattmann for TIKA-1047: Provide a JAX-RS to detect only mediatype.
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sun Jan 27 19:17:35 2013
@@ -1,5 +1,9 @@
Release 1.4 Current Development
+ * Added a new end-point to Tika's JAX-RS REST server that only detects
+ the media-type based on a small portion of the document submitted
+ (TIKA-1047).
+
* RTF: Ordered and unordered lists are now extracted (TIKA-1062).
Release 1.3 - 01/19/2013
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Sun Jan 27 19:17:35 2013
@@ -36,8 +36,11 @@ import org.apache.tika.sax.BodyContentHa
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import javax.mail.internet.ContentDisposition;
+import javax.mail.internet.ParseException;
import javax.ws.rs.*;
import javax.ws.rs.core.*;
+
import java.io.*;
import java.util.List;
import java.util.Map;
@@ -79,10 +82,37 @@ public class TikaResource {
return parser;
}
- public static void fillMetadata(AutoDetectParser parser, Metadata metadata, HttpHeaders httpHeaders) {
+ public static String detectFilename(HttpHeaders httpHeaders) {
+
+ List<String> disposition = httpHeaders.getRequestHeader("Content-Disposition");
+ if (disposition != null && !disposition.isEmpty()) {
+ try {
+ ContentDisposition c = new ContentDisposition(disposition.get(0));
+
+ // only support "attachment" dispositions
+ if ("attachment".equals(c.getDisposition())) {
+ String fn = c.getParameter("filename");
+ if (fn != null) {
+ return fn;
+ }
+ }
+ } catch (ParseException e) {
+ // not a valid content-disposition field
+ }
+ }
+
+ // this really should not be used, since it's not an official field
List<String> fileName = httpHeaders.getRequestHeader("File-Name");
- if (fileName!=null && !fileName.isEmpty()) {
- metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName.get(0));
+ if (fileName != null && !fileName.isEmpty()) {
+ return fileName.get(0);
+ }
+ return null;
+ }
+
+ public static void fillMetadata(AutoDetectParser parser, Metadata metadata, HttpHeaders httpHeaders) {
+ String fileName = detectFilename(httpHeaders);
+ if (fileName != null) {
+ metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
}
javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Sun Jan 27 19:17:35 2013
@@ -17,16 +17,7 @@
package org.apache.tika.server;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
+import org.apache.commons.cli.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.cxf.binding.BindingFactoryManager;
@@ -36,6 +27,11 @@ import org.apache.cxf.jaxrs.JAXRSServerF
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.Tika;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
public class TikaServerCli {
private static final Log logger = LogFactory.getLog(TikaServerCli.class);
public static final int DEFAULT_PORT = 9998;
@@ -77,11 +73,13 @@ public class TikaServerCli {
}
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
- sf.setResourceClasses(MetadataResource.class, TikaResource.class, UnpackerResource.class, TikaVersion.class);
+ sf.setResourceClasses(MetadataEP.class,MetadataResource.class, TikaResource.class, UnpackerResource.class, TikaVersion.class);
List providers = new ArrayList();
providers.add(new TarWriter());
providers.add(new ZipWriter());
+ providers.add(new CSVMessageBodyWriter());
+ providers.add(new JSONMessageBodyWriter());
providers.add(new TikaExceptionMapper());
providers.add(new SingletonResourceProvider(new MetadataResource()));
providers.add(new SingletonResourceProvider(new TikaResource()));