You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2013/01/27 20:17:35 UTC

svn commit: r1439143 - in /tika/trunk: CHANGES.txt tika-server/src/main/java/org/apache/tika/server/TikaResource.java tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java

Author: mattmann
Date: Sun Jan 27 19:17:35 2013
New Revision: 1439143

URL: http://svn.apache.org/viewvc?rev=1439143&view=rev
Log:
Apply patch from Raimund Merkert and Chris Mattmann for TIKA-1047: Provide a JAX-RS to detect only mediatype.

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sun Jan 27 19:17:35 2013
@@ -1,5 +1,9 @@
 Release 1.4 Current Development
 
+  * Added a new end-point to Tika's JAX-RS REST server that only detects
+    the media-type based on a small portion of the document submitted
+   (TIKA-1047).
+
   * RTF: Ordered and unordered lists are now extracted (TIKA-1062).
 
 Release 1.3 - 01/19/2013

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Sun Jan 27 19:17:35 2013
@@ -36,8 +36,11 @@ import org.apache.tika.sax.BodyContentHa
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import javax.mail.internet.ContentDisposition;
+import javax.mail.internet.ParseException;
 import javax.ws.rs.*;
 import javax.ws.rs.core.*;
+
 import java.io.*;
 import java.util.List;
 import java.util.Map;
@@ -79,10 +82,37 @@ public class TikaResource {
     return parser;
   }
 
-  public static void fillMetadata(AutoDetectParser parser, Metadata metadata, HttpHeaders httpHeaders) {
+  public static String detectFilename(HttpHeaders httpHeaders) {
+
+    List<String> disposition = httpHeaders.getRequestHeader("Content-Disposition");
+    if (disposition != null && !disposition.isEmpty()) {
+      try {
+        ContentDisposition c = new ContentDisposition(disposition.get(0));
+
+        // only support "attachment" dispositions
+        if ("attachment".equals(c.getDisposition())) {
+          String fn = c.getParameter("filename");
+          if (fn != null) {
+            return fn;
+          }
+        }
+      } catch (ParseException e) {
+        // not a valid content-disposition field
+      }
+    }
+
+    // this really should not be used, since it's not an official field
     List<String> fileName = httpHeaders.getRequestHeader("File-Name");
-    if (fileName!=null && !fileName.isEmpty()) {
-      metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName.get(0));
+    if (fileName != null && !fileName.isEmpty()) {
+      return fileName.get(0);
+    }
+    return null;
+  }
+
+  public static void fillMetadata(AutoDetectParser parser, Metadata metadata, HttpHeaders httpHeaders) {
+    String fileName = detectFilename(httpHeaders);
+    if (fileName != null) {
+      metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
     }
 
     javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1439143&r1=1439142&r2=1439143&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Sun Jan 27 19:17:35 2013
@@ -17,16 +17,7 @@
 
 package org.apache.tika.server;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
+import org.apache.commons.cli.*;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.cxf.binding.BindingFactoryManager;
@@ -36,6 +27,11 @@ import org.apache.cxf.jaxrs.JAXRSServerF
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
 import org.apache.tika.Tika;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
 public class TikaServerCli {
   private static final Log logger = LogFactory.getLog(TikaServerCli.class);
   public static final int DEFAULT_PORT = 9998;
@@ -77,11 +73,13 @@ public class TikaServerCli {
       }
 
       JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
-      sf.setResourceClasses(MetadataResource.class, TikaResource.class, UnpackerResource.class, TikaVersion.class);
+      sf.setResourceClasses(MetadataEP.class,MetadataResource.class, TikaResource.class, UnpackerResource.class, TikaVersion.class);
 
       List providers = new ArrayList();
       providers.add(new TarWriter());
       providers.add(new ZipWriter());
+      providers.add(new CSVMessageBodyWriter());
+      providers.add(new JSONMessageBodyWriter());
       providers.add(new TikaExceptionMapper());
       providers.add(new SingletonResourceProvider(new MetadataResource()));
       providers.add(new SingletonResourceProvider(new TikaResource()));