You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2013/05/27 19:05:02 UTC

svn commit: r1486665 - in /tika/trunk: CHANGES.txt tika-server/src/main/java/org/apache/tika/server/TikaResource.java tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java

Author: mattmann
Date: Mon May 27 17:05:01 2013
New Revision: 1486665

URL: http://svn.apache.org/r1486665
Log:
Patch for TIKA-1127 provided by Ali Mosavian.

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1486665&r1=1486664&r2=1486665&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon May 27 17:05:01 2013
@@ -1,5 +1,8 @@
 Release 1.4 Current Development
 
+  * Improvements to tika-server to allow it to produce text/html and
+    text/xml content (TIKA-1126, TIKA-1127).
+
   * Improvements were made to the Compressor Parser to handle g'zipped files
     that require the decompressConcatenated option set to true (TIKA-1096).
 

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1486665&r1=1486664&r2=1486665&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Mon May 27 17:05:01 2013
@@ -208,75 +208,145 @@ public class TikaResource {
   }
 
 
-    @PUT
-    @Consumes("*/*")
-    @Produces("text/html")
-    public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
-        final AutoDetectParser parser = createParser();
-        final Metadata metadata = new Metadata();
-
-        fillMetadata(parser, metadata, httpHeaders);
-
-        logRequest(logger, info, metadata);
-
-        return new StreamingOutput() {
-            public void write(OutputStream outputStream)
-            throws IOException, WebApplicationException {
-                Writer writer = new OutputStreamWriter(outputStream, "UTF-8");
-                ContentHandler content;
-
-                try {
-                    SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance( );
-                    TransformerHandler handler = factory.newTransformerHandler( );
-                    handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
-                    handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
-                    handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-                    handler.setResult(new StreamResult(writer));
-                    content = new ExpandedTitleContentHandler( handler );
-                }
-                catch ( TransformerConfigurationException e ) {
-                    throw new WebApplicationException( e );
-                }
-
-                TikaInputStream tis = TikaInputStream.get(is);
-
-                try {
-                    tis.getFile();
-                    parser.parse(tis, content, metadata);
-                }
-                catch (SAXException e) {
-                    throw new WebApplicationException(e);
-                }
-                catch (EncryptedDocumentException e) {
-                    logger.warn(String.format(
-                            "%s: Encrypted document",
-                            info.getPath()
-                    ), e);
-                    throw new WebApplicationException(e, Response.status(422).build());
-                }
-                catch (TikaException e) {
-                    logger.warn(String.format(
-                            "%s: Text extraction failed",
-                            info.getPath()
-                    ), e);
-
-                    if (e.getCause()!=null && e.getCause() instanceof WebApplicationException)
-                        throw (WebApplicationException) e.getCause();
-
-                    if (e.getCause()!=null && e.getCause() instanceof IllegalStateException)
-                        throw new WebApplicationException(Response.status(422).build());
-
-                    if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException)
-                        throw new WebApplicationException(Response.status(422).build());
-
-                    throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
-                }
-                finally {
-                    tis.close();
-                }
-            }
-        };
-    }
+  @PUT
+  @Consumes("*/*")
+  @Produces("text/html")
+  public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
+      final AutoDetectParser parser = createParser();
+      final Metadata metadata = new Metadata();
+
+      fillMetadata(parser, metadata, httpHeaders);
+
+      logRequest(logger, info, metadata);
+
+      return new StreamingOutput() {
+          public void write(OutputStream outputStream)
+          throws IOException, WebApplicationException {
+              Writer writer = new OutputStreamWriter(outputStream, "UTF-8");
+              ContentHandler content;
+
+              try {
+                  SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance( );
+                  TransformerHandler handler = factory.newTransformerHandler( );
+                  handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
+                  handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
+                  handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+                  handler.setResult(new StreamResult(writer));
+                  content = new ExpandedTitleContentHandler( handler );
+              }
+              catch ( TransformerConfigurationException e ) {
+                  throw new WebApplicationException( e );
+              }
+
+              TikaInputStream tis = TikaInputStream.get(is);
+
+              try {
+                  tis.getFile();
+                  parser.parse(tis, content, metadata);
+              }
+              catch (SAXException e) {
+                  throw new WebApplicationException(e);
+              }
+              catch (EncryptedDocumentException e) {
+                  logger.warn(String.format(
+                          "%s: Encrypted document",
+                          info.getPath()
+                  ), e);
+                  throw new WebApplicationException(e, Response.status(422).build());
+              }
+              catch (TikaException e) {
+                  logger.warn(String.format(
+                          "%s: Text extraction failed",
+                          info.getPath()
+                  ), e);
+
+                  if (e.getCause()!=null && e.getCause() instanceof WebApplicationException)
+                      throw (WebApplicationException) e.getCause();
+
+                  if (e.getCause()!=null && e.getCause() instanceof IllegalStateException)
+                      throw new WebApplicationException(Response.status(422).build());
+
+                  if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException)
+                      throw new WebApplicationException(Response.status(422).build());
+
+                  throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
+              }
+              finally {
+                  tis.close();
+              }
+          }
+      };
+  }
+
+  @PUT
+  @Consumes("*/*")
+  @Produces("text/xml")
+  public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
+    final AutoDetectParser parser = createParser();
+    final Metadata metadata = new Metadata();
+
+    fillMetadata(parser, metadata, httpHeaders);
+
+    logRequest(logger, info, metadata);
+
+    return new StreamingOutput() {
+      public void write(OutputStream outputStream)
+        throws IOException, WebApplicationException {
+        Writer writer = new OutputStreamWriter(outputStream, "UTF-8");
+        ContentHandler content;
+
+        try {
+          SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance( );
+          TransformerHandler handler = factory.newTransformerHandler( );
+          handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
+          handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
+          handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+          handler.setResult(new StreamResult(writer));
+          content = new ExpandedTitleContentHandler( handler );
+        }
+        catch ( TransformerConfigurationException e ) {
+          throw new WebApplicationException( e );
+        }
+
+        TikaInputStream tis = TikaInputStream.get(is);
+
+        try {
+          tis.getFile();
+          parser.parse(tis, content, metadata);
+        }
+        catch (SAXException e) {
+          throw new WebApplicationException(e);
+        }
+        catch (EncryptedDocumentException e) {
+          logger.warn(String.format(
+            "%s: Encrypted document",
+            info.getPath()
+          ), e);
+          throw new WebApplicationException(e, Response.status(422).build());
+        }
+        catch (TikaException e) {
+          logger.warn(String.format(
+            "%s: Text extraction failed",
+            info.getPath()
+          ), e);
+
+          if (e.getCause()!=null && e.getCause() instanceof WebApplicationException)
+            throw (WebApplicationException) e.getCause();
+
+          if (e.getCause()!=null && e.getCause() instanceof IllegalStateException)
+            throw new WebApplicationException(Response.status(422).build());
+
+          if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException)
+            throw new WebApplicationException(Response.status(422).build());
+
+          throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
+        }
+        finally {
+          tis.close();
+        }
+      }
+    };
+  }
 
   public static void logRequest(Log logger, UriInfo info, Metadata metadata) {
     if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null) {

Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java?rev=1486665&r1=1486664&r2=1486665&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java Mon May 27 17:05:01 2013
@@ -110,24 +110,45 @@ public class TikaResourceTest extends CX
 		assertEquals(UNPROCESSEABLE, response.getStatus());
 	}
 
-    @Test
-    public void testSimpleWordHTML() throws Exception {
-        Response response = WebClient.create(endPoint + TIKA_PATH)
-                .type("application/msword")
-                .accept("text/html")
-                .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
-        String responseMsg = getStringFromInputStream((InputStream) response
-                .getEntity());
-        assertTrue(responseMsg.contains("test"));
-    }
-
-    @Test
-    public void testPasswordXLSHTML() throws Exception {
-        Response response = WebClient.create(endPoint + TIKA_PATH)
-                .type("application/vnd.ms-excel")
-                .accept("text/html")
-                .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+  @Test
+  public void testSimpleWordHTML() throws Exception {
+      Response response = WebClient.create(endPoint + TIKA_PATH)
+              .type("application/msword")
+              .accept("text/html")
+              .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
+      String responseMsg = getStringFromInputStream((InputStream) response
+              .getEntity());
+      assertTrue(responseMsg.contains("test"));
+  }
 
-        assertEquals(UNPROCESSEABLE, response.getStatus());
-    }
+  @Test
+  public void testPasswordXLSHTML() throws Exception {
+      Response response = WebClient.create(endPoint + TIKA_PATH)
+              .type("application/vnd.ms-excel")
+              .accept("text/html")
+              .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+
+      assertEquals(UNPROCESSEABLE, response.getStatus());
+  }
+
+  @Test
+  public void testSimpleWordXML() throws Exception {
+    Response response = WebClient.create(endPoint + TIKA_PATH)
+      .type("application/msword")
+      .accept("text/xml")
+      .put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
+    String responseMsg = getStringFromInputStream((InputStream) response
+      .getEntity());
+    assertTrue(responseMsg.contains("test"));
+  }
+
+  @Test
+  public void testPasswordXLSXML() throws Exception {
+    Response response = WebClient.create(endPoint + TIKA_PATH)
+      .type("application/vnd.ms-excel")
+      .accept("text/xml")
+      .put(ClassLoader.getSystemResourceAsStream("password.xls"));
+
+    assertEquals(UNPROCESSEABLE, response.getStatus());
+  }
 }