You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/02/24 10:39:19 UTC

svn commit: r1074088 - in /tika/trunk: ./ tika-server/ tika-server/src/ tika-server/src/main/ tika-server/src/main/java/ tika-server/src/main/java/org/ tika-server/src/main/java/org/apache/ tika-server/src/main/java/org/apache/tika/ tika-server/src/mai...

Author: maxcom
Date: Thu Feb 24 09:39:17 2011
New Revision: 1074088

URL: http://svn.apache.org/viewvc?rev=1074088&view=rev
Log:
TIKA-593: JAX-RS network server

Added:
    tika/trunk/tika-server/
    tika/trunk/tika-server/README
    tika/trunk/tika-server/pom.xml
    tika/trunk/tika-server/src/
    tika/trunk/tika-server/src/main/
    tika/trunk/tika-server/src/main/java/
    tika/trunk/tika-server/src/main/java/org/
    tika/trunk/tika-server/src/main/java/org/apache/
    tika/trunk/tika-server/src/main/java/org/apache/tika/
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
    tika/trunk/tika-server/src/main/resources/
    tika/trunk/tika-server/src/main/resources/commons-logging.properties
    tika/trunk/tika-server/src/test/
    tika/trunk/tika-server/src/test/java/
    tika/trunk/tika-server/src/test/java/org/
    tika/trunk/tika-server/src/test/java/org/apache/
    tika/trunk/tika-server/src/test/java/org/apache/tika/
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
    tika/trunk/tika-server/src/test/resources/
    tika/trunk/tika-server/src/test/resources/2exe.docx   (with props)
    tika/trunk/tika-server/src/test/resources/2pic.doc   (with props)
    tika/trunk/tika-server/src/test/resources/2pic.docx   (with props)
    tika/trunk/tika-server/src/test/resources/Doc1_ole.doc   (with props)
    tika/trunk/tika-server/src/test/resources/password.xls   (with props)
    tika/trunk/tika-server/src/test/resources/pic.xls   (with props)
    tika/trunk/tika-server/src/test/resources/pic.xlsx   (with props)
    tika/trunk/tika-server/src/test/resources/test.doc   (with props)
Modified:
    tika/trunk/NOTICE.txt
    tika/trunk/pom.xml

Modified: tika/trunk/NOTICE.txt
URL: http://svn.apache.org/viewvc/tika/trunk/NOTICE.txt?rev=1074088&r1=1074087&r2=1074088&view=diff
==============================================================================
--- tika/trunk/NOTICE.txt (original)
+++ tika/trunk/NOTICE.txt Thu Feb 24 09:39:17 2011
@@ -7,3 +7,8 @@ The Apache Software Foundation (http://w
 Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata
 This software contains code derived from UCAR/Unidata's NetCDF library.
 
+Tika-server compoment uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and 
+Grizzly (http://grizzly.java.net/)
+
+OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0
+

Modified: tika/trunk/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/pom.xml?rev=1074088&r1=1074087&r2=1074088&view=diff
==============================================================================
--- tika/trunk/pom.xml (original)
+++ tika/trunk/pom.xml Thu Feb 24 09:39:17 2011
@@ -50,6 +50,7 @@
     <module>tika-parsers</module>
     <module>tika-app</module>
     <module>tika-bundle</module>
+    <module>tika-server</module>
   </modules>
 
   <build>

Added: tika/trunk/tika-server/README
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/README?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/README (added)
+++ tika/trunk/tika-server/README Thu Feb 24 09:39:17 2011
@@ -0,0 +1,35 @@
+This is JAX-RS Tika server for Tika
+(https://issues.apache.org/jira/browse/TIKA-593)
+
+Running
+-------
+java -jar target/tikaserver-1.0-SNAPSHOT.jar
+
+Usage
+-----
+Usage examples from command line with curl utility:
+
+1) Extract plain text:
+
+curl -T price.xls http://localhost:9998/tika
+
+2) Extract text with mime-type hint:
+
+curl -v -H "Content-type: application/vnd.openxmlformats-officedocument.wordprocessingml.document" -T document.docx http://localhost:9998/tika
+
+3) Get all document attachments as ZIP-file:
+
+curl -v -T Doc1_ole.doc http://localhost:9998/unpacker > /var/tmp/x.zip
+
+4) Extract metadata to CSV format:
+
+curl -T price.xls http://localhost:9998/meta
+
+HTTP Codes
+----------
+200 - Ok
+204 - No content (for example when we are unpacking file without attachments)
+415 - Unknown file type
+422 - Unparsable document of known type (password protected documents and unsupported versions like Biff5 Excel)
+500 - Internal error
+

Added: tika/trunk/tika-server/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/pom.xml (added)
+++ tika/trunk/tika-server/pom.xml Thu Feb 24 09:39:17 2011
@@ -0,0 +1,183 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parent</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <relativePath>../tika-parent/pom.xml</relativePath>
+  </parent>
+
+  <artifactId>tika-server</artifactId>
+  <packaging>bundle</packaging>
+  <version>1.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>${project.version}</version>
+        </dependency>        
+
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-server</artifactId>
+            <version>1.0.3.1</version>
+        </dependency>        
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-core</artifactId>
+            <version>1.0.3.1</version>
+        </dependency>        
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-client</artifactId>
+            <version>1.0.3.1</version>
+        </dependency>        
+       <dependency>
+            <groupId>javax.ws.rs</groupId>
+            <artifactId>jsr311-api</artifactId>
+            <version>1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey.test.framework</groupId>
+            <artifactId>jersey-test-framework</artifactId>
+            <version>1.0.3.1</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.grizzly</groupId>
+            <artifactId>grizzly-servlet-webserver</artifactId>
+            <version>1.9.8</version>
+        </dependency>      
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <version>1.1.1</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>1.3</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-collections</groupId>
+            <artifactId>commons-collections</artifactId>
+            <version>3.2.1</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.2</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+            <version>2.5</version>
+        </dependency>
+	<dependency>
+	    <groupId>net.sf.opencsv</groupId>
+	    <artifactId>opencsv</artifactId>
+	    <version>2.0</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+            <version>4.8</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+           <!--  Maven Exec Plug-In: http://mojo.codehaus.org/exec-maven-plugin/  -->
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>1.1</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <mainClass>su.msk.jet.tikaserver.TikaServerCli</mainClass>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <inherited>true</inherited>
+                <configuration>
+                    <source>1.6</source>
+                    <target>1.6</target>
+                </configuration>
+            </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>org.apache.tika.*</Export-Package>
+            <Embed-Dependency>
+            	!jersey-server;scope=compile;inline=META-INF/services/**|au/**|javax/**|org/**|com/**|Resources/**|font_metrics.properties|repackage/**|schema*/**,
+            	jersey-server;scope=compile;inline=com/** |META-INF/services/com.sun*|META-INF/services/javax.ws.rs.ext.RuntimeDelegate
+            </Embed-Dependency>
+            <Embed-Transitive>true</Embed-Transitive>
+            <Bundle-DocURL>${project.url}</Bundle-DocURL>
+                    <Main-Class>org.apache.tika.server.TikaServerCli</Main-Class>
+          </instructions>
+        </configuration>
+      </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <version>2.6</version>
+                    <redirectTestOutputToFile>true</redirectTestOutputToFile>
+                    <argLine>-da -XX:+HeapDumpOnOutOfMemoryError -Xmx512m</argLine>
+<!--                    <argLine>-agentlib:jprofilerti=port=8849  -Xbootclasspath/a:/arc/opt/jprofiler5/bin/agent.jar</argLine> -->
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+     <repositories>
+        <repository>
+            <id>maven2-repository.dev.java.net</id>
+            <name>Java.net Repository for Maven</name>
+            <url>http://download.java.net/maven/2/</url>
+            <layout>default</layout>
+        </repository>
+        <repository>
+            <id>maven-repository.dev.java.net</id>
+            <name>Java.net Maven 1 Repository (legacy)</name>
+            <url>http://download.java.net/maven/1</url>
+            <layout>legacy</layout>
+        </repository>
+    </repositories>
+</project>
+

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import au.com.bytecode.opencsv.CSVWriter;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypeException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.StreamingOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+@Path("/meta")
+public class MetadataResource {
+  private static final String CONTENT_LENGTH = "Content-Length";
+  private static final String FILE_NNAME = "File-Name";
+  private static final String RESOURCE_NAME = "resourceName";
+
+  @PUT
+  @Produces("text/csv")
+  public StreamingOutput getMetadata( InputStream is, @Context HttpHeaders httpHeaders ) throws Exception {
+    final Detector detector = new HeaderTrustingDetectorFactory ().createDetector( httpHeaders );
+    final AutoDetectParser parser = new AutoDetectParser(detector);
+    final ParseContext context = new ParseContext();
+    context.set(Parser.class, parser);
+    final Metadata metadata = new Metadata();
+    parser.parse( is, new DefaultHandler(), metadata, context );
+    fillMetadata ( httpHeaders, metadata );
+
+    return new StreamingOutput() {
+      @Override
+      public void write(OutputStream outputStream) throws IOException, WebApplicationException {
+        CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream));
+        for (String name : metadata.names()) {
+          String[] values = metadata.getValues(name);
+          ArrayList<String> list = new ArrayList<String>(values.length+1);
+          list.add(name);
+          list.addAll(Arrays.asList(values));
+          writer.writeNext(list.toArray(values));
+        }
+        writer.close();
+      }
+    };
+  }
+
+  private void fillMetadata ( HttpHeaders httpHeaders, Metadata metadata ) {
+    final List < String > fileName = httpHeaders.getRequestHeader(FILE_NNAME), cl = httpHeaders.getRequestHeader(CONTENT_LENGTH);
+    if ( cl != null && !cl.isEmpty() )
+      metadata.set( CONTENT_LENGTH, cl.get(0) );
+
+    if ( fileName != null && !fileName.isEmpty() )
+      metadata.set( RESOURCE_NAME, fileName.get(0) );
+  }
+
+  private static class HeaderTrustingDetectorFactory {
+    public Detector createDetector( HttpHeaders httpHeaders ) throws IOException, MimeTypeException {
+      final javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
+      if (mediaType == null || mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE ))
+        return (new TikaConfig()).getMimeRepository();
+      else return new Detector() {
+        @Override
+        public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
+          return MediaType.parse( mediaType.toString() );
+        }
+      };
+    }
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import java.io.IOException;
+import java.util.zip.ZipOutputStream;
+
+public interface PartExtractor<T> {
+  void extract(T part, ZipOutputStream output) throws IOException;
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import org.apache.tika.exception.TikaException;
+
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.ext.ExceptionMapper;
+import javax.ws.rs.ext.Provider;
+
+@Provider
+public class TikaExceptionMapper implements ExceptionMapper<TikaException> {
+  @Override
+  public Response toResponse(TikaException e) {
+    if (e.getCause() !=null && e.getCause() instanceof WebApplicationException) {
+      return ((WebApplicationException) e.getCause()).getResponse();
+    } else {
+      return Response.serverError().build();
+    }
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import javax.ws.rs.*;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.StreamingOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Set;
+
+@Path("/tika")
+public class TikaResource {
+  public static final String GREETING = "This is Tika Server. Please PUT\n";
+  private final Log logger = LogFactory.getLog(TikaResource.class);
+
+  @SuppressWarnings({"SameReturnValue"})
+  @GET
+  @Produces("text/plain")
+  public String getMessage() {
+    return GREETING;
+  }
+
+  public static AutoDetectParser createParser() {
+    final AutoDetectParser parser = new AutoDetectParser();
+
+    parser.setFallback(new Parser() {
+      public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
+        return parser.getSupportedTypes(parseContext);
+      }
+
+      @Override
+      public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
+        throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
+      }
+
+      @Override
+      public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) {
+        throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
+      }
+    });
+
+    return parser;
+  }
+
+  public static void fillMetadata(AutoDetectParser parser, Metadata metadata, HttpHeaders httpHeaders) {
+    javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
+
+    if (mediaType !=null && !mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
+      metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString());
+
+      final Detector detector = parser.getDetector();
+
+      parser.setDetector(new Detector() {
+        @Override
+        public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
+          String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+
+          if (ct!=null) {
+            return MediaType.parse(ct);
+          } else {
+            return detector.detect(inputStream, metadata);
+          }
+        }
+      });
+    }
+  }
+
+  @PUT
+  @Consumes("*/*")
+  @Produces("text/plain")
+  public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders) {
+    final AutoDetectParser parser = createParser();
+    final Metadata metadata = new Metadata();
+
+    fillMetadata(parser, metadata, httpHeaders);
+
+    return new StreamingOutput() {
+      @Override
+      public void write(OutputStream outputStream) throws IOException, WebApplicationException {
+        BodyContentHandler body = new BodyContentHandler(outputStream);
+
+        try {
+          parser.parse(is, body, metadata);
+        } catch (SAXException e) {
+          throw new WebApplicationException(e);
+        } catch (TikaException e) {
+          if (e.getCause()!=null && e.getCause() instanceof WebApplicationException) {
+            throw (WebApplicationException) e.getCause();
+          }
+
+          if (e.getCause()!=null && e.getCause() instanceof IllegalStateException) {
+            throw new WebApplicationException(Response.status(422).build());
+          }
+
+          if (e.getCause()!=null && e.getCause() instanceof EncryptedDocumentException) {
+            throw new WebApplicationException(Response.status(422).build());
+          }
+
+          if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException) {
+            throw new WebApplicationException(Response.status(422).build());
+          }
+
+          logger.warn("Text extraction failed", e);
+
+          throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
+        }
+      }
+    };
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import com.sun.grizzly.http.SelectorThread;
+import com.sun.jersey.api.container.grizzly.GrizzlyWebContainerFactory;
+import org.apache.commons.cli.*;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import javax.ws.rs.core.UriBuilder;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TikaServerCli {
+  private static final Log logger = LogFactory.getLog(TikaServerCli.class);
+
+  public static final int DEFAULT_PORT = 9998;
+
+  private static Options getOptions() {
+    Options options = new Options();
+    options.addOption("p", "port", true, "listen port (default = "+DEFAULT_PORT+ ')');
+
+    options.addOption("h", "help", false, "this help message");
+
+    return options;
+  }
+
+  public static void main(String[] args) {
+    try {
+      TikaServerCli cli = new TikaServerCli();
+
+      Map<String, String> params = new HashMap<String, String>();
+
+      params.put("com.sun.jersey.config.property.packages", "org.apache.tika.server");
+
+      Options options = cli.getOptions();
+
+      CommandLineParser cliParser = new GnuParser();
+      CommandLine line = cliParser.parse(options, args);
+
+      int port = DEFAULT_PORT;
+
+      if (line.hasOption("port")) {
+        port = Integer.valueOf(line.getOptionValue("port"));
+      }
+
+      if (line.hasOption("help")) {
+        HelpFormatter helpFormatter = new HelpFormatter();
+        helpFormatter.printHelp("tikaserver", options);
+        System.exit(-1);
+      }
+
+      String baseUri = "http://localhost/";
+      URI buildUri = UriBuilder.fromUri(baseUri).port(port).build();
+      SelectorThread threadSelector = GrizzlyWebContainerFactory.create(buildUri, params);
+
+      logger.info("Started at " + buildUri);
+    } catch (Exception ex) {
+      logger.fatal("Can't start", ex);
+      System.exit(-1);
+    }
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import org.apache.commons.lang.mutable.MutableInt;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.StreamingOutput;
+import java.io.*;
+import java.util.Collections;
+import java.util.zip.ZipOutputStream;
+
+@Path("/unpacker")
+public class UnpackerResource {
+  private static final Log logger = LogFactory.getLog(UnpackerResource.class);
+
+  private final TikaConfig tikaConfig;
+
+  public UnpackerResource() {
+    tikaConfig = TikaConfig.getDefaultConfig();
+  }
+
+  @PUT
+  @Produces("application/zip")
+  public StreamingOutput getText(
+          InputStream is,
+          @Context HttpHeaders httpHeaders
+  ) throws Exception {
+    if (!is.markSupported()) {
+      is = new BufferedInputStream(is);
+    }
+    
+    Parser parser;
+
+    javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
+    if (mediaType !=null && !mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
+      parser = tikaConfig.getParser(new MediaType(httpHeaders.getMediaType().getType(), httpHeaders.getMediaType().getSubtype()));
+    } else {
+      MediaType type = tikaConfig.getMimeRepository().detect(is, new Metadata());
+      parser = tikaConfig.getParser(type);
+    }
+
+    if (parser==null) {
+      throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
+    }
+
+    ContentHandler ch = new DefaultHandler();
+
+    ParseContext pc = new ParseContext();
+
+    ZipOutput zout = new ZipOutput();
+    MutableInt count = new MutableInt();
+
+    pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, zout));
+
+    parser.parse(is, ch, new Metadata(), pc);
+
+    if (count.intValue()==0) {
+      throw new WebApplicationException(Response.Status.NO_CONTENT);
+    }
+
+    return zout;
+  }
+
+  private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
+    private final MutableInt count;
+    private final ZipOutput zout;
+
+    MyEmbeddedDocumentExtractor(MutableInt count, ZipOutput zout) {
+      this.count = count;
+      this.zout = zout;
+    }
+
+    @Override
+    public boolean shouldParseEmbedded(Metadata metadata) {
+      return true;
+    }
+
+    @Override
+    public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b) throws SAXException, IOException {
+      ByteArrayOutputStream bos = new ByteArrayOutputStream();
+      IOUtils.copy(inputStream, bos);
+      byte[] data = bos.toByteArray();
+
+      String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
+      String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+
+      if (name == null) {
+        name = Integer.toString(count.intValue());
+      }
+
+      if (!name.contains(".")) {
+        try {
+          String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension();
+
+          if (ext!=null) {
+            name += ext;
+          }
+        } catch (MimeTypeException e) {
+          logger.warn("Unexpected MimeTypeException", e);
+        }
+      }
+
+      if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
+        POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
+        OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
+
+        if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
+          try {
+            Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
+            if (ole.getDataSize()>0) {
+              String label = ole.getLabel();
+
+              if (label.startsWith("ole-")) {
+                label = Integer.toString(count.intValue()) + '-' + label;
+              }
+
+              name = label;
+
+              data = ole.getDataBuffer();
+            }
+          } catch (Ole10NativeException ex) {
+            logger.warn("Skipping invalid part", ex);
+          }
+        } else {
+          name += '.' + type.getExtension();
+        }
+      }      
+
+      final String finalName = name;
+
+      zout.put(new PartExtractor<byte[]>() {
+        @Override
+        public void extract(byte[] part, ZipOutputStream output) throws IOException {
+          ZipUtils.zipStoreBuffer(output, finalName, part);
+        }
+      }, Collections.singletonList(data));
+
+      count.increment();
+    }
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.StreamingOutput;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.zip.ZipOutputStream;
+
+public class ZipOutput implements StreamingOutput {
+  private final Map<PartExtractor, Collection> parts = new HashMap<PartExtractor, Collection>();
+
+  public <T> void put(PartExtractor<T> extractor, Collection<T> parts) {
+    if (parts.isEmpty()) {
+      return;
+    }
+
+    this.parts.put(extractor, parts);
+  }
+
+  @Override
+  public void write(OutputStream outputStream) throws IOException, WebApplicationException {
+    ZipOutputStream zip = new ZipOutputStream(outputStream);
+
+    zip.setMethod(ZipOutputStream.STORED);
+
+    addParts(zip);
+
+    zip.close();
+  }
+
+  private void addParts(ZipOutputStream zip) throws IOException {
+    for (Map.Entry<PartExtractor, Collection> entry : parts.entrySet()) {
+      for (Object part : entry.getValue()) {
+        entry.getKey().extract(part, zip);
+      }
+    }
+  }
+
+  public boolean isEmpty() {
+    return parts.isEmpty();
+  }
+}

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import java.io.IOException;
+import java.util.zip.CRC32;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+import java.util.zip.ZipException;
+import java.util.UUID;
+
+public class ZipUtils {
+  private ZipUtils() {
+  }
+
+  public static void zipStoreBuffer(ZipOutputStream zip, String name, byte[] dataBuffer) throws IOException {
+    ZipEntry zipEntry = new ZipEntry(name!=null?name: UUID.randomUUID().toString());
+    zipEntry.setMethod(ZipOutputStream.STORED);
+
+    zipEntry.setSize(dataBuffer.length);
+    CRC32 crc32 = new CRC32();
+    crc32.update(dataBuffer);
+    zipEntry.setCrc(crc32.getValue());
+
+    try {
+      zip.putNextEntry(zipEntry);
+    } catch (ZipException ex) {
+      if (name!=null) {
+        zipStoreBuffer(zip, null, dataBuffer);
+        return;
+      }
+    }
+
+    zip.write(dataBuffer);
+
+    zip.closeEntry();
+  }
+
+  public static String cleanupFilename(String name) {
+    if (name.charAt(0)=='/') {
+      name = name.substring(1);
+    }
+
+    return name;
+  }
+}

Added: tika/trunk/tika-server/src/main/resources/commons-logging.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/resources/commons-logging.properties?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/resources/commons-logging.properties (added)
+++ tika/trunk/tika-server/src/main/resources/commons-logging.properties Thu Feb 24 09:39:17 2011
@@ -0,0 +1 @@
+org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger

Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java (added)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import au.com.bytecode.opencsv.CSVReader;
+import com.sun.jersey.test.framework.JerseyTest;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+public class MetadataResourceTest extends JerseyTest {
+  private static final String META_PATH = "/meta";
+
+  public MetadataResourceTest() throws Exception {
+    super("org.apache.tika.server");
+  }
+
+  @Test
+  public void testSimpleWord() throws Exception {
+    Reader reader =
+            webResource.path(META_PATH)
+            .type("application/msword")
+                    .put(Reader.class, ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+
+    CSVReader csvReader = new CSVReader(reader);
+
+    Map<String,String> metadata = new HashMap<String, String>();
+
+    String[] nextLine;
+    while ((nextLine = csvReader.readNext()) != null) {
+      metadata.put(nextLine[0], nextLine[1]);
+    }
+
+    assertNotNull(metadata.get("Author"));
+    assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+  }
+/*
+  @Test
+  public void testXLSX() throws Exception {
+    Reader reader =
+            webResource.path(META_PATH)
+            .type("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+                    .header("File-Name", TikaResourceTest.TEST_XLSX)
+                    .put(Reader.class, ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_XLSX));
+
+    CSVReader csvReader = new CSVReader(reader);
+
+    final Map < String, String > metadataActual = new HashMap < String, String > (),
+            metadataExpected = new HashMap < String, String > ();
+
+    String[] nextLine;
+    while ((nextLine = csvReader.readNext()) != null) {
+      metadataActual.put(nextLine[0], nextLine[1]);
+    }
+    metadataExpected.put("Author", "jet");
+    metadataExpected.put("Application-Name", "Microsoft Excel");
+    metadataExpected.put("description", "Тестовый комментарий");
+    metadataExpected.put("resourceName", TikaResourceTest.TEST_XLSX);
+    metadataExpected.put("protected", "false");
+    metadataExpected.put("Creation-Date", "2010-05-11T12:37:42Z");
+    metadataExpected.put("Last-Modified", "2010-05-11T14:46:20Z");
+    assertEquals( true, metadataActual.size() >= metadataExpected.size() );
+    for ( final Map.Entry < String, String > field : metadataExpected.entrySet() ) {
+      final String key = field.getKey(), valueActual = metadataActual.get(key), valueExpected = field.getValue();
+      assertNotNull( valueActual );
+      assertEquals( valueExpected, valueActual );
+    }
+  }
+*/
+}

Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java (added)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.core.header.MediaTypes;
+import com.sun.jersey.test.framework.JerseyTest;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+public class TikaResourceTest extends JerseyTest {
+  private static final String TIKA_PATH = "tika";
+  public static final String TEST_DOC = "test.doc";
+  public static final String TEST_XLSX = "16637.xlsx";
+  private static final int UNPROCESSEABLE = 422;
+
+  public TikaResourceTest() throws Exception {
+    super("org.apache.tika.server");
+  }
+
+  /**
+   * Test to see that the message "Hello World" is sent in the response.
+   */
+  @Test
+  public void testHelloWorld() {
+    String responseMsg = webResource.path(TIKA_PATH).get(String.class);
+    assertEquals(TikaResource.GREETING, responseMsg);
+  }
+
+  @Test
+  public void testSimpleWord() {
+    String responseMsg =
+            webResource.path(TIKA_PATH)
+            .type("application/msword")
+                    .put(String.class, ClassLoader.getSystemResourceAsStream(TEST_DOC));
+
+    assertTrue(responseMsg.contains("test"));
+  }
+
+  @Test
+  public void testApplicationWadl() {
+    String serviceWadl = webResource.path("application.wadl").
+            accept(MediaTypes.WADL).get(String.class);
+
+    assertTrue(serviceWadl.length() > 0);
+  }
+
+  @Test
+  public void testPasswordXLS() throws Exception {
+    ClientResponse cr =
+            webResource
+                    .path(TIKA_PATH)
+                    .type("application/vnd.ms-excel")                    
+                    .put(ClientResponse.class, ClassLoader.getSystemResourceAsStream("password.xls"));
+
+    assertEquals(UNPROCESSEABLE, cr.getStatus());
+  }
+}

Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java?rev=1074088&view=auto
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java (added)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java Thu Feb 24 09:39:17 2011
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server;
+
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.test.framework.JerseyTest;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.tika.io.IOUtils;
+import org.junit.Test;
+
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class UnpackerResourceTest extends JerseyTest {
+  private static final String UNPACKER_PATH = "/unpacker";
+
+  private static final String TEST_DOC_WAV = "Doc1_ole.doc";
+  private static final String WAV1_MD5 = "bdd0a78a54968e362445364f95d8dc96";
+  private static final String WAV1_NAME = "_1310388059/MSj00974840000[1].wav";
+  private static final String WAV2_MD5 = "3bbd42fb1ac0e46a95350285f16d9596";
+  private static final String WAV2_NAME = "_1310388058/MSj00748450000[1].wav";
+  private static final String APPLICATION_MSWORD = "application/msword";
+  private static final int NO_CONTENT = 204;
+  private static final String JPG_NAME = "image1.jpg";
+  private static final String XSL_IMAGE1_MD5 = "68ead8f4995a3555f48a2f738b2b0c3d";
+  private static final String JPG_MD5 = XSL_IMAGE1_MD5;
+  private static final String JPG2_NAME = "image2.jpg";
+  private static final String JPG2_MD5 = "b27a41d12c646d7fc4f3826cf8183c68";
+  private static final String TEST_DOCX_IMAGE = "2pic.docx";
+  private static final String DOCX_IMAGE1_MD5 = "5516590467b069fa59397432677bad4d";
+  private static final String DOCX_IMAGE2_MD5 = "a5dd81567427070ce0a2ff3e3ef13a4c";
+  private static final String DOCX_IMAGE1_NAME = "image1.jpeg";
+  private static final String DOCX_IMAGE2_NAME = "image2.jpeg";
+  private static final String DOCX_EXE1_MD5 = "d71ffa0623014df725f8fd2710de4411";
+  private static final String DOCX_EXE1_NAME = "GMapTool.exe";
+  private static final String DOCX_EXE2_MD5 = "2485435c7c22d35f2de9b4c98c0c2e1a";
+  private static final String DOCX_EXE2_NAME = "Setup.exe";
+  private static final String XSLX_IMAGE1_NAME = "image1.jpeg";
+  private static final String XSLX_IMAGE2_NAME = "image2.jpeg";
+  private static final String XSL_IMAGE2_MD5 = "8969288f4245120e7c3870287cce0ff3";
+  private static final String COVER_JPG_MD5SUM = "4d236dab6e711735ed11686641b1fba9";
+  private static final String COVER_JPG = "cover.jpg";
+  private static final String APPLICATION_XML = "application/xml";
+  private static final String CONTENT_TYPE = "Content-type";
+
+  public UnpackerResourceTest() throws Exception {
+    super("org.apache.tika.server");
+  }
+
+  @Test
+  public void testDocWAV() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .type(APPLICATION_MSWORD)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(WAV1_MD5, data.get(WAV1_NAME));
+    assertEquals(WAV2_MD5, data.get(WAV2_NAME));
+  }
+
+  @Test
+  public void testDocPicture() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .type(APPLICATION_MSWORD)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(JPG_MD5, data.get(JPG_NAME));
+  }
+
+  @Test
+  public void testDocPictureNoOle() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .type(APPLICATION_MSWORD)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream("2pic.doc"));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(JPG2_MD5, data.get(JPG2_NAME));
+  }
+
+  @Test
+  public void testImageDOCX() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(DOCX_IMAGE1_MD5, data.get(DOCX_IMAGE1_NAME));
+    assertEquals(DOCX_IMAGE2_MD5, data.get(DOCX_IMAGE2_NAME));
+  }
+
+  @Test
+  public void testExeDOCX() throws Exception {
+    String TEST_DOCX_EXE = "2exe.docx";
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(DOCX_EXE1_MD5, data.get(DOCX_EXE1_NAME));
+    assertEquals(DOCX_EXE2_MD5, data.get(DOCX_EXE2_NAME));
+  }
+/*
+  @Test
+  public void testImageXSLX() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream("pic.xlsx"));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(XSL_IMAGE1_MD5, data.get(XSLX_IMAGE1_NAME));
+    assertEquals(XSL_IMAGE2_MD5, data.get(XSLX_IMAGE2_NAME));
+  }
+*/
+  @Test
+  public void testImageXSL() throws Exception {
+    InputStream is =
+            webResource
+                    .path(UNPACKER_PATH)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream("pic.xls"));
+
+    ZipInputStream zip = new ZipInputStream(is);
+
+    Map<String, String> data = readZip(zip);
+
+    assertEquals(XSL_IMAGE1_MD5, data.get("0.jpg"));
+    assertEquals(XSL_IMAGE2_MD5, data.get("1.jpg"));
+  }
+
+  private static Map<String, String> readZip(ZipInputStream zip) throws IOException {
+    Map<String, String> data = new HashMap<String, String>();
+
+    while (true) {
+      ZipEntry entry = zip.getNextEntry();
+
+      if (entry==null) {
+        break;
+      }
+
+      ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+      IOUtils.copy(zip, bos);
+
+      data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray()));
+    }
+
+    return data;
+  }
+}

Added: tika/trunk/tika-server/src/test/resources/2exe.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/2exe.docx?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/2exe.docx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/2pic.doc
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/2pic.doc?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/2pic.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/2pic.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/2pic.docx?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/2pic.docx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/Doc1_ole.doc
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/Doc1_ole.doc?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/Doc1_ole.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/password.xls
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/password.xls?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/password.xls
------------------------------------------------------------------------------
    svn:executable = *

Propchange: tika/trunk/tika-server/src/test/resources/password.xls
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/pic.xls
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/pic.xls?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/pic.xls
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/pic.xlsx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/pic.xlsx?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/pic.xlsx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-server/src/test/resources/test.doc
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/test.doc?rev=1074088&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-server/src/test/resources/test.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream