You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/10/27 18:13:27 UTC

svn commit: r1189827 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/detect/ tika-core/src/main/java/org/apache/tika/io/ tika-core/src/main/java/org/apache/tika/metadata/ tika-core/src/main/java...

Author: jukka
Date: Thu Oct 27 16:13:26 2011
New Revision: 1189827

URL: http://svn.apache.org/viewvc?rev=1189827&view=rev
Log:
TIKA-703: Drop deprecated methods/classes/interfaces

Added:
    tika/trunk/tika-core/src/test/resources/org/apache/tika/io/
    tika/trunk/tika-core/src/test/resources/org/apache/tika/io/test.txt
      - copied, changed from r1189765, tika/trunk/tika-core/src/test/resources/org/apache/tika/metadata/test.txt
Removed:
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/ContainerAwareDetector.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadataHelper.java
    tika/trunk/tika-core/src/test/resources/org/apache/tika/metadata/test.txt
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SafeContentHandler.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Thu Oct 27 16:13:26 2011
@@ -86,28 +86,10 @@ public class TikaConfig {
         this(getBuilder().parse(stream));
     }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(InputStream stream, Parser delegate)
-            throws TikaException, IOException, SAXException {
-        this(stream);
-    }
-
     public TikaConfig(Document document) throws TikaException, IOException {
         this(document.getDocumentElement());
     }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Document document, Parser delegate)
-            throws TikaException, IOException {
-        this(document);
-    }
-
     public TikaConfig(Element element) throws TikaException, IOException {
         this(element, ServiceLoader.getContextClassLoader());
     }
@@ -165,8 +147,7 @@ public class TikaConfig {
             this.mimeTypes = MimeTypes.getDefaultMimeTypes();
             this.parser = new DefaultParser(
                     mimeTypes.getMediaTypeRegistry(), loader);
-            this.detector = new DefaultDetector(
-                  MimeTypes.getDefaultMimeTypes(), loader);
+            this.detector = new DefaultDetector(mimeTypes, loader);
         } else {
             InputStream stream;
             File file = new File(config);
@@ -198,15 +179,6 @@ public class TikaConfig {
         }
     }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Element element, Parser delegate)
-            throws TikaException, IOException {
-        this(element);
-    }
-
     private static String getText(Node node) {
         if (node.getNodeType() == Node.TEXT_NODE) {
             return node.getNodeValue();
@@ -223,13 +195,6 @@ public class TikaConfig {
     }
 
     /**
-     * @deprecated Use the {@link #getParser()} method instead
-     */
-    public Parser getParser(MediaType mimeType) {
-        return parser.getParsers().get(mimeType);
-    }
-
-    /**
      * Returns the configured parser instance.
      *
      * @return configured parser
@@ -239,13 +204,6 @@ public class TikaConfig {
     }
 
     /**
-     * @deprecated Use the {@link #getParser()} method instead
-     */
-    public Map<MediaType, Parser> getParsers() {
-        return parser.getParsers();
-    }
-
-    /**
      * Returns the configured detector instance.
      *
      * @return configured detector
@@ -281,15 +239,6 @@ public class TikaConfig {
         }
     }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public static TikaConfig getDefaultConfig(Parser delegate)
-            throws TikaException {
-        return getDefaultConfig();
-    }
-
     private static DocumentBuilder getBuilder() throws TikaException {
         try {
             return DocumentBuilderFactory.newInstance().newDocumentBuilder();

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Thu Oct 27 16:13:26 2011
@@ -119,13 +119,6 @@ public class TikaInputStream extends Tag
     }
 
     /**
-     * @deprecated Use the {@link #get(InputStream, TemporaryResources)} instead
-     */
-    public static TikaInputStream get(InputStream stream, TemporaryFiles tmp) {
-        return get(stream, (TemporaryResources) tmp);
-    }
-
-    /**
      * Casts or wraps the given stream to a TikaInputStream instance.
      * This method can be used to access the functionality of this class
      * even when given just a normal input stream instance.

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Thu Oct 27 16:13:26 2011
@@ -18,13 +18,11 @@ package org.apache.tika.mime;
 
 // JDK imports
 import java.io.ByteArrayInputStream;
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Locale;
@@ -34,7 +32,6 @@ import java.util.TreeSet;
 
 import javax.xml.namespace.QName;
 
-import org.apache.tika.Tika;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.detect.TextDetector;
 import org.apache.tika.detect.XmlRootExtractor;
@@ -120,40 +117,13 @@ public final class MimeTypes implements 
     }
 
     /**
-     * Find the Mime Content Type of a file.
-     *
-     * @deprecated Use the {@link Tika#detect(File)} method
-     * @param file
-     *            to analyze.
-     * @return the Mime Content Type of the specified file, or <code>null</code>
-     *         if none is found.
-     */
-    public MimeType getMimeType(File file) {
-        return getMimeType(file.getName());
-    }
-
-    /**
-     * Find the Mime Content Type of a document from its URL.
-     *
-     * @deprecated Use the {@link Tika#detect(URL)} method
-     * @param url
-     *            of the document to analyze.
-     * @return the Mime Content Type of the specified document URL, or
-     *         <code>null</code> if none is found.
-     */
-    public MimeType getMimeType(URL url) {
-        return getMimeType(url.getPath());
-    }
-
-    /**
      * Find the Mime Content Type of a document from its name.
      * Returns application/octet-stream if no better match is found.
      *
-     * @deprecated Use the {@link Tika#detect(String)} method
      * @param name of the document to analyze.
      * @return the Mime Content Type of the specified document name
      */
-    public MimeType getMimeType(String name) {
+    private MimeType getMimeType(String name) {
         MimeType type = patterns.matches(name);
         if (type != null) {
             return type;
@@ -174,11 +144,10 @@ public final class MimeTypes implements 
      * The given byte array is expected to be at least {@link #getMinLength()}
      * long, or shorter only if the document stream itself is shorter.
      *
-     * @deprecated Use the {@link Tika#detect(byte[])} method
      * @param data first few bytes of a document stream
      * @return matching MIME type
      */
-    public MimeType getMimeType(byte[] data) {
+    private MimeType getMimeType(byte[] data) {
         if (data == null) {
             throw new IllegalArgumentException("Data is missing");
         } else if (data.length == 0) {
@@ -232,19 +201,6 @@ public final class MimeTypes implements 
     }
 
     /**
-     * Returns the MIME type that best matches the first few bytes of the
-     * given document stream.
-     *
-     * @deprecated Use the {@link Tika#detect(InputStream)} method
-     * @param stream document stream
-     * @return matching MIME type, or <code>null</code> if no match is found
-     * @throws IOException if the stream can be read
-     */
-    public MimeType getMimeType(InputStream stream) throws IOException {
-        return getMimeType(readMagicHeader(stream));
-    }
-
-    /**
      * Reads the first {@link #getMinLength()} bytes from the given stream.
      * If the stream is shorter, then the entire content of the stream is
      * returned.
@@ -280,98 +236,6 @@ public final class MimeTypes implements 
     }
 
     /**
-     * @deprecated Use the {@link Tika#detect(InputStream, Metadata))} method
-     */
-    public String getType(String typeName, String url, byte[] data) {
-        try {
-            Metadata metadata = new Metadata();
-            if (url != null) {
-                metadata.set(Metadata.RESOURCE_NAME_KEY, url);
-            }
-            if (typeName != null) {
-                metadata.set(Metadata.CONTENT_TYPE, typeName);
-            }
-            return detect(new ByteArrayInputStream(data), metadata).toString();
-        } catch (IOException e) {
-            throw new IllegalStateException(
-                    "ByteArrayInputStream throws an IOException!", e);
-        }
-    }
-
-    /**
-     * Determines the MIME type of the resource pointed to by the specified URL.
-     * Examines the file's header, and if it cannot determine the MIME type
-     * from the header, guesses the MIME type from the URL extension
-     * (e.g. "pdf).
-     *
-     * @deprecated Use the {@link Tika#detect(URL)} method
-     * @param url URL of the document
-     * @return type of the document
-     * @throws IOException if the document can not be accessed
-     */
-    public String getType(URL url) throws IOException {
-        InputStream stream = url.openStream();
-        try {
-            Metadata metadata = new Metadata();
-            metadata.set(Metadata.RESOURCE_NAME_KEY, url.toString());
-            return detect(stream, metadata).toString();
-        } finally {
-            stream.close();
-        }
-    }
-
-    /**
-     * Find the Mime Content Type of a document from its name and its content.
-     * The policy used to guess the Mime Content Type is:
-     * <ol>
-     * <li>Try to find the type based on the provided data.</li>
-     * <li>If a type is found, then return it, otherwise try to find the type
-     * based on the file name</li>
-     * </ol>
-     *
-     *
-     * @deprecated Use the {@link Tika#detect(byte[], String)} method
-     * @param name
-     *            of the document to analyze.
-     * @param data
-     *            are the first bytes of the document's content.
-     * @return the Mime Content Type of the specified document, or
-     *         <code>null</code> if none is found.
-     * @see #getMinLength()
-     */
-    public MimeType getMimeType(String name, byte[] data) {
-        // First, try to get the mime-type from the content
-        MimeType dataType = getMimeType(data);
-
-        // Then, try to get the mime-type from the document name
-        MimeType nameType = getMimeType(name);
-
-        // Use the more specific of the two types
-        if (registry.isSpecializationOf(
-                nameType.getType(), dataType.getType())) {
-            return nameType;
-        } else {
-            return dataType;
-        }
-    }
-
-    /**
-     * Returns the MIME type that best matches the given document name and
-     * the first few bytes of the given document stream.
-     *
-     * @deprecated Use the {@link Tika#detect(InputStream,String)} method
-     * @see #getMimeType(String, byte[])
-     * @param name document name
-     * @param stream document stream
-     * @return matching MIME type, or <code>null</code> if no match is found
-     * @throws IOException if the stream can not be read
-     */
-    public MimeType getMimeType(String name, InputStream stream)
-            throws IOException {
-        return getMimeType(name, readMagicHeader(stream));
-    }
-
-    /**
      * Returns the registered media type with the given name (or alias).
      * The named media type is automatically registered (and returned) if
      * it doesn't already exist.

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java Thu Oct 27 16:13:26 2011
@@ -40,7 +40,12 @@ public abstract class AbstractParser imp
     /**
      * Calls the
      * {@link Parser#parse(InputStream, ContentHandler, Metadata, ParseContext)}
-     * method with an empty {@link ParseContext}.
+     * method with an empty {@link ParseContext}. This method exists as a
+     * leftover from Tika 0.x when the three-argument parse() method still
+     * existed in the {@link Parser} interface. No new code should call this
+     * method anymore, it's only here for backwards compatibility.
+     *
+     * @deprecated use the {@link Parser#parse(InputStream, ContentHandler, Metadata, ParseContext)} method instead
      */
     public void parse(
             InputStream stream, ContentHandler handler, Metadata metadata)

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Thu Oct 27 16:13:26 2011
@@ -80,15 +80,6 @@ public class AutoDetectParser extends Co
     }
 
     /**
-     * @deprecated This method will be removed in Tika 1.0
-     */
-    public void setConfig(TikaConfig config) {
-        setParsers(config.getParsers());
-        setDetector(config.getDetector());
-        setMediaTypeRegistry(config.getMediaTypeRegistry());
-    }
-
-    /**
      * Returns the type detector used by this parser to auto-detect the type
      * of a document.
      *

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java Thu Oct 27 16:13:26 2011
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.parser;
 
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collections;
 import java.util.Set;
@@ -25,7 +24,6 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
 
 /**
  * Dummy parser that always throws a {@link TikaException} without even

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java Thu Oct 27 16:13:26 2011
@@ -67,16 +67,4 @@ public interface Parser extends Serializ
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException;
 
-    /**
-     * The parse() method from Tika 0.4 and earlier. Please use the
-     * {@link #parse(InputStream, ContentHandler, Metadata, ParseContext)}
-     * method instead in new code. Calls to this backwards compatibility
-     * method are forwarded to the new parse() method with an empty parse
-     * context.
-     *
-     * @deprecated This method will be removed in Apache Tika 1.0.
-     */
-    void parse(InputStream stream, ContentHandler handler, Metadata metadata)
-        throws IOException, SAXException, TikaException;
-
 }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java Thu Oct 27 16:13:26 2011
@@ -101,7 +101,8 @@ public class ParsingReader extends Reade
      * @throws IOException if the document can not be parsed
      */
     public ParsingReader(InputStream stream) throws IOException {
-        this(new AutoDetectParser(), stream, new Metadata());
+        this(new AutoDetectParser(), stream, new Metadata(), new ParseContext());
+        context.set(Parser.class, parser);
     }
 
     /**
@@ -113,7 +114,8 @@ public class ParsingReader extends Reade
      * @throws IOException if the document can not be parsed
      */
     public ParsingReader(InputStream stream, String name) throws IOException {
-        this(new AutoDetectParser(), stream, getMetadata(name));
+        this(new AutoDetectParser(), stream, getMetadata(name), new ParseContext());
+        context.set(Parser.class, parser);
     }
 
     /**
@@ -203,27 +205,6 @@ public class ParsingReader extends Reade
     }
 
     /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public ParsingReader(Parser parser, InputStream stream, Metadata metadata)
-            throws IOException {
-        this(parser, stream, metadata, new ParseContext());
-        context.set(Parser.class, parser);
-    }
-
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public ParsingReader(
-            Parser parser, InputStream stream, Metadata metadata,
-            Executor executor) throws IOException {
-        this(parser, stream, metadata, new ParseContext(), executor);
-        context.set(Parser.class, parser);
-    }
-
-    /**
      * The background parsing task.
      */
     private class ParsingTask implements Runnable {

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SafeContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SafeContentHandler.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SafeContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SafeContentHandler.java Thu Oct 27 16:13:26 2011
@@ -188,13 +188,6 @@ public class SafeContentHandler extends 
     }
 
     /**
-     * @deprecated Use {@link #isInvalid(int)} instead
-     */
-    protected boolean isInvalid(char ch) {
-        return isInvalid((int) ch);
-    }
-
-    /**
      * Outputs the replacement for an invalid character. Subclasses can
      * override this method to use a custom replacement.
      *

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java Thu Oct 27 16:13:26 2011
@@ -24,6 +24,9 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.net.URL;
+
+import org.apache.tika.metadata.Metadata;
 
 import junit.framework.TestCase;
 
@@ -103,4 +106,14 @@ public class TikaInputStreamTest extends
         return buffer.toString("UTF-8");
     }
 
+    public void testGetMetadata() throws Exception {
+        URL url = TikaInputStreamTest.class.getResource("test.txt");
+        Metadata metadata = new Metadata();
+        TikaInputStream.get(url, metadata).close();
+        assertEquals("test.txt", metadata.get(Metadata.RESOURCE_NAME_KEY));
+        assertEquals(
+                Long.toString(new File(url.toURI()).length()),
+                metadata.get(Metadata.CONTENT_LENGTH));
+    }
+
 }

Copied: tika/trunk/tika-core/src/test/resources/org/apache/tika/io/test.txt (from r1189765, tika/trunk/tika-core/src/test/resources/org/apache/tika/metadata/test.txt)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/io/test.txt?p2=tika/trunk/tika-core/src/test/resources/org/apache/tika/io/test.txt&p1=tika/trunk/tika-core/src/test/resources/org/apache/tika/metadata/test.txt&r1=1189765&r2=1189827&rev=1189827&view=diff
==============================================================================
    (empty)

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Thu Oct 27 16:13:26 2011
@@ -22,9 +22,8 @@ import java.io.InputStream;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.utils.ParseUtils;
 import org.xml.sax.helpers.DefaultHandler;
 
 /**
@@ -34,62 +33,21 @@ public class TestParsers extends TikaTes
 
     private TikaConfig tc;
 
+    private Tika tika;
+
     public void setUp() throws Exception {
         tc = TikaConfig.getDefaultConfig();
-    }
-
-    public void testPDFExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testPDF.pdf");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "application/pdf");
-        String s3 = ParseUtils.getStringContent(file, TikaConfig
-                .getDefaultConfig());
-        assertEquals(s1, s2);
-        assertEquals(s1, s3);
-    }
-
-    public void testTXTExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testTXT.txt");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "text/plain");
-        assertEquals(s1, s2);
-    }
-
-    public void testXMLExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testXML.xml");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "application/xml");
-        assertEquals(s1, s2);
-    }
-
-    public void testPPTExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testPPT.ppt");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc,
-                "application/vnd.ms-powerpoint");
-        assertEquals(s1, s2);
-        Parser parser =
-            tc.getParser(MediaType.parse("application/vnd.ms-powerpoint"));
-        Metadata metadata = new Metadata();
-        InputStream stream = new FileInputStream(file);
-        try {
-            parser.parse(stream, new DefaultHandler(), metadata);
-        } finally {
-            stream.close();
-        }
-        assertEquals("Sample Powerpoint Slide", metadata.get(Metadata.TITLE));
+        tika = new Tika(tc);
     }
 
     public void testWORDxtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testWORD.doc");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
-        assertEquals(s1, s2);
-        Parser parser = tc.getParser(MediaType.parse("application/msword"));
+        Parser parser = tika.getParser();
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
-            parser.parse(stream, new DefaultHandler(), metadata);
+            parser.parse(
+                    stream, new DefaultHandler(), metadata, new ParseContext());
         } finally {
             stream.close();
         }
@@ -99,75 +57,27 @@ public class TestParsers extends TikaTes
     public void testEXCELExtraction() throws Exception {
         final String expected = "Numbers and their Squares";
         File file = getResourceAsFile("/test-documents/testEXCEL.xls");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc,
-                "application/vnd.ms-excel");
-        assertEquals(s1, s2);
+        String s1 = tika.parseToString(file);
         assertTrue("Text does not contain '" + expected + "'", s1
                 .contains(expected));
-        Parser parser =
-            tc.getParser(MediaType.parse("application/vnd.ms-excel"));
+        Parser parser = tika.getParser();
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
-            parser.parse(stream, new DefaultHandler(), metadata);
+            parser.parse(
+                    stream, new DefaultHandler(), metadata, new ParseContext());
         } finally {
             stream.close();
         }
         assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
     }
 
-    public void testOOExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testOpenOffice2.odt");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc,
-                "application/vnd.oasis.opendocument.text");
-        assertEquals(s1, s2);
-    }
-
-    public void testOutlookExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/test-outlook.msg");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc,
-                "application/vnd.ms-outlook");
-        assertEquals(s1, s2);
-    }
-
-    public void testHTMLExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testHTML.html");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "text/html");
-        assertEquals(s1, s2);
-
-        Parser parser = tc.getParser(MediaType.parse("text/html"));
-        assertNotNull(parser);
-    }
-
-    public void testZipFileExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/test-documents.zip");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "application/zip");
-        assertEquals(s1, s2);
-
-        Parser parser = tc.getParser(MediaType.parse("application/zip"));
-        assertNotNull(parser);
-    }
-
-    public void testMP3Extraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testMP3id3v1.mp3");
-        String s1 = ParseUtils.getStringContent(file, tc);
-        String s2 = ParseUtils.getStringContent(file, tc, "audio/mpeg");
-        assertEquals(s1, s2);
-
-        Parser parser = tc.getParser(MediaType.parse("audio/mpeg"));
-        assertNotNull(parser);
-    }
-
     public void testOptionalHyphen() throws Exception {
-        final String[] extensions = new String[] {"ppt", "pptx", "doc", "docx", "rtf", "pdf"};
-        for(String extension : extensions) {
+        String[] extensions =
+                new String[] { "ppt", "pptx", "doc", "docx", "rtf", "pdf"};
+        for (String extension : extensions) {
             File file = getResourceAsFile("/test-documents/testOptionalHyphen." + extension);
-            String content = ParseUtils.getStringContent(file, tc);
+            String content = tika.parseToString(file);
             assertTrue("optional hyphen was not handled for '" + extension + "' file type: " + content,
                        content.contains("optionalhyphen") ||
                        content.contains("optional\u00adhyphen") ||   // soft hyphen
@@ -179,7 +89,7 @@ public class TestParsers extends TikaTes
 
     private void verifyComment(String extension, String fileName) throws Exception {
         File file = getResourceAsFile("/test-documents/" + fileName + "." + extension);
-        String content = ParseUtils.getStringContent(file, tc);
+        String content = tika.parseToString(file);
         assertTrue(extension + ": content=" + content + " did not extract text",
                    content.contains("Here is some text"));
         assertTrue(extension + ": content=" + content + " did not extract comment",

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Thu Oct 27 16:13:26 2011
@@ -27,15 +27,13 @@ import org.apache.poi.poifs.filesystem.N
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeTypes;
 
 /**
  * Junit test class for {@link ContainerAwareDetector}
  */
 public class TestContainerAwareDetector extends TestCase {
 
-    private final Detector detector =
-        new ContainerAwareDetector(MimeTypes.getDefaultMimeTypes());
+    private final Detector detector = new DefaultDetector();
 
     private void assertDetect(String file, String type) throws Exception {
         TikaInputStream stream = TikaInputStream.get(

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/MimeTypesTest.java Thu Oct 27 16:13:26 2011
@@ -19,8 +19,10 @@ package org.apache.tika.mime;
 import static org.apache.tika.mime.MediaType.OCTET_STREAM;
 import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
-import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
 
 import junit.framework.TestCase;
 
@@ -93,15 +95,9 @@ public class MimeTypesTest extends TestC
         assertTrue(html.compareTo(html) == 0);
     }
 
-    /** Test getMimeType(byte[]) */
-    public void testGetMimeType_byteArray() {
-        try {
-            types.getMimeType((byte[])null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
-
+    /** Test getMimeType(byte[]) 
+     * @throws IOException */
+    public void testGetMimeType_byteArray() throws IOException {
         // Plain text detection
         assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
         assertText(new byte[] { (byte) 0xFF, (byte) 0xFE });
@@ -111,28 +107,19 @@ public class MimeTypesTest extends TestC
         assertNotText(new byte[] { '\t', '\r', '\n', 0x0E, 0x1C });
     }
 
-    private void assertText(byte[] prefix) {
+    private void assertText(byte[] prefix) throws IOException {
         assertMagic("text/plain", prefix);
     }
 
-    private void assertNotText(byte[] prefix) {
+    private void assertNotText(byte[] prefix) throws IOException {
         assertMagic("application/octet-stream", prefix);
     }
 
-    private void assertMagic(String expected, byte[] prefix) {
-        MimeType type = types.getMimeType(prefix);
+    private void assertMagic(String expected, byte[] prefix) throws IOException {
+        MediaType type =
+                types.detect(new ByteArrayInputStream(prefix), new Metadata());
         assertNotNull(type);
-        assertEquals(expected, type.getName());
-    }
-
-    /** Test getMimeType(InputStream) */
-    public void testGetMimeType_InputStream() throws IOException {
-        try {
-            types.getMimeType((InputStream)null);
-            fail("Expected IllegalArgumentException");
-        } catch (IllegalArgumentException e) {
-            // expected result
-        }
+        assertEquals(expected, type.toString());
     }
 
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Thu Oct 27 16:13:26 2011
@@ -21,11 +21,11 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.MalformedURLException;
 import java.net.URL;
 
 import junit.framework.TestCase;
 
+import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 
@@ -36,35 +36,27 @@ import org.apache.tika.metadata.Metadata
  */
 public class TestMimeTypes extends TestCase {
 
-    private MimeTypes repo;
+    private Tika tika;
 
-    private static URL u;
+    private MimeTypes repo;
 
-    static {
-        try {
-            u = new URL("http://mydomain.com/x.pdf?x=y");
-        } catch (MalformedURLException e) {
-            fail(e.getMessage());
-        }
-    }
+    private URL u;
 
     private static final File f = new File("/a/b/c/x.pdf");
 
-    public TestMimeTypes() {
-        try {
-            repo = TikaConfig.getDefaultConfig().getMimeRepository();
-        } catch (Exception e) {
-            fail(e.getMessage());
-        }
-
+    protected void setUp() throws Exception{
+        TikaConfig config = TikaConfig.getDefaultConfig();
+        repo = config.getMimeRepository();
+        tika = new Tika(config);
+        u = new URL("http://mydomain.com/x.pdf?x=y");
     }
 
     public void testCaseSensitivity() {
-        MimeType type = repo.getMimeType("test.PDF");
+        String type = tika.detect("test.PDF");
         assertNotNull(type);
-        assertEquals(repo.getMimeType("test.pdf"), type);
-        assertEquals(repo.getMimeType("test.PdF"), type);
-        assertEquals(repo.getMimeType("test.pdF"), type);
+        assertEquals(type, tika.detect("test.pdf"));
+        assertEquals(type, tika.detect("test.PdF"));
+        assertEquals(type, tika.detect("test.pdF"));
     }
 
     public void testLoadMimeTypes() throws MimeTypeException {
@@ -77,8 +69,8 @@ public class TestMimeTypes extends TestC
      */
     public void testGuessMimeTypes() throws Exception {
         assertTypeByName("application/pdf", "x.pdf");
-        assertEquals("application/pdf", repo.getMimeType(u).getName());
-        assertEquals("application/pdf", repo.getMimeType(f).getName());
+        assertEquals("application/pdf", tika.detect(u.toExternalForm()));
+        assertEquals("application/pdf", tika.detect(f.getPath()));
         assertTypeByName("text/plain", "x.txt");
         assertTypeByName("text/html", "x.htm");
         assertTypeByName("text/html", "x.html");
@@ -361,15 +353,13 @@ public class TestMimeTypes extends TestC
         String pattern = "rtg_sst_grb_0\\.5\\.\\d{8}";
         this.repo.addPattern(testType, pattern, true);
         String testFileName = "rtg_sst_grb_0.5.12345678";
-        assertNotNull(this.repo.getMimeType(testFileName));
-        assertEquals(this.repo.getMimeType(testFileName).getName(), "foo/bar");
+        assertEquals("foo/bar", tika.detect(testFileName));
 
         MimeType testType2 = new MimeType(MediaType.parse("foo/bar2"));
         this.repo.add(testType2);
         assertNotNull(repo.forName("foo/bar2"));
         this.repo.addPattern(testType2, pattern, false);
-        assertNotNull(this.repo.getMimeType(testFileName));
-        assertNotSame("foo/bar2", this.repo.getMimeType(testFileName).getName());
+        assertNotSame("foo/bar2", tika.detect(testFileName));
     }
     
     public void testRawDetection() throws Exception {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ParsingReaderTest.java Thu Oct 27 16:13:26 2011
@@ -79,8 +79,8 @@ public class ParsingReaderTest extends T
         Metadata metadata = new Metadata();
         InputStream stream = ParsingReaderTest.class.getResourceAsStream(
                 "/test-documents/testEXCEL.xls");
-        Reader reader =
-            new ParsingReader(new AutoDetectParser(), stream, metadata);
+        Reader reader = new ParsingReader(
+                new AutoDetectParser(), stream, metadata, new ParseContext());
         try {
             // Metadata should already be available
             assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java Thu Oct 27 16:13:26 2011
@@ -21,6 +21,7 @@ import java.io.InputStream;
 
 //TIKA imports
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.hdf.HDFParser;
 import org.apache.tika.sax.BodyContentHandler;
@@ -54,7 +55,7 @@ public class HDFParserTest extends TestC
         InputStream stream = HDFParser.class
                 .getResourceAsStream("/test-documents/test.he5");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java Thu Oct 27 16:13:26 2011
@@ -17,6 +17,8 @@
 package org.apache.tika.parser.image;
 
 import junit.framework.TestCase;
+
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.image.TiffParser;
 import org.apache.tika.metadata.Metadata;
@@ -34,7 +36,7 @@ public class TiffParserTest extends Test
         metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
         InputStream stream =
             getClass().getResourceAsStream("/test-documents/testTIFF.tif");
-        parser.parse(stream, new DefaultHandler(), metadata);
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
 
         assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
         		"more contributor license agreements.  See the NOTICE file " +

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java Thu Oct 27 16:13:26 2011
@@ -16,13 +16,13 @@
  */
 package org.apache.tika.parser.microsoft;
 
-import org.apache.tika.detect.ContainerAwareDetector;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
 import org.apache.tika.extractor.ContainerExtractor;
 import org.apache.tika.extractor.ParserContainerExtractor;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -35,9 +35,7 @@ public class TNEFParserTest extends Abst
    
    public void testBasics() throws Exception {
       TikaInputStream stream = getTestFile(file);
-      ContainerAwareDetector detector =
-         new ContainerAwareDetector(MimeTypes.getDefaultMimeTypes());
-      
+      Detector detector = new DefaultDetector();
       try {
          assertEquals(
                  MediaType.application("vnd.ms-tnef"),

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Thu Oct 27 16:13:26 2011
@@ -26,8 +26,6 @@ import javax.xml.transform.sax.Transform
 import javax.xml.transform.stream.StreamResult;
 
 import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.ContainerAwareDetector;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMetadataKeys;
@@ -38,16 +36,8 @@ import org.apache.tika.sax.BodyContentHa
 import org.xml.sax.ContentHandler;
 
 public class OOXMLParserTest extends TikaTest {
-    private Parser parser;
-   
-    @Override
-    protected void setUp() throws Exception {
-       TikaConfig config = TikaConfig.getDefaultConfig();
-       ContainerAwareDetector detector = new ContainerAwareDetector(
-             config.getMimeRepository()
-       );
-       parser = new AutoDetectParser(detector);
-    }
+
+    private Parser parser = new AutoDetectParser();
 
    public void testExcel() throws Exception {
         InputStream input = OOXMLParserTest.class

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java Thu Oct 27 16:13:26 2011
@@ -21,6 +21,7 @@ import java.io.InputStream;
 
 //TIKA imports
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -46,7 +47,7 @@ public class NetCDFParserTest extends Te
         InputStream stream = NetCDFParser.class
                 .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1189827&r1=1189826&r2=1189827&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java Thu Oct 27 16:13:26 2011
@@ -26,19 +26,18 @@ import javax.xml.transform.sax.SAXTransf
 import javax.xml.transform.sax.TransformerHandler;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.tika.Tika;
 import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.WriteOutContentHandler;
-import org.apache.tika.utils.ParseUtils;
 
 /**
  * Junit test class for the Tika {@link RTFParser}
  */
 public class RTFParserTest extends TikaTest {
-    private RTFParser parser;
-    private static final TikaConfig defaultConfig = TikaConfig.getDefaultConfig();
+
+    private Tika tika = new Tika();
 
     private static class Result {
         public final String text;
@@ -50,16 +49,12 @@ public class RTFParserTest extends TikaT
         }
     }
 
-    public void setUp() throws Exception {
-        parser = new RTFParser();
-    }
-
     public void testBasicExtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTF.rtf");
         
         Metadata metadata = new Metadata();
         StringWriter writer = new StringWriter();
-        parser.parse(
+        tika.getParser().parse(
                      new FileInputStream(file),
                      new WriteOutContentHandler(writer),
                      metadata,
@@ -100,10 +95,7 @@ public class RTFParserTest extends TikaT
 
     public void testTableCellSeparation() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTFTableCellSeparation.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
-        String content = s1;
+        String content = tika.parseToString(file);
         content = content.replaceAll("\\s+"," ");
         assertTrue(content.contains("a b c d \u00E4 \u00EB \u00F6 \u00FC"));
         assertContains("a b c d \u00E4 \u00EB \u00F6 \u00FC", content);
@@ -118,34 +110,21 @@ public class RTFParserTest extends TikaT
 
     public void testWordPadCzechCharactersExtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTFWordPadCzechCharacters.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
+        String s1 = tika.parseToString(file);
         assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
         assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
     }
 
     public void testWord2010CzechCharactersExtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTFWord2010CzechCharacters.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
+        String s1 = tika.parseToString(file);
         assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
         assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
     }
 
-    public void testExtraction() throws Exception {
-        File file = getResourceAsFile("/test-documents/testRTF.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
-    }
-
     public void testMS932Extraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTF-ms932.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
+        String s1 = tika.parseToString(file);
 
         // Hello in Japanese
         assertTrue(s1.contains("\u3053\u3093\u306b\u3061\u306f"));
@@ -157,17 +136,15 @@ public class RTFParserTest extends TikaT
 
     public void testUmlautSpacesExtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testRTFUmlautSpaces.rtf");
-        String s1 = ParseUtils.getStringContent(file, defaultConfig);
-        String s2 = ParseUtils.getStringContent(file, defaultConfig, "application/rtf");
-        assertEquals(s1, s2);
+        String s1 = tika.parseToString(file);
         assertTrue(s1.contains("\u00DCbersicht"));
     }
 
     public void testGothic() throws Exception {
-    	String content = getText("testRTFUnicodeGothic.rtf");
-    	assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+        String content = getText("testRTFUnicodeGothic.rtf");
+        assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
     }
-    
+
     public void testJapaneseText() throws Exception {
         Result r = getResult("testRTFJapanese.rtf");
         String content = r.text;
@@ -302,7 +279,7 @@ public class RTFParserTest extends TikaT
        
         Metadata metadata = new Metadata();
         StringWriter writer = new StringWriter();
-        parser.parse(
+        tika.getParser().parse(
                      new FileInputStream(file),
                      new WriteOutContentHandler(writer),
                      metadata,
@@ -335,7 +312,7 @@ public class RTFParserTest extends TikaT
         // Try with a document containing various tables and formattings
         InputStream input = getResourceAsStream("/test-documents/" + filename);
         try {
-            parser.parse(input, handler, metadata, new ParseContext());
+            tika.getParser().parse(input, handler, metadata, new ParseContext());
             return new XMLResult(sw.toString(), metadata);
         } finally {
             input.close();