You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/03/09 17:23:23 UTC
svn commit: r1079871 -
/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
Author: jukka
Date: Wed Mar 9 16:23:23 2011
New Revision: 1079871
URL: http://svn.apache.org/viewvc?rev=1079871&view=rev
Log:
TIKA-607: ParseUtils.getStringContent( ) of a text file - parser is null
Deprecate the ParseUtils class and make it use the Tika facade instead
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=1079871&r1=1079870&r2=1079871&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/utils/ParseUtils.java Wed Mar 9 16:23:23 2011
@@ -17,26 +17,25 @@
package org.apache.tika.utils;
//JDK imports
-import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
+import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMimeKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
/**
* Contains utility methods for parsing documents. Intended to provide simple
* entry points into the Tika framework.
+ *
+ * @deprecated Use the {@link Tika} facade class instead.
*/
public class ParseUtils implements TikaMimeKeys {
@@ -50,6 +49,7 @@ public class ParseUtils implements TikaM
* the document's MIME type
* @return a parser appropriate to this MIME type
* @throws TikaException
+ * @deprecated
*/
public static Parser getParser(String mimeType, TikaConfig config)
throws TikaException {
@@ -68,6 +68,7 @@ public class ParseUtils implements TikaM
* @return a parser appropriate to this MIME type and ready to read input
* from the specified document
* @throws TikaException
+ * @deprecated
*/
public static Parser getParser(URL documentUrl, TikaConfig config)
throws TikaException {
@@ -87,6 +88,7 @@ public class ParseUtils implements TikaM
* @return a parser appropriate to this MIME type and ready to read input
* from the specified document
* @throws TikaException
+ * @deprecated
*/
public static Parser getParser(File documentFile, TikaConfig config)
throws TikaException {
@@ -102,18 +104,14 @@ public class ParseUtils implements TikaM
* @param config
* @param mimeType MIME type of the data
* @return the string content parsed from the document
+ * @deprecated Use the {@link Tika#parseToString(InputStream, Metadata)} method
*/
public static String getStringContent(
InputStream stream, TikaConfig config, String mimeType)
throws TikaException, IOException {
- try {
- Parser parser = config.getParser(MediaType.parse(mimeType));
- ContentHandler handler = new BodyContentHandler();
- parser.parse(stream, handler, new Metadata());
- return handler.toString();
- } catch (SAXException e) {
- throw new TikaException("Unexpected SAX error", e);
- }
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, mimeType);
+ return new Tika(config).parseToString(stream, metadata);
}
/**
@@ -123,12 +121,11 @@ public class ParseUtils implements TikaM
* URL pointing to the document to parse
* @param config
* @return the string content parsed from the document
+ * @deprecated Use the {@link Tika#parseToString(URL)} method
*/
public static String getStringContent(URL documentUrl, TikaConfig config)
throws TikaException, IOException {
- String mime = config.getMimeRepository().getMimeType(documentUrl)
- .getName();
- return getStringContent(documentUrl, config, mime);
+ return new Tika(config).parseToString(documentUrl);
}
/**
@@ -140,16 +137,14 @@ public class ParseUtils implements TikaM
* @param mimeType
* MIME type of the data
* @return the string content parsed from the document
+ * @deprecated Use the {@link Tika#parseToString(URL)} method
*/
public static String getStringContent(
URL documentUrl, TikaConfig config, String mimeType)
throws TikaException, IOException {
- InputStream stream = documentUrl.openStream();
- try {
- return getStringContent(stream, config, mimeType);
- } finally {
- stream.close();
- }
+ Metadata metadata = new Metadata();
+ InputStream stream = TikaInputStream.get(documentUrl, metadata);
+ return new Tika(config).parseToString(stream, metadata);
}
/**
@@ -161,17 +156,14 @@ public class ParseUtils implements TikaM
* @param mimeType
* MIME type of the data
* @return the string content parsed from the document
+ * @deprecated Use the {@link Tika#parseToString(File)} method
*/
public static String getStringContent(
File documentFile, TikaConfig config, String mimeType)
throws TikaException, IOException {
- InputStream stream = new BufferedInputStream(new FileInputStream(
- documentFile));
- try {
- return getStringContent(stream, config, mimeType);
- } finally {
- stream.close();
- }
+ Metadata metadata = new Metadata();
+ InputStream stream = TikaInputStream.get(documentFile, metadata);
+ return new Tika(config).parseToString(stream, metadata);
}
/**
@@ -181,12 +173,11 @@ public class ParseUtils implements TikaM
* File object pointing to the document to parse
* @param config
* @return the string content parsed from the document
+ * @deprecated Use the {@link Tika#parseToString(File)} method
*/
public static String getStringContent(File documentFile, TikaConfig config)
throws TikaException, IOException {
- String mime =
- config.getMimeRepository().getMimeType(documentFile).getName();
- return getStringContent(documentFile, config, mime);
+ return new Tika(config).parseToString(documentFile);
}
}