You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/10 08:33:52 UTC
svn commit: r1694962 -
/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
Author: nick
Date: Mon Aug 10 06:33:51 2015
New Revision: 1694962
URL: http://svn.apache.org/r1694962
Log:
Replace deprecated method use and outdated practice from the example
Modified:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1694962&r1=1694961&r2=1694962&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon Aug 10 06:33:51 2015
@@ -15,6 +15,7 @@
package org.apache.tika.example;
import java.io.File;
+import java.io.InputStream;
import org.apache.commons.io.FileUtils;
import org.apache.tika.config.TikaConfig;
@@ -35,10 +36,9 @@ import org.xml.sax.ContentHandler;
* Demonstrates how to call the different components within Tika: its
* {@link Detector} framework (aka MIME identification and repository), its
* {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ *
* It also shows the "easy way" via {@link AutoDetectParser}
*/
-
-@SuppressWarnings("deprecation")
public class MyFirstTika {
public static void main(String[] args) throws Exception {
String filename = args[0];
@@ -77,16 +77,18 @@ public class MyFirstTika {
System.out.println("Examining: [" + filename + "]");
+ metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
System.out.println("The MIME type (based on filename) is: ["
- + mimeRegistry.getMimeType(filename) + "]");
+ + mimeRegistry.detect(null, metadata) + "]");
+ InputStream stream = TikaInputStream.get(new File(filename));
System.out.println("The MIME type (based on MAGIC) is: ["
- + mimeRegistry.getMimeType(new File(filename)) + "]");
+ + mimeRegistry.detect(stream, metadata) + "]");
- Detector mimeDetector = (Detector) mimeRegistry;
+ stream = TikaInputStream.get(new File(filename));
+ Detector detector = tikaConfig.getDetector();
System.out.println("The MIME type (based on the Detector interface) is: ["
- + mimeDetector.detect(new File(filename).toURI().toURL()
- .openStream(), new Metadata()) + "]");
+ + detector.detect(stream, metadata) + "]");
LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile(
FileUtils.readFileToString(new File(filename))));
@@ -94,11 +96,14 @@ public class MyFirstTika {
System.out.println("The language of this content is: ["
+ lang.getLanguage() + "]");
- Parser parser = tikaConfig.getParser(
- MediaType.parse(mimeRegistry.getMimeType(filename).getName()));
+ // Get a non-detecting parser that handles all the types it can
+ Parser parser = tikaConfig.getParser();
+ // Tell it what we think the content is
+ MediaType type = detector.detect(stream, metadata);
+ metadata.set(Metadata.CONTENT_TYPE, type.toString());
+ // Have the file parsed to get the content and metadata
ContentHandler handler = new BodyContentHandler();
- parser.parse(new File(filename).toURI().toURL().openStream(), handler,
- metadata, new ParseContext());
+ parser.parse(stream, handler, metadata, new ParseContext());
return handler.toString();
}