You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/10 08:33:52 UTC

svn commit: r1694962 - /tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java

Author: nick
Date: Mon Aug 10 06:33:51 2015
New Revision: 1694962

URL: http://svn.apache.org/r1694962
Log:
Replace deprecated method use and outdated practice from the example

Modified:
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1694962&r1=1694961&r2=1694962&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon Aug 10 06:33:51 2015
@@ -15,6 +15,7 @@
 package org.apache.tika.example;
 
 import java.io.File;
+import java.io.InputStream;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.config.TikaConfig;
@@ -35,10 +36,9 @@ import org.xml.sax.ContentHandler;
  * Demonstrates how to call the different components within Tika: its
  * {@link Detector} framework (aka MIME identification and repository), its
  * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ * 
  * It also shows the "easy way" via {@link AutoDetectParser}
  */
-
-@SuppressWarnings("deprecation")
 public class MyFirstTika {
     public static void main(String[] args) throws Exception {
         String filename = args[0];
@@ -77,16 +77,18 @@ public class MyFirstTika {
 
         System.out.println("Examining: [" + filename + "]");
 
+        metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
         System.out.println("The MIME type (based on filename) is: ["
-                + mimeRegistry.getMimeType(filename) + "]");
+                + mimeRegistry.detect(null, metadata) + "]");
 
+        InputStream stream = TikaInputStream.get(new File(filename));
         System.out.println("The MIME type (based on MAGIC) is: ["
-                + mimeRegistry.getMimeType(new File(filename)) + "]");
+                + mimeRegistry.detect(stream, metadata) + "]");
 
-        Detector mimeDetector = (Detector) mimeRegistry;
+        stream = TikaInputStream.get(new File(filename));
+        Detector detector = tikaConfig.getDetector();
         System.out.println("The MIME type (based on the Detector interface) is: ["
-                + mimeDetector.detect(new File(filename).toURI().toURL()
-                        .openStream(), new Metadata()) + "]");
+                + detector.detect(stream, metadata) + "]");
 
         LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile(
                 FileUtils.readFileToString(new File(filename))));
@@ -94,11 +96,14 @@ public class MyFirstTika {
         System.out.println("The language of this content is: ["
                 + lang.getLanguage() + "]");
 
-        Parser parser = tikaConfig.getParser(
-                MediaType.parse(mimeRegistry.getMimeType(filename).getName()));
+        // Get a non-detecting parser that handles all the types it can
+        Parser parser = tikaConfig.getParser();
+        // Tell it what we think the content is
+        MediaType type = detector.detect(stream, metadata);
+        metadata.set(Metadata.CONTENT_TYPE, type.toString());
+        // Have the file parsed to get the content and metadata
         ContentHandler handler = new BodyContentHandler();
-        parser.parse(new File(filename).toURI().toURL().openStream(), handler,
-                metadata, new ParseContext());
+        parser.parse(stream, handler, metadata, new ParseContext());
         
         return handler.toString();
     }