You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/10/04 00:00:55 UTC

svn commit: r1004055 - /tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Author: jukka
Date: Sun Oct  3 22:00:55 2010
New Revision: 1004055

URL: http://svn.apache.org/viewvc?rev=1004055&view=rev
Log:
TIKA-411: Generate list of supported and detected types automatically

Add a --list-supported-types option to the Tika CLI and implement a basic listing of known types, aliases and matching parser classes

Modified:
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1004055&r1=1004054&r2=1004055&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Sun Oct  3 22:00:55 2010
@@ -30,8 +30,8 @@ import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.TransformerConfigurationException;
@@ -49,6 +49,7 @@ import org.apache.tika.io.TikaInputStrea
 import org.apache.tika.language.ProfilingHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -170,12 +171,13 @@ public class TikaCLI {
         } else if (arg.equals("--list-parser-detail") || arg.equals("--list-parser-details")) {
             pipeMode = false;
             displayParsers(true);
-        } 
-          else if(arg.equals("--list-met-models")){
+        } else if(arg.equals("--list-met-models")){
             pipeMode = false;
             displayMetModels();
-        }
-          else if (arg.startsWith("-e")) {
+        } else if(arg.equals("--list-supported-types")){
+            pipeMode = false;
+            displaySupportedTypes();
+        } else if (arg.startsWith("-e")) {
             encoding = arg.substring("-e".length());
         } else if (arg.startsWith("--encoding=")) {
             encoding = arg.substring("--encoding=".length());
@@ -243,6 +245,9 @@ public class TikaCLI {
         out.println("    --list-met-models");
         out.println("         List the available metadata models, and their supported keys");
         out.println();
+        out.println("    --list-supported-types");
+        out.println("         List all known media types and related information");
+        out.println();
         out.println("Description:");
         out.println("    Apache Tika will parse the file(s) specified on the");
         out.println("    command line and output the extracted text content");
@@ -320,6 +325,25 @@ public class TikaCLI {
     }
 
     /**
+     * Prints all the known media types, aliases and matching parser classes.
+     */
+    private void displaySupportedTypes() {
+        MediaTypeRegistry registry = parser.getMediaTypeRegistry();
+        Map<MediaType, Parser> parsers = parser.getParsers();
+
+        for (MediaType type : registry.getTypes()) {
+            System.out.println(type);
+            for (MediaType alias : registry.getAliases(type)) {
+                System.out.println("  alias: " + alias);
+            }
+            Parser parser = parsers.get(type);
+            if (parser != null) {
+                System.out.println("  parser: " + parser.getClass().getName());
+            }
+        }
+    }
+
+    /**
      * Returns a {@link System#out} writer with the given output encoding.
      *
      * @see <a href="https://issues.apache.org/jira/browse/TIKA-277">TIKA-277</a>