You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/10/04 00:00:55 UTC
svn commit: r1004055 -
/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Author: jukka
Date: Sun Oct 3 22:00:55 2010
New Revision: 1004055
URL: http://svn.apache.org/viewvc?rev=1004055&view=rev
Log:
TIKA-411: Generate list of supported and detected types automatically
Add a --list-supported-types option to the Tika CLI and implement a basic listing of known types, aliases and matching parser classes
Modified:
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1004055&r1=1004054&r2=1004055&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Sun Oct 3 22:00:55 2010
@@ -30,8 +30,8 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
@@ -49,6 +49,7 @@ import org.apache.tika.io.TikaInputStrea
import org.apache.tika.language.ProfilingHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
@@ -170,12 +171,13 @@ public class TikaCLI {
} else if (arg.equals("--list-parser-detail") || arg.equals("--list-parser-details")) {
pipeMode = false;
displayParsers(true);
- }
- else if(arg.equals("--list-met-models")){
+ } else if(arg.equals("--list-met-models")){
pipeMode = false;
displayMetModels();
- }
- else if (arg.startsWith("-e")) {
+ } else if(arg.equals("--list-supported-types")){
+ pipeMode = false;
+ displaySupportedTypes();
+ } else if (arg.startsWith("-e")) {
encoding = arg.substring("-e".length());
} else if (arg.startsWith("--encoding=")) {
encoding = arg.substring("--encoding=".length());
@@ -243,6 +245,9 @@ public class TikaCLI {
out.println(" --list-met-models");
out.println(" List the available metadata models, and their supported keys");
out.println();
+ out.println(" --list-supported-types");
+ out.println(" List all known media types and related information");
+ out.println();
out.println("Description:");
out.println(" Apache Tika will parse the file(s) specified on the");
out.println(" command line and output the extracted text content");
@@ -320,6 +325,25 @@ public class TikaCLI {
}
/**
+ * Prints all the known media types, aliases and matching parser classes.
+ */
+ private void displaySupportedTypes() {
+ MediaTypeRegistry registry = parser.getMediaTypeRegistry();
+ Map<MediaType, Parser> parsers = parser.getParsers();
+
+ for (MediaType type : registry.getTypes()) {
+ System.out.println(type);
+ for (MediaType alias : registry.getAliases(type)) {
+ System.out.println(" alias: " + alias);
+ }
+ Parser parser = parsers.get(type);
+ if (parser != null) {
+ System.out.println(" parser: " + parser.getClass().getName());
+ }
+ }
+ }
+
+ /**
* Returns a {@link System#out} writer with the given output encoding.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-277">TIKA-277</a>