You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2010/11/07 00:57:37 UTC

svn commit: r1032187 - in /tika/trunk: tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java

Author: mattmann
Date: Sat Nov  6 23:57:36 2010
New Revision: 1032187

URL: http://svn.apache.org/viewvc?rev=1032187&view=rev
Log:
- fix for TIKA-537 Command line option --list-parsers should list 2nd level parsers below CompositeParsers

Modified:
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1032187&r1=1032186&r2=1032187&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Sat Nov  6 23:57:36 2010
@@ -48,19 +48,22 @@ import org.apache.log4j.WriterAppender;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.ContainerAwareDetector;
 import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.gui.TikaGUI;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.language.ProfilingHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
-import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 /**
@@ -162,7 +165,7 @@ public class TikaCLI {
 
     private boolean pipeMode = true;
 
-    public TikaCLI() throws TransformerConfigurationException, IOException, MimeTypeException {
+    public TikaCLI() throws TransformerConfigurationException, IOException, TikaException, SAXException {
         context = new ParseContext();
         detector = (new TikaConfig()).getMimeRepository();
         initParser();
@@ -310,17 +313,40 @@ public class TikaCLI {
         }
     }
 
+    /*
+     * Displays loaded parsers and their mime types
+     * If a parser is a composite parser, it will list the
+     * sub parsers and their mime-types.
+     */
     private void displayParsers(boolean includeMimeTypes) {
-        // Invert the map
-        Map<MediaType,Parser> supported = parser.getParsers();
-        Map<Parser,Set<MediaType>> parsers = new HashMap<Parser, Set<MediaType>>();
-        for(Entry<MediaType, Parser> e : supported.entrySet()) {
-            if (!parsers.containsKey(e.getValue())) {
-                parsers.put(e.getValue(), new HashSet<MediaType>());
+        displayParser(parser, includeMimeTypes, 0);
+    }
+     
+    private void displayParser(Parser p, boolean includeMimeTypes, int i) {
+        boolean isComposite = (p instanceof CompositeParser);
+        String name = (p instanceof ParserDecorator) ?
+                      ((ParserDecorator) p).getWrappedParser().getClass().getName() :
+                      p.getClass().getName();
+        System.out.println(indent(i) + name + (isComposite ? " (Composite Parser):" : ""));
+        if (includeMimeTypes && !isComposite) {
+            for (MediaType mt : p.getSupportedTypes(context)) {
+                System.out.println(indent(i+2) + mt);
             }
-            parsers.get(e.getValue()).add(e.getKey());
         }
+        
+        if (isComposite) {
+            Parser[] subParsers = sortParsers(invertMediaTypeMap(((CompositeParser) p).getParsers()));
+            for(Parser sp : subParsers) {
+                displayParser(sp, includeMimeTypes, i+2);
+            }
+        }
+    }
 
+    private String indent(int indent) {
+        return "                     ".substring(0, indent);
+    }
+
+    private Parser[] sortParsers(Map<Parser, Set<MediaType>> parsers) {
         // Get a nicely sorted list of the parsers
         Parser[] sortedParsers = parsers.keySet().toArray(new Parser[parsers.size()]);
         Arrays.sort(sortedParsers, new Comparator<Parser>() {
@@ -330,16 +356,18 @@ public class TikaCLI {
                 return name1.compareTo(name2);
             }
         });
+        return sortedParsers;
+    }
 
-        // Display
-        for (Parser p : sortedParsers) {
-            System.out.println(p.getClass().getName());
-            if (includeMimeTypes) {
-                for (MediaType mt : parsers.get(p)) {
-                    System.out.println("  " + mt);
-                }
+    private Map<Parser, Set<MediaType>> invertMediaTypeMap(Map<MediaType, Parser> supported) {
+        Map<Parser,Set<MediaType>> parsers = new HashMap<Parser, Set<MediaType>>();
+        for(Entry<MediaType, Parser> e : supported.entrySet()) {
+            if (!parsers.containsKey(e.getValue())) {
+                parsers.put(e.getValue(), new HashSet<MediaType>());
             }
+            parsers.get(e.getValue()).add(e.getKey());
         }
+        return parsers;
     }
 
     /**

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java?rev=1032187&r1=1032186&r2=1032187&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java Sat Nov  6 23:57:36 2010
@@ -99,5 +99,13 @@ public class ParserDecorator implements 
             throws IOException, SAXException, TikaException {
         parse(stream, handler, metadata, new ParseContext());
     }
+    
+    /**
+     * Gets the parser wrapped by this ParserDecorator
+     * @return
+     */
+    public Parser getWrappedParser() {
+        return this.parser;
+    }
 
 }