You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/09/23 23:04:08 UTC

svn commit: r1704934 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/language/translate/ tika-example/src/main/java/org/apache/tika/example/ tika-example/src/test/java/org/apache/tika/example/

Author: nick
Date: Wed Sep 23 21:04:08 2015
New Revision: 1704934

URL: http://svn.apache.org/viewvc?rev=1704934&view=rev
Log:
TIKA-1657 Update the example of dumping a Tika Config to support different output modes, for Translators and Detectors

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
    tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java?rev=1704934&r1=1704933&r2=1704934&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java Wed Sep 23 21:04:08 2015
@@ -99,6 +99,19 @@ public class DefaultTranslator implement
         }
         throw new TikaException("No translators currently available");
     }
+    
+    /**
+     * Returns all available translators
+     */
+    public List<Translator> getTranslators() {
+        return getDefaultTranslators(loader);
+    }
+    /**
+     * Returns the current translator
+     */
+    public Translator getTranslator() {
+        return getFirstAvailable(loader);
+    }
 
     public boolean isAvailable() {
         return getFirstAvailable(loader) != null;

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1704934&r1=1704933&r2=1704934&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java Wed Sep 23 21:04:08 2015
@@ -17,7 +17,8 @@
 
 package org.apache.tika.example;
 
-import java.io.File;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
@@ -29,6 +30,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
+
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.transform.OutputKeys;
@@ -38,6 +40,7 @@ import javax.xml.transform.dom.DOMSource
 import javax.xml.transform.stream.StreamResult;
 
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.CompositeDetector;
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
@@ -51,8 +54,6 @@ import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 
 /**
  * This class shows how to dump a TikaConfig object to a configuration file.
@@ -70,21 +71,21 @@ public class DumpTikaConfigExample {
      * @param writer writer to which to write
      * @throws Exception
      */
-    public void dump(TikaConfig config, Writer writer, String encoding) throws Exception {
+    public void dump(TikaConfig config, Mode mode, Writer writer, String encoding) throws Exception {
         DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
         DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+        
         // root elements
         Document doc = docBuilder.newDocument();
         Element rootElement = doc.createElement("properties");
 
         doc.appendChild(rootElement);
-        addMimeComment(rootElement, doc);
-        addTranslator(rootElement, doc, config);
-        addDetectors(rootElement, doc, config);
-        addParsers(rootElement, doc, config);
+        addMimeComment(mode, rootElement, doc);
+        addTranslator(mode, rootElement, doc, config);
+        addDetectors(mode, rootElement, doc, config);
+        addParsers(mode, rootElement, doc, config);
 
-
-        //now write
+        // now write
         TransformerFactory transformerFactory = TransformerFactory.newInstance();
         Transformer transformer = transformerFactory.newTransformer();
         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
@@ -96,33 +97,50 @@ public class DumpTikaConfigExample {
         transformer.transform(source, result);
     }
 
-    private void addTranslator(Element rootElement, Document doc, TikaConfig config) {
-        //TikaConfig only reads the first translator from the list,
-        //but it looks like it expects a list
+    private void addTranslator(Mode mode, Element rootElement, Document doc, TikaConfig config) {
+        // TikaConfig only reads the first translator from the list,
+        //  but it looks like it expects a list
         Translator translator = config.getTranslator();
-        if (translator instanceof DefaultTranslator) {
+        if (mode == Mode.MINIMAL && translator instanceof DefaultTranslator) {
             Node mimeComment = doc.createComment(
                     "for example: <translator class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
             rootElement.appendChild(mimeComment);
         } else {
-            Element translatorElement = doc.createElement("translator");
-            translatorElement.setAttribute("class", translator.getClass().getCanonicalName());
-            rootElement.appendChild(translatorElement);
+            if (translator instanceof DefaultTranslator && mode == Mode.STATIC) {
+                translator = ((DefaultTranslator)translator).getTranslator();
+            }
+            if (translator != null) {
+                Element translatorElement = doc.createElement("translator");
+                translatorElement.setAttribute("class", translator.getClass().getCanonicalName());
+                rootElement.appendChild(translatorElement);
+            } else {
+                rootElement.appendChild(doc.createComment("No translators available"));
+            }
         }
     }
 
-    private void addMimeComment(Element rootElement, Document doc) {
+    private void addMimeComment(Mode mode, Element rootElement, Document doc) {
         Node mimeComment = doc.createComment(
                 "for example: <mimeTypeRepository resource=\"/org/apache/tika/mime/tika-mimetypes.xml\"/>");
         rootElement.appendChild(mimeComment);
     }
 
-    private void addDetectors(Element rootElement, Document doc, TikaConfig config) throws Exception {
+    private void addDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
         Detector detector = config.getDetector();
+        
+        if (mode == Mode.MINIMAL && detector instanceof DefaultDetector) {
+            // Don't output anything, all using defaults
+            return;
+        }
+        
         Element detectorsElement = doc.createElement("detectors");
-
-        if (detector instanceof DefaultDetector) {
-            List<Detector> children = ((DefaultDetector) detector).getDetectors();
+        if (mode == Mode.CURRENT && detector instanceof DefaultDetector ||
+            ! (detector instanceof CompositeDetector)) {
+            Element detectorElement = doc.createElement("detector");
+            detectorElement.setAttribute("class", detector.getClass().getCanonicalName());
+            detectorsElement.appendChild(detectorElement);
+        } else {
+            List<Detector> children = ((CompositeDetector)detector).getDetectors();
             for (Detector d : children) {
                 Element detectorElement = doc.createElement("detector");
                 detectorElement.setAttribute("class", d.getClass().getCanonicalName());
@@ -132,7 +150,7 @@ public class DumpTikaConfigExample {
         rootElement.appendChild(detectorsElement);
     }
 
-    private void addParsers(Element rootElement, Document doc, TikaConfig config) throws Exception {
+    private void addParsers(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
         Map<String, Parser> parsers = getConcreteParsers(config.getParser());
 
         Element parsersElement = doc.createElement("parsers");
@@ -187,18 +205,42 @@ public class DumpTikaConfigExample {
      */
     public static void main(String[] args) throws Exception {
         Charset encoding = UTF_8;
+        Mode mode = Mode.CURRENT;
+        String filename = null;
+        
+        for (String arg : args) {
+            if (arg.startsWith("-")) {
+                if (arg.contains("-dump-minimal")) {
+                    mode = Mode.MINIMAL;
+                } else if (arg.contains("-dump-current")) {
+                    mode = Mode.CURRENT;
+                } else if (arg.contains("-dump-static")) {
+                    mode = Mode.STATIC;
+                } else {
+                    System.out.println("Use:");
+                    System.out.println("  DumpTikaConfig [--dump-minimal] [--dump-current] [--dump-static] [filename] [encoding]");
+                    System.out.println("");
+                    System.out.println("--dump-minimal    Produce the minimal config file");
+                    System.out.println("--dump-current    The current (with defaults) config file");
+                    System.out.println("--dump-static     Convert dynamic parts to static");
+                    return;
+                }
+            } else if (filename == null) {
+                filename = arg;
+            } else {
+                encoding = Charset.forName(arg);
+            }
+        }
+        
         Writer writer = null;
-        if (args.length > 0) {
-            writer = new OutputStreamWriter(new FileOutputStream(new File(args[0])), encoding);
+        if (filename != null) {
+            writer = new OutputStreamWriter(new FileOutputStream(filename), encoding);
         } else {
             writer = new StringWriter();
         }
-
-        if (args.length > 1) {
-            encoding = Charset.forName(args[1]);
-        }
+        
         DumpTikaConfigExample ex = new DumpTikaConfigExample();
-        ex.dump(TikaConfig.getDefaultConfig(), writer, encoding.name());
+        ex.dump(TikaConfig.getDefaultConfig(), mode, writer, encoding.name());
 
         writer.flush();
 
@@ -207,4 +249,7 @@ public class DumpTikaConfigExample {
         }
         writer.close();
     }
+    protected enum Mode {
+        MINIMAL, CURRENT, STATIC;
+    }
 }

Modified: tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java?rev=1704934&r1=1704933&r2=1704934&view=diff
==============================================================================
--- tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java (original)
+++ tika/trunk/tika-example/src/test/java/org/apache/tika/example/DumpTikaConfigExampleTest.java Wed Sep 23 21:04:08 2015
@@ -17,10 +17,9 @@ package org.apache.tika.example;
  * limitations under the License.
  */
 
-
 import static java.nio.charset.StandardCharsets.UTF_16LE;
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.File;
@@ -32,6 +31,7 @@ import java.nio.charset.Charset;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.example.DumpTikaConfigExample.Mode;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
@@ -39,7 +39,6 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-
 public class DumpTikaConfigExampleTest {
     private File configFile;
     @Before
@@ -65,22 +64,26 @@ public class DumpTikaConfigExampleTest {
     public void testDump() throws Exception {
         DumpTikaConfigExample ex = new DumpTikaConfigExample();
         for (Charset charset : new Charset[]{UTF_8, UTF_16LE}) {
-            Writer writer = new OutputStreamWriter(new FileOutputStream(configFile), charset);
-            ex.dump(TikaConfig.getDefaultConfig(), writer, charset.name());
-            writer.flush();
-            writer.close();
-
-            TikaConfig c = new TikaConfig(configFile);
-            assertEquals(CompositeParser.class, c.getParser().getClass());
-            assertEquals(CompositeDetector.class, c.getDetector().getClass());
-
-            CompositeParser p = (CompositeParser) c.getParser();
-            assertTrue("enough parsers?", p.getParsers().size() > 130);
-
-            CompositeDetector d = (CompositeDetector) c.getDetector();
-            assertTrue("enough detectors?", d.getDetectors().size() > 3);
-            //just try to load it into autodetect to make sure no errors are thrown
-            Parser auto = new AutoDetectParser(c);
+            for (Mode mode : Mode.values()) {
+                Writer writer = new OutputStreamWriter(new FileOutputStream(configFile), charset);
+                ex.dump(TikaConfig.getDefaultConfig(), mode, writer, charset.name());
+                writer.flush();
+                writer.close();
+    
+                TikaConfig c = new TikaConfig(configFile);
+                assertTrue(c.getParser().toString(), c.getParser() instanceof CompositeParser);
+                assertTrue(c.getDetector().toString(), c.getDetector() instanceof CompositeDetector);
+    
+                CompositeParser p = (CompositeParser) c.getParser();
+                assertTrue("enough parsers?", p.getParsers().size() > 130);
+    
+                CompositeDetector d = (CompositeDetector) c.getDetector();
+                assertTrue("enough detectors?", d.getDetectors().size() > 3);
+                
+                //just try to load it into autodetect to make sure no errors are thrown
+                Parser auto = new AutoDetectParser(c);
+                assertNotNull(auto);
+            }
         }
     }