You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 16:50:56 UTC

svn commit: r1693710 - in /tika/trunk: tika-core/src/test/java/org/apache/tika/config/ tika-parsers/src/test/java/org/apache/tika/config/ tika-parsers/src/test/resources/org/apache/tika/config/

Author: nick
Date: Sat Aug  1 14:50:56 2015
New Revision: 1693710

URL: http://svn.apache.org/r1693710
Log:
Start on detector config tests for TIKA-1702

Added:
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
    tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-detector-blacklist.xml
Modified:
    tika/trunk/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java?rev=1693710&r1=1693709&r2=1693710&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java Sat Aug  1 14:50:56 2015
@@ -36,6 +36,13 @@ import static org.junit.Assert.assertNot
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+/**
+ * Tests for the Tika Config, which don't require real parsers /
+ *  detectors / etc.
+ * There's also {@link TikaParserConfigTest} and {@link TikaDetectorConfigTest}
+ *  over in the Tika Parsers project, which do further Tika Config
+ *  testing using real parsers and detectors.
+ */
 public class TikaConfigTest {
 
     /**

Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java?rev=1693710&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/AbstractTikaConfigTest.java Sat Aug  1 14:50:56 2015
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import java.net.URL;
+
+import org.apache.tika.parser.ParseContext;
+import org.junit.After;
+
+/**
+ * Parent of Junit test classes for {@link TikaConfig}, which cover 
+ *  things that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of "real" classes of parsers / detectors
+ */
+public abstract class AbstractTikaConfigTest {
+    protected static ParseContext context = new ParseContext();
+    
+    protected static TikaConfig getConfig(String config) throws Exception {
+        URL url = TikaConfig.class.getResource(config);
+        System.setProperty("tika.config", url.toExternalForm());
+        return new TikaConfig();
+    }
+    
+    @After
+    public void resetConfig() {
+        System.clearProperty("tika.config");
+    }
+}

Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1693710&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java Sat Aug  1 14:50:56 2015
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.EmptyDetector;
+import org.apache.tika.parser.microsoft.POIFSContainerDetector;
+import org.apache.tika.parser.pkg.ZipContainerDetector;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link AbstractTikaConfigTest} can't do due to a need for the
+ *  full set of detectors
+ */
+public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    @Ignore // TODO Finish support
+    public void testDetectorExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        CompositeDetector detector = (CompositeDetector)config.getDetector();
+        
+        // Should be wrapping two detectors
+        assertEquals(2, detector.getDetectors().size());
+
+        
+        // First should be DefaultDetector, second Empty, that order
+        assertEquals(DefaultDetector.class, detector.getDetectors().get(0).getClass());
+        assertEquals(EmptyDetector.class,   detector.getDetectors().get(1).getClass());
+        
+        
+        // Get the DefaultDetector from the config
+        DefaultDetector confDetecotor = (DefaultDetector)detector.getDetectors().get(0);
+        
+        // Get a fresh "default" DefaultParser
+        DefaultDetector normDetector = new DefaultDetector(config.getMimeRepository());
+        
+        
+        // The default one will offer the Zip and POIFS detectors
+        boolean hasZip = false;
+        boolean hasPOIFS = false;
+        for (Detector d : normDetector.getDetectors()) {
+            if (d instanceof ZipContainerDetector) {
+                hasZip = true;
+            }
+            if (d instanceof POIFSContainerDetector) {
+                hasPOIFS = true;
+            }
+        }
+        assertTrue(hasZip);
+        assertTrue(hasPOIFS);
+        
+        
+        // The one from the config won't, as we excluded those
+        for (Detector d : confDetecotor.getDetectors()) {
+            if (d instanceof ZipContainerDetector)
+                fail("Shouldn't have the ZipContainerDetector from config");
+            if (d instanceof POIFSContainerDetector)
+                fail("Shouldn't have the POIFSContainerDetector from config");
+        }
+    }
+}

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1693710&r1=1693709&r2=1693710&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java Sat Aug  1 14:50:56 2015
@@ -23,41 +23,29 @@ import static org.junit.Assert.assertNot
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import java.net.URL;
 import java.util.List;
 
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.EmptyParser;
-import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.executable.ExecutableParser;
 import org.apache.tika.parser.xml.XMLParser;
-import org.junit.After;
 import org.junit.Test;
 
 /**
  * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link TikaConfigTest} can't do due to a need for the
+ *  that {@link AbstractTikaConfigTest} can't do due to a need for the
  *  full set of parsers
  */
-public class TikaParserConfigTest {
-    protected static ParseContext context = new ParseContext();
-    protected static TikaConfig getConfig(String config) throws Exception {
-        URL url = TikaConfig.class.getResource(config);
-        System.setProperty("tika.config", url.toExternalForm());
-        return new TikaConfig();
-    }
-    @After
-    public void resetConfig() {
-        System.clearProperty("tika.config");
-    }
-    
+public class TikaParserConfigTest extends AbstractTikaConfigTest {
     @Test
     public void testMimeExcludeInclude() throws Exception {
         TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
         Parser parser = config.getParser();
         
         MediaType PDF = MediaType.application("pdf");
@@ -95,6 +83,8 @@ public class TikaParserConfigTest {
     @Test
     public void testParserExcludeFromDefault() throws Exception {
         TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
         CompositeParser parser = (CompositeParser)config.getParser();
         
         MediaType PE_EXE = MediaType.application("x-msdownload");
@@ -140,6 +130,8 @@ public class TikaParserConfigTest {
     @Test
     public void defaultParserBlacklist() throws Exception {
         TikaConfig config = new TikaConfig();
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
         CompositeParser cp = (CompositeParser) config.getParser();
         List<Parser> parsers = cp.getAllComponentParsers();
 

Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-detector-blacklist.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-detector-blacklist.xml?rev=1693710&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-detector-blacklist.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-detector-blacklist.xml Sat Aug  1 14:50:56 2015
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <!-- Explicitly request default parsers -->
+  <parsers/>
+  <detectors>
+    <!-- All detectors except built-in container ones -->
+    <detector class="org.apache.tika.detect.DefaultDetector">
+      <detector-exclude class="org.apache.tika.parser.pkg.ZipContainerDetector"/>
+      <detector-exclude class="org.apache.tika.parser.microsoft.POIFSContainerDetector"/>
+    </detector>
+    <!-- One other detector, to check ordering -->
+    <detector class="org.apache.tika.detect.EmptyDetector">
+    </detector>
+  </detectors>
+</properties>