You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/05/18 18:02:47 UTC

svn commit: r1124309 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/parser/CompositeParser.java test/java/org/apache/tika/parser/ test/java/org/apache/tika/parser/CompositeParserTest.java

Author: jukka
Date: Wed May 18 16:02:47 2011
New Revision: 1124309

URL: http://svn.apache.org/viewvc?rev=1124309&view=rev
Log:
TIKA-660: Remove logging of duplicate parser definitions

Move this functionality into an explicit findDuplicateParsers() method.

Added:
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1124309&r1=1124308&r2=1124309&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Wed May 18 16:02:47 2011
@@ -25,8 +25,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryFiles;
@@ -81,19 +79,44 @@ public class CompositeParser extends Abs
         Map<MediaType, Parser> map = new HashMap<MediaType, Parser>();
         for (Parser parser : parsers) {
             for (MediaType type : parser.getSupportedTypes(context)) {
+                map.put(registry.normalize(type), parser);
+            }
+        }
+        return map;
+    }
+
+    /**
+     * Utility method that goes through all the component parsers and finds
+     * all media types for which more than one parser declares support. This
+     * is useful in tracking down conflicting parser definitions.
+     *
+     * @since Apache Tika 1.0
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-660">TIKA-660</a>
+     * @param context parsing context
+     * @return media types that are supported by at least two component parsers
+     */
+    public Map<MediaType, List<Parser>> findDuplicateParsers(
+            ParseContext context) {
+        Map<MediaType, Parser> types = new HashMap<MediaType, Parser>();
+        Map<MediaType, List<Parser>> duplicates =
+            new HashMap<MediaType, List<Parser>>();
+        for (Parser parser : parsers) {
+            for (MediaType type : parser.getSupportedTypes(context)) {
                 MediaType canonicalType = registry.normalize(type);
-                if (map.containsKey(canonicalType)) {
-                   if (map.get(canonicalType) != parser) {
-                      Logger.getLogger(getClass().getName()).log(
-                            Level.INFO, "Duplicate parser definition for " + type + 
-                            " (" + canonicalType + "), using " + parser
-                      );
-                   }
+                if (types.containsKey(canonicalType)) {
+                    List<Parser> list = duplicates.get(canonicalType);
+                    if (list == null) {
+                        list = new ArrayList<Parser>();
+                        list.add(types.get(canonicalType));
+                        duplicates.put(canonicalType, list);
+                    }
+                    list.add(parser);
+                } else {
+                    types.put(canonicalType, parser);
                 }
-                map.put(canonicalType, parser);
             }
         }
-        return map;
+        return duplicates;
     }
 
     /**

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java?rev=1124309&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/CompositeParserTest.java Wed May 18 16:02:47 2011
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+public class CompositeParserTest extends TestCase {
+
+    public void testFindDuplicateParsers() {
+        Parser a = new EmptyParser() {
+            public Set<MediaType> getSupportedTypes(ParseContext context) {
+                return Collections.singleton(MediaType.TEXT_PLAIN);
+            }
+        };
+        Parser b = new EmptyParser() {
+            public Set<MediaType> getSupportedTypes(ParseContext context) {
+                return Collections.singleton(MediaType.TEXT_PLAIN);
+            }
+        };
+        Parser c = new EmptyParser() {
+            public Set<MediaType> getSupportedTypes(ParseContext context) {
+                return Collections.singleton(MediaType.OCTET_STREAM);
+            }
+        };
+
+        CompositeParser composite = new CompositeParser(
+                MediaTypeRegistry.getDefaultRegistry(), a, b, c);
+        Map<MediaType, List<Parser>> duplicates =
+            composite.findDuplicateParsers(new ParseContext());
+        assertEquals(1, duplicates.size());
+        List<Parser> parsers = duplicates.get(MediaType.TEXT_PLAIN);
+        assertNotNull(parsers);
+        assertEquals(2, parsers.size());
+        assertEquals(a, parsers.get(0));
+        assertEquals(b, parsers.get(1));
+    }
+
+}