You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/03/01 18:49:32 UTC

svn commit: r1663136 - in /tika/trunk: CHANGES.txt tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Author: nick
Date: Sun Mar  1 17:49:31 2015
New Revision: 1663136

URL: http://svn.apache.org/r1663136
Log:
When looking at the file(1) magic dir, check children for magic too, as sometimes they have it, and update the changelog

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1663136&r1=1663135&r2=1663136&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sun Mar  1 17:49:31 2015
@@ -1,5 +1,10 @@
 Release 1.8 - Current Development
 
+  * TikaCLI option --compare-file-magic to report mime types known to
+    the file(1) tool but not known / fully known to Tika
+
+  * MediaTypeRegistry support for returning known child types
+
   * Support for excluding (blacklisting) certain Parsers from being
     used by DefaultParser via the Tika Config file, using the new
     parser-exclude tag (TIKA-1558)

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1663136&r1=1663135&r2=1663136&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Sun Mar  1 17:49:31 2015
@@ -767,18 +767,31 @@ public class TikaCLI {
         MediaTypeRegistry registry = config.getMediaTypeRegistry();
         for (String mime : fileMimes) {
             try {
-                MimeType type = mimeTypes.getRegisteredMimeType(mime);
+                final MimeType type = mimeTypes.getRegisteredMimeType(mime);
                 
                 if (type == null) {
                     // Tika doesn't know about this one
                     tikaLacking.add(mime);
                 } else {
                     // Tika knows about this one!
-                    // Check for magic on this, or parents
-                    // TODO What about magic on children?
-                    boolean hasMagic = false;
-                    while (type != null && !hasMagic) {
-                        if (type.hasMagic()) {
+                    
+                    // Does Tika have magic for it?
+                    boolean hasMagic = type.hasMagic();
+                    
+                    // How about the children?
+                    if (!hasMagic) {
+                        for (MediaType child : registry.getChildTypes(type.getType())) {
+                            MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
+                            if (childType != null && childType.hasMagic()) {
+                                hasMagic = true;
+                            }
+                        }
+                    }
+                    
+                    // How about the parents?
+                    MimeType parentType = type;
+                    while (parentType != null && !hasMagic) {
+                        if (parentType.hasMagic()) {
                             // Has magic, fine
                             hasMagic = true;
                         } else {
@@ -791,9 +804,9 @@ public class TikaCLI {
                                 parent = null;
                             }
                             if (parent != null) {
-                                type = mimeTypes.getRegisteredMimeType(parent.toString());
+                                parentType = mimeTypes.getRegisteredMimeType(parent.toString());
                             } else {
-                                type = null;
+                                parentType = null;
                             }
                         }
                     }