You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/03/01 18:49:32 UTC
svn commit: r1663136 - in /tika/trunk: CHANGES.txt
tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Author: nick
Date: Sun Mar 1 17:49:31 2015
New Revision: 1663136
URL: http://svn.apache.org/r1663136
Log:
When looking at the file(1) magic dir, check children for magic too, as sometimes they have it, and update the changelog
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1663136&r1=1663135&r2=1663136&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sun Mar 1 17:49:31 2015
@@ -1,5 +1,10 @@
Release 1.8 - Current Development
+ * TikaCLI option --compare-file-magic to report mime types known to
+ the file(1) tool but not known / fully known to Tika
+
+ * MediaTypeRegistry support for returning known child types
+
* Support for excluding (blacklisting) certain Parsers from being
used by DefaultParser via the Tika Config file, using the new
parser-exclude tag (TIKA-1558)
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1663136&r1=1663135&r2=1663136&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Sun Mar 1 17:49:31 2015
@@ -767,18 +767,31 @@ public class TikaCLI {
MediaTypeRegistry registry = config.getMediaTypeRegistry();
for (String mime : fileMimes) {
try {
- MimeType type = mimeTypes.getRegisteredMimeType(mime);
+ final MimeType type = mimeTypes.getRegisteredMimeType(mime);
if (type == null) {
// Tika doesn't know about this one
tikaLacking.add(mime);
} else {
// Tika knows about this one!
- // Check for magic on this, or parents
- // TODO What about magic on children?
- boolean hasMagic = false;
- while (type != null && !hasMagic) {
- if (type.hasMagic()) {
+
+ // Does Tika have magic for it?
+ boolean hasMagic = type.hasMagic();
+
+ // How about the children?
+ if (!hasMagic) {
+ for (MediaType child : registry.getChildTypes(type.getType())) {
+ MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
+ if (childType != null && childType.hasMagic()) {
+ hasMagic = true;
+ }
+ }
+ }
+
+ // How about the parents?
+ MimeType parentType = type;
+ while (parentType != null && !hasMagic) {
+ if (parentType.hasMagic()) {
// Has magic, fine
hasMagic = true;
} else {
@@ -791,9 +804,9 @@ public class TikaCLI {
parent = null;
}
if (parent != null) {
- type = mimeTypes.getRegisteredMimeType(parent.toString());
+ parentType = mimeTypes.getRegisteredMimeType(parent.toString());
} else {
- type = null;
+ parentType = null;
}
}
}