You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/02/14 02:27:46 UTC

svn commit: r1070359 - in /tika/trunk: CHANGES.txt tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Author: mattmann
Date: Mon Feb 14 01:27:45 2011
New Revision: 1070359

URL: http://svn.apache.org/viewvc?rev=1070359&view=rev
Log:
- fix for TIKA-596 NetCDF and HDF files don't parse correctly from the command line via tika-app

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1070359&r1=1070358&r2=1070359&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Feb 14 01:27:45 2011
@@ -4,6 +4,10 @@ Release 0.8 - 11/07/2010
 
 The most notable changes in Tika 0.8 over previous releases are:
 
+ * A critical bugfix preventing metadata from printing to the 
+   command line when the underlying Parser didn't generate 
+   XHTML output was fixed. (TIKA-596)
+
  * Language identification is now dynamically configurable, 
    managed via a config file loaded from the classpath. (TIKA-490)
 

Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1070359&r1=1070358&r2=1070359&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Mon Feb 14 01:27:45 2011
@@ -103,14 +103,24 @@ public class TikaCLI {
             if (fork) {
                 p = new ForkParser(TikaCLI.class.getClassLoader(), p);
             }
-            p.parse(input, getContentHandler(output), metadata, context);
+            ContentHandler handler = getContentHandler(output);
+            p.parse(input, handler, metadata, context);   
+            // fix for TIKA-596: if a parser doesn't generate
+            // XHTML output, the lack of an output document prevents
+            // metadata from being output: this fixes that
+            if (handler instanceof NoDocumentMetHandler){
+                NoDocumentMetHandler metHandler = (NoDocumentMetHandler)handler;
+                if(!metHandler.metOutput()){
+                    metHandler.endDocument();
+                }
+            }
         }
 
         protected ContentHandler getContentHandler(OutputStream output)
                 throws Exception {
             throw new UnsupportedOperationException();
         }
-
+        
     }
 
     private final OutputType XML = new OutputType() {
@@ -158,16 +168,7 @@ public class TikaCLI {
                 throws Exception {
             final PrintWriter writer =
                 new PrintWriter(getOutputWriter(output, encoding));
-            return new DefaultHandler() {
-                public void endDocument() {
-                    String[] names = metadata.names();
-                    Arrays.sort(names);
-                    for (String name : names) {
-                        writer.println(name + ": " + metadata.get(name));
-                    }
-                    writer.flush();
-                }
-            };
+            return new NoDocumentMetHandler(writer);
         }
     };
 
@@ -613,5 +614,33 @@ public class TikaCLI {
         }
 
     }
+    
+    private class NoDocumentMetHandler extends DefaultHandler{
+        
+        private PrintWriter writer;
+        
+        private boolean metOutput;
+        
+        public NoDocumentMetHandler(PrintWriter writer){
+            this.writer = writer;
+            this.metOutput = false;
+        }
+        
+        @Override
+        public void endDocument() {
+            String[] names = metadata.names();
+            Arrays.sort(names);
+            for (String name : names) {
+                writer.println(name + ": " + metadata.get(name));
+            }
+            writer.flush();
+            this.metOutput = true;
+        }        
+        
+        public boolean metOutput(){
+            return this.metOutput;
+        }
+        
+    }
 
 }