You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/02/14 02:27:46 UTC
svn commit: r1070359 - in /tika/trunk: CHANGES.txt
tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Author: mattmann
Date: Mon Feb 14 01:27:45 2011
New Revision: 1070359
URL: http://svn.apache.org/viewvc?rev=1070359&view=rev
Log:
- fix for TIKA-596 NetCDF and HDF files don't parse correctly from the command line via tika-app
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1070359&r1=1070358&r2=1070359&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Feb 14 01:27:45 2011
@@ -4,6 +4,10 @@ Release 0.8 - 11/07/2010
The most notable changes in Tika 0.8 over previous releases are:
+ * A critical bugfix preventing metadata from printing to the
+ command line when the underlying Parser didn't generate
+ XHTML output was fixed. (TIKA-596)
+
* Language identification is now dynamically configurable,
managed via a config file loaded from the classpath. (TIKA-490)
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1070359&r1=1070358&r2=1070359&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Mon Feb 14 01:27:45 2011
@@ -103,14 +103,24 @@ public class TikaCLI {
if (fork) {
p = new ForkParser(TikaCLI.class.getClassLoader(), p);
}
- p.parse(input, getContentHandler(output), metadata, context);
+ ContentHandler handler = getContentHandler(output);
+ p.parse(input, handler, metadata, context);
+ // fix for TIKA-596: if a parser doesn't generate
+ // XHTML output, the lack of an output document prevents
+ // metadata from being output: this fixes that
+ if (handler instanceof NoDocumentMetHandler){
+ NoDocumentMetHandler metHandler = (NoDocumentMetHandler)handler;
+ if(!metHandler.metOutput()){
+ metHandler.endDocument();
+ }
+ }
}
protected ContentHandler getContentHandler(OutputStream output)
throws Exception {
throw new UnsupportedOperationException();
}
-
+
}
private final OutputType XML = new OutputType() {
@@ -158,16 +168,7 @@ public class TikaCLI {
throws Exception {
final PrintWriter writer =
new PrintWriter(getOutputWriter(output, encoding));
- return new DefaultHandler() {
- public void endDocument() {
- String[] names = metadata.names();
- Arrays.sort(names);
- for (String name : names) {
- writer.println(name + ": " + metadata.get(name));
- }
- writer.flush();
- }
- };
+ return new NoDocumentMetHandler(writer);
}
};
@@ -613,5 +614,33 @@ public class TikaCLI {
}
}
+
+ private class NoDocumentMetHandler extends DefaultHandler{
+
+ private PrintWriter writer;
+
+ private boolean metOutput;
+
+ public NoDocumentMetHandler(PrintWriter writer){
+ this.writer = writer;
+ this.metOutput = false;
+ }
+
+ @Override
+ public void endDocument() {
+ String[] names = metadata.names();
+ Arrays.sort(names);
+ for (String name : names) {
+ writer.println(name + ": " + metadata.get(name));
+ }
+ writer.flush();
+ this.metOutput = true;
+ }
+
+ public boolean metOutput(){
+ return this.metOutput;
+ }
+
+ }
}