You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/30 18:12:59 UTC
svn commit: r1003132 -
/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Author: jukka
Date: Thu Sep 30 16:12:59 2010
New Revision: 1003132
URL: http://svn.apache.org/viewvc?rev=1003132&view=rev
Log:
TIKA-383: new option for TIKA CLI to get only the languages of a document
Use the ProfilingHandler class to implement the TikaCLI --language option
Modified:
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Modified: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=1003132&r1=1003131&r2=1003132&view=diff
==============================================================================
--- tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Thu Sep 30 16:12:59 2010
@@ -46,6 +46,7 @@ import org.apache.log4j.SimpleLayout;
import org.apache.log4j.WriterAppender;
import org.apache.tika.gui.TikaGUI;
import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.language.ProfilingHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
@@ -124,20 +125,9 @@ public class TikaCLI {
public ContentHandler getContentHandler() throws Exception{
final PrintWriter writer =
new PrintWriter(getSystemOutWriter(encoding));
- return new DefaultHandler() {
+ return new ProfilingHandler() {
public void endDocument() {
- String language = metadata.get(Metadata.LANGUAGE);
- if (language == null) {
- language = "No language detected";
- }
- String contentLanguage =
- metadata.get(Metadata.CONTENT_LANGUAGE);
- if (contentLanguage == null) {
- contentLanguage = "No language detected";
- }
- writer.println(Metadata.LANGUAGE + ": " + language);
- writer.println(
- Metadata.CONTENT_LANGUAGE + ": " + contentLanguage);
+ writer.println(getLanguage().getLanguage());
writer.flush();
}
};