You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/11/17 16:09:02 UTC
svn commit: r881320 -
/lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Author: jukka
Date: Tue Nov 17 15:09:01 2009
New Revision: 881320
URL: http://svn.apache.org/viewvc?rev=881320&view=rev
Log:
TIKA-324: Tika CLI mangles utf-8 content in text (-t) mode (on Mac OS X)
Use UTF-8 as the default encoding on Mac OS X. The Java platform encoding is still set to MacRoman even though most parts of OS X already use UTF-8.
Modified:
lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Modified: lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java?rev=881320&r1=881319&r2=881320&view=diff
==============================================================================
--- lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java (original)
+++ lucene/tika/trunk/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java Tue Nov 17 15:09:01 2009
@@ -228,6 +228,10 @@
throws UnsupportedEncodingException {
if (encoding != null) {
return new OutputStreamWriter(System.out, encoding);
+ } else if (System.getProperty("os.name")
+ .toLowerCase().startsWith("mac os x")) {
+ // TIKA-324: Override the default encoding on Mac OS X
+ return new OutputStreamWriter(System.out, "UTF-8");
} else {
return new OutputStreamWriter(System.out);
}