You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/11/13 04:49:28 UTC

svn commit: r835726 - /lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java

Author: jukka
Date: Fri Nov 13 03:49:27 2009
New Revision: 835726

URL: http://svn.apache.org/viewvc?rev=835726&view=rev
Log:
TIKA-319: HtmlParser - use encoding hint only if charset is supported

Fixed as suggested by Piotr B.

Modified:
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=835726&r1=835725&r2=835726&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Fri Nov 13 03:49:27 2009
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.Charset;
 import java.util.Collections;
 import java.util.Map;
 
@@ -46,7 +47,7 @@
         // Prepare the input source using the encoding hint if available
         InputSource source = new InputSource(stream); 
         String encoding = metadata.get(Metadata.CONTENT_ENCODING); 
-        if (encoding != null) { 
+        if (encoding != null && Charset.isSupported(encoding)) { 
             source.setEncoding(encoding);
         }