You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2008/08/29 17:01:07 UTC

svn commit: r690282 - /jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java

Author: mreutegg
Date: Fri Aug 29 08:01:07 2008
New Revision: 690282

URL: http://svn.apache.org/viewvc?rev=690282&view=rev
Log:
JCR-1727: HTMLTextExtractor modifying UTF-8 encoded String

Modified:
    jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java

Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java?rev=690282&r1=690281&r2=690282&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java Fri Aug 29 08:01:07 2008
@@ -31,6 +31,7 @@
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.StringReader;
+import java.io.InputStreamReader;
 
 /**
  * Text extractor for HyperText Markup Language (HTML).
@@ -64,7 +65,13 @@
             HTMLParser parser = new HTMLParser();
             SAXResult result = new SAXResult(new DefaultHandler());
 
-            SAXSource source = new SAXSource(parser, new InputSource(stream));
+            Reader reader;
+            if (encoding != null) {
+                reader = new InputStreamReader(stream, encoding);
+            } else {
+                reader = new InputStreamReader(stream);
+            }
+            SAXSource source = new SAXSource(parser, new InputSource(reader));
             transformer.transform(source, result);
 
             return new StringReader(parser.getContents());