You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2008/10/28 18:23:34 UTC

svn commit: r708616 - in /jackrabbit/branches/1.5: ./ jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ jackrabbit-text-extractors/src/test/resources/ jac...

Author: jukka
Date: Tue Oct 28 10:23:33 2008
New Revision: 708616

URL: http://svn.apache.org/viewvc?rev=708616&view=rev
Log:
1.5: Merged revisions 707303, -04, -07, and -10 (JCR-1832, JCR-1829, JCR-1833, and JCR-1830)

Added:
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/RTFTextExtractorTest.java
      - copied unchanged from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/RTFTextExtractorTest.java
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/
      - copied from r707303, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/log4j.properties
      - copied unchanged from r707303, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/log4j.properties
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/org/
      - copied from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/org/
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/org/apache/
      - copied from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/org/apache/
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/
      - copied from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/extractor/
      - copied from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/extractor/
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/extractor/test.rtf
      - copied unchanged from r707307, jackrabbit/trunk/jackrabbit-text-extractors/src/test/resources/org/apache/jackrabbit/extractor/test.rtf
Modified:
    jackrabbit/branches/1.5/   (props changed)
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/PlainTextExtractor.java
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/XMLTextExtractor.java
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/PlainTextExtractorTest.java
    jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/XMLTextExtractorTest.java

Propchange: jackrabbit/branches/1.5/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Oct 28 10:23:33 2008
@@ -1,2 +1,2 @@
 /jackrabbit/branches/1.3:631261
-/jackrabbit/trunk:703899-704158,704165,704167,704324,704358,704361,704864,704933,704939,705010,705033,705243,705496,705522,705579,705925,705932,705934,705937-705938,705961,706242,706273,706285-706286,706562,706606,706649,706655,706660,706697,706918
+/jackrabbit/trunk:703899-704158,704165,704167,704324,704358,704361,704864,704933,704939,705010,705033,705243,705496,705522,705579,705925,705932,705934,705937-705938,705961,706242,706273,706285-706286,706562,706606,706649,706655,706660,706697,706918,707303-707304,707307,707310

Modified: jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/PlainTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/PlainTextExtractor.java?rev=708616&r1=708615&r2=708616&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/PlainTextExtractor.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/PlainTextExtractor.java Tue Oct 28 10:23:33 2008
@@ -49,8 +49,8 @@
     /**
      * Wraps the given input stream to an {@link InputStreamReader} using
      * the given encoding, or the platform default encoding if the encoding
-     * is not given. Closes the stream and returns an empty reader if the
-     * given encoding is not supported.
+     * is not given or is unsupported. Closes the stream and returns an empty
+     * reader if the given encoding is not supported.
      *
      * @param stream binary stream
      * @param type ignored
@@ -64,14 +64,12 @@
         try {
             if (encoding != null) {
                 return new InputStreamReader(stream, encoding);
-            } else {
-                return new InputStreamReader(stream);
             }
         } catch (UnsupportedEncodingException e) {
-            logger.warn("Failed to extract plain text content", e);
-            stream.close();
-            return new StringReader("");
+            logger.warn("Unsupported encoding '{}', using default ({}) instead.",
+                    new Object[]{encoding, System.getProperty("file.encoding")});
         }
+        return new InputStreamReader(stream);
     }
 
 }

Modified: jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/XMLTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/XMLTextExtractor.java?rev=708616&r1=708615&r2=708616&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/XMLTextExtractor.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/XMLTextExtractor.java Tue Oct 28 10:23:33 2008
@@ -23,6 +23,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
+import java.nio.charset.Charset;
 
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
@@ -95,7 +96,13 @@
                 }
             });
             if (encoding != null) {
-                source.setEncoding(encoding);
+                try {
+                    Charset.forName(encoding);
+                    source.setEncoding(encoding);
+                } catch (Exception e) {
+                    logger.warn("Unsupported encoding '{}', using default ({}) instead.",
+                            new Object[]{encoding, System.getProperty("file.encoding")});
+                }
             }
             reader.parse(source);
 

Modified: jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/PlainTextExtractorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/PlainTextExtractorTest.java?rev=708616&r1=708615&r2=708616&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/PlainTextExtractorTest.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/PlainTextExtractorTest.java Tue Oct 28 10:23:33 2008
@@ -92,7 +92,7 @@
             Reader reader = extractor.extractText(
                     new ByteArrayInputStream(text.getBytes()),
                     "text/plain", "unsupported");
-            assertEquals("", ExtractorHelper.read(reader));
+            assertEquals(text, ExtractorHelper.read(reader));
         } catch (UnsupportedEncodingException e) {
             fail("PlainTextExtractor does not handle unsupported encodings");
         }

Modified: jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/XMLTextExtractorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/XMLTextExtractorTest.java?rev=708616&r1=708615&r2=708616&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/XMLTextExtractorTest.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/XMLTextExtractorTest.java Tue Oct 28 10:23:33 2008
@@ -110,7 +110,7 @@
             Reader reader = extractor.extractText(
                     new ByteArrayInputStream(xml.getBytes()),
                     "text/xml", "unsupported");
-            assertEquals("", ExtractorHelper.read(reader));
+            assertEquals("attribute value text content", ExtractorHelper.read(reader));
         } catch (UnsupportedEncodingException e) {
             fail("XMLTextExtractor does not handle unsupported encodings");
         } catch (IOException e) {