You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2014/02/26 02:29:32 UTC

svn commit: r1571896 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/txt/CharsetDetector.java test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java

Author: kkrugler
Date: Wed Feb 26 01:29:32 2014
New Revision: 1571896

URL: http://svn.apache.org/r1571896
Log:
TIKA-1248: handle empty/null declaredEncoding with call to CharsetDetector.getReader

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java?rev=1571896&r1=1571895&r2=1571896&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java Wed Feb 26 01:29:32 2014
@@ -349,6 +349,10 @@ public class CharsetDetector {
      * @param encoding - name of character encoding
      */
     private void setCanonicalDeclaredEncoding(String encoding) {
+        if ((encoding == null) || encoding.isEmpty()) {
+            return;
+        }
+        
         Charset cs = Charset.forName(encoding);
         if (cs != null) {
             fDeclaredEncoding = cs.name();

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java?rev=1571896&r1=1571895&r2=1571896&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java Wed Feb 26 01:29:32 2014
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTru
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.Reader;
 
 public class CharsetDetectorTest {
   
@@ -46,4 +47,25 @@ public class CharsetDetectorTest {
       in.close();
     }
   }
+  
+  /* https://issues.apache.org/jira/browse/TIKA-1248
+   * Verify empty or null declaredEncoding doesn't cause an exception
+   * 
+   */
+  
+  @Test
+  public void testEmptyOrNullDeclaredCharset() throws IOException {
+    InputStream in = CharsetDetectorTest.class.getResourceAsStream( "/test-documents/resume.html" );
+      
+    try {
+      CharsetDetector detector = new CharsetDetector();
+      Reader reader = detector.getReader(in, null);
+      assertTrue(reader.ready());
+      
+      reader = detector.getReader(in, "");
+      assertTrue(reader.ready());
+    } finally {
+      in.close();
+    }
+  }
 }