You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2014/02/26 02:29:32 UTC
svn commit: r1571896 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/txt/CharsetDetector.java
test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
Author: kkrugler
Date: Wed Feb 26 01:29:32 2014
New Revision: 1571896
URL: http://svn.apache.org/r1571896
Log:
TIKA-1248: handle empty/null declaredEncoding with call to CharsetDetector.getReader
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java?rev=1571896&r1=1571895&r2=1571896&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetDetector.java Wed Feb 26 01:29:32 2014
@@ -349,6 +349,10 @@ public class CharsetDetector {
* @param encoding - name of character encoding
*/
private void setCanonicalDeclaredEncoding(String encoding) {
+ if ((encoding == null) || encoding.isEmpty()) {
+ return;
+ }
+
Charset cs = Charset.forName(encoding);
if (cs != null) {
fDeclaredEncoding = cs.name();
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java?rev=1571896&r1=1571895&r2=1571896&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java Wed Feb 26 01:29:32 2014
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTru
import java.io.IOException;
import java.io.InputStream;
+import java.io.Reader;
public class CharsetDetectorTest {
@@ -46,4 +47,25 @@ public class CharsetDetectorTest {
in.close();
}
}
+
+ /* https://issues.apache.org/jira/browse/TIKA-1248
+ * Verify empty or null declaredEncoding doesn't cause an exception
+ *
+ */
+
+ @Test
+ public void testEmptyOrNullDeclaredCharset() throws IOException {
+ InputStream in = CharsetDetectorTest.class.getResourceAsStream( "/test-documents/resume.html" );
+
+ try {
+ CharsetDetector detector = new CharsetDetector();
+ Reader reader = detector.getReader(in, null);
+ assertTrue(reader.ready());
+
+ reader = detector.getReader(in, "");
+ assertTrue(reader.ready());
+ } finally {
+ in.close();
+ }
+ }
}