You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by kr...@apache.org on 2015/06/21 18:04:26 UTC

svn commit: r1686739 - in /commons/proper/io/trunk/src: main/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/ test/resources/

Author: krosenvold
Date: Sun Jun 21 16:04:25 2015
New Revision: 1686739

URL: http://svn.apache.org/r1686739
Log:
IO-471 Support for additional encodings in ReversedLinesFileReader

Patch by Leandro Reis, applied with patch adjustments to trunk

Added:
    commons/proper/io/trunk/src/test/resources/test-file-gbk.bin
    commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin
    commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin
    commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin
Modified:
    commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
    commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
    commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java

Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java (original)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java Sun Jun 21 16:04:25 2015
@@ -121,9 +121,11 @@ public class ReversedLinesFileReader imp
             // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
             // http://en.wikipedia.org/wiki/UTF-8
             byteDecrement = 1;
-        } else if (charset == Charset.forName("Shift_JIS")) {
-            // Same as for UTF-8
-            // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
+        } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
+                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
+                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
+                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
+                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
             byteDecrement = 1;
         } else if (charset == Charsets.UTF_16BE || charset == Charsets.UTF_16LE) {
             // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
@@ -356,4 +358,4 @@ public class ReversedLinesFileReader imp
         }
     }
 
-}
\ No newline at end of file
+}

Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java Sun Jun 21 16:04:25 2015
@@ -59,6 +59,18 @@ public class ReversedLinesFileReaderTest
     private static final String TEST_LINE_SHIFT_JIS1 = "Hiragana letters: \u3041\u3042\u3043\u3044\u3045";
     // Kanji letters: 明輸�京
     private static final String TEST_LINE_SHIFT_JIS2 = "Kanji letters: \u660E\u8F38\u5B50\u4EAC";
+    // windows-31j characters
+    private static final String TEST_LINE_WINDOWS_31J_1 = "\u3041\u3042\u3043\u3044\u3045";
+    private static final String TEST_LINE_WINDOWS_31J_2 = "\u660E\u8F38\u5B50\u4EAC";
+    // gbk characters (Simplified Chinese)
+    private static final String TEST_LINE_GBK_1 = "\u660E\u8F38\u5B50\u4EAC";
+    private static final String TEST_LINE_GBK_2 = "\u7B80\u4F53\u4E2D\u6587";
+    // x-windows-949 characters (Korean)
+    private static final String TEST_LINE_X_WINDOWS_949_1 = "\uD55C\uAD6D\uC5B4";
+    private static final String TEST_LINE_X_WINDOWS_949_2 = "\uB300\uD55C\uBBFC\uAD6D";
+    // x-windows-950 characters (Traditional Chinese)
+    private static final String TEST_LINE_X_WINDOWS_950_1 = "\u660E\u8F38\u5B50\u4EAC";
+    private static final String TEST_LINE_X_WINDOWS_950_2 = "\u7E41\u9AD4\u4E2D\u6587";
 
 
     @After
@@ -127,6 +139,38 @@ public class ReversedLinesFileReaderTest
         assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS1, reversedLinesFileReader.readLine());
     }
 
+    @Test
+    public void testWindows31jFile() throws URISyntaxException, IOException {
+        final File testFileWindows31J = new File(this.getClass().getResource("/test-file-windows-31j.bin").toURI());
+        reversedLinesFileReader = new ReversedLinesFileReader(testFileWindows31J, testParamBlockSize, "windows-31j");
+        assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_2, reversedLinesFileReader.readLine());
+        assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_1, reversedLinesFileReader.readLine());
+    }
+
+    @Test
+    public void testGBK() throws URISyntaxException, IOException {
+        final File testFileGBK = new File(this.getClass().getResource("/test-file-gbk.bin").toURI());
+        reversedLinesFileReader = new ReversedLinesFileReader(testFileGBK, testParamBlockSize, "GBK");
+        assertEqualsAndNoLineBreaks(TEST_LINE_GBK_2, reversedLinesFileReader.readLine());
+        assertEqualsAndNoLineBreaks(TEST_LINE_GBK_1, reversedLinesFileReader.readLine());
+    }
+
+    @Test
+    public void testxWindows949File() throws URISyntaxException, IOException {
+        final File testFilexWindows949 = new File(this.getClass().getResource("/test-file-x-windows-949.bin").toURI());
+        reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows949, testParamBlockSize, "x-windows-949");
+        assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_2, reversedLinesFileReader.readLine());
+        assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_1, reversedLinesFileReader.readLine());
+    }
+
+    @Test
+    public void testxWindows950File() throws URISyntaxException, IOException {
+        final File testFilexWindows950 = new File(this.getClass().getResource("/test-file-x-windows-950.bin").toURI());
+        reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows950, testParamBlockSize, "x-windows-950");
+        assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_2, reversedLinesFileReader.readLine());
+        assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_1, reversedLinesFileReader.readLine());
+    }
+
     @Test // this test is run 3x for same block size as we want to test with 10
     public void testFileSizeIsExactMultipleOfBlockSize() throws URISyntaxException, IOException {
         final int blockSize = 10;

Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java Sun Jun 21 16:04:25 2015
@@ -57,6 +57,10 @@ public class ReversedLinesFileReaderTest
                 {"test-file-utf8-win-linebr.bin", "UTF-8", 3},
                 {"test-file-utf8-win-linebr.bin", "UTF-8", 4},
                 {"test-file-utf8.bin", "UTF-8", null},
+                {"test-file-windows-31j.bin", "windows-31j", null},
+                {"test-file-gbk.bin", "gbk", null},
+                {"test-file-x-windows-949.bin", "x-windows-949", null},
+                {"test-file-x-windows-950.bin", "x-windows-950", null},
         });
     }
 

Added: commons/proper/io/trunk/src/test/resources/test-file-gbk.bin
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-gbk.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-gbk.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-gbk.bin Sun Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+Ã÷ݔ×Ó¾©
+¼òÌåÖÐÎÄ

Added: commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin Sun Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+‚Ÿ‚ ‚¡‚¢‚£
+–¾—AŽq‹ž

Added: commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin Sun Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+Çѱ¹¾î
+´ëÇѹα¹

Added: commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin Sun Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+©ú¿é¤l¨Ê
+ÁcÅ餤¤å