You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2017/05/10 03:54:15 UTC

[13/43] metamodel git commit: METAMODEL-1109: Fixed

METAMODEL-1109: Fixed

Fixes #122

Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/7e29fb89
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/7e29fb89
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/7e29fb89

Branch: refs/heads/5.x
Commit: 7e29fb895703508fd793ba9604f3e893f55adf82
Parents: 7e355a1
Author: Jakub Horcicka <ja...@humaninference.com>
Authored: Wed Aug 10 20:38:25 2016 -0700
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Wed Aug 10 20:39:54 2016 -0700

----------------------------------------------------------------------
 .../metamodel/fixedwidth/EbcdicReader.java      |  4 +++
 .../metamodel/fixedwidth/FixedWidthReader.java  | 37 ++++++++++++--------
 .../fixedwidth/FixedWidthReaderTest.java        | 28 +++++++++++++++
 .../test/resources/example_diacritics_utf8.txt  |  4 +++
 4 files changed, 59 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
index a7639fc..9e22dac 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
@@ -26,6 +26,8 @@ import java.io.IOException;
  */
 class EbcdicReader extends FixedWidthReader {
 
+    private final BufferedInputStream _stream;
+    private final String _charsetName;
     private final boolean _skipEbcdicHeader;
     private final boolean _eolPresent;
     private boolean _headerSkipped;
@@ -33,6 +35,8 @@ class EbcdicReader extends FixedWidthReader {
     public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
             boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
         super(stream, charsetName, valueWidths, failOnInconsistentLineWidth);
+        _stream = stream;
+        _charsetName = charsetName;
         _skipEbcdicHeader = skipEbcdicHeader;
         _eolPresent = eolPresent;
     }

http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
index da17ff1..9f65ac7 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
@@ -19,9 +19,13 @@
 package org.apache.metamodel.fixedwidth;
 
 import java.io.BufferedInputStream;
+import java.io.BufferedReader;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
 import java.util.ArrayList;
@@ -35,14 +39,13 @@ class FixedWidthReader implements Closeable {
     private static final int LINE_FEED = '\n';
     private static final int CARRIAGE_RETURN = '\r';
     
-    protected final String _charsetName;
     private final int _fixedValueWidth;
     private final int[] _valueWidths;
     private int _valueIndex = 0;
     private final boolean _failOnInconsistentLineWidth;
     private final boolean _constantWidth;
     private volatile int _rowNumber;
-    protected final BufferedInputStream _stream;
+    protected final Reader _reader;
     protected final int _expectedLineLength;
 
     public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
@@ -52,8 +55,7 @@ class FixedWidthReader implements Closeable {
 
     private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
             boolean failOnInconsistentLineWidth) {
-        _stream = stream;
-        _charsetName = charsetName;
+        _reader = initReader(stream, charsetName);
         _fixedValueWidth = fixedValueWidth;
         _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
         _rowNumber = 0;
@@ -69,8 +71,7 @@ class FixedWidthReader implements Closeable {
 
     FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
             boolean failOnInconsistentLineWidth) {
-        _stream = stream;
-        _charsetName = charsetName;
+        _reader = initReader(stream, charsetName);
         _fixedValueWidth = -1;
         _valueWidths = valueWidths;
         _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
@@ -85,6 +86,15 @@ class FixedWidthReader implements Closeable {
         _expectedLineLength = expectedLineLength;
     }
 
+    private Reader initReader(BufferedInputStream stream, String charsetName) {
+        try {
+            InputStreamReader inputStreamReader = new InputStreamReader(stream, charsetName);
+            return new BufferedReader(inputStreamReader);
+        } catch (UnsupportedEncodingException e) {
+            throw new IllegalArgumentException(String.format("Encoding '%s' was not recognized. ", charsetName));
+        }
+    }
+    
     /**
      * This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
      * are not present, the length of the content depends on the column-widths setting.
@@ -106,7 +116,6 @@ class FixedWidthReader implements Closeable {
      * Empty hook that enables special behavior in sub-classed readers (by overriding this method). 
      */
     protected void beforeReadLine() {
-        return;
     }
 
     private String[] getValues() throws IOException {
@@ -167,8 +176,8 @@ class FixedWidthReader implements Closeable {
         StringBuilder line = new StringBuilder();
         int ch;
 
-        for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
-            line.append((char) ch);
+        for (ch = _reader.read(); !isEndingCharacter(ch); ch = _reader.read()) {
+            line.append((char)ch);
         }
 
         if (ch == CARRIAGE_RETURN) {
@@ -179,10 +188,10 @@ class FixedWidthReader implements Closeable {
     }
     
     private void readLineFeedIfFollows() throws IOException {
-        _stream.mark(1);
-
-        if (_stream.read() != LINE_FEED) {
-            _stream.reset();
+        _reader.mark(1);
+        
+        if (_reader.read() != LINE_FEED) {
+            _reader.reset();
         }
     }
 
@@ -247,6 +256,6 @@ class FixedWidthReader implements Closeable {
 
     @Override
     public void close() throws IOException {
-        _stream.close();
+        _reader.close();
     }
 }

http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
index 8f40c1d..29b4b06 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
@@ -37,6 +37,34 @@ public class FixedWidthReaderTest {
     public final ExpectedException exception = ExpectedException.none();
     
     @Test
+    public void testDiacritics() throws IOException {
+        assertExpectedDiacritics(CHARSET);
+    }
+
+    @Test(expected=AssertionError.class)
+    public void testDiacriticsFails() throws IOException {
+        assertExpectedDiacritics("Windows-1250");
+    }
+
+    private void assertExpectedDiacritics(String charset) throws IOException {
+        final File file = new File("src/test/resources/example_diacritics_utf8.txt");
+        final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+        int[] widths = new int[] { 10, 10 };
+        final String[] expectedValues = {
+                "[name, surname]",
+                "[Štěpán, Knížek]",
+                "[Lukáš, Žáček]",
+                "[Přemysl, Hývl]",
+        };
+        try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, charset, widths, false)) {
+            for (String expectedLine : expectedValues) {
+                final String[] line = fixedWidthReader.readLine();
+                assertEquals(expectedLine, Arrays.asList(line).toString());
+            }
+        }
+    }
+
+    @Test
     public void testBufferedReader1() throws IOException {
         final File file = new File("src/test/resources/example_simple1.txt");
         final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));

http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/resources/example_diacritics_utf8.txt
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/resources/example_diacritics_utf8.txt b/fixedwidth/src/test/resources/example_diacritics_utf8.txt
new file mode 100644
index 0000000..65b6a63
--- /dev/null
+++ b/fixedwidth/src/test/resources/example_diacritics_utf8.txt
@@ -0,0 +1,4 @@
+name      surname   
+Štěpán    Knížek
+Lukáš     Žáček
+Přemysl   Hývl
\ No newline at end of file