You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2017/05/10 03:54:15 UTC
[13/43] metamodel git commit: METAMODEL-1109: Fixed
METAMODEL-1109: Fixed
Fixes #122
Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/7e29fb89
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/7e29fb89
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/7e29fb89
Branch: refs/heads/5.x
Commit: 7e29fb895703508fd793ba9604f3e893f55adf82
Parents: 7e355a1
Author: Jakub Horcicka <ja...@humaninference.com>
Authored: Wed Aug 10 20:38:25 2016 -0700
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Wed Aug 10 20:39:54 2016 -0700
----------------------------------------------------------------------
.../metamodel/fixedwidth/EbcdicReader.java | 4 +++
.../metamodel/fixedwidth/FixedWidthReader.java | 37 ++++++++++++--------
.../fixedwidth/FixedWidthReaderTest.java | 28 +++++++++++++++
.../test/resources/example_diacritics_utf8.txt | 4 +++
4 files changed, 59 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
index a7639fc..9e22dac 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
@@ -26,6 +26,8 @@ import java.io.IOException;
*/
class EbcdicReader extends FixedWidthReader {
+ private final BufferedInputStream _stream;
+ private final String _charsetName;
private final boolean _skipEbcdicHeader;
private final boolean _eolPresent;
private boolean _headerSkipped;
@@ -33,6 +35,8 @@ class EbcdicReader extends FixedWidthReader {
public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
super(stream, charsetName, valueWidths, failOnInconsistentLineWidth);
+ _stream = stream;
+ _charsetName = charsetName;
_skipEbcdicHeader = skipEbcdicHeader;
_eolPresent = eolPresent;
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
index da17ff1..9f65ac7 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
@@ -19,9 +19,13 @@
package org.apache.metamodel.fixedwidth;
import java.io.BufferedInputStream;
+import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
@@ -35,14 +39,13 @@ class FixedWidthReader implements Closeable {
private static final int LINE_FEED = '\n';
private static final int CARRIAGE_RETURN = '\r';
- protected final String _charsetName;
private final int _fixedValueWidth;
private final int[] _valueWidths;
private int _valueIndex = 0;
private final boolean _failOnInconsistentLineWidth;
private final boolean _constantWidth;
private volatile int _rowNumber;
- protected final BufferedInputStream _stream;
+ protected final Reader _reader;
protected final int _expectedLineLength;
public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
@@ -52,8 +55,7 @@ class FixedWidthReader implements Closeable {
private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
boolean failOnInconsistentLineWidth) {
- _stream = stream;
- _charsetName = charsetName;
+ _reader = initReader(stream, charsetName);
_fixedValueWidth = fixedValueWidth;
_failOnInconsistentLineWidth = failOnInconsistentLineWidth;
_rowNumber = 0;
@@ -69,8 +71,7 @@ class FixedWidthReader implements Closeable {
FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
boolean failOnInconsistentLineWidth) {
- _stream = stream;
- _charsetName = charsetName;
+ _reader = initReader(stream, charsetName);
_fixedValueWidth = -1;
_valueWidths = valueWidths;
_failOnInconsistentLineWidth = failOnInconsistentLineWidth;
@@ -85,6 +86,15 @@ class FixedWidthReader implements Closeable {
_expectedLineLength = expectedLineLength;
}
+ private Reader initReader(BufferedInputStream stream, String charsetName) {
+ try {
+ InputStreamReader inputStreamReader = new InputStreamReader(stream, charsetName);
+ return new BufferedReader(inputStreamReader);
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalArgumentException(String.format("Encoding '%s' was not recognized. ", charsetName));
+ }
+ }
+
/**
* This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
* are not present, the length of the content depends on the column-widths setting.
@@ -106,7 +116,6 @@ class FixedWidthReader implements Closeable {
* Empty hook that enables special behavior in sub-classed readers (by overriding this method).
*/
protected void beforeReadLine() {
- return;
}
private String[] getValues() throws IOException {
@@ -167,8 +176,8 @@ class FixedWidthReader implements Closeable {
StringBuilder line = new StringBuilder();
int ch;
- for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
- line.append((char) ch);
+ for (ch = _reader.read(); !isEndingCharacter(ch); ch = _reader.read()) {
+ line.append((char)ch);
}
if (ch == CARRIAGE_RETURN) {
@@ -179,10 +188,10 @@ class FixedWidthReader implements Closeable {
}
private void readLineFeedIfFollows() throws IOException {
- _stream.mark(1);
-
- if (_stream.read() != LINE_FEED) {
- _stream.reset();
+ _reader.mark(1);
+
+ if (_reader.read() != LINE_FEED) {
+ _reader.reset();
}
}
@@ -247,6 +256,6 @@ class FixedWidthReader implements Closeable {
@Override
public void close() throws IOException {
- _stream.close();
+ _reader.close();
}
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
index 8f40c1d..29b4b06 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
@@ -37,6 +37,34 @@ public class FixedWidthReaderTest {
public final ExpectedException exception = ExpectedException.none();
@Test
+ public void testDiacritics() throws IOException {
+ assertExpectedDiacritics(CHARSET);
+ }
+
+ @Test(expected=AssertionError.class)
+ public void testDiacriticsFails() throws IOException {
+ assertExpectedDiacritics("Windows-1250");
+ }
+
+ private void assertExpectedDiacritics(String charset) throws IOException {
+ final File file = new File("src/test/resources/example_diacritics_utf8.txt");
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ int[] widths = new int[] { 10, 10 };
+ final String[] expectedValues = {
+ "[name, surname]",
+ "[Štěpán, Knížek]",
+ "[Lukáš, Žáček]",
+ "[Přemysl, Hývl]",
+ };
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, charset, widths, false)) {
+ for (String expectedLine : expectedValues) {
+ final String[] line = fixedWidthReader.readLine();
+ assertEquals(expectedLine, Arrays.asList(line).toString());
+ }
+ }
+ }
+
+ @Test
public void testBufferedReader1() throws IOException {
final File file = new File("src/test/resources/example_simple1.txt");
final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
http://git-wip-us.apache.org/repos/asf/metamodel/blob/7e29fb89/fixedwidth/src/test/resources/example_diacritics_utf8.txt
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/resources/example_diacritics_utf8.txt b/fixedwidth/src/test/resources/example_diacritics_utf8.txt
new file mode 100644
index 0000000..65b6a63
--- /dev/null
+++ b/fixedwidth/src/test/resources/example_diacritics_utf8.txt
@@ -0,0 +1,4 @@
+name surname
+Štěpán Knížek
+Lukáš Žáček
+Přemysl Hývl
\ No newline at end of file